video_core: Switch to xxHash and improve ShaderSetup behaviour (#1592)

This commit is contained in:
PabloMK7 2026-01-09 18:01:49 +01:00 committed by GitHub
parent 8acdb35798
commit 7de2d7b846
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 188 additions and 520 deletions

3
.gitmodules vendored
View File

@ -100,3 +100,6 @@
[submodule "spirv-headers"]
path = externals/spirv-headers
url = https://github.com/KhronosGroup/SPIRV-Headers
[submodule "externals/xxHash"]
path = externals/xxHash
url = https://github.com/Cyan4973/xxHash.git

View File

@ -489,3 +489,17 @@ if (ENABLE_VULKAN)
add_subdirectory(libadrenotools)
endif()
endif()
set(XXHASH_BUILD_XXHSUM OFF)
add_subdirectory(xxHash/cmake_unofficial EXCLUDE_FROM_ALL)
target_compile_definitions(xxhash PRIVATE XXH_FORCE_MEMORY_ACCESS=2)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64")
target_compile_definitions(xxhash PRIVATE XXH_VECTOR=XXH_SSE2)
message(STATUS "Enabling SSE2 for xxHash")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64|armv8")
target_compile_definitions(xxhash PRIVATE XXH_VECTOR=XXH_NEON)
message(STATUS "Enabling NEON for xxHash")
else()
target_compile_definitions(xxhash PRIVATE XXH_VECTOR=XXH_SCALAR)
message(STATUS "Disabling SIMD for xxHash")
endif()

1
externals/xxHash vendored Submodule

@ -0,0 +1 @@
Subproject commit e626a72bc2321cd320e953a0ccf1584cad60f363

View File

@ -1,4 +1,4 @@
// Copyright 2015 Citra Emulator Project
// Copyright Citra Emulator Project / Azahar Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -63,8 +63,8 @@ void GraphicsTracingWidget::StartRecording() {
return;
auto& pica = system.GPU().PicaCore();
auto shader_binary = pica.vs_setup.program_code;
auto swizzle_data = pica.vs_setup.swizzle_data;
const auto& shader_binary = pica.vs_setup.GetProgramCode();
const auto& swizzle_data = pica.vs_setup.GetSwizzleData();
// Encode floating point numbers to 24-bit values
// TODO: Drop this explicit conversion once we store float24 values bit-correctly internally.
@ -86,7 +86,7 @@ void GraphicsTracingWidget::StartRecording() {
CiTrace::Recorder::InitialState state;
const auto copy = [&](std::vector<u32>& dest, auto& data) {
const auto copy = [&](std::vector<u32>& dest, const auto& data) {
dest.resize(sizeof(data));
std::memcpy(dest.data(), std::addressof(data), sizeof(data));
};

View File

@ -1,4 +1,4 @@
// Copyright 2014 Citra Emulator Project
// Copyright Citra Emulator Project / Azahar Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -508,11 +508,13 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, const void* ve
info.Clear();
auto& pica = system.GPU().PicaCore();
for (auto instr : pica.vs_setup.program_code)
const auto& program_code = pica.vs_setup.GetProgramCode();
const auto& swizzle_data = pica.vs_setup.GetSwizzleData();
for (auto instr : program_code)
info.code.push_back({instr});
int num_attributes = pica.regs.internal.vs.max_input_attribute_index + 1;
for (auto pattern : pica.vs_setup.swizzle_data) {
for (auto pattern : swizzle_data) {
const nihstro::SwizzleInfo swizzle_info = {.pattern = nihstro::SwizzlePattern{pattern}};
info.swizzle_info.push_back(swizzle_info);
}

View File

@ -70,8 +70,6 @@ add_library(citra_common STATIC
bit_field.h
bit_set.h
bounded_threadsafe_queue.h
cityhash.cpp
cityhash.h
color.h
common_funcs.h
common_paths.h
@ -216,3 +214,5 @@ if (SSE42_COMPILE_OPTION)
target_compile_definitions(citra_common PRIVATE CITRA_HAS_SSE42)
target_compile_options(citra_common PRIVATE ${SSE42_COMPILE_OPTION})
endif()
target_link_libraries(citra_common PUBLIC xxHash::xxhash)

View File

@ -1,338 +0,0 @@
// Copyright (c) 2011 Google, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// CityHash, by Geoff Pike and Jyrki Alakuijala
//
// This file provides CityHash64() and related functions.
//
// It's probably possible to create even faster hash functions by
// writing a program that systematically explores some of the space of
// possible hash functions, by using SIMD instructions, or by
// compromising on hash quality.
#include <algorithm>
#include <string.h> // for memcpy and memset
#include "cityhash.h"
#include "common/swap.h"
// #include "config.h"
#ifdef __GNUC__
#define HAVE_BUILTIN_EXPECT 1
#endif
#ifdef COMMON_BIG_ENDIAN
#define WORDS_BIGENDIAN 1
#endif
typedef std::uint8_t uint8;
typedef std::uint32_t uint32;
typedef std::uint64_t uint64;
namespace Common {
static uint64 UNALIGNED_LOAD64(const char* p) {
uint64 result;
memcpy(&result, p, sizeof(result));
return result;
}
static uint32 UNALIGNED_LOAD32(const char* p) {
uint32 result;
memcpy(&result, p, sizeof(result));
return result;
}
#ifdef WORDS_BIGENDIAN
#define uint32_in_expected_order(x) (swap32(x))
#define uint64_in_expected_order(x) (swap64(x))
#else
#define uint32_in_expected_order(x) (x)
#define uint64_in_expected_order(x) (x)
#endif
#if !defined(LIKELY)
#if HAVE_BUILTIN_EXPECT
#define LIKELY(x) (__builtin_expect(!!(x), 1))
#else
#define LIKELY(x) (x)
#endif
#endif
static uint64 Fetch64(const char* p) {
return uint64_in_expected_order(UNALIGNED_LOAD64(p));
}
static uint32 Fetch32(const char* p) {
return uint32_in_expected_order(UNALIGNED_LOAD32(p));
}
// Some primes between 2^63 and 2^64 for various uses.
static const uint64 k0 = 0xc3a5c85c97cb3127ULL;
static const uint64 k1 = 0xb492b66fbe98f273ULL;
static const uint64 k2 = 0x9ae16a3b2f90404fULL;
// Bitwise right rotate. Normally this will compile to a single
// instruction, especially if the shift is a manifest constant.
static uint64 Rotate(uint64 val, int shift) {
// Avoid shifting by 64: doing so yields an undefined result.
return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
}
static uint64 ShiftMix(uint64 val) {
return val ^ (val >> 47);
}
static uint64 HashLen16(uint64 u, uint64 v) {
return Hash128to64(uint128(u, v));
}
static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) {
// Murmur-inspired hashing.
uint64 a = (u ^ v) * mul;
a ^= (a >> 47);
uint64 b = (v ^ a) * mul;
b ^= (b >> 47);
b *= mul;
return b;
}
static uint64 HashLen0to16(const char* s, std::size_t len) {
if (len >= 8) {
uint64 mul = k2 + len * 2;
uint64 a = Fetch64(s) + k2;
uint64 b = Fetch64(s + len - 8);
uint64 c = Rotate(b, 37) * mul + a;
uint64 d = (Rotate(a, 25) + b) * mul;
return HashLen16(c, d, mul);
}
if (len >= 4) {
uint64 mul = k2 + len * 2;
uint64 a = Fetch32(s);
return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul);
}
if (len > 0) {
uint8 a = s[0];
uint8 b = s[len >> 1];
uint8 c = s[len - 1];
uint32 y = static_cast<uint32>(a) + (static_cast<uint32>(b) << 8);
uint32 z = static_cast<uint32>(len) + (static_cast<uint32>(c) << 2);
return ShiftMix(y * k2 ^ z * k0) * k2;
}
return k2;
}
// This probably works well for 16-byte strings as well, but it may be overkill
// in that case.
static uint64 HashLen17to32(const char* s, std::size_t len) {
uint64 mul = k2 + len * 2;
uint64 a = Fetch64(s) * k1;
uint64 b = Fetch64(s + 8);
uint64 c = Fetch64(s + len - 8) * mul;
uint64 d = Fetch64(s + len - 16) * k2;
return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, a + Rotate(b + k2, 18) + c, mul);
}
// Return a 16-byte hash for 48 bytes. Quick and dirty.
// Callers do best to use "random-looking" values for a and b.
static std::pair<uint64, uint64> WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z,
uint64 a, uint64 b) {
a += w;
b = Rotate(b + a + z, 21);
uint64 c = a;
a += x;
a += y;
b += Rotate(a, 44);
return std::make_pair(a + z, b + c);
}
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
static std::pair<uint64, uint64> WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) {
return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16), Fetch64(s + 24), a,
b);
}
// Return an 8-byte hash for 33 to 64 bytes.
static uint64 HashLen33to64(const char* s, std::size_t len) {
uint64 mul = k2 + len * 2;
uint64 a = Fetch64(s) * k2;
uint64 b = Fetch64(s + 8);
uint64 c = Fetch64(s + len - 24);
uint64 d = Fetch64(s + len - 32);
uint64 e = Fetch64(s + 16) * k2;
uint64 f = Fetch64(s + 24) * 9;
uint64 g = Fetch64(s + len - 8);
uint64 h = Fetch64(s + len - 16) * mul;
uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
uint64 v = ((a + g) ^ d) + f + 1;
uint64 w = swap64((u + v) * mul) + h;
uint64 x = Rotate(e + f, 42) + c;
uint64 y = (swap64((v + w) * mul) + g) * mul;
uint64 z = e + f + c;
a = swap64((x + z) * mul + y) + b;
b = ShiftMix((z + a) * mul + d + h) * mul;
return b + x;
}
uint64 CityHash64(const char* s, std::size_t len) {
if (len <= 32) {
if (len <= 16) {
return HashLen0to16(s, len);
} else {
return HashLen17to32(s, len);
}
} else if (len <= 64) {
return HashLen33to64(s, len);
}
// For strings over 64 bytes we hash the end first, and then as we
// loop we keep 56 bytes of state: v, w, x, y, and z.
uint64 x = Fetch64(s + len - 40);
uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
std::pair<uint64, uint64> v = WeakHashLen32WithSeeds(s + len - 64, len, z);
std::pair<uint64, uint64> w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
x = x * k1 + Fetch64(s);
// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
len = (len - 1) & ~static_cast<std::size_t>(63);
do {
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
x ^= w.second;
y += v.first + Fetch64(s + 40);
z = Rotate(z + w.first, 33) * k1;
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
std::swap(z, x);
s += 64;
len -= 64;
} while (len != 0);
return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
HashLen16(v.second, w.second) + x);
}
uint64 CityHash64WithSeed(const char* s, std::size_t len, uint64 seed) {
return CityHash64WithSeeds(s, len, k2, seed);
}
uint64 CityHash64WithSeeds(const char* s, std::size_t len, uint64 seed0, uint64 seed1) {
return HashLen16(CityHash64(s, len) - seed0, seed1);
}
// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings
// of any length representable in signed long. Based on City and Murmur.
static uint128 CityMurmur(const char* s, std::size_t len, uint128 seed) {
uint64 a = Uint128Low64(seed);
uint64 b = Uint128High64(seed);
uint64 c = 0;
uint64 d = 0;
signed long l = static_cast<long>(len) - 16;
if (l <= 0) { // len <= 16
a = ShiftMix(a * k1) * k1;
c = b * k1 + HashLen0to16(s, len);
d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c));
} else { // len > 16
c = HashLen16(Fetch64(s + len - 8) + k1, a);
d = HashLen16(b + len, c + Fetch64(s + len - 16));
a += d;
do {
a ^= ShiftMix(Fetch64(s) * k1) * k1;
a *= k1;
b ^= a;
c ^= ShiftMix(Fetch64(s + 8) * k1) * k1;
c *= k1;
d ^= c;
s += 16;
l -= 16;
} while (l > 0);
}
a = HashLen16(a, c);
b = HashLen16(d, b);
return uint128(a ^ b, HashLen16(b, a));
}
uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed) {
if (len < 128) {
return CityMurmur(s, len, seed);
}
// We expect len >= 128 to be the common case. Keep 56 bytes of state:
// v, w, x, y, and z.
std::pair<uint64, uint64> v, w;
uint64 x = Uint128Low64(seed);
uint64 y = Uint128High64(seed);
uint64 z = len * k1;
v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s);
v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8);
w.first = Rotate(y + z, 35) * k1 + x;
w.second = Rotate(x + Fetch64(s + 88), 53) * k1;
// This is the same inner loop as CityHash64(), manually unrolled.
do {
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
x ^= w.second;
y += v.first + Fetch64(s + 40);
z = Rotate(z + w.first, 33) * k1;
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
std::swap(z, x);
s += 64;
x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
x ^= w.second;
y += v.first + Fetch64(s + 40);
z = Rotate(z + w.first, 33) * k1;
v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
std::swap(z, x);
s += 64;
len -= 128;
} while (LIKELY(len >= 128));
x += Rotate(v.first + z, 49) * k0;
y = y * k0 + Rotate(w.second, 37);
z = z * k0 + Rotate(w.first, 27);
w.first *= 9;
v.first *= k0;
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
for (std::size_t tail_done = 0; tail_done < len;) {
tail_done += 32;
y = Rotate(x + y, 42) * k0 + v.second;
w.first += Fetch64(s + len - tail_done + 16);
x = x * k0 + w.first;
z += w.second + Fetch64(s + len - tail_done);
w.second += v.first;
v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second);
v.first *= k0;
}
// At this point our 56 bytes of state should contain more than
// enough information for a strong 128-bit hash. We use two
// different 56-byte-to-8-byte hashes to get a 16-byte final result.
x = HashLen16(x, v.first);
y = HashLen16(y + z, w.first);
return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second));
}
uint128 CityHash128(const char* s, std::size_t len) {
return len >= 16
? CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s), Fetch64(s + 8) + k0))
: CityHash128WithSeed(s, len, uint128(k0, k1));
}
} // namespace Common

View File

@ -1,111 +0,0 @@
// Copyright (c) 2011 Google, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
// CityHash, by Geoff Pike and Jyrki Alakuijala
//
// http://code.google.com/p/cityhash/
//
// This file provides a few functions for hashing strings. All of them are
// high-quality functions in the sense that they pass standard tests such
// as Austin Appleby's SMHasher. They are also fast.
//
// For 64-bit x86 code, on short strings, we don't know of anything faster than
// CityHash64 that is of comparable quality. We believe our nearest competitor
// is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash
// tables and most other hashing (excluding cryptography).
//
// For 64-bit x86 code, on long strings, the picture is more complicated.
// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc.,
// CityHashCrc128 appears to be faster than all competitors of comparable
// quality. CityHash128 is also good but not quite as fast. We believe our
// nearest competitor is Bob Jenkins' Spooky. We don't have great data for
// other 64-bit CPUs, but for long strings we know that Spooky is slightly
// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example.
// Note that CityHashCrc128 is declared in citycrc.h.
//
// For 32-bit x86 code, we don't know of anything faster than CityHash32 that
// is of comparable quality. We believe our nearest competitor is Murmur3A.
// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.)
//
// Functions in the CityHash family are not suitable for cryptography.
//
// Please see CityHash's README file for more details on our performance
// measurements and so on.
//
// WARNING: This code has been only lightly tested on big-endian platforms!
// It is known to work well on little-endian platforms that have a small penalty
// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
// It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
// bug reports are welcome.
//
// By the way, for some hash functions, given strings a and b, the hash
// of a+b is easily derived from the hashes of a and b. This property
// doesn't hold for any hash functions in this file.
#pragma once
#include <cstddef>
#include <cstdint>
#include <utility>
namespace Common {
using uint128 = std::pair<uint64_t, uint64_t>;
[[nodiscard]] inline uint64_t Uint128Low64(const uint128& x) {
return x.first;
}
[[nodiscard]] inline uint64_t Uint128High64(const uint128& x) {
return x.second;
}
// Hash function for a byte array.
[[nodiscard]] uint64_t CityHash64(const char* buf, std::size_t len);
// Hash function for a byte array. For convenience, a 64-bit seed is also
// hashed into the result.
[[nodiscard]] uint64_t CityHash64WithSeed(const char* buf, std::size_t len, uint64_t seed);
// Hash function for a byte array. For convenience, two seeds are also
// hashed into the result.
[[nodiscard]] uint64_t CityHash64WithSeeds(const char* buf, std::size_t len, uint64_t seed0,
uint64_t seed1);
// Hash function for a byte array.
[[nodiscard]] uint128 CityHash128(const char* s, std::size_t len);
// Hash function for a byte array. For convenience, a 128-bit seed is also
// hashed into the result.
[[nodiscard]] uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed);
// Hash 128 input bits down to 64 bits of output.
// This is intended to be a reasonably good hash function.
[[nodiscard]] inline uint64_t Hash128to64(const uint128& x) {
// Murmur-inspired hashing.
const uint64_t kMul = 0x9ddfea08eb382d69ULL;
uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
a ^= (a >> 47);
uint64_t b = (Uint128High64(x) ^ a) * kMul;
b ^= (b >> 47);
b *= kMul;
return b;
}
} // namespace Common

View File

@ -14,6 +14,7 @@ enum class HackType : int {
DECRYPTION_AUTHORIZED,
ONLINE_LLE_REQUIRED,
REGION_FROM_SECURE,
REQUIRES_SHADER_FIXUP,
};
class UserHackData {};

View File

@ -1,4 +1,4 @@
// Copyright 2015 Citra Emulator Project
// Copyright Citra Emulator Project / Azahar Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -6,7 +6,7 @@
#include <cstddef>
#include <cstring>
#include "common/cityhash.h"
#include <xxhash.h>
#include "common/common_types.h"
namespace Common {
@ -18,7 +18,7 @@ namespace Common {
* @returns 64-bit hash value that was computed over the data block
*/
static inline u64 ComputeHash64(const void* data, std::size_t len) noexcept {
return CityHash64(static_cast<const char*>(data), len);
return XXH3_64bits(data, len);
}
/**

View File

@ -20,6 +20,7 @@
#include "core/memory.h"
#include "video_core/gpu.h"
#include "video_core/gpu_debugger.h"
#include "video_core/pica/pica_core.h"
#include "video_core/pica/regs_lcd.h"
#include "video_core/renderer_base.h"
#include "video_core/right_eye_disabler.h"
@ -685,8 +686,16 @@ Result GSP_GPU::AcquireGpuRight(const Kernel::HLERequestContext& ctx,
Common::Hacks::hack_manager.GetHackAllowMode(Common::Hacks::HackType::RIGHT_EYE_DISABLE,
process->codeset->program_id) !=
Common::Hacks::HackAllowMode::DISALLOW;
bool requires_shader_fixup =
Common::Hacks::hack_manager.GetHackAllowMode(
Common::Hacks::HackType::REQUIRES_SHADER_FIXUP, process->codeset->program_id,
Common::Hacks::HackAllowMode::DISALLOW) != Common::Hacks::HackAllowMode::DISALLOW;
auto& gpu = system.GPU();
gpu.GetRightEyeDisabler().SetEnabled(right_eye_disable_allow);
gpu.PicaCore().vs_setup.requires_fixup = requires_shader_fixup;
gpu.PicaCore().gs_setup.requires_fixup = requires_shader_fixup;
if (active_thread_id == session_data->thread_id) {
return {ErrorDescription::AlreadyDone, ErrorModule::GX, ErrorSummary::Success,

View File

@ -1,4 +1,4 @@
// Copyright 2023 Citra Emulator Project
// Copyright Citra Emulator Project / Azahar Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -63,11 +63,15 @@ static std::unique_ptr<Pica::ShaderSetup> CompileShaderSetup(
const auto shbin = nihstro::InlineAsm::CompileToRawBinary(code);
auto shader = std::make_unique<Pica::ShaderSetup>();
std::transform(shbin.program.begin(), shbin.program.end(), shader->program_code.begin(),
Pica::ProgramCode program_code{};
Pica::SwizzleData swizzle_data{};
std::transform(shbin.program.begin(), shbin.program.end(), program_code.begin(),
[](const auto& x) { return x.hex; });
std::transform(shbin.swizzle_table.begin(), shbin.swizzle_table.end(),
shader->swizzle_data.begin(), [](const auto& x) { return x.hex; });
std::transform(shbin.swizzle_table.begin(), shbin.swizzle_table.end(), swizzle_data.begin(),
[](const auto& x) { return x.hex; });
shader->UpdateProgramCode(program_code);
shader->UpdateSwizzleData(swizzle_data);
return shader;
}
@ -143,12 +147,16 @@ private:
class ShaderJitTest : public ShaderTest {
public:
explicit ShaderJitTest(std::initializer_list<nihstro::InlineAsm> code) : ShaderTest(code) {
shader_jit.Compile(&shader_setup->program_code, &shader_setup->swizzle_data);
const auto& program_code = shader_setup->GetProgramCode();
const auto& swizzle_data = shader_setup->GetSwizzleData();
shader_jit.Compile(&program_code, &swizzle_data);
}
explicit ShaderJitTest(std::unique_ptr<Pica::ShaderSetup> input_shader_setup)
: ShaderTest(std::move(input_shader_setup)) {
shader_jit.Compile(&shader_setup->program_code, &shader_setup->swizzle_data);
const auto& program_code = shader_setup->GetProgramCode();
const auto& swizzle_data = shader_setup->GetSwizzleData();
shader_jit.Compile(&program_code, &swizzle_data);
}
void RunShader(Pica::ShaderUnit& shader_unit, std::span<const Common::Vec4f> inputs) override {
@ -210,12 +218,12 @@ SHADER_TEST_CASE("CALL", "[video_core][shader]") {
// call foo
CALL.flow_control.dest_offset = 2;
CALL.flow_control.num_instructions = 1;
shader_setup->program_code[0] = CALL.hex;
shader_setup->UpdateProgramCode(0, CALL.hex);
// call ex2
CALL.flow_control.dest_offset = 4;
CALL.flow_control.num_instructions = 1;
shader_setup->program_code[2] = CALL.hex;
shader_setup->UpdateProgramCode(2, CALL.hex);
auto shader = TestType(std::move(shader_setup));
@ -608,7 +616,7 @@ SHADER_TEST_CASE("MAD", "[video_core][shader]") {
MAD.mad.src2 = sh_input2;
MAD.mad.src3 = sh_input3;
MAD.mad.dest = sh_output;
shader_setup->program_code[0] = MAD.hex;
shader_setup->UpdateProgramCode(0, MAD.hex);
nihstro::SwizzlePattern swizzle = {};
swizzle.dest_mask = 0b1111;
@ -624,7 +632,7 @@ SHADER_TEST_CASE("MAD", "[video_core][shader]") {
swizzle.SetSelectorSrc3(1, SwizzlePattern::Selector::y);
swizzle.SetSelectorSrc3(2, SwizzlePattern::Selector::z);
swizzle.SetSelectorSrc3(3, SwizzlePattern::Selector::w);
shader_setup->swizzle_data[0] = swizzle.hex;
shader_setup->UpdateSwizzleData(0, swizzle.hex);
auto shader = TestType(std::move(shader_setup));
@ -713,7 +721,7 @@ SHADER_TEST_CASE("Conditional", "[video_core][shader]") {
{
auto shader_setup = CompileShaderSetup(assembly_template);
IFC.flow_control.op = nihstro::Instruction::FlowControlType::Op::JustX;
shader_setup->program_code[0] = IFC.hex;
shader_setup->UpdateProgramCode(0, IFC.hex);
const float result = result_x ? 1.0f : 0.0f;
auto shader_test = TestType(std::move(shader_setup));
@ -726,7 +734,7 @@ SHADER_TEST_CASE("Conditional", "[video_core][shader]") {
{
auto shader_setup = CompileShaderSetup(assembly_template);
IFC.flow_control.op = nihstro::Instruction::FlowControlType::Op::JustY;
shader_setup->program_code[0] = IFC.hex;
shader_setup->UpdateProgramCode(0, IFC.hex);
const float result = result_y ? 1.0f : 0.0f;
auto shader_test = TestType(std::move(shader_setup));
@ -739,7 +747,7 @@ SHADER_TEST_CASE("Conditional", "[video_core][shader]") {
{
auto shader_setup = CompileShaderSetup(assembly_template);
IFC.flow_control.op = nihstro::Instruction::FlowControlType::Op::Or;
shader_setup->program_code[0] = IFC.hex;
shader_setup->UpdateProgramCode(0, IFC.hex);
const float result = (result_x || result_y) ? 1.0f : 0.0f;
auto shader_test = TestType(std::move(shader_setup));
@ -752,7 +760,7 @@ SHADER_TEST_CASE("Conditional", "[video_core][shader]") {
{
auto shader_setup = CompileShaderSetup(assembly_template);
IFC.flow_control.op = nihstro::Instruction::FlowControlType::Op::And;
shader_setup->program_code[0] = IFC.hex;
shader_setup->UpdateProgramCode(0, IFC.hex);
const float result = (result_x && result_y) ? 1.0f : 0.0f;
auto shader_test = TestType(std::move(shader_setup));

View File

@ -184,17 +184,18 @@ void DumpShader(const std::string& filename, const ShaderRegs& config, const Sha
dvlb.dvle_offset = QueueForWriting(reinterpret_cast<const u8*>(&dvle), sizeof(dvle));
// TODO: Reduce the amount of binary code written to relevant portions
const auto& program_code = setup.GetProgramCode();
dvlp.binary_offset = write_offset - dvlp_offset;
dvlp.binary_size_words = static_cast<uint32_t>(setup.program_code.size());
QueueForWriting(reinterpret_cast<const u8*>(setup.program_code.data()),
static_cast<u32>(setup.program_code.size()) * sizeof(u32));
dvlp.binary_size_words = static_cast<uint32_t>(program_code.size());
QueueForWriting(reinterpret_cast<const u8*>(program_code.data()),
static_cast<u32>(program_code.size()) * sizeof(u32));
const auto& swizzle_data = setup.GetSwizzleData();
dvlp.swizzle_info_offset = write_offset - dvlp_offset;
dvlp.swizzle_info_num_entries = static_cast<uint32_t>(setup.swizzle_data.size());
dvlp.swizzle_info_num_entries = static_cast<uint32_t>(swizzle_data.size());
u32 dummy = 0;
for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) {
QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]),
sizeof(setup.swizzle_data[i]));
for (unsigned int i = 0; i < swizzle_data.size(); ++i) {
QueueForWriting(reinterpret_cast<const u8*>(&swizzle_data[i]), sizeof(swizzle_data[i]));
QueueForWriting(reinterpret_cast<const u8*>(&dummy), sizeof(dummy));
}

View File

@ -258,8 +258,7 @@ void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask, bool& stop_requeste
if (offset >= 4096) {
LOG_ERROR(HW_GPU, "Invalid GS program offset {}", offset);
} else {
gs_setup.program_code[offset] = value;
gs_setup.MarkProgramCodeDirty();
gs_setup.UpdateProgramCode(offset, value);
offset++;
}
break;
@ -274,11 +273,10 @@ void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask, bool& stop_requeste
case PICA_REG_INDEX(gs.swizzle_patterns.set_word[6]):
case PICA_REG_INDEX(gs.swizzle_patterns.set_word[7]): {
u32& offset = regs.internal.gs.swizzle_patterns.offset;
if (offset >= gs_setup.swizzle_data.size()) {
if (offset >= gs_setup.GetSwizzleData().size()) {
LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset {}", offset);
} else {
gs_setup.swizzle_data[offset] = value;
gs_setup.MarkSwizzleDataDirty();
gs_setup.UpdateSwizzleData(offset, value);
offset++;
}
break;
@ -340,12 +338,10 @@ void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask, bool& stop_requeste
if (offset >= 512) {
LOG_ERROR(HW_GPU, "Invalid VS program offset {}", offset);
} else {
vs_setup.program_code[offset] = value;
vs_setup.MarkProgramCodeDirty();
vs_setup.UpdateProgramCode(offset, value);
if (!regs.internal.pipeline.gs_unit_exclusive_configuration &&
regs.internal.pipeline.use_gs == PipelineRegs::UseGS::No) {
gs_setup.program_code[offset] = value;
gs_setup.MarkProgramCodeDirty();
gs_setup.UpdateProgramCode(offset, value);
}
offset++;
}
@ -361,15 +357,13 @@ void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask, bool& stop_requeste
case PICA_REG_INDEX(vs.swizzle_patterns.set_word[6]):
case PICA_REG_INDEX(vs.swizzle_patterns.set_word[7]): {
u32& offset = regs.internal.vs.swizzle_patterns.offset;
if (offset >= vs_setup.swizzle_data.size()) {
if (offset >= vs_setup.GetSwizzleData().size()) {
LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset {}", offset);
} else {
vs_setup.swizzle_data[offset] = value;
vs_setup.MarkSwizzleDataDirty();
vs_setup.UpdateSwizzleData(offset, value);
if (!regs.internal.pipeline.gs_unit_exclusive_configuration &&
regs.internal.pipeline.use_gs == PipelineRegs::UseGS::No) {
gs_setup.swizzle_data[offset] = value;
gs_setup.MarkSwizzleDataDirty();
gs_setup.UpdateSwizzleData(offset, value);
}
offset++;
}

View File

@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <utility>
#include "common/assert.h"
#include "common/bit_set.h"
#include "common/hash.h"
@ -51,7 +52,9 @@ std::optional<u32> ShaderSetup::WriteUniformFloatReg(ShaderRegs& config, u32 val
u64 ShaderSetup::GetProgramCodeHash() {
if (program_code_hash_dirty) {
program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code));
const auto& prog_code = GetProgramCode();
program_code_hash =
Common::ComputeHash64(prog_code.data(), biggest_program_size * sizeof(u32));
program_code_hash_dirty = false;
}
return program_code_hash;
@ -59,10 +62,18 @@ u64 ShaderSetup::GetProgramCodeHash() {
u64 ShaderSetup::GetSwizzleDataHash() {
if (swizzle_data_hash_dirty) {
swizzle_data_hash = Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data));
swizzle_data_hash =
Common::ComputeHash64(swizzle_data.data(), biggest_swizzle_size * sizeof(u32));
swizzle_data_hash_dirty = false;
}
return swizzle_data_hash;
}
void ShaderSetup::DoProgramCodeFixup() {
if (!requires_fixup || !program_code_pending_fixup) {
return;
}
program_code_pending_fixup = false;
}
} // namespace Pica

View File

@ -51,6 +51,17 @@ struct ShaderRegs;
* The geometry shaders has a unique configuration so when enabled it has its own setup.
*/
struct ShaderSetup {
private:
void MakeProgramCodeDirty() {
program_code_hash_dirty = true;
program_code_pending_fixup = true;
has_fixup = false;
}
void MakeSwizzleDataDirty() {
swizzle_data_hash_dirty = true;
}
public:
explicit ShaderSetup();
~ShaderSetup();
@ -65,28 +76,80 @@ public:
u64 GetSwizzleDataHash();
void MarkProgramCodeDirty() {
program_code_hash_dirty = true;
void DoProgramCodeFixup();
inline void UpdateProgramCode(size_t offset, u32 value) {
u32& inst = program_code[offset];
u32 old = inst;
if (old == value)
return;
inst = value;
if (!program_code_hash_dirty) {
MakeProgramCodeDirty();
}
if ((offset + 1) > biggest_program_size) {
biggest_program_size = offset + 1;
}
}
void MarkSwizzleDataDirty() {
swizzle_data_hash_dirty = true;
void UpdateProgramCode(const ProgramCode& other) {
program_code = other;
biggest_program_size = program_code.size();
MakeProgramCodeDirty();
}
inline void UpdateSwizzleData(size_t offset, u32 value) {
u32& data = swizzle_data[offset];
u32 old = data;
if (old == value)
return;
data = value;
if (!swizzle_data_hash_dirty) {
MakeSwizzleDataDirty();
}
if ((offset + 1) > biggest_swizzle_size) {
biggest_swizzle_size = offset + 1;
}
}
void UpdateSwizzleData(const SwizzleData& other) {
swizzle_data = other;
biggest_swizzle_size = swizzle_data.size();
MakeSwizzleDataDirty();
}
const ProgramCode& GetProgramCode() const {
return (has_fixup) ? program_code_fixup : program_code;
}
const SwizzleData& GetSwizzleData() const {
return swizzle_data;
}
public:
Uniforms uniforms;
PackedAttribute uniform_queue;
ProgramCode program_code{};
SwizzleData swizzle_data{};
u32 entry_point{};
const void* cached_shader{};
bool uniforms_dirty = true;
bool requires_fixup = false;
bool has_fixup = false;
private:
ProgramCode program_code{};
ProgramCode program_code_fixup{};
SwizzleData swizzle_data{};
bool program_code_hash_dirty{true};
bool swizzle_data_hash_dirty{true};
u64 program_code_hash{0xDEADC0DE};
u64 swizzle_data_hash{0xDEADC0DE};
bool program_code_pending_fixup{true};
u32 biggest_program_size = 0;
u32 biggest_swizzle_size = 0;
u64 program_code_hash{0};
u64 swizzle_data_hash{0};
friend class boost::serialization::access;
template <class Archive>
@ -94,11 +157,17 @@ private:
ar & uniforms;
ar & uniform_queue;
ar & program_code;
ar & program_code_fixup;
ar & swizzle_data;
ar & program_code_hash_dirty;
ar & swizzle_data_hash_dirty;
ar & program_code_pending_fixup;
ar & biggest_program_size;
ar & biggest_swizzle_size;
ar & program_code_hash;
ar & swizzle_data_hash;
ar & requires_fixup;
ar & has_fixup;
}
};

View File

@ -89,8 +89,8 @@ static std::tuple<PicaVSConfig, Pica::ShaderSetup> BuildVSConfigFromRaw(
std::copy_n(raw.GetProgramCode().begin() + Pica::MAX_PROGRAM_CODE_LENGTH,
Pica::MAX_SWIZZLE_DATA_LENGTH, swizzle_data.begin());
Pica::ShaderSetup setup;
setup.program_code = program_code;
setup.swizzle_data = swizzle_data;
setup.UpdateProgramCode(program_code);
setup.UpdateSwizzleData(swizzle_data);
// Enable the geometry-shader only if we are actually doing per-fragment lighting
// and care about proper quaternions. Otherwise just use standard vertex+fragment shaders
@ -354,12 +354,13 @@ bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::RegsInternal&
// Save VS to the disk cache if its a new shader
if (result) {
auto& disk_cache = impl->disk_cache;
ProgramCode program_code{setup.program_code.begin(), setup.program_code.end()};
program_code.insert(program_code.end(), setup.swizzle_data.begin(),
setup.swizzle_data.end());
const u64 unique_identifier = GetUniqueIdentifier(regs, program_code);
const auto& program_code = setup.GetProgramCode();
const auto& swizzle_data = setup.GetSwizzleData();
ProgramCode new_program_code{program_code.begin(), program_code.end()};
new_program_code.insert(new_program_code.end(), swizzle_data.begin(), swizzle_data.end());
const u64 unique_identifier = GetUniqueIdentifier(regs, new_program_code);
const ShaderDiskCacheRaw raw{unique_identifier, ProgramType::VS, regs,
std::move(program_code)};
std::move(new_program_code)};
disk_cache.SaveRaw(raw);
disk_cache.SaveDecompiled(unique_identifier, *result, accurate_mul);
}

View File

@ -178,7 +178,7 @@ std::string GenerateVertexShader(const ShaderSetup& setup, const PicaVSConfig& c
};
auto program_source =
DecompileProgram(setup.program_code, setup.swizzle_data, config.state.main_offset,
DecompileProgram(setup.GetProgramCode(), setup.GetSwizzleData(), config.state.main_offset,
get_input_reg, get_output_reg, config.state.sanitize_mul);
if (program_source.empty()) {

View File

@ -1,4 +1,4 @@
// Copyright 2023 Citra Emulator Project
// Copyright Citra Emulator Project / Azahar Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -42,6 +42,7 @@ void PicaVSConfigState::Init(const Pica::RegsInternal& regs, Pica::ShaderSetup&
use_geometry_shader = use_geometry_shader_;
sanitize_mul = accurate_mul_;
setup.DoProgramCodeFixup();
program_hash = setup.GetProgramCodeHash();
swizzle_hash = setup.GetSwizzleDataHash();
main_offset = regs.vs.main_offset;

View File

@ -105,8 +105,8 @@ static void RunInterpreter(const ShaderSetup& setup, ShaderUnit& state,
};
const auto& uniforms = setup.uniforms;
const auto& swizzle_data = setup.swizzle_data;
const auto& program_code = setup.program_code;
const auto& swizzle_data = setup.GetSwizzleData();
const auto& program_code = setup.GetProgramCode();
// Constants for handling invalid inputs
static f24 dummy_vec4_float24_zeros[4] = {f24::Zero(), f24::Zero(), f24::Zero(), f24::Zero()};
@ -736,6 +736,7 @@ static void RunInterpreter(const ShaderSetup& setup, ShaderUnit& state,
void InterpreterEngine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
setup.DoProgramCodeFixup();
setup.entry_point = entry_point;
}

View File

@ -1,4 +1,4 @@
// Copyright 2016 Citra Emulator Project
// Copyright Citra Emulator Project / Azahar Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
@ -26,6 +26,7 @@ void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) {
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
setup.entry_point = entry_point;
setup.DoProgramCodeFixup();
const u64 code_hash = setup.GetProgramCodeHash();
const u64 swizzle_hash = setup.GetSwizzleDataHash();
@ -35,7 +36,7 @@ void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) {
setup.cached_shader = iter->second.get();
} else {
auto shader = std::make_unique<JitShader>();
shader->Compile(&setup.program_code, &setup.swizzle_data);
shader->Compile(&setup.GetProgramCode(), &setup.GetSwizzleData());
setup.cached_shader = shader.get();
cache.emplace_hint(iter, cache_key, std::move(shader));
}