From 7de2d7b846cf5f8ccc60820515864d69724dbc74 Mon Sep 17 00:00:00 2001 From: PabloMK7 Date: Fri, 9 Jan 2026 18:01:49 +0100 Subject: [PATCH] video_core: Switch to xxHash and improve ShaderSetup behaviour (#1592) --- .gitmodules | 3 + externals/CMakeLists.txt | 14 + externals/xxHash | 1 + .../debugger/graphics/graphics_tracing.cpp | 8 +- .../graphics/graphics_vertex_shader.cpp | 8 +- src/common/CMakeLists.txt | 4 +- src/common/cityhash.cpp | 338 ------------------ src/common/cityhash.h | 111 ------ src/common/hacks/hack_list.h | 1 + src/common/hash.h | 6 +- src/core/hle/service/gsp/gsp_gpu.cpp | 9 + src/tests/video_core/shader.cpp | 38 +- src/video_core/debug_utils/debug_utils.cpp | 15 +- src/video_core/pica/pica_core.cpp | 22 +- src/video_core/pica/shader_setup.cpp | 15 +- src/video_core/pica/shader_setup.h | 85 ++++- .../renderer_opengl/gl_shader_manager.cpp | 15 +- .../shader/generator/glsl_shader_gen.cpp | 2 +- .../shader/generator/shader_gen.cpp | 3 +- src/video_core/shader/shader_interpreter.cpp | 5 +- src/video_core/shader/shader_jit.cpp | 5 +- 21 files changed, 188 insertions(+), 520 deletions(-) create mode 160000 externals/xxHash delete mode 100644 src/common/cityhash.cpp delete mode 100644 src/common/cityhash.h diff --git a/.gitmodules b/.gitmodules index ed5d9f0f9..cb600a64c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -100,3 +100,6 @@ [submodule "spirv-headers"] path = externals/spirv-headers url = https://github.com/KhronosGroup/SPIRV-Headers +[submodule "externals/xxHash"] + path = externals/xxHash + url = https://github.com/Cyan4973/xxHash.git diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 506b1105c..01e76a095 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -489,3 +489,17 @@ if (ENABLE_VULKAN) add_subdirectory(libadrenotools) endif() endif() + +set(XXHASH_BUILD_XXHSUM OFF) +add_subdirectory(xxHash/cmake_unofficial EXCLUDE_FROM_ALL) +target_compile_definitions(xxhash PRIVATE XXH_FORCE_MEMORY_ACCESS=2) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64") + target_compile_definitions(xxhash PRIVATE XXH_VECTOR=XXH_SSE2) + message(STATUS "Enabling SSE2 for xxHash") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64|armv8") + target_compile_definitions(xxhash PRIVATE XXH_VECTOR=XXH_NEON) + message(STATUS "Enabling NEON for xxHash") +else() + target_compile_definitions(xxhash PRIVATE XXH_VECTOR=XXH_SCALAR) + message(STATUS "Disabling SIMD for xxHash") +endif() \ No newline at end of file diff --git a/externals/xxHash b/externals/xxHash new file mode 160000 index 000000000..e626a72bc --- /dev/null +++ b/externals/xxHash @@ -0,0 +1 @@ +Subproject commit e626a72bc2321cd320e953a0ccf1584cad60f363 diff --git a/src/citra_qt/debugger/graphics/graphics_tracing.cpp b/src/citra_qt/debugger/graphics/graphics_tracing.cpp index e78acb096..9cf54940e 100644 --- a/src/citra_qt/debugger/graphics/graphics_tracing.cpp +++ b/src/citra_qt/debugger/graphics/graphics_tracing.cpp @@ -1,4 +1,4 @@ -// Copyright 2015 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -63,8 +63,8 @@ void GraphicsTracingWidget::StartRecording() { return; auto& pica = system.GPU().PicaCore(); - auto shader_binary = pica.vs_setup.program_code; - auto swizzle_data = pica.vs_setup.swizzle_data; + const auto& shader_binary = pica.vs_setup.GetProgramCode(); + const auto& swizzle_data = pica.vs_setup.GetSwizzleData(); // Encode floating point numbers to 24-bit values // TODO: Drop this explicit conversion once we store float24 values bit-correctly internally. @@ -86,7 +86,7 @@ void GraphicsTracingWidget::StartRecording() { CiTrace::Recorder::InitialState state; - const auto copy = [&](std::vector& dest, auto& data) { + const auto copy = [&](std::vector& dest, const auto& data) { dest.resize(sizeof(data)); std::memcpy(dest.data(), std::addressof(data), sizeof(data)); }; diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp index 46f8ec5cf..e25cf7733 100644 --- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp @@ -1,4 +1,4 @@ -// Copyright 2014 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -508,11 +508,13 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, const void* ve info.Clear(); auto& pica = system.GPU().PicaCore(); - for (auto instr : pica.vs_setup.program_code) + const auto& program_code = pica.vs_setup.GetProgramCode(); + const auto& swizzle_data = pica.vs_setup.GetSwizzleData(); + for (auto instr : program_code) info.code.push_back({instr}); int num_attributes = pica.regs.internal.vs.max_input_attribute_index + 1; - for (auto pattern : pica.vs_setup.swizzle_data) { + for (auto pattern : swizzle_data) { const nihstro::SwizzleInfo swizzle_info = {.pattern = nihstro::SwizzlePattern{pattern}}; info.swizzle_info.push_back(swizzle_info); } diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 44f0a67dc..afb67041d 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -70,8 +70,6 @@ add_library(citra_common STATIC bit_field.h bit_set.h bounded_threadsafe_queue.h - cityhash.cpp - cityhash.h color.h common_funcs.h common_paths.h @@ -216,3 +214,5 @@ if (SSE42_COMPILE_OPTION) target_compile_definitions(citra_common PRIVATE CITRA_HAS_SSE42) target_compile_options(citra_common PRIVATE ${SSE42_COMPILE_OPTION}) endif() + +target_link_libraries(citra_common PUBLIC xxHash::xxhash) \ No newline at end of file diff --git a/src/common/cityhash.cpp b/src/common/cityhash.cpp deleted file mode 100644 index 8a75c11c4..000000000 --- a/src/common/cityhash.cpp +++ /dev/null @@ -1,338 +0,0 @@ -// Copyright (c) 2011 Google, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -// -// CityHash, by Geoff Pike and Jyrki Alakuijala -// -// This file provides CityHash64() and related functions. -// -// It's probably possible to create even faster hash functions by -// writing a program that systematically explores some of the space of -// possible hash functions, by using SIMD instructions, or by -// compromising on hash quality. - -#include -#include // for memcpy and memset -#include "cityhash.h" -#include "common/swap.h" - -// #include "config.h" -#ifdef __GNUC__ -#define HAVE_BUILTIN_EXPECT 1 -#endif -#ifdef COMMON_BIG_ENDIAN -#define WORDS_BIGENDIAN 1 -#endif - -typedef std::uint8_t uint8; -typedef std::uint32_t uint32; -typedef std::uint64_t uint64; - -namespace Common { - -static uint64 UNALIGNED_LOAD64(const char* p) { - uint64 result; - memcpy(&result, p, sizeof(result)); - return result; -} - -static uint32 UNALIGNED_LOAD32(const char* p) { - uint32 result; - memcpy(&result, p, sizeof(result)); - return result; -} - -#ifdef WORDS_BIGENDIAN -#define uint32_in_expected_order(x) (swap32(x)) -#define uint64_in_expected_order(x) (swap64(x)) -#else -#define uint32_in_expected_order(x) (x) -#define uint64_in_expected_order(x) (x) -#endif - -#if !defined(LIKELY) -#if HAVE_BUILTIN_EXPECT -#define LIKELY(x) (__builtin_expect(!!(x), 1)) -#else -#define LIKELY(x) (x) -#endif -#endif - -static uint64 Fetch64(const char* p) { - return uint64_in_expected_order(UNALIGNED_LOAD64(p)); -} - -static uint32 Fetch32(const char* p) { - return uint32_in_expected_order(UNALIGNED_LOAD32(p)); -} - -// Some primes between 2^63 and 2^64 for various uses. -static const uint64 k0 = 0xc3a5c85c97cb3127ULL; -static const uint64 k1 = 0xb492b66fbe98f273ULL; -static const uint64 k2 = 0x9ae16a3b2f90404fULL; - -// Bitwise right rotate. Normally this will compile to a single -// instruction, especially if the shift is a manifest constant. -static uint64 Rotate(uint64 val, int shift) { - // Avoid shifting by 64: doing so yields an undefined result. - return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); -} - -static uint64 ShiftMix(uint64 val) { - return val ^ (val >> 47); -} - -static uint64 HashLen16(uint64 u, uint64 v) { - return Hash128to64(uint128(u, v)); -} - -static uint64 HashLen16(uint64 u, uint64 v, uint64 mul) { - // Murmur-inspired hashing. - uint64 a = (u ^ v) * mul; - a ^= (a >> 47); - uint64 b = (v ^ a) * mul; - b ^= (b >> 47); - b *= mul; - return b; -} - -static uint64 HashLen0to16(const char* s, std::size_t len) { - if (len >= 8) { - uint64 mul = k2 + len * 2; - uint64 a = Fetch64(s) + k2; - uint64 b = Fetch64(s + len - 8); - uint64 c = Rotate(b, 37) * mul + a; - uint64 d = (Rotate(a, 25) + b) * mul; - return HashLen16(c, d, mul); - } - if (len >= 4) { - uint64 mul = k2 + len * 2; - uint64 a = Fetch32(s); - return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul); - } - if (len > 0) { - uint8 a = s[0]; - uint8 b = s[len >> 1]; - uint8 c = s[len - 1]; - uint32 y = static_cast(a) + (static_cast(b) << 8); - uint32 z = static_cast(len) + (static_cast(c) << 2); - return ShiftMix(y * k2 ^ z * k0) * k2; - } - return k2; -} - -// This probably works well for 16-byte strings as well, but it may be overkill -// in that case. -static uint64 HashLen17to32(const char* s, std::size_t len) { - uint64 mul = k2 + len * 2; - uint64 a = Fetch64(s) * k1; - uint64 b = Fetch64(s + 8); - uint64 c = Fetch64(s + len - 8) * mul; - uint64 d = Fetch64(s + len - 16) * k2; - return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, a + Rotate(b + k2, 18) + c, mul); -} - -// Return a 16-byte hash for 48 bytes. Quick and dirty. -// Callers do best to use "random-looking" values for a and b. -static std::pair WeakHashLen32WithSeeds(uint64 w, uint64 x, uint64 y, uint64 z, - uint64 a, uint64 b) { - a += w; - b = Rotate(b + a + z, 21); - uint64 c = a; - a += x; - a += y; - b += Rotate(a, 44); - return std::make_pair(a + z, b + c); -} - -// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. -static std::pair WeakHashLen32WithSeeds(const char* s, uint64 a, uint64 b) { - return WeakHashLen32WithSeeds(Fetch64(s), Fetch64(s + 8), Fetch64(s + 16), Fetch64(s + 24), a, - b); -} - -// Return an 8-byte hash for 33 to 64 bytes. -static uint64 HashLen33to64(const char* s, std::size_t len) { - uint64 mul = k2 + len * 2; - uint64 a = Fetch64(s) * k2; - uint64 b = Fetch64(s + 8); - uint64 c = Fetch64(s + len - 24); - uint64 d = Fetch64(s + len - 32); - uint64 e = Fetch64(s + 16) * k2; - uint64 f = Fetch64(s + 24) * 9; - uint64 g = Fetch64(s + len - 8); - uint64 h = Fetch64(s + len - 16) * mul; - uint64 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9; - uint64 v = ((a + g) ^ d) + f + 1; - uint64 w = swap64((u + v) * mul) + h; - uint64 x = Rotate(e + f, 42) + c; - uint64 y = (swap64((v + w) * mul) + g) * mul; - uint64 z = e + f + c; - a = swap64((x + z) * mul + y) + b; - b = ShiftMix((z + a) * mul + d + h) * mul; - return b + x; -} - -uint64 CityHash64(const char* s, std::size_t len) { - if (len <= 32) { - if (len <= 16) { - return HashLen0to16(s, len); - } else { - return HashLen17to32(s, len); - } - } else if (len <= 64) { - return HashLen33to64(s, len); - } - - // For strings over 64 bytes we hash the end first, and then as we - // loop we keep 56 bytes of state: v, w, x, y, and z. - uint64 x = Fetch64(s + len - 40); - uint64 y = Fetch64(s + len - 16) + Fetch64(s + len - 56); - uint64 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24)); - std::pair v = WeakHashLen32WithSeeds(s + len - 64, len, z); - std::pair w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x); - x = x * k1 + Fetch64(s); - - // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. - len = (len - 1) & ~static_cast(63); - do { - x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; - y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; - x ^= w.second; - y += v.first + Fetch64(s + 40); - z = Rotate(z + w.first, 33) * k1; - v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); - w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); - std::swap(z, x); - s += 64; - len -= 64; - } while (len != 0); - return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, - HashLen16(v.second, w.second) + x); -} - -uint64 CityHash64WithSeed(const char* s, std::size_t len, uint64 seed) { - return CityHash64WithSeeds(s, len, k2, seed); -} - -uint64 CityHash64WithSeeds(const char* s, std::size_t len, uint64 seed0, uint64 seed1) { - return HashLen16(CityHash64(s, len) - seed0, seed1); -} - -// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings -// of any length representable in signed long. Based on City and Murmur. -static uint128 CityMurmur(const char* s, std::size_t len, uint128 seed) { - uint64 a = Uint128Low64(seed); - uint64 b = Uint128High64(seed); - uint64 c = 0; - uint64 d = 0; - signed long l = static_cast(len) - 16; - if (l <= 0) { // len <= 16 - a = ShiftMix(a * k1) * k1; - c = b * k1 + HashLen0to16(s, len); - d = ShiftMix(a + (len >= 8 ? Fetch64(s) : c)); - } else { // len > 16 - c = HashLen16(Fetch64(s + len - 8) + k1, a); - d = HashLen16(b + len, c + Fetch64(s + len - 16)); - a += d; - do { - a ^= ShiftMix(Fetch64(s) * k1) * k1; - a *= k1; - b ^= a; - c ^= ShiftMix(Fetch64(s + 8) * k1) * k1; - c *= k1; - d ^= c; - s += 16; - l -= 16; - } while (l > 0); - } - a = HashLen16(a, c); - b = HashLen16(d, b); - return uint128(a ^ b, HashLen16(b, a)); -} - -uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed) { - if (len < 128) { - return CityMurmur(s, len, seed); - } - - // We expect len >= 128 to be the common case. Keep 56 bytes of state: - // v, w, x, y, and z. - std::pair v, w; - uint64 x = Uint128Low64(seed); - uint64 y = Uint128High64(seed); - uint64 z = len * k1; - v.first = Rotate(y ^ k1, 49) * k1 + Fetch64(s); - v.second = Rotate(v.first, 42) * k1 + Fetch64(s + 8); - w.first = Rotate(y + z, 35) * k1 + x; - w.second = Rotate(x + Fetch64(s + 88), 53) * k1; - - // This is the same inner loop as CityHash64(), manually unrolled. - do { - x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; - y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; - x ^= w.second; - y += v.first + Fetch64(s + 40); - z = Rotate(z + w.first, 33) * k1; - v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); - w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); - std::swap(z, x); - s += 64; - x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1; - y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1; - x ^= w.second; - y += v.first + Fetch64(s + 40); - z = Rotate(z + w.first, 33) * k1; - v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); - w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16)); - std::swap(z, x); - s += 64; - len -= 128; - } while (LIKELY(len >= 128)); - x += Rotate(v.first + z, 49) * k0; - y = y * k0 + Rotate(w.second, 37); - z = z * k0 + Rotate(w.first, 27); - w.first *= 9; - v.first *= k0; - // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. - for (std::size_t tail_done = 0; tail_done < len;) { - tail_done += 32; - y = Rotate(x + y, 42) * k0 + v.second; - w.first += Fetch64(s + len - tail_done + 16); - x = x * k0 + w.first; - z += w.second + Fetch64(s + len - tail_done); - w.second += v.first; - v = WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second); - v.first *= k0; - } - // At this point our 56 bytes of state should contain more than - // enough information for a strong 128-bit hash. We use two - // different 56-byte-to-8-byte hashes to get a 16-byte final result. - x = HashLen16(x, v.first); - y = HashLen16(y + z, w.first); - return uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second)); -} - -uint128 CityHash128(const char* s, std::size_t len) { - return len >= 16 - ? CityHash128WithSeed(s + 16, len - 16, uint128(Fetch64(s), Fetch64(s + 8) + k0)) - : CityHash128WithSeed(s, len, uint128(k0, k1)); -} - -} // namespace Common diff --git a/src/common/cityhash.h b/src/common/cityhash.h deleted file mode 100644 index a00804e01..000000000 --- a/src/common/cityhash.h +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright (c) 2011 Google, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -// -// CityHash, by Geoff Pike and Jyrki Alakuijala -// -// http://code.google.com/p/cityhash/ -// -// This file provides a few functions for hashing strings. All of them are -// high-quality functions in the sense that they pass standard tests such -// as Austin Appleby's SMHasher. They are also fast. -// -// For 64-bit x86 code, on short strings, we don't know of anything faster than -// CityHash64 that is of comparable quality. We believe our nearest competitor -// is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash -// tables and most other hashing (excluding cryptography). -// -// For 64-bit x86 code, on long strings, the picture is more complicated. -// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc., -// CityHashCrc128 appears to be faster than all competitors of comparable -// quality. CityHash128 is also good but not quite as fast. We believe our -// nearest competitor is Bob Jenkins' Spooky. We don't have great data for -// other 64-bit CPUs, but for long strings we know that Spooky is slightly -// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example. -// Note that CityHashCrc128 is declared in citycrc.h. -// -// For 32-bit x86 code, we don't know of anything faster than CityHash32 that -// is of comparable quality. We believe our nearest competitor is Murmur3A. -// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.) -// -// Functions in the CityHash family are not suitable for cryptography. -// -// Please see CityHash's README file for more details on our performance -// measurements and so on. -// -// WARNING: This code has been only lightly tested on big-endian platforms! -// It is known to work well on little-endian platforms that have a small penalty -// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. -// It should work on all 32-bit and 64-bit platforms that allow unaligned reads; -// bug reports are welcome. -// -// By the way, for some hash functions, given strings a and b, the hash -// of a+b is easily derived from the hashes of a and b. This property -// doesn't hold for any hash functions in this file. - -#pragma once - -#include -#include -#include - -namespace Common { - -using uint128 = std::pair; - -[[nodiscard]] inline uint64_t Uint128Low64(const uint128& x) { - return x.first; -} -[[nodiscard]] inline uint64_t Uint128High64(const uint128& x) { - return x.second; -} - -// Hash function for a byte array. -[[nodiscard]] uint64_t CityHash64(const char* buf, std::size_t len); - -// Hash function for a byte array. For convenience, a 64-bit seed is also -// hashed into the result. -[[nodiscard]] uint64_t CityHash64WithSeed(const char* buf, std::size_t len, uint64_t seed); - -// Hash function for a byte array. For convenience, two seeds are also -// hashed into the result. -[[nodiscard]] uint64_t CityHash64WithSeeds(const char* buf, std::size_t len, uint64_t seed0, - uint64_t seed1); - -// Hash function for a byte array. -[[nodiscard]] uint128 CityHash128(const char* s, std::size_t len); - -// Hash function for a byte array. For convenience, a 128-bit seed is also -// hashed into the result. -[[nodiscard]] uint128 CityHash128WithSeed(const char* s, std::size_t len, uint128 seed); - -// Hash 128 input bits down to 64 bits of output. -// This is intended to be a reasonably good hash function. -[[nodiscard]] inline uint64_t Hash128to64(const uint128& x) { - // Murmur-inspired hashing. - const uint64_t kMul = 0x9ddfea08eb382d69ULL; - uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; - a ^= (a >> 47); - uint64_t b = (Uint128High64(x) ^ a) * kMul; - b ^= (b >> 47); - b *= kMul; - return b; -} - -} // namespace Common diff --git a/src/common/hacks/hack_list.h b/src/common/hacks/hack_list.h index 43d7ea409..0617fa51c 100644 --- a/src/common/hacks/hack_list.h +++ b/src/common/hacks/hack_list.h @@ -14,6 +14,7 @@ enum class HackType : int { DECRYPTION_AUTHORIZED, ONLINE_LLE_REQUIRED, REGION_FROM_SECURE, + REQUIRES_SHADER_FIXUP, }; class UserHackData {}; diff --git a/src/common/hash.h b/src/common/hash.h index 1a222b22e..e70614ff8 100644 --- a/src/common/hash.h +++ b/src/common/hash.h @@ -1,4 +1,4 @@ -// Copyright 2015 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -6,7 +6,7 @@ #include #include -#include "common/cityhash.h" +#include #include "common/common_types.h" namespace Common { @@ -18,7 +18,7 @@ namespace Common { * @returns 64-bit hash value that was computed over the data block */ static inline u64 ComputeHash64(const void* data, std::size_t len) noexcept { - return CityHash64(static_cast(data), len); + return XXH3_64bits(data, len); } /** diff --git a/src/core/hle/service/gsp/gsp_gpu.cpp b/src/core/hle/service/gsp/gsp_gpu.cpp index b488a8ce2..4692e62fd 100644 --- a/src/core/hle/service/gsp/gsp_gpu.cpp +++ b/src/core/hle/service/gsp/gsp_gpu.cpp @@ -20,6 +20,7 @@ #include "core/memory.h" #include "video_core/gpu.h" #include "video_core/gpu_debugger.h" +#include "video_core/pica/pica_core.h" #include "video_core/pica/regs_lcd.h" #include "video_core/renderer_base.h" #include "video_core/right_eye_disabler.h" @@ -685,8 +686,16 @@ Result GSP_GPU::AcquireGpuRight(const Kernel::HLERequestContext& ctx, Common::Hacks::hack_manager.GetHackAllowMode(Common::Hacks::HackType::RIGHT_EYE_DISABLE, process->codeset->program_id) != Common::Hacks::HackAllowMode::DISALLOW; + + bool requires_shader_fixup = + Common::Hacks::hack_manager.GetHackAllowMode( + Common::Hacks::HackType::REQUIRES_SHADER_FIXUP, process->codeset->program_id, + Common::Hacks::HackAllowMode::DISALLOW) != Common::Hacks::HackAllowMode::DISALLOW; + auto& gpu = system.GPU(); gpu.GetRightEyeDisabler().SetEnabled(right_eye_disable_allow); + gpu.PicaCore().vs_setup.requires_fixup = requires_shader_fixup; + gpu.PicaCore().gs_setup.requires_fixup = requires_shader_fixup; if (active_thread_id == session_data->thread_id) { return {ErrorDescription::AlreadyDone, ErrorModule::GX, ErrorSummary::Success, diff --git a/src/tests/video_core/shader.cpp b/src/tests/video_core/shader.cpp index 8747808d2..db868a260 100644 --- a/src/tests/video_core/shader.cpp +++ b/src/tests/video_core/shader.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -63,11 +63,15 @@ static std::unique_ptr CompileShaderSetup( const auto shbin = nihstro::InlineAsm::CompileToRawBinary(code); auto shader = std::make_unique(); - - std::transform(shbin.program.begin(), shbin.program.end(), shader->program_code.begin(), + Pica::ProgramCode program_code{}; + Pica::SwizzleData swizzle_data{}; + std::transform(shbin.program.begin(), shbin.program.end(), program_code.begin(), [](const auto& x) { return x.hex; }); - std::transform(shbin.swizzle_table.begin(), shbin.swizzle_table.end(), - shader->swizzle_data.begin(), [](const auto& x) { return x.hex; }); + std::transform(shbin.swizzle_table.begin(), shbin.swizzle_table.end(), swizzle_data.begin(), + [](const auto& x) { return x.hex; }); + + shader->UpdateProgramCode(program_code); + shader->UpdateSwizzleData(swizzle_data); return shader; } @@ -143,12 +147,16 @@ private: class ShaderJitTest : public ShaderTest { public: explicit ShaderJitTest(std::initializer_list code) : ShaderTest(code) { - shader_jit.Compile(&shader_setup->program_code, &shader_setup->swizzle_data); + const auto& program_code = shader_setup->GetProgramCode(); + const auto& swizzle_data = shader_setup->GetSwizzleData(); + shader_jit.Compile(&program_code, &swizzle_data); } explicit ShaderJitTest(std::unique_ptr input_shader_setup) : ShaderTest(std::move(input_shader_setup)) { - shader_jit.Compile(&shader_setup->program_code, &shader_setup->swizzle_data); + const auto& program_code = shader_setup->GetProgramCode(); + const auto& swizzle_data = shader_setup->GetSwizzleData(); + shader_jit.Compile(&program_code, &swizzle_data); } void RunShader(Pica::ShaderUnit& shader_unit, std::span inputs) override { @@ -210,12 +218,12 @@ SHADER_TEST_CASE("CALL", "[video_core][shader]") { // call foo CALL.flow_control.dest_offset = 2; CALL.flow_control.num_instructions = 1; - shader_setup->program_code[0] = CALL.hex; + shader_setup->UpdateProgramCode(0, CALL.hex); // call ex2 CALL.flow_control.dest_offset = 4; CALL.flow_control.num_instructions = 1; - shader_setup->program_code[2] = CALL.hex; + shader_setup->UpdateProgramCode(2, CALL.hex); auto shader = TestType(std::move(shader_setup)); @@ -608,7 +616,7 @@ SHADER_TEST_CASE("MAD", "[video_core][shader]") { MAD.mad.src2 = sh_input2; MAD.mad.src3 = sh_input3; MAD.mad.dest = sh_output; - shader_setup->program_code[0] = MAD.hex; + shader_setup->UpdateProgramCode(0, MAD.hex); nihstro::SwizzlePattern swizzle = {}; swizzle.dest_mask = 0b1111; @@ -624,7 +632,7 @@ SHADER_TEST_CASE("MAD", "[video_core][shader]") { swizzle.SetSelectorSrc3(1, SwizzlePattern::Selector::y); swizzle.SetSelectorSrc3(2, SwizzlePattern::Selector::z); swizzle.SetSelectorSrc3(3, SwizzlePattern::Selector::w); - shader_setup->swizzle_data[0] = swizzle.hex; + shader_setup->UpdateSwizzleData(0, swizzle.hex); auto shader = TestType(std::move(shader_setup)); @@ -713,7 +721,7 @@ SHADER_TEST_CASE("Conditional", "[video_core][shader]") { { auto shader_setup = CompileShaderSetup(assembly_template); IFC.flow_control.op = nihstro::Instruction::FlowControlType::Op::JustX; - shader_setup->program_code[0] = IFC.hex; + shader_setup->UpdateProgramCode(0, IFC.hex); const float result = result_x ? 1.0f : 0.0f; auto shader_test = TestType(std::move(shader_setup)); @@ -726,7 +734,7 @@ SHADER_TEST_CASE("Conditional", "[video_core][shader]") { { auto shader_setup = CompileShaderSetup(assembly_template); IFC.flow_control.op = nihstro::Instruction::FlowControlType::Op::JustY; - shader_setup->program_code[0] = IFC.hex; + shader_setup->UpdateProgramCode(0, IFC.hex); const float result = result_y ? 1.0f : 0.0f; auto shader_test = TestType(std::move(shader_setup)); @@ -739,7 +747,7 @@ SHADER_TEST_CASE("Conditional", "[video_core][shader]") { { auto shader_setup = CompileShaderSetup(assembly_template); IFC.flow_control.op = nihstro::Instruction::FlowControlType::Op::Or; - shader_setup->program_code[0] = IFC.hex; + shader_setup->UpdateProgramCode(0, IFC.hex); const float result = (result_x || result_y) ? 1.0f : 0.0f; auto shader_test = TestType(std::move(shader_setup)); @@ -752,7 +760,7 @@ SHADER_TEST_CASE("Conditional", "[video_core][shader]") { { auto shader_setup = CompileShaderSetup(assembly_template); IFC.flow_control.op = nihstro::Instruction::FlowControlType::Op::And; - shader_setup->program_code[0] = IFC.hex; + shader_setup->UpdateProgramCode(0, IFC.hex); const float result = (result_x && result_y) ? 1.0f : 0.0f; auto shader_test = TestType(std::move(shader_setup)); diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 0331a6fea..9b76be5fc 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -184,17 +184,18 @@ void DumpShader(const std::string& filename, const ShaderRegs& config, const Sha dvlb.dvle_offset = QueueForWriting(reinterpret_cast(&dvle), sizeof(dvle)); // TODO: Reduce the amount of binary code written to relevant portions + const auto& program_code = setup.GetProgramCode(); dvlp.binary_offset = write_offset - dvlp_offset; - dvlp.binary_size_words = static_cast(setup.program_code.size()); - QueueForWriting(reinterpret_cast(setup.program_code.data()), - static_cast(setup.program_code.size()) * sizeof(u32)); + dvlp.binary_size_words = static_cast(program_code.size()); + QueueForWriting(reinterpret_cast(program_code.data()), + static_cast(program_code.size()) * sizeof(u32)); + const auto& swizzle_data = setup.GetSwizzleData(); dvlp.swizzle_info_offset = write_offset - dvlp_offset; - dvlp.swizzle_info_num_entries = static_cast(setup.swizzle_data.size()); + dvlp.swizzle_info_num_entries = static_cast(swizzle_data.size()); u32 dummy = 0; - for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) { - QueueForWriting(reinterpret_cast(&setup.swizzle_data[i]), - sizeof(setup.swizzle_data[i])); + for (unsigned int i = 0; i < swizzle_data.size(); ++i) { + QueueForWriting(reinterpret_cast(&swizzle_data[i]), sizeof(swizzle_data[i])); QueueForWriting(reinterpret_cast(&dummy), sizeof(dummy)); } diff --git a/src/video_core/pica/pica_core.cpp b/src/video_core/pica/pica_core.cpp index 359fe0ff1..a5768365d 100644 --- a/src/video_core/pica/pica_core.cpp +++ b/src/video_core/pica/pica_core.cpp @@ -258,8 +258,7 @@ void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask, bool& stop_requeste if (offset >= 4096) { LOG_ERROR(HW_GPU, "Invalid GS program offset {}", offset); } else { - gs_setup.program_code[offset] = value; - gs_setup.MarkProgramCodeDirty(); + gs_setup.UpdateProgramCode(offset, value); offset++; } break; @@ -274,11 +273,10 @@ void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask, bool& stop_requeste case PICA_REG_INDEX(gs.swizzle_patterns.set_word[6]): case PICA_REG_INDEX(gs.swizzle_patterns.set_word[7]): { u32& offset = regs.internal.gs.swizzle_patterns.offset; - if (offset >= gs_setup.swizzle_data.size()) { + if (offset >= gs_setup.GetSwizzleData().size()) { LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset {}", offset); } else { - gs_setup.swizzle_data[offset] = value; - gs_setup.MarkSwizzleDataDirty(); + gs_setup.UpdateSwizzleData(offset, value); offset++; } break; @@ -340,12 +338,10 @@ void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask, bool& stop_requeste if (offset >= 512) { LOG_ERROR(HW_GPU, "Invalid VS program offset {}", offset); } else { - vs_setup.program_code[offset] = value; - vs_setup.MarkProgramCodeDirty(); + vs_setup.UpdateProgramCode(offset, value); if (!regs.internal.pipeline.gs_unit_exclusive_configuration && regs.internal.pipeline.use_gs == PipelineRegs::UseGS::No) { - gs_setup.program_code[offset] = value; - gs_setup.MarkProgramCodeDirty(); + gs_setup.UpdateProgramCode(offset, value); } offset++; } @@ -361,15 +357,13 @@ void PicaCore::WriteInternalReg(u32 id, u32 value, u32 mask, bool& stop_requeste case PICA_REG_INDEX(vs.swizzle_patterns.set_word[6]): case PICA_REG_INDEX(vs.swizzle_patterns.set_word[7]): { u32& offset = regs.internal.vs.swizzle_patterns.offset; - if (offset >= vs_setup.swizzle_data.size()) { + if (offset >= vs_setup.GetSwizzleData().size()) { LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset {}", offset); } else { - vs_setup.swizzle_data[offset] = value; - vs_setup.MarkSwizzleDataDirty(); + vs_setup.UpdateSwizzleData(offset, value); if (!regs.internal.pipeline.gs_unit_exclusive_configuration && regs.internal.pipeline.use_gs == PipelineRegs::UseGS::No) { - gs_setup.swizzle_data[offset] = value; - gs_setup.MarkSwizzleDataDirty(); + gs_setup.UpdateSwizzleData(offset, value); } offset++; } diff --git a/src/video_core/pica/shader_setup.cpp b/src/video_core/pica/shader_setup.cpp index 70808b02f..46de9aafc 100644 --- a/src/video_core/pica/shader_setup.cpp +++ b/src/video_core/pica/shader_setup.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include "common/assert.h" #include "common/bit_set.h" #include "common/hash.h" @@ -51,7 +52,9 @@ std::optional ShaderSetup::WriteUniformFloatReg(ShaderRegs& config, u32 val u64 ShaderSetup::GetProgramCodeHash() { if (program_code_hash_dirty) { - program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); + const auto& prog_code = GetProgramCode(); + program_code_hash = + Common::ComputeHash64(prog_code.data(), biggest_program_size * sizeof(u32)); program_code_hash_dirty = false; } return program_code_hash; @@ -59,10 +62,18 @@ u64 ShaderSetup::GetProgramCodeHash() { u64 ShaderSetup::GetSwizzleDataHash() { if (swizzle_data_hash_dirty) { - swizzle_data_hash = Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data)); + swizzle_data_hash = + Common::ComputeHash64(swizzle_data.data(), biggest_swizzle_size * sizeof(u32)); swizzle_data_hash_dirty = false; } return swizzle_data_hash; } +void ShaderSetup::DoProgramCodeFixup() { + if (!requires_fixup || !program_code_pending_fixup) { + return; + } + program_code_pending_fixup = false; +} + } // namespace Pica diff --git a/src/video_core/pica/shader_setup.h b/src/video_core/pica/shader_setup.h index ad4981b95..8845e0cb9 100644 --- a/src/video_core/pica/shader_setup.h +++ b/src/video_core/pica/shader_setup.h @@ -51,6 +51,17 @@ struct ShaderRegs; * The geometry shaders has a unique configuration so when enabled it has its own setup. */ struct ShaderSetup { +private: + void MakeProgramCodeDirty() { + program_code_hash_dirty = true; + program_code_pending_fixup = true; + has_fixup = false; + } + + void MakeSwizzleDataDirty() { + swizzle_data_hash_dirty = true; + } + public: explicit ShaderSetup(); ~ShaderSetup(); @@ -65,28 +76,80 @@ public: u64 GetSwizzleDataHash(); - void MarkProgramCodeDirty() { - program_code_hash_dirty = true; + void DoProgramCodeFixup(); + + inline void UpdateProgramCode(size_t offset, u32 value) { + u32& inst = program_code[offset]; + u32 old = inst; + + if (old == value) + return; + + inst = value; + if (!program_code_hash_dirty) { + MakeProgramCodeDirty(); + } + if ((offset + 1) > biggest_program_size) { + biggest_program_size = offset + 1; + } } - void MarkSwizzleDataDirty() { - swizzle_data_hash_dirty = true; + void UpdateProgramCode(const ProgramCode& other) { + program_code = other; + biggest_program_size = program_code.size(); + MakeProgramCodeDirty(); + } + + inline void UpdateSwizzleData(size_t offset, u32 value) { + u32& data = swizzle_data[offset]; + u32 old = data; + + if (old == value) + return; + + data = value; + if (!swizzle_data_hash_dirty) { + MakeSwizzleDataDirty(); + } + if ((offset + 1) > biggest_swizzle_size) { + biggest_swizzle_size = offset + 1; + } + } + + void UpdateSwizzleData(const SwizzleData& other) { + swizzle_data = other; + biggest_swizzle_size = swizzle_data.size(); + MakeSwizzleDataDirty(); + } + + const ProgramCode& GetProgramCode() const { + return (has_fixup) ? program_code_fixup : program_code; + } + + const SwizzleData& GetSwizzleData() const { + return swizzle_data; } public: Uniforms uniforms; PackedAttribute uniform_queue; - ProgramCode program_code{}; - SwizzleData swizzle_data{}; u32 entry_point{}; const void* cached_shader{}; bool uniforms_dirty = true; + bool requires_fixup = false; + bool has_fixup = false; private: + ProgramCode program_code{}; + ProgramCode program_code_fixup{}; + SwizzleData swizzle_data{}; bool program_code_hash_dirty{true}; bool swizzle_data_hash_dirty{true}; - u64 program_code_hash{0xDEADC0DE}; - u64 swizzle_data_hash{0xDEADC0DE}; + bool program_code_pending_fixup{true}; + u32 biggest_program_size = 0; + u32 biggest_swizzle_size = 0; + u64 program_code_hash{0}; + u64 swizzle_data_hash{0}; friend class boost::serialization::access; template @@ -94,11 +157,17 @@ private: ar & uniforms; ar & uniform_queue; ar & program_code; + ar & program_code_fixup; ar & swizzle_data; ar & program_code_hash_dirty; ar & swizzle_data_hash_dirty; + ar & program_code_pending_fixup; + ar & biggest_program_size; + ar & biggest_swizzle_size; ar & program_code_hash; ar & swizzle_data_hash; + ar & requires_fixup; + ar & has_fixup; } }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index e70319ef3..7b91a1639 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -89,8 +89,8 @@ static std::tuple BuildVSConfigFromRaw( std::copy_n(raw.GetProgramCode().begin() + Pica::MAX_PROGRAM_CODE_LENGTH, Pica::MAX_SWIZZLE_DATA_LENGTH, swizzle_data.begin()); Pica::ShaderSetup setup; - setup.program_code = program_code; - setup.swizzle_data = swizzle_data; + setup.UpdateProgramCode(program_code); + setup.UpdateSwizzleData(swizzle_data); // Enable the geometry-shader only if we are actually doing per-fragment lighting // and care about proper quaternions. Otherwise just use standard vertex+fragment shaders @@ -354,12 +354,13 @@ bool ShaderProgramManager::UseProgrammableVertexShader(const Pica::RegsInternal& // Save VS to the disk cache if its a new shader if (result) { auto& disk_cache = impl->disk_cache; - ProgramCode program_code{setup.program_code.begin(), setup.program_code.end()}; - program_code.insert(program_code.end(), setup.swizzle_data.begin(), - setup.swizzle_data.end()); - const u64 unique_identifier = GetUniqueIdentifier(regs, program_code); + const auto& program_code = setup.GetProgramCode(); + const auto& swizzle_data = setup.GetSwizzleData(); + ProgramCode new_program_code{program_code.begin(), program_code.end()}; + new_program_code.insert(new_program_code.end(), swizzle_data.begin(), swizzle_data.end()); + const u64 unique_identifier = GetUniqueIdentifier(regs, new_program_code); const ShaderDiskCacheRaw raw{unique_identifier, ProgramType::VS, regs, - std::move(program_code)}; + std::move(new_program_code)}; disk_cache.SaveRaw(raw); disk_cache.SaveDecompiled(unique_identifier, *result, accurate_mul); } diff --git a/src/video_core/shader/generator/glsl_shader_gen.cpp b/src/video_core/shader/generator/glsl_shader_gen.cpp index 154ff1908..d6cc390cc 100644 --- a/src/video_core/shader/generator/glsl_shader_gen.cpp +++ b/src/video_core/shader/generator/glsl_shader_gen.cpp @@ -178,7 +178,7 @@ std::string GenerateVertexShader(const ShaderSetup& setup, const PicaVSConfig& c }; auto program_source = - DecompileProgram(setup.program_code, setup.swizzle_data, config.state.main_offset, + DecompileProgram(setup.GetProgramCode(), setup.GetSwizzleData(), config.state.main_offset, get_input_reg, get_output_reg, config.state.sanitize_mul); if (program_source.empty()) { diff --git a/src/video_core/shader/generator/shader_gen.cpp b/src/video_core/shader/generator/shader_gen.cpp index 6d7e61190..dafbdd8e8 100644 --- a/src/video_core/shader/generator/shader_gen.cpp +++ b/src/video_core/shader/generator/shader_gen.cpp @@ -1,4 +1,4 @@ -// Copyright 2023 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -42,6 +42,7 @@ void PicaVSConfigState::Init(const Pica::RegsInternal& regs, Pica::ShaderSetup& use_geometry_shader = use_geometry_shader_; sanitize_mul = accurate_mul_; + setup.DoProgramCodeFixup(); program_hash = setup.GetProgramCodeHash(); swizzle_hash = setup.GetSwizzleDataHash(); main_offset = regs.vs.main_offset; diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index b8da94941..cb06a62bc 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -105,8 +105,8 @@ static void RunInterpreter(const ShaderSetup& setup, ShaderUnit& state, }; const auto& uniforms = setup.uniforms; - const auto& swizzle_data = setup.swizzle_data; - const auto& program_code = setup.program_code; + const auto& swizzle_data = setup.GetSwizzleData(); + const auto& program_code = setup.GetProgramCode(); // Constants for handling invalid inputs static f24 dummy_vec4_float24_zeros[4] = {f24::Zero(), f24::Zero(), f24::Zero(), f24::Zero()}; @@ -736,6 +736,7 @@ static void RunInterpreter(const ShaderSetup& setup, ShaderUnit& state, void InterpreterEngine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) { ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); + setup.DoProgramCodeFixup(); setup.entry_point = entry_point; } diff --git a/src/video_core/shader/shader_jit.cpp b/src/video_core/shader/shader_jit.cpp index 644586226..a45cf519c 100644 --- a/src/video_core/shader/shader_jit.cpp +++ b/src/video_core/shader/shader_jit.cpp @@ -1,4 +1,4 @@ -// Copyright 2016 Citra Emulator Project +// Copyright Citra Emulator Project / Azahar Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. @@ -26,6 +26,7 @@ void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) { ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); setup.entry_point = entry_point; + setup.DoProgramCodeFixup(); const u64 code_hash = setup.GetProgramCodeHash(); const u64 swizzle_hash = setup.GetSwizzleDataHash(); @@ -35,7 +36,7 @@ void JitEngine::SetupBatch(ShaderSetup& setup, u32 entry_point) { setup.cached_shader = iter->second.get(); } else { auto shader = std::make_unique(); - shader->Compile(&setup.program_code, &setup.swizzle_data); + shader->Compile(&setup.GetProgramCode(), &setup.GetSwizzleData()); setup.cached_shader = shader.get(); cache.emplace_hint(iter, cache_key, std::move(shader)); }