From f657314da5ee7b37ff1497134aa0819b9a51b0d7 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 13 Dec 2025 14:35:31 +0300 Subject: [PATCH 01/13] vk/gl: Mark unused FP outputs as unused --- rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp | 15 +++++++++++++-- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 18 ++++++++++++++---- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 4cbd92eecd..0879e5e987 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -103,8 +103,19 @@ void GLFragmentDecompilerThread::insertOutputs(std::stringstream & OS) const auto reg_type = float_type ? "vec4" : getHalfTypeName(4); for (uint i = 0; i < std::size(table); ++i) { - if (m_parr.HasParam(PF_PARAM_NONE, reg_type, table[i].second)) - OS << "layout(location=" << i << ") out vec4 " << table[i].first << ";\n"; + if (!m_parr.HasParam(PF_PARAM_NONE, reg_type, table[i].second)) + { + continue; + } + + if (i >= m_prog.mrt_buffers_count) + { + // Dead writes. Declare as temp variables for DCE to clean up. + OS << "vec4 " << table[i].first << "; // Unused\n"; + continue; + } + + OS << "layout(location=" << i << ") out vec4 " << table[i].first << ";\n"; } } diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index ea9d7cd0f1..ecca27f0df 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -150,17 +150,27 @@ void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS) { "ocol3", m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS ? "r4" : "h8" }, }; - //NOTE: We do not skip outputs, the only possible combinations are a(0), b(0), ab(0,1), abc(0,1,2), abcd(0,1,2,3) + // NOTE: We do not skip outputs, the only possible combinations are a(0), b(0), ab(0,1), abc(0,1,2), abcd(0,1,2,3) u8 output_index = 0; const bool float_type = (m_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) || !device_props.has_native_half_support; const auto reg_type = float_type ? "vec4" : getHalfTypeName(4); for (uint i = 0; i < std::size(table); ++i) { - if (m_parr.HasParam(PF_PARAM_NONE, reg_type, table[i].second)) + if (!m_parr.HasParam(PF_PARAM_NONE, reg_type, table[i].second)) { - OS << "layout(location=" << std::to_string(output_index++) << ") " << "out vec4 " << table[i].first << ";\n"; - vk_prog->output_color_masks[i] = -1; + continue; } + + if (i >= m_prog.mrt_buffers_count) + { + // Dead writes. Declare as temp variables for DCE to clean up. + OS << "vec4 " << table[i].first << "; // Unused\n"; + vk_prog->output_color_masks[i] = 0; + continue; + } + + OS << "layout(location=" << std::to_string(output_index++) << ") " << "out vec4 " << table[i].first << ";\n"; + vk_prog->output_color_masks[i] = -1; } } From 7c4a6118f8bca1fac9e204245504976cfb15661d Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 13 Dec 2025 14:37:01 +0300 Subject: [PATCH 02/13] rsx/fp: Stop referencing wpos when the value is just getting discarded. --- .../RSX/Program/FragmentProgramDecompiler.cpp | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp index 94b92ce98e..1ba30e45b3 100644 --- a/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/Program/FragmentProgramDecompiler.cpp @@ -551,18 +551,22 @@ template std::string FragmentProgramDecompiler::GetSRC(T src) { std::string ret; u32 precision_modifier = 0; + u32 register_index = umax; if constexpr (std::is_same_v) { precision_modifier = src1.src0_prec_mod; + register_index = 0; } else if constexpr (std::is_same_v) { precision_modifier = src1.src1_prec_mod; + register_index = 1; } else if constexpr (std::is_same_v) { precision_modifier = src1.src2_prec_mod; + register_index = 2; } switch (src.reg_type) @@ -645,18 +649,29 @@ template std::string FragmentProgramDecompiler::GetSRC(T src) { // TEX0 - TEX9 // Texcoord 2d mask seems to reset the last 2 arguments to 0 and w if set + + // Opt: Skip emitting w dependency unless w coord is actually being sampled + ensure(register_index != umax); + const auto lane_mask = FP::get_src_vector_lane_mask_shuffled(m_prog, m_instruction, register_index); + const auto touches_z = !!(lane_mask & (1u << 2)); + const bool touches_w = !!(lane_mask & (1u << 3)); + const u8 texcoord = u8(register_id) - 4; if (m_prog.texcoord_is_point_coord(texcoord)) { // Point sprite coord generation. Stacks with the 2D override mask. - if (m_prog.texcoord_is_2d(texcoord)) + if (!m_prog.texcoord_is_2d(texcoord)) { - ret += getFloatTypeName(4) + "(gl_PointCoord, 0., in_w)"; - properties.has_w_access = true; + ret += getFloatTypeName(4) + "(gl_PointCoord, 1., 0.)"; + } + else if (!touches_w) + { + ret += getFloatTypeName(4) + "(gl_PointCoord, 0., 0.)"; } else { - ret += getFloatTypeName(4) + "(gl_PointCoord, 1., 0.)"; + ret += getFloatTypeName(4) + "(gl_PointCoord, 0., in_w)"; + properties.has_w_access = true; } } else if (src2.perspective_corr) @@ -673,14 +688,19 @@ template std::string FragmentProgramDecompiler::GetSRC(T src) } else { - if (m_prog.texcoord_is_2d(texcoord)) + const bool skip_zw_load = !touches_z && !touches_w; + if (!m_prog.texcoord_is_2d(texcoord) || skip_zw_load) { - ret += getFloatTypeName(4) + "(" + reg_var + ".xy, 0., in_w)"; - properties.has_w_access = true; + ret += reg_var; + } + else if (!touches_w) + { + ret += getFloatTypeName(4) + "(" + reg_var + ".xy, 0., 0.)"; } else { - ret += reg_var; + ret += getFloatTypeName(4) + "(" + reg_var + ".xy, 0., in_w)"; + properties.has_w_access = true; } } break; From dacc8b5c21c678fe66cdeee2955dd09f714fd47b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 13 Dec 2025 14:59:54 +0300 Subject: [PATCH 03/13] rsx/vk/gl: Make texture parameter reads backend-dependent - Allows optimizations and improvements per-backend --- rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp | 3 ++- rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp | 1 + .../RSXFragmentTextureDepthConversion.glsl | 6 +++--- .../RSXProg/RSXFragmentTextureMSAAOps.glsl | 2 +- .../RSXProg/RSXFragmentTextureOps.glsl | 16 ++++++++-------- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 3 ++- rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp | 1 + 7 files changed, 18 insertions(+), 14 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index 0879e5e987..cc28c0c98f 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -201,7 +201,8 @@ void GLFragmentDecompilerThread::insertConstants(std::stringstream & OS) " uvec4 stipple_pattern[8];\n" "};\n\n" - "#define texture_base_index 0\n\n"; + "#define texture_base_index 0\n" + "#define TEX_PARAM(index) texture_parameters[index]\n\n"; } void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) diff --git a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp index 9606ca7b16..c7f9ec2622 100644 --- a/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/GL/GLShaderInterpreter.cpp @@ -300,6 +300,7 @@ namespace gl } builder << "\n" + "#define TEX_PARAM(index) texture_parameters[index + texture_base_index]\n" "#define IS_TEXTURE_RESIDENT(index) (texture_handles[index] < 0xFF)\n" "#define SAMPLER1D(index) sampler1D_array[texture_handles[index]]\n" "#define SAMPLER2D(index) sampler2D_array[texture_handles[index]]\n" diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureDepthConversion.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureDepthConversion.glsl index ef77ae179d..a9737e97fa 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureDepthConversion.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureDepthConversion.glsl @@ -1,8 +1,8 @@ R"( #define ZS_READ(index, coord) vec2(texture(TEX_NAME(index), coord).r, float(texture(TEX_NAME_STENCIL(index), coord).x)) -#define TEX1D_Z24X8_RGBA8(index, coord1) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE1(index, coord1)), texture_parameters[index + texture_base_index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) -#define TEX2D_Z24X8_RGBA8(index, coord2) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE2(index, coord2)), texture_parameters[index + texture_base_index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) -#define TEX3D_Z24X8_RGBA8(index, coord3) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE3(index, coord3)), texture_parameters[index + texture_base_index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) +#define TEX1D_Z24X8_RGBA8(index, coord1) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE1(index, coord1)), TEX_PARAM(index).remap, TEX_FLAGS(index)), TEX_FLAGS(index)) +#define TEX2D_Z24X8_RGBA8(index, coord2) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE2(index, coord2)), TEX_PARAM(index).remap, TEX_FLAGS(index)), TEX_FLAGS(index)) +#define TEX3D_Z24X8_RGBA8(index, coord3) _process_texel(convert_z24x8_to_rgba8(ZS_READ(index, COORD_SCALE3(index, coord3)), TEX_PARAM(index).remap, TEX_FLAGS(index)), TEX_FLAGS(index)) // NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS) // The A component (Z) is useless (should contain stencil8 or just 1) diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOps.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOps.glsl index 37f2427ba0..965eac5276 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOps.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOps.glsl @@ -4,7 +4,7 @@ R"( #define TEX2D_MS(index, coord2) _process_texel(sampleTexture2DMS(TEX_NAME(index), coord2, index), TEX_FLAGS(index)) #define TEX2D_SHADOW_MS(index, coord3) vec4(comparison_passes(sampleTexture2DMS(TEX_NAME(index), coord3.xy, index).x, coord3.z, ZCOMPARE_FUNC(index))) #define TEX2D_SHADOWPROJ_MS(index, coord4) TEX2D_SHADOW_MS(index, (coord4.xyz / coord4.w)) -#define TEX2D_Z24X8_RGBA8_MS(index, coord2) _process_texel(convert_z24x8_to_rgba8(ZS_READ_MS(index, coord2), texture_parameters[index + texture_base_index].remap, TEX_FLAGS(index)), TEX_FLAGS(index)) +#define TEX2D_Z24X8_RGBA8_MS(index, coord2) _process_texel(convert_z24x8_to_rgba8(ZS_READ_MS(index, coord2), TEX_PARAM(index).remap, TEX_FLAGS(index)), TEX_FLAGS(index)) vec3 compute2x2DownsampleWeights(const in float coord, const in float uv_step, const in float actual_step) { diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureOps.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureOps.glsl index 7ef54a244d..e231333184 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureOps.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureOps.glsl @@ -24,17 +24,17 @@ R"( uint _texture_flag_override = 0; #define _enable_texture_expand() _texture_flag_override = SIGN_EXPAND_MASK #define _disable_texture_expand() _texture_flag_override = 0 - #define TEX_FLAGS(index) (texture_parameters[index + texture_base_index].flags | _texture_flag_override) + #define TEX_FLAGS(index) (TEX_PARAM(index).flags | _texture_flag_override) #else - #define TEX_FLAGS(index) texture_parameters[index + texture_base_index].flags + #define TEX_FLAGS(index) TEX_PARAM(index).flags #endif #define TEX_NAME(index) tex##index #define TEX_NAME_STENCIL(index) tex##index##_stencil -#define COORD_SCALE1(index, coord1) _texcoord_xform(coord1, texture_parameters[index + texture_base_index]) -#define COORD_SCALE2(index, coord2) _texcoord_xform(coord2, texture_parameters[index + texture_base_index]) -#define COORD_SCALE3(index, coord3) _texcoord_xform(coord3, texture_parameters[index + texture_base_index]) +#define COORD_SCALE1(index, coord1) _texcoord_xform(coord1, TEX_PARAM(index)) +#define COORD_SCALE2(index, coord2) _texcoord_xform(coord2, TEX_PARAM(index)) +#define COORD_SCALE3(index, coord3) _texcoord_xform(coord3, TEX_PARAM(index)) #define COORD_PROJ1(index, coord2) COORD_SCALE1(index, coord2.x / coord2.y) #define COORD_PROJ2(index, coord3) COORD_SCALE2(index, coord3.xy / coord3.z) #define COORD_PROJ3(index, coord4) COORD_SCALE3(index, coord4.xyz / coord4.w) @@ -57,9 +57,9 @@ R"( #ifdef _ENABLE_SHADOW #ifdef _EMULATED_TEXSHADOW - #define SHADOW_COORD(index, coord3) _texcoord_xform_shadow(coord3, texture_parameters[index + texture_base_index]) - #define SHADOW_COORD4(index, coord4) _texcoord_xform_shadow(coord4, texture_parameters[index + texture_base_index]) - #define SHADOW_COORD_PROJ(index, coord4) _texcoord_xform_shadow(coord4.xyz / coord4.w, texture_parameters[index + texture_base_index]) + #define SHADOW_COORD(index, coord3) _texcoord_xform_shadow(coord3, TEX_PARAM(index)) + #define SHADOW_COORD4(index, coord4) _texcoord_xform_shadow(coord4, TEX_PARAM(index)) + #define SHADOW_COORD_PROJ(index, coord4) _texcoord_xform_shadow(coord4.xyz / coord4.w, TEX_PARAM(index)) #define TEX2D_SHADOW(index, coord3) texture(TEX_NAME(index), SHADOW_COORD(index, coord3)) #define TEX3D_SHADOW(index, coord4) texture(TEX_NAME(index), SHADOW_COORD4(index, coord4)) diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index ecca27f0df..0cba2fa80b 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -346,7 +346,8 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) } OS << - "#define texture_base_index _fs_texture_base_index\n\n"; + "#define texture_base_index _fs_texture_base_index\n" + "#define TEX_PARAM(index) texture_parameters[index + texture_base_index]\n\n"; glsl::insert_glsl_legacy_function(OS, m_shader_props); } diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index 083c3f8c7f..9315ac1593 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -264,6 +264,7 @@ namespace vk } builder << "\n" + "#define TEX_PARAM(index) texture_parameters[index + texture_base_index]\n" "#define IS_TEXTURE_RESIDENT(index) true\n" "#define SAMPLER1D(index) sampler1D_array[index]\n" "#define SAMPLER2D(index) sampler2D_array[index]\n" From 765429b61efb1fc5a52b395c5e771823ade38069 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 13 Dec 2025 15:18:43 +0300 Subject: [PATCH 04/13] vk: Hoist texture param loads --- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 0cba2fa80b..c720ae8550 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -347,7 +347,7 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) OS << "#define texture_base_index _fs_texture_base_index\n" - "#define TEX_PARAM(index) texture_parameters[index + texture_base_index]\n\n"; + "#define TEX_PARAM(index) texture_parameters_##index\n\n"; glsl::insert_glsl_legacy_function(OS, m_shader_props); } @@ -433,6 +433,16 @@ void VKFragmentDecompilerThread::insertMainStart(std::stringstream & OS) if (properties.in_register_mask & in_spec_color) OS << " vec4 spec_color = gl_FrontFacing ? spec_color1 : spec_color0;\n"; } + + for (u16 i = 0, mask = (properties.common_access_sampler_mask | properties.shadow_sampler_mask); mask != 0; ++i, mask >>= 1) + { + if (!(mask & 1)) + { + continue; + } + + OS << " const sampler_info texture_parameters_" << i << " = texture_parameters[texture_base_index + " << i << "];\n"; + } } void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) From 19e57c9525c62d66acbadf4f2fba712af514a655 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 14 Dec 2025 07:26:30 +0300 Subject: [PATCH 05/13] rsx: Move heavy codegen capabilities to permutation flags from runtime checks --- rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp | 68 ------------------- rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp | 12 +++- rpcs3/Emu/RSX/Program/GLSLCommon.cpp | 30 ++++++-- .../RSXProg/RSXFragmentPrologue.glsl | 4 +- .../RSXProg/RSXFragmentTextureOps.glsl | 2 + .../GLSLSnippets/RSXProg/RSXROPEpilogue.glsl | 42 +++++------- rpcs3/Emu/RSX/Program/GLSLTypes.h | 9 ++- rpcs3/Emu/RSX/RSXThread.cpp | 39 +++++++++++ rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 12 +++- rpcs3/Emu/RSX/gcm_enums.h | 14 +++- 10 files changed, 123 insertions(+), 109 deletions(-) diff --git a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp index 449900cef1..ed2447ab24 100644 --- a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp +++ b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp @@ -687,73 +687,6 @@ namespace rsx rop_control.enable_alpha_test(); } - if (REGS(m_ctx)->polygon_stipple_enabled()) - { - rop_control.enable_polygon_stipple(); - } - - auto can_use_hw_a2c = [&]() -> bool - { - const auto& config = RSX(m_ctx)->get_backend_config(); - if (!config.supports_hw_a2c) - { - return false; - } - - if (config.supports_hw_a2c_1spp) - { - return true; - } - - return REGS(m_ctx)->surface_antialias() != rsx::surface_antialiasing::center_1_sample; - }; - - if (REGS(m_ctx)->msaa_alpha_to_coverage_enabled() && !can_use_hw_a2c()) - { - // TODO: Properly support alpha-to-coverage and alpha-to-one behavior in shaders - // Alpha values generate a coverage mask for order independent blending - // Requires hardware AA to work properly (or just fragment sample stage in fragment shaders) - // Simulated using combined alpha blend and alpha test - rop_control.enable_alpha_to_coverage(); - if (REGS(m_ctx)->msaa_sample_mask()) - { - rop_control.enable_MSAA_writes(); - } - - // Sample configuration bits - switch (REGS(m_ctx)->surface_antialias()) - { - case rsx::surface_antialiasing::center_1_sample: - break; - case rsx::surface_antialiasing::diagonal_centered_2_samples: - rop_control.set_msaa_control(1u); - break; - default: - rop_control.set_msaa_control(3u); - break; - } - } - - // Check if framebuffer is actually an XRGB format and not a WZYX format - switch (REGS(m_ctx)->surface_color()) - { - case rsx::surface_color_format::w16z16y16x16: - case rsx::surface_color_format::w32z32y32x32: - case rsx::surface_color_format::x32: - // These behave very differently from "normal" formats. - break; - default: - // Integer framebuffer formats. - rop_control.enable_framebuffer_INT(); - - // Check if we want sRGB conversion. - if (REGS(m_ctx)->framebuffer_srgb_enabled()) - { - rop_control.enable_framebuffer_sRGB(); - } - break; - } - // Generate wpos coefficients // wpos equation is now as follows (ignoring pixel center offset): // wpos.y = (frag_coord / resolution_scale) * ((window_origin!=top)?-1.: 1.) + ((window_origin!=top)? window_height : 0) @@ -766,7 +699,6 @@ namespace rsx payload.rop_control = rop_control.value; payload.alpha_ref = REGS(m_ctx)->alpha_ref(); - const auto window_origin = REGS(m_ctx)->shader_window_origin(); const u32 window_height = REGS(m_ctx)->shader_window_height(); const auto pixel_center = REGS(m_ctx)->pixel_center(); diff --git a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp index cc28c0c98f..047b362bae 100644 --- a/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/GL/GLFragmentProgram.cpp @@ -219,16 +219,24 @@ void GLFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.require_srgb_to_linear = properties.has_upg; m_shader_props.require_linear_to_srgb = properties.has_pkg; m_shader_props.require_fog_read = properties.in_register_mask & in_fogc; - m_shader_props.emulate_coverage_tests = !rsx::get_renderer_backend_config().supports_hw_a2c_1spp; m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare; + m_shader_props.low_precision_tests = ::gl::get_driver_caps().vendor_NVIDIA && !(m_prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION); m_shader_props.disable_early_discard = !::gl::get_driver_caps().vendor_NVIDIA; m_shader_props.supports_native_fp16 = device_props.has_native_half_support; - m_shader_props.ROP_output_rounding = g_cfg.video.shader_precision != gpu_preset_level::low; + + m_shader_props.ROP_output_rounding = (g_cfg.video.shader_precision != gpu_preset_level::low) && !!(m_prog.ctrl & RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER); + m_shader_props.ROP_sRGB_packing = !!(m_prog.ctrl & RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER); + m_shader_props.ROP_alpha_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_ALPHA_TEST); + m_shader_props.ROP_alpha_to_coverage_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE); + m_shader_props.ROP_polygon_stipple_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_POLYGON_STIPPLE); + m_shader_props.ROP_discard = !!(m_prog.ctrl & RSX_SHADER_CONTROL_USES_KIL); + m_shader_props.require_tex1D_ops = properties.has_tex1D; m_shader_props.require_tex2D_ops = properties.has_tex2D; m_shader_props.require_tex3D_ops = properties.has_tex3D; m_shader_props.require_shadowProj_ops = properties.shadow_sampler_mask != 0 && properties.has_texShadowProj; + m_shader_props.require_alpha_kill = !!(m_prog.ctrl & RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL); glsl::insert_glsl_legacy_function(OS, m_shader_props); } diff --git a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp index afebcce7ed..1421b179fc 100644 --- a/rpcs3/Emu/RSX/Program/GLSLCommon.cpp +++ b/rpcs3/Emu/RSX/Program/GLSLCommon.cpp @@ -216,12 +216,12 @@ namespace glsl enabled_options.push_back("_32_BIT_OUTPUT"); } - if (!props.fp32_outputs) + if (props.ROP_sRGB_packing) { enabled_options.push_back("_ENABLE_FRAMEBUFFER_SRGB"); } - if (props.disable_early_discard) + if (props.disable_early_discard && props.ROP_discard) { enabled_options.push_back("_DISABLE_EARLY_DISCARD"); } @@ -231,7 +231,15 @@ namespace glsl enabled_options.push_back("_ENABLE_ROP_OUTPUT_ROUNDING"); } - enabled_options.push_back("_ENABLE_POLYGON_STIPPLE"); + if (props.ROP_alpha_test) + { + enabled_options.push_back("_ENABLE_ALPHA_TEST"); + } + + if (props.ROP_polygon_stipple_test) + { + enabled_options.push_back("_ENABLE_POLYGON_STIPPLE"); + } } // Import common header @@ -276,12 +284,12 @@ namespace glsl return; } - if (props.emulate_coverage_tests) + if (props.ROP_alpha_to_coverage_test) { - enabled_options.push_back("_EMULATE_COVERAGE_TEST"); + enabled_options.push_back("_ENABLE_ALPHA_TO_COVERAGE_TEST"); } - if (!props.fp32_outputs || props.require_linear_to_srgb) + if (props.ROP_sRGB_packing || props.require_linear_to_srgb) { enabled_options.push_back("_ENABLE_LINEAR_TO_SRGB"); } @@ -296,6 +304,11 @@ namespace glsl enabled_options.push_back("_ENABLE_WPOS"); } + if (props.ROP_alpha_test || (props.require_msaa_ops && props.require_tex_shadow_ops)) + { + enabled_options.push_back("_ENABLE_COMPARISON_FUNC"); + } + if (props.require_fog_read) { program_common::define_glsl_constants(OS, @@ -385,6 +398,11 @@ namespace glsl enabled_options.push_back("_ENABLE_SHADOWPROJ"); } + if (props.require_alpha_kill) + { + enabled_options.push_back("_ENABLE_TEXTURE_ALPHA_KILL"); + } + program_common::define_glsl_switches(OS, enabled_options); enabled_options.clear(); diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentPrologue.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentPrologue.glsl index e40373f64c..2fe1277ea9 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentPrologue.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentPrologue.glsl @@ -81,7 +81,7 @@ vec4 fetch_fog_value(const in uint mode) } #endif -#ifdef _EMULATE_COVERAGE_TEST +#ifdef _ENABLE_ALPHA_TO_COVERAGE_TEST // Purely stochastic bool coverage_test_passes(const in vec4 _sample) { @@ -109,6 +109,7 @@ vec4 srgb_to_linear(const in vec4 cs) } #endif +#ifdef _ENABLE_COMPARISON_FUNC // Required by all fragment shaders for alpha test bool comparison_passes(const in float a, const in float b, const in uint func) { @@ -125,5 +126,6 @@ bool comparison_passes(const in float a, const in float b, const in uint func) case 7: return true; //always } } +#endif )" diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureOps.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureOps.glsl index e231333184..f0457f0034 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureOps.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureOps.glsl @@ -188,6 +188,7 @@ vec4 _process_texel(in vec4 rgba, const in uint control_bits) return rgba; } +#ifdef _ENABLE_TEXTURE_ALPHA_KILL if (_test_bit(control_bits, ALPHAKILL)) { // Alphakill @@ -197,6 +198,7 @@ vec4 _process_texel(in vec4 rgba, const in uint control_bits) return rgba; } } +#endif if (_test_bit(control_bits, RENORMALIZE)) { diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl index aaaaa559ca..283371d0ed 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl @@ -8,43 +8,33 @@ R"( #endif #ifdef _ENABLE_FRAMEBUFFER_SRGB - if (_test_bit(rop_control, SRGB_FRAMEBUFFER_BIT)) - { - col0.rgb = _mrt_color_t(linear_to_srgb(col0)).rgb; - col1.rgb = _mrt_color_t(linear_to_srgb(col1)).rgb; - col2.rgb = _mrt_color_t(linear_to_srgb(col2)).rgb; - col3.rgb = _mrt_color_t(linear_to_srgb(col3)).rgb; - } + col0.rgb = _mrt_color_t(linear_to_srgb(col0)).rgb; + col1.rgb = _mrt_color_t(linear_to_srgb(col1)).rgb; + col2.rgb = _mrt_color_t(linear_to_srgb(col2)).rgb; + col3.rgb = _mrt_color_t(linear_to_srgb(col3)).rgb; #endif #ifdef _ENABLE_ROP_OUTPUT_ROUNDING - if (_test_bit(rop_control, INT_FRAMEBUFFER_BIT)) - { - col0 = round_to_8bit(col0); - col1 = round_to_8bit(col1); - col2 = round_to_8bit(col2); - col3 = round_to_8bit(col3); - } + col0 = round_to_8bit(col0); + col1 = round_to_8bit(col1); + col2 = round_to_8bit(col2); + col3 = round_to_8bit(col3); #endif // Post-output stages // Alpha Testing - if (_test_bit(rop_control, ALPHA_TEST_ENABLE_BIT)) +#ifdef _ENABLE_ALPHA_TEST + const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH); + if (!comparison_passes(col0.a, alpha_ref, alpha_func)) { - const uint alpha_func = _get_bits(rop_control, ALPHA_TEST_FUNC_OFFSET, ALPHA_TEST_FUNC_LENGTH); - if (!comparison_passes(col0.a, alpha_ref, alpha_func)) - { - discard; - } + discard; } +#endif -#ifdef _EMULATE_COVERAGE_TEST - if (_test_bit(rop_control, ALPHA_TO_COVERAGE_ENABLE_BIT)) +#ifdef _ENABLE_ALPHA_TO_COVERAGE_TEST + if (!_test_bit(rop_control, MSAA_WRITE_ENABLE_BIT) || !coverage_test_passes(col0)) { - if (!_test_bit(rop_control, MSAA_WRITE_ENABLE_BIT) || !coverage_test_passes(col0)) - { - discard; - } + discard; } #endif diff --git a/rpcs3/Emu/RSX/Program/GLSLTypes.h b/rpcs3/Emu/RSX/Program/GLSLTypes.h index 4716c845d2..5b1b61396e 100644 --- a/rpcs3/Emu/RSX/Program/GLSLTypes.h +++ b/rpcs3/Emu/RSX/Program/GLSLTypes.h @@ -36,12 +36,18 @@ namespace glsl bool require_srgb_to_linear : 1; bool require_linear_to_srgb : 1; bool require_fog_read : 1; - bool emulate_coverage_tests : 1; bool emulate_shadow_compare : 1; bool low_precision_tests : 1; bool disable_early_discard : 1; bool supports_native_fp16 : 1; + + // ROP control flags bool ROP_output_rounding : 1; + bool ROP_sRGB_packing : 1; + bool ROP_alpha_test : 1; + bool ROP_alpha_to_coverage_test : 1; + bool ROP_polygon_stipple_test : 1; + bool ROP_discard : 1; // Texturing spec bool require_texture_ops : 1; // Global switch to enable/disable all texture code @@ -53,5 +59,6 @@ namespace glsl bool require_tex2D_ops : 1; // Include 2D texture stuff bool require_tex3D_ops : 1; // Include 3D texture stuff (including cubemap) bool require_shadowProj_ops : 1; // Include shadow2DProj projection textures (1D is unsupported anyway) + bool require_alpha_kill : 1; // Include alpha kill checking code }; }; diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 73c86907f1..dc4f01f010 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -2063,6 +2063,26 @@ namespace rsx { current_fragment_program.ctrl |= RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION; } + + if (method_registers.alpha_test_enabled()) + { + current_fragment_program.ctrl |= RSX_SHADER_CONTROL_ALPHA_TEST; + } + + if (method_registers.polygon_stipple_enabled()) + { + current_fragment_program.ctrl |= RSX_SHADER_CONTROL_POLYGON_STIPPLE; + } + + if (method_registers.msaa_alpha_to_coverage_enabled()) + { + const bool is_multiple_samples = method_registers.surface_antialias() != rsx::surface_antialiasing::center_1_sample; + if (!backend_config.supports_hw_a2c || (!is_multiple_samples && !backend_config.supports_hw_a2c_1spp)) + { + // Emulation required + current_fragment_program.ctrl |= RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE; + } + } } else if (method_registers.point_sprite_enabled() && method_registers.current_draw_clause.primitive == primitive_type::points) @@ -2071,6 +2091,24 @@ namespace rsx current_fragment_program.texcoord_control_mask |= u32(method_registers.point_sprite_control_mask()) << 16; } + // Check if framebuffer is actually an XRGB format and not a WZYX format + switch (method_registers.surface_color()) + { + case rsx::surface_color_format::w16z16y16x16: + case rsx::surface_color_format::w32z32y32x32: + case rsx::surface_color_format::x32: + // These behave very differently from "normal" formats. + break; + default: + // Integer framebuffer formats. These can support sRGB output as well as some special rules for output quantization. + current_fragment_program.ctrl |= RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER; + if (method_registers.framebuffer_srgb_enabled()) + { + current_fragment_program.ctrl |= RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER; + } + break; + } + for (u32 textures_ref = current_fp_metadata.referenced_textures_mask, i = 0; textures_ref; textures_ref >>= 1, ++i) { if (!(textures_ref & 1)) continue; @@ -2098,6 +2136,7 @@ namespace rsx { //alphakill can be ignored unless a valid comparison function is set texture_control |= (1 << texture_control_bits::ALPHAKILL); + current_fragment_program.ctrl |= RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL; } //const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index c720ae8550..ab06fbdade 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -318,16 +318,24 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.require_srgb_to_linear = properties.has_upg; m_shader_props.require_linear_to_srgb = properties.has_pkg; m_shader_props.require_fog_read = properties.in_register_mask & in_fogc; - m_shader_props.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none; m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare; + m_shader_props.low_precision_tests = device_props.has_low_precision_rounding && !(m_prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION); m_shader_props.disable_early_discard = !vk::is_NVIDIA(vk::get_driver_vendor()); m_shader_props.supports_native_fp16 = device_props.has_native_half_support; - m_shader_props.ROP_output_rounding = g_cfg.video.shader_precision != gpu_preset_level::low; + + m_shader_props.ROP_output_rounding = (g_cfg.video.shader_precision != gpu_preset_level::low) && !!(m_prog.ctrl & RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER); + m_shader_props.ROP_sRGB_packing = !!(m_prog.ctrl & RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER); + m_shader_props.ROP_alpha_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_ALPHA_TEST); + m_shader_props.ROP_alpha_to_coverage_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE); + m_shader_props.ROP_polygon_stipple_test = !!(m_prog.ctrl & RSX_SHADER_CONTROL_POLYGON_STIPPLE); + m_shader_props.ROP_discard = !!(m_prog.ctrl & RSX_SHADER_CONTROL_USES_KIL); + m_shader_props.require_tex1D_ops = properties.has_tex1D; m_shader_props.require_tex2D_ops = properties.has_tex2D; m_shader_props.require_tex3D_ops = properties.has_tex3D; m_shader_props.require_shadowProj_ops = properties.shadow_sampler_mask != 0 && properties.has_texShadowProj; + m_shader_props.require_alpha_kill = !!(m_prog.ctrl & RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL); // Declare global constants if (m_shader_props.require_fog_read) diff --git a/rpcs3/Emu/RSX/gcm_enums.h b/rpcs3/Emu/RSX/gcm_enums.h index 61b51c3857..7fd9437f66 100644 --- a/rpcs3/Emu/RSX/gcm_enums.h +++ b/rpcs3/Emu/RSX/gcm_enums.h @@ -454,9 +454,17 @@ namespace gcm RSX_SHADER_CONTROL_UNKNOWN1 = 0x8000, // seemingly set when srgb packer is used?? // Custom - RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x10000, // Rasterizing triangles and not lines or points - RSX_SHADER_CONTROL_INSTANCED_CONSTANTS = 0x20000, // Support instance ID offsets when loading constants - RSX_SHADER_CONTROL_INTERPRETER_MODEL = 0x40000, // Compile internals expecting interpreter + RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION = 0x0010000, // Rasterizing triangles and not lines or points + RSX_SHADER_CONTROL_INSTANCED_CONSTANTS = 0x0020000, // Support instance ID offsets when loading constants + RSX_SHADER_CONTROL_INTERPRETER_MODEL = 0x0040000, // Compile internals expecting interpreter + + RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER = 0x0080000, // Quantize outputs to 8-bit FBO + RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER = 0x0100000, // Outputs are SRGB. We could reuse UNKNOWN1 but we just keep the namespaces separate. + + RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL = 0x0200000, // Uses alpha kill on texture input + RSX_SHADER_CONTROL_ALPHA_TEST = 0x0400000, // Uses alpha test on the outputs + RSX_SHADER_CONTROL_POLYGON_STIPPLE = 0x0800000, // Uses polygon stipple for dithered rendering + RSX_SHADER_CONTROL_ALPHA_TO_COVERAGE = 0x1000000, // Alpha to coverage }; // GCM Reports From f5cca049c2565b1241a421d4b25921b98c8efa88 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 14 Dec 2025 15:23:56 +0300 Subject: [PATCH 06/13] rsx: Fix MSAA sampling operations --- .../RSXProg/RSXFragmentTextureMSAAOps.glsl | 6 ++--- .../RSXFragmentTextureMSAAOpsInternal.glsl | 24 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOps.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOps.glsl index 965eac5276..00e81c39c5 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOps.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOps.glsl @@ -1,8 +1,8 @@ R"( #define ZCOMPARE_FUNC(index) _get_bits(TEX_FLAGS(index), DEPTH_COMPARE, 3) -#define ZS_READ_MS(index, coord) vec2(sampleTexture2DMS(TEX_NAME(index), coord, index).r, float(sampleTexture2DMS(TEX_NAME_STENCIL(index), coord, index).x)) -#define TEX2D_MS(index, coord2) _process_texel(sampleTexture2DMS(TEX_NAME(index), coord2, index), TEX_FLAGS(index)) -#define TEX2D_SHADOW_MS(index, coord3) vec4(comparison_passes(sampleTexture2DMS(TEX_NAME(index), coord3.xy, index).x, coord3.z, ZCOMPARE_FUNC(index))) +#define ZS_READ_MS(index, coord) vec2(sampleTexture2DMS(TEX_NAME(index), coord, TEX_PARAM(index)).r, float(sampleTexture2DMS(TEX_NAME_STENCIL(index), coord, TEX_PARAM(index)).x)) +#define TEX2D_MS(index, coord2) _process_texel(sampleTexture2DMS(TEX_NAME(index), coord2, TEX_PARAM(index)), TEX_FLAGS(index)) +#define TEX2D_SHADOW_MS(index, coord3) vec4(comparison_passes(sampleTexture2DMS(TEX_NAME(index), coord3.xy, TEX_PARAM(index)).x, coord3.z, ZCOMPARE_FUNC(index))) #define TEX2D_SHADOWPROJ_MS(index, coord4) TEX2D_SHADOW_MS(index, (coord4.xyz / coord4.w)) #define TEX2D_Z24X8_RGBA8_MS(index, coord2) _process_texel(convert_z24x8_to_rgba8(ZS_READ_MS(index, coord2), TEX_PARAM(index).remap, TEX_FLAGS(index)), TEX_FLAGS(index)) diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOpsInternal.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOpsInternal.glsl index 0bf734ab35..5bf5ab9f9a 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOpsInternal.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXFragmentTextureMSAAOpsInternal.glsl @@ -1,5 +1,5 @@ R"( -vec4 texelFetch2DMS(in _MSAA_SAMPLER_TYPE_ tex, const in vec2 sample_count, const in ivec2 icoords, const in int index, const in ivec2 offset) +vec4 texelFetch2DMS(in _MSAA_SAMPLER_TYPE_ tex, const in vec2 sample_count, const in ivec2 icoords, const in ivec2 offset) { const vec2 resolve_coords = vec2(icoords + offset); const vec2 aa_coords = floor(resolve_coords / sample_count); // AA coords = real_coords / sample_count @@ -8,15 +8,15 @@ vec4 texelFetch2DMS(in _MSAA_SAMPLER_TYPE_ tex, const in vec2 sample_count, cons return texelFetch(tex, ivec2(aa_coords), int(sample_index)); } -vec4 sampleTexture2DMS(in _MSAA_SAMPLER_TYPE_ tex, const in vec2 coords, const in int index) +vec4 sampleTexture2DMS(in _MSAA_SAMPLER_TYPE_ tex, const in vec2 coords, const in sampler_info tex_params) { - const uint flags = TEX_FLAGS(index); - const vec2 scaled_coords = COORD_SCALE2(index, coords); + const uint flags = tex_params.flags; + const vec2 scaled_coords = _texcoord_xform(coords, tex_params); const vec2 normalized_coords = texture2DMSCoord(scaled_coords, flags); const vec2 sample_count = vec2(2., textureSamples(tex) * 0.5); const vec2 image_size = textureSize(tex) * sample_count; const ivec2 icoords = ivec2(normalized_coords * image_size); - const vec4 sample0 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(0)); + const vec4 sample0 = texelFetch2DMS(tex, sample_count, icoords, ivec2(0)); if (_get_bits(flags, FILTERED_MAG_BIT, 2) == 0) { @@ -35,7 +35,7 @@ vec4 sampleTexture2DMS(in _MSAA_SAMPLER_TYPE_ tex, const in vec2 coords, const i vec4 a, b; float factor; - const vec4 sample2 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(0, 1)); // Top left + const vec4 sample2 = texelFetch2DMS(tex, sample_count, icoords, ivec2(0, 1)); // Top left if (no_filter.x) { @@ -46,21 +46,21 @@ vec4 sampleTexture2DMS(in _MSAA_SAMPLER_TYPE_ tex, const in vec2 coords, const i else { // Filter required, sample more data - const vec4 sample1 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(1, 0)); // Bottom right - const vec4 sample3 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(1, 1)); // Top right + const vec4 sample1 = texelFetch2DMS(tex, sample_count, icoords, ivec2(1, 0)); // Bottom right + const vec4 sample3 = texelFetch2DMS(tex, sample_count, icoords, ivec2(1, 1)); // Top right if (actual_step.x > uv_step.x) { // Downscale in X, centered const vec3 weights = compute2x2DownsampleWeights(normalized_coords.x, uv_step.x, actual_step.x); - const vec4 sample4 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(2, 0)); // Further bottom right - a = fma(sample0, weights.xxxx, sample1 * weights.y) + (sample4 * weights.z); // Weighted sum + const vec4 sample4 = texelFetch2DMS(tex, sample_count, icoords, ivec2(2, 0)); // Further bottom right + a = fma(sample0, weights.xxxx, sample1 * weights.y) + (sample4 * weights.z); // Weighted sum if (!no_filter.y) { - const vec4 sample5 = texelFetch2DMS(tex, sample_count, icoords, index, ivec2(2, 1)); // Further top right - b = fma(sample2, weights.xxxx, sample3 * weights.y) + (sample5 * weights.z); // Weighted sum + const vec4 sample5 = texelFetch2DMS(tex, sample_count, icoords, ivec2(2, 1)); // Further top right + b = fma(sample2, weights.xxxx, sample3 * weights.y) + (sample5 * weights.z); // Weighted sum } } else if (actual_step.x < uv_step.x) From 858e8f423a0a5d5a89ae5bde140d8b1599297a5b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 14 Dec 2025 15:24:32 +0300 Subject: [PATCH 07/13] rsx: Propagate shader kill flag and refactor texture flags processing a bit --- rpcs3/Emu/RSX/RSXThread.cpp | 378 ++++++++++++++++++------------------ 1 file changed, 190 insertions(+), 188 deletions(-) diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index dc4f01f010..dd272d5f84 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -2052,7 +2052,7 @@ namespace rsx m_graphics_state.clear(rsx::pipeline_state::fragment_program_dirty); - current_fragment_program.ctrl = m_ctx->register_state->shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT); + current_fragment_program.ctrl = m_ctx->register_state->shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT | RSX_SHADER_CONTROL_USES_KIL); current_fragment_program.texcoord_control_mask = m_ctx->register_state->texcoord_control_mask(); current_fragment_program.two_sided_lighting = m_ctx->register_state->two_side_light_en(); current_fragment_program.mrt_buffers_count = rsx::utility::get_mrt_buffers_count(m_ctx->register_state->surface_color_target()); @@ -2116,195 +2116,197 @@ namespace rsx auto &tex = rsx::method_registers.fragment_textures[i]; current_fp_texture_state.clear(i); - if (tex.enabled() && sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_UNDEFINED) + if (!tex.enabled() || sampler_descriptors[i]->format_class == RSX_FORMAT_CLASS_UNDEFINED) { - std::memcpy(current_fragment_program.texture_params[i].scale, sampler_descriptors[i]->texcoord_xform.scale, 6 * sizeof(f32)); - current_fragment_program.texture_params[i].remap = tex.remap(); - - m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty; - - u32 texture_control = 0; - current_fp_texture_state.set_dimension(sampler_descriptors[i]->image_type, i); - - if (sampler_descriptors[i]->texcoord_xform.clamp) - { - std::memcpy(current_fragment_program.texture_params[i].clamp_min, sampler_descriptors[i]->texcoord_xform.clamp_min, 4 * sizeof(f32)); - texture_control |= (1 << rsx::texture_control_bits::CLAMP_TEXCOORDS_BIT); - } - - if (tex.alpha_kill_enabled()) - { - //alphakill can be ignored unless a valid comparison function is set - texture_control |= (1 << texture_control_bits::ALPHAKILL); - current_fragment_program.ctrl |= RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL; - } - - //const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); - const u32 raw_format = tex.format(); - const u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); - - if (raw_format & CELL_GCM_TEXTURE_UN) - { - if (tex.min_filter() == rsx::texture_minify_filter::nearest || - tex.mag_filter() == rsx::texture_magnify_filter::nearest) - { - // Subpixel offset so that (X + bias) * scale will round correctly. - // This is done to work around fdiv precision issues in some GPUs (NVIDIA) - // We apply the simplification where (x + bias) * z = xz + zbias here. - constexpr auto subpixel_bias = 0.01f; - current_fragment_program.texture_params[i].bias[0] += (subpixel_bias * current_fragment_program.texture_params[i].scale[0]); - current_fragment_program.texture_params[i].bias[1] += (subpixel_bias * current_fragment_program.texture_params[i].scale[1]); - current_fragment_program.texture_params[i].bias[2] += (subpixel_bias * current_fragment_program.texture_params[i].scale[2]); - } - } - - if (backend_config.supports_hw_msaa && sampler_descriptors[i]->samples > 1) - { - current_fp_texture_state.multisampled_textures |= (1 << i); - texture_control |= (static_cast(tex.zfunc()) << texture_control_bits::DEPTH_COMPARE_OP); - texture_control |= (static_cast(tex.mag_filter() != rsx::texture_magnify_filter::nearest) << texture_control_bits::FILTERED_MAG); - texture_control |= (static_cast(tex.min_filter() != rsx::texture_minify_filter::nearest) << texture_control_bits::FILTERED_MIN); - texture_control |= (((tex.format() & CELL_GCM_TEXTURE_UN) >> 6) << texture_control_bits::UNNORMALIZED_COORDS); - - if (rsx::is_texcoord_wrapping_mode(tex.wrap_s())) - { - texture_control |= (1 << texture_control_bits::WRAP_S); - } - - if (rsx::is_texcoord_wrapping_mode(tex.wrap_t())) - { - texture_control |= (1 << texture_control_bits::WRAP_T); - } - - if (rsx::is_texcoord_wrapping_mode(tex.wrap_r())) - { - texture_control |= (1 << texture_control_bits::WRAP_R); - } - } - - if (sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_COLOR) - { - switch (sampler_descriptors[i]->format_class) - { - case RSX_FORMAT_CLASS_DEPTH16_FLOAT: - case RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32: - texture_control |= (1 << texture_control_bits::DEPTH_FLOAT); - break; - default: - break; - } - - switch (format) - { - case CELL_GCM_TEXTURE_A8R8G8B8: - case CELL_GCM_TEXTURE_D8R8G8B8: - { - // Emulate bitcast in shader - current_fp_texture_state.redirected_textures |= (1 << i); - const auto float_en = (sampler_descriptors[i]->format_class == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32)? 1 : 0; - texture_control |= (float_en << texture_control_bits::DEPTH_FLOAT); - break; - } - case CELL_GCM_TEXTURE_X16: - { - // A simple way to quickly read DEPTH16 data without shadow comparison - break; - } - case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH24_D8: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: - case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - { - // Natively supported Z formats with shadow comparison feature - const auto compare_mode = tex.zfunc(); - if (!tex.alpha_kill_enabled() && - compare_mode < rsx::comparison_function::always && - compare_mode > rsx::comparison_function::never) - { - current_fp_texture_state.shadow_textures |= (1 << i); - } - break; - } - default: - rsx_log.error("Depth texture bound to pipeline with unexpected format 0x%X", format); - } - } - else if (!backend_config.supports_hw_renormalization /* && - tex.min_filter() == rsx::texture_minify_filter::nearest && - tex.mag_filter() == rsx::texture_magnify_filter::nearest*/) - { - // FIXME: This check should only apply to point-sampled textures. However, it severely regresses some games (id tech 5). - // This is because even when filtering is active, the error from the PS3 texture expansion still applies. - // A proper fix is to expand these formats into BGRA8 when high texture precision is required. That requires different GUI settings and inflation shaders, so it will be handled separately. - - switch (format) - { - case CELL_GCM_TEXTURE_A1R5G5B5: - case CELL_GCM_TEXTURE_A4R4G4B4: - case CELL_GCM_TEXTURE_D1R5G5B5: - case CELL_GCM_TEXTURE_R5G5B5A1: - case CELL_GCM_TEXTURE_R5G6B5: - case CELL_GCM_TEXTURE_R6G5B5: - texture_control |= (1 << texture_control_bits::RENORMALIZE); - break; - default: - break; - } - } - - if (rsx::is_int8_remapped_format(format)) - { - // Special operations applied to 8-bit formats such as gamma correction and sign conversion - // NOTE: The unsigned_remap=bias flag being set flags the texture as being compressed normal (2n-1 / BX2) (UE3) - // NOTE: The ARGB8_signed flag means to reinterpret the raw bytes as signed. This is different than unsigned_remap=bias which does range decompression. - // This is a separate method of setting the format to signed mode without doing so per-channel - // Precedence = SNORM > GAMMA > UNSIGNED_REMAP (See Resistance 3 for GAMMA/BX2 relationship, UE3 for BX2 effect) - - const u32 argb8_signed = tex.argb_signed(); // _SNROM - const u32 gamma = tex.gamma() & ~argb8_signed; // _SRGB - const u32 unsigned_remap = (tex.unsigned_remap() == CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL)? 0u : (~(gamma | argb8_signed) & 0xF); // _BX2 - u32 argb8_convert = gamma; - - // The options are mutually exclusive - ensure((argb8_signed & gamma) == 0); - ensure((argb8_signed & unsigned_remap) == 0); - ensure((gamma & unsigned_remap) == 0); - - // Helper function to apply a per-channel mask based on an input mask - const auto apply_sign_convert_mask = [&](u32 mask, u32 bit_offset) - { - // TODO: Use actual remap mask to account for 0 and 1 overrides in default mapping - // TODO: Replace this clusterfuck of texture control with matrix transformation - const auto remap_ctrl = (tex.remap() >> 8) & 0xAA; - if (remap_ctrl == 0xAA) - { - argb8_convert |= (mask & 0xFu) << bit_offset; - return; - } - - if ((remap_ctrl & 0x03) == 0x02) argb8_convert |= (mask & 0x1u) << bit_offset; - if ((remap_ctrl & 0x0C) == 0x08) argb8_convert |= (mask & 0x2u) << bit_offset; - if ((remap_ctrl & 0x30) == 0x20) argb8_convert |= (mask & 0x4u) << bit_offset; - if ((remap_ctrl & 0xC0) == 0x80) argb8_convert |= (mask & 0x8u) << bit_offset; - }; - - if (argb8_signed) - { - // Apply integer sign extension from uint8 to sint8 and renormalize - apply_sign_convert_mask(argb8_signed, texture_control_bits::SEXT_OFFSET); - } - - if (unsigned_remap) - { - // Apply sign expansion, compressed normal-map style (2n - 1) - apply_sign_convert_mask(unsigned_remap, texture_control_bits::EXPAND_OFFSET); - } - - texture_control |= argb8_convert; - } - - current_fragment_program.texture_params[i].control = texture_control; + continue; } + + std::memcpy(current_fragment_program.texture_params[i].scale, sampler_descriptors[i]->texcoord_xform.scale, 6 * sizeof(f32)); + current_fragment_program.texture_params[i].remap = tex.remap(); + + m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty; + + u32 texture_control = 0; + current_fp_texture_state.set_dimension(sampler_descriptors[i]->image_type, i); + + if (sampler_descriptors[i]->texcoord_xform.clamp) + { + std::memcpy(current_fragment_program.texture_params[i].clamp_min, sampler_descriptors[i]->texcoord_xform.clamp_min, 4 * sizeof(f32)); + texture_control |= (1 << rsx::texture_control_bits::CLAMP_TEXCOORDS_BIT); + } + + if (tex.alpha_kill_enabled()) + { + //alphakill can be ignored unless a valid comparison function is set + texture_control |= (1 << texture_control_bits::ALPHAKILL); + current_fragment_program.ctrl |= RSX_SHADER_CONTROL_TEXTURE_ALPHA_KILL; + } + + //const u32 texaddr = rsx::get_address(tex.offset(), tex.location()); + const u32 raw_format = tex.format(); + const u32 format = raw_format & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + + if (raw_format & CELL_GCM_TEXTURE_UN) + { + if (tex.min_filter() == rsx::texture_minify_filter::nearest || + tex.mag_filter() == rsx::texture_magnify_filter::nearest) + { + // Subpixel offset so that (X + bias) * scale will round correctly. + // This is done to work around fdiv precision issues in some GPUs (NVIDIA) + // We apply the simplification where (x + bias) * z = xz + zbias here. + constexpr auto subpixel_bias = 0.01f; + current_fragment_program.texture_params[i].bias[0] += (subpixel_bias * current_fragment_program.texture_params[i].scale[0]); + current_fragment_program.texture_params[i].bias[1] += (subpixel_bias * current_fragment_program.texture_params[i].scale[1]); + current_fragment_program.texture_params[i].bias[2] += (subpixel_bias * current_fragment_program.texture_params[i].scale[2]); + } + } + + if (backend_config.supports_hw_msaa && sampler_descriptors[i]->samples > 1) + { + current_fp_texture_state.multisampled_textures |= (1 << i); + texture_control |= (static_cast(tex.zfunc()) << texture_control_bits::DEPTH_COMPARE_OP); + texture_control |= (static_cast(tex.mag_filter() != rsx::texture_magnify_filter::nearest) << texture_control_bits::FILTERED_MAG); + texture_control |= (static_cast(tex.min_filter() != rsx::texture_minify_filter::nearest) << texture_control_bits::FILTERED_MIN); + texture_control |= (((tex.format() & CELL_GCM_TEXTURE_UN) >> 6) << texture_control_bits::UNNORMALIZED_COORDS); + + if (rsx::is_texcoord_wrapping_mode(tex.wrap_s())) + { + texture_control |= (1 << texture_control_bits::WRAP_S); + } + + if (rsx::is_texcoord_wrapping_mode(tex.wrap_t())) + { + texture_control |= (1 << texture_control_bits::WRAP_T); + } + + if (rsx::is_texcoord_wrapping_mode(tex.wrap_r())) + { + texture_control |= (1 << texture_control_bits::WRAP_R); + } + } + + if (sampler_descriptors[i]->format_class != RSX_FORMAT_CLASS_COLOR) + { + switch (sampler_descriptors[i]->format_class) + { + case RSX_FORMAT_CLASS_DEPTH16_FLOAT: + case RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32: + texture_control |= (1 << texture_control_bits::DEPTH_FLOAT); + break; + default: + break; + } + + switch (format) + { + case CELL_GCM_TEXTURE_A8R8G8B8: + case CELL_GCM_TEXTURE_D8R8G8B8: + { + // Emulate bitcast in shader + current_fp_texture_state.redirected_textures |= (1 << i); + const auto float_en = (sampler_descriptors[i]->format_class == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32)? 1 : 0; + texture_control |= (float_en << texture_control_bits::DEPTH_FLOAT); + break; + } + case CELL_GCM_TEXTURE_X16: + { + // A simple way to quickly read DEPTH16 data without shadow comparison + break; + } + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH24_D8: + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + { + // Natively supported Z formats with shadow comparison feature + const auto compare_mode = tex.zfunc(); + if (!tex.alpha_kill_enabled() && + compare_mode < rsx::comparison_function::always && + compare_mode > rsx::comparison_function::never) + { + current_fp_texture_state.shadow_textures |= (1 << i); + } + break; + } + default: + rsx_log.error("Depth texture bound to pipeline with unexpected format 0x%X", format); + } + } + else if (!backend_config.supports_hw_renormalization /* && + tex.min_filter() == rsx::texture_minify_filter::nearest && + tex.mag_filter() == rsx::texture_magnify_filter::nearest*/) + { + // FIXME: This check should only apply to point-sampled textures. However, it severely regresses some games (id tech 5). + // This is because even when filtering is active, the error from the PS3 texture expansion still applies. + // A proper fix is to expand these formats into BGRA8 when high texture precision is required. That requires different GUI settings and inflation shaders, so it will be handled separately. + + switch (format) + { + case CELL_GCM_TEXTURE_A1R5G5B5: + case CELL_GCM_TEXTURE_A4R4G4B4: + case CELL_GCM_TEXTURE_D1R5G5B5: + case CELL_GCM_TEXTURE_R5G5B5A1: + case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_R6G5B5: + texture_control |= (1 << texture_control_bits::RENORMALIZE); + break; + default: + break; + } + } + + if (rsx::is_int8_remapped_format(format)) + { + // Special operations applied to 8-bit formats such as gamma correction and sign conversion + // NOTE: The unsigned_remap=bias flag being set flags the texture as being compressed normal (2n-1 / BX2) (UE3) + // NOTE: The ARGB8_signed flag means to reinterpret the raw bytes as signed. This is different than unsigned_remap=bias which does range decompression. + // This is a separate method of setting the format to signed mode without doing so per-channel + // Precedence = SNORM > GAMMA > UNSIGNED_REMAP (See Resistance 3 for GAMMA/BX2 relationship, UE3 for BX2 effect) + + const u32 argb8_signed = tex.argb_signed(); // _SNROM + const u32 gamma = tex.gamma() & ~argb8_signed; // _SRGB + const u32 unsigned_remap = (tex.unsigned_remap() == CELL_GCM_TEXTURE_UNSIGNED_REMAP_NORMAL)? 0u : (~(gamma | argb8_signed) & 0xF); // _BX2 + u32 argb8_convert = gamma; + + // The options are mutually exclusive + ensure((argb8_signed & gamma) == 0); + ensure((argb8_signed & unsigned_remap) == 0); + ensure((gamma & unsigned_remap) == 0); + + // Helper function to apply a per-channel mask based on an input mask + const auto apply_sign_convert_mask = [&](u32 mask, u32 bit_offset) + { + // TODO: Use actual remap mask to account for 0 and 1 overrides in default mapping + // TODO: Replace this clusterfuck of texture control with matrix transformation + const auto remap_ctrl = (tex.remap() >> 8) & 0xAA; + if (remap_ctrl == 0xAA) + { + argb8_convert |= (mask & 0xFu) << bit_offset; + return; + } + + if ((remap_ctrl & 0x03) == 0x02) argb8_convert |= (mask & 0x1u) << bit_offset; + if ((remap_ctrl & 0x0C) == 0x08) argb8_convert |= (mask & 0x2u) << bit_offset; + if ((remap_ctrl & 0x30) == 0x20) argb8_convert |= (mask & 0x4u) << bit_offset; + if ((remap_ctrl & 0xC0) == 0x80) argb8_convert |= (mask & 0x8u) << bit_offset; + }; + + if (argb8_signed) + { + // Apply integer sign extension from uint8 to sint8 and renormalize + apply_sign_convert_mask(argb8_signed, texture_control_bits::SEXT_OFFSET); + } + + if (unsigned_remap) + { + // Apply sign expansion, compressed normal-map style (2n - 1) + apply_sign_convert_mask(unsigned_remap, texture_control_bits::EXPAND_OFFSET); + } + + texture_control |= argb8_convert; + } + + current_fragment_program.texture_params[i].control = texture_control; } // Update texture configuration From 7dce07c945b022552fc6431474242d67d685e915 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 14 Dec 2025 20:38:12 +0300 Subject: [PATCH 08/13] rsx: Fix sRGB control flag propagation to shader control - Also clean up the texturing code and fix some memcpy overflow bugs. --- rpcs3/Emu/RSX/RSXThread.cpp | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index dd272d5f84..638814fda1 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -2057,26 +2057,26 @@ namespace rsx current_fragment_program.two_sided_lighting = m_ctx->register_state->two_side_light_en(); current_fragment_program.mrt_buffers_count = rsx::utility::get_mrt_buffers_count(m_ctx->register_state->surface_color_target()); - if (method_registers.current_draw_clause.classify_mode() == primitive_class::polygon) + if (m_ctx->register_state->current_draw_clause.classify_mode() == primitive_class::polygon) { if (!backend_config.supports_normalized_barycentrics) { current_fragment_program.ctrl |= RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION; } - if (method_registers.alpha_test_enabled()) + if (m_ctx->register_state->alpha_test_enabled()) { current_fragment_program.ctrl |= RSX_SHADER_CONTROL_ALPHA_TEST; } - if (method_registers.polygon_stipple_enabled()) + if (m_ctx->register_state->polygon_stipple_enabled()) { current_fragment_program.ctrl |= RSX_SHADER_CONTROL_POLYGON_STIPPLE; } - if (method_registers.msaa_alpha_to_coverage_enabled()) + if (m_ctx->register_state->msaa_alpha_to_coverage_enabled()) { - const bool is_multiple_samples = method_registers.surface_antialias() != rsx::surface_antialiasing::center_1_sample; + const bool is_multiple_samples = m_ctx->register_state->surface_antialias() != rsx::surface_antialiasing::center_1_sample; if (!backend_config.supports_hw_a2c || (!is_multiple_samples && !backend_config.supports_hw_a2c_1spp)) { // Emulation required @@ -2084,15 +2084,15 @@ namespace rsx } } } - else if (method_registers.point_sprite_enabled() && - method_registers.current_draw_clause.primitive == primitive_type::points) + else if (m_ctx->register_state->point_sprite_enabled() && + m_ctx->register_state->current_draw_clause.primitive == primitive_type::points) { // Set high word of the control mask to store point sprite control - current_fragment_program.texcoord_control_mask |= u32(method_registers.point_sprite_control_mask()) << 16; + current_fragment_program.texcoord_control_mask |= u32(m_ctx->register_state->point_sprite_control_mask()) << 16; } // Check if framebuffer is actually an XRGB format and not a WZYX format - switch (method_registers.surface_color()) + switch (m_ctx->register_state->surface_color()) { case rsx::surface_color_format::w16z16y16x16: case rsx::surface_color_format::w32z32y32x32: @@ -2102,7 +2102,8 @@ namespace rsx default: // Integer framebuffer formats. These can support sRGB output as well as some special rules for output quantization. current_fragment_program.ctrl |= RSX_SHADER_CONTROL_8BIT_FRAMEBUFFER; - if (method_registers.framebuffer_srgb_enabled()) + if (!(current_fragment_program.ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) && // Cannot output sRGB from 32-bit registers + m_ctx->register_state->framebuffer_srgb_enabled()) { current_fragment_program.ctrl |= RSX_SHADER_CONTROL_SRGB_FRAMEBUFFER; } @@ -2113,7 +2114,7 @@ namespace rsx { if (!(textures_ref & 1)) continue; - auto &tex = rsx::method_registers.fragment_textures[i]; + auto &tex = m_ctx->register_state->fragment_textures[i]; current_fp_texture_state.clear(i); if (!tex.enabled() || sampler_descriptors[i]->format_class == RSX_FORMAT_CLASS_UNDEFINED) @@ -2121,7 +2122,11 @@ namespace rsx continue; } - std::memcpy(current_fragment_program.texture_params[i].scale, sampler_descriptors[i]->texcoord_xform.scale, 6 * sizeof(f32)); + std::memcpy( + current_fragment_program.texture_params[i].scale, + sampler_descriptors[i]->texcoord_xform.scale, + sizeof(sampler_descriptors[i]->texcoord_xform.scale)); + current_fragment_program.texture_params[i].remap = tex.remap(); m_graphics_state |= rsx::pipeline_state::fragment_texture_state_dirty; @@ -2131,7 +2136,11 @@ namespace rsx if (sampler_descriptors[i]->texcoord_xform.clamp) { - std::memcpy(current_fragment_program.texture_params[i].clamp_min, sampler_descriptors[i]->texcoord_xform.clamp_min, 4 * sizeof(f32)); + std::memcpy( + current_fragment_program.texture_params[i].clamp_min, + sampler_descriptors[i]->texcoord_xform.clamp_min, + sizeof(sampler_descriptors[i]->texcoord_xform.clamp_min)); + texture_control |= (1 << rsx::texture_control_bits::CLAMP_TEXCOORDS_BIT); } @@ -2316,7 +2325,7 @@ namespace rsx if (current_fragment_program.ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) { //Check that the depth stage is not disabled - if (!rsx::method_registers.depth_test_enabled()) + if (!m_ctx->register_state->depth_test_enabled()) { rsx_log.trace("FS exports depth component but depth test is disabled (INVALID_OPERATION)"); } From 5cab7ccce0653fa8c9c9573207552ee960331dfc Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 14 Dec 2025 21:25:37 +0300 Subject: [PATCH 09/13] rsx: Propagate state changes to trigger permutation lookup - Alpha test - MSAA (for CSAA enable flag) - Polygon stipple --- rpcs3/Emu/RSX/NV47/HW/nv4097.cpp | 8 ++++++-- rpcs3/Emu/RSX/rsx_methods.cpp | 8 ++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp index d0298cf359..8db588da60 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp @@ -250,8 +250,12 @@ namespace rsx const auto current = REGS(ctx)->decode(arg); const auto previous = REGS(ctx)->decode(REGS(ctx)->latch); - if (*current.antialias() != *previous.antialias() || // Antialias control has changed, update ROP parameters - current.is_integer_color_format() != previous.is_integer_color_format()) // The type of color format also requires ROP control update + if (current.is_integer_color_format() != previous.is_integer_color_format()) // Different ROP emulation + { + RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_program_state_dirty; + } + + if (*current.antialias() != *previous.antialias()) // Antialias control has changed, update ROP parameters { RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_state_dirty; } diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 903a10c582..3d12440882 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -716,9 +716,9 @@ namespace rsx state_signals[NV4097_SET_POINT_SIZE] = rsx::vertex_state_dirty; state_signals[NV4097_SET_ALPHA_FUNC] = rsx::fragment_state_dirty; state_signals[NV4097_SET_ALPHA_REF] = rsx::fragment_state_dirty; - state_signals[NV4097_SET_ALPHA_TEST_ENABLE] = rsx::fragment_state_dirty; - state_signals[NV4097_SET_ANTI_ALIASING_CONTROL] = rsx::fragment_state_dirty | rsx::pipeline_config_dirty; - state_signals[NV4097_SET_SHADER_PACKER] = rsx::fragment_state_dirty; + state_signals[NV4097_SET_ALPHA_TEST_ENABLE] = rsx::fragment_program_state_dirty; + state_signals[NV4097_SET_ANTI_ALIASING_CONTROL] = rsx::fragment_program_state_dirty; + state_signals[NV4097_SET_SHADER_PACKER] = rsx::fragment_program_state_dirty; state_signals[NV4097_SET_SHADER_WINDOW] = rsx::fragment_state_dirty; state_signals[NV4097_SET_FOG_MODE] = rsx::fragment_state_dirty; state_signals[NV4097_SET_SCISSOR_HORIZONTAL] = rsx::scissor_config_state_dirty; @@ -733,7 +733,7 @@ namespace rsx state_signals[NV4097_SET_VIEWPORT_OFFSET + 0] = rsx::vertex_state_dirty; state_signals[NV4097_SET_VIEWPORT_OFFSET + 1] = rsx::vertex_state_dirty; state_signals[NV4097_SET_VIEWPORT_OFFSET + 2] = rsx::vertex_state_dirty; - state_signals[NV4097_SET_POLYGON_STIPPLE] = rsx::fragment_state_dirty; + state_signals[NV4097_SET_POLYGON_STIPPLE] = rsx::fragment_program_state_dirty; state_signals[NV4097_SET_POLYGON_STIPPLE_PATTERN + 0] = rsx::polygon_stipple_pattern_dirty; state_signals[NV4097_SET_POLYGON_STIPPLE_PATTERN + 1] = rsx::polygon_stipple_pattern_dirty; state_signals[NV4097_SET_POLYGON_STIPPLE_PATTERN + 2] = rsx::polygon_stipple_pattern_dirty; From 570f6a25e3168709308b1853dc589e4c57b947b2 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 14 Dec 2025 21:40:14 +0300 Subject: [PATCH 10/13] rxs: Improve alpha-to-coverage emulation --- rpcs3/Emu/RSX/NV47/HW/nv4097.cpp | 28 +++++++++++++++++++ rpcs3/Emu/RSX/NV47/HW/nv4097.h | 2 ++ .../GLSLSnippets/RSXProg/RSXROPEpilogue.glsl | 2 +- rpcs3/Emu/RSX/rsx_methods.cpp | 2 +- 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp index 8db588da60..929925bcb1 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp @@ -306,6 +306,34 @@ namespace rsx REGS(ctx)->decode(reg, REGS(ctx)->latch); } + void set_aa_control(context* ctx, u32 reg, u32 arg) + { + const auto latch = REGS(ctx)->latch; + if (arg == latch) + { + return; + } + + // Reconfigure pipeline. + RSX(ctx)->m_graphics_state |= rsx::pipeline_config_dirty; + + // If we support A2C in hardware, leave the rest upto the hardware. The pipeline config should take care of it. + const auto& backend_config = RSX(ctx)->get_backend_config(); + if (backend_config.supports_hw_a2c && + backend_config.supports_hw_a2c_1spp) + { + return; + } + + // No A2C hardware support or partial hardware support. Invalidate the current program if A2C state changed. + const auto a2c_old = REGS(ctx)->decode(latch).msaa_alpha_to_coverage(); + const auto a2c_new = REGS(ctx)->decode(arg).msaa_alpha_to_coverage(); + if (a2c_old != a2c_new) + { + RSX(ctx)->m_graphics_state |= rsx::fragment_program_state_dirty; + } + } + ///// Draw call setup (vertex, etc) void set_array_element16(context* ctx, u32, u32 arg) diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.h b/rpcs3/Emu/RSX/NV47/HW/nv4097.h index db736396b9..a5a434e47d 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.h +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.h @@ -87,6 +87,8 @@ namespace rsx void set_transform_constant_load(context* ctx, u32 reg, u32 arg); + void set_aa_control(context* ctx, u32 reg, u32 arg); + #define RSX(ctx) ctx->rsxthr #define REGS(ctx) (&rsx::method_registers) diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl index 283371d0ed..240186cd29 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPEpilogue.glsl @@ -32,7 +32,7 @@ R"( #endif #ifdef _ENABLE_ALPHA_TO_COVERAGE_TEST - if (!_test_bit(rop_control, MSAA_WRITE_ENABLE_BIT) || !coverage_test_passes(col0)) + if (!coverage_test_passes(col0)) { discard; } diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 3d12440882..03bbd1e52a 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -717,7 +717,6 @@ namespace rsx state_signals[NV4097_SET_ALPHA_FUNC] = rsx::fragment_state_dirty; state_signals[NV4097_SET_ALPHA_REF] = rsx::fragment_state_dirty; state_signals[NV4097_SET_ALPHA_TEST_ENABLE] = rsx::fragment_program_state_dirty; - state_signals[NV4097_SET_ANTI_ALIASING_CONTROL] = rsx::fragment_program_state_dirty; state_signals[NV4097_SET_SHADER_PACKER] = rsx::fragment_program_state_dirty; state_signals[NV4097_SET_SHADER_WINDOW] = rsx::fragment_state_dirty; state_signals[NV4097_SET_FOG_MODE] = rsx::fragment_state_dirty; @@ -1714,6 +1713,7 @@ namespace rsx bind(NV4097_SET_BLEND_EQUATION, nv4097::set_blend_equation); bind(NV4097_SET_BLEND_FUNC_SFACTOR, nv4097::set_blend_factor); bind(NV4097_SET_BLEND_FUNC_DFACTOR, nv4097::set_blend_factor); + bind(NV4097_SET_ANTI_ALIASING_CONTROL, nv4097::set_aa_control); //NV308A (0xa400..0xbffc!) bind_array(NV308A_COLOR, 1, 256 * 7, nv308a::color::impl); From 0ca7e3fdb2b38c2fec7eb336aa30b25259f69b60 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 14 Dec 2025 21:40:46 +0300 Subject: [PATCH 11/13] rsx: Drop pointless ROP control bit check --- .../GLSLSnippets/RSXProg/RSXROPPrologue.glsl | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPPrologue.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPPrologue.glsl index 5b0798e43e..f24a1ca201 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPPrologue.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/RSXProg/RSXROPPrologue.glsl @@ -1,24 +1,21 @@ R"( #ifdef _ENABLE_POLYGON_STIPPLE - if (_test_bit(rop_control, POLYGON_STIPPLE_ENABLE_BIT)) - { - // Convert x,y to linear address - const uvec2 stipple_coord = uvec2(gl_FragCoord.xy) % uvec2(32, 32); - const uint address = stipple_coord.y * 32u + stipple_coord.x; - const uint bit_offset = (address & 31u); - #ifdef VULKAN - // In vulkan we have a unified array with a dynamic offset - const uint word_index = _get_bits(address, 7, 3) + _fs_stipple_pattern_array_offset; - #else - const uint word_index = _get_bits(address, 7, 3); - #endif - const uint sub_index = _get_bits(address, 5, 2); + // Convert x,y to linear address + const uvec2 stipple_coord = uvec2(gl_FragCoord.xy) % uvec2(32, 32); + const uint address = stipple_coord.y * 32u + stipple_coord.x; + const uint bit_offset = (address & 31u); +#ifdef VULKAN + // In vulkan we have a unified array with a dynamic offset + const uint word_index = _get_bits(address, 7, 3) + _fs_stipple_pattern_array_offset; +#else + const uint word_index = _get_bits(address, 7, 3); +#endif + const uint sub_index = _get_bits(address, 5, 2); - if (!_test_bit(stipple_pattern[word_index][sub_index], int(bit_offset))) - { - _kill(); - } + if (!_test_bit(stipple_pattern[word_index][sub_index], int(bit_offset))) + { + _kill(); } #endif From 4fd3e536d0b60a99941159ab10bc045e8d6fe502 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 14 Dec 2025 22:50:35 +0300 Subject: [PATCH 12/13] rsx: Fix texcoord clamp regression --- rpcs3/Emu/RSX/RSXThread.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 638814fda1..80ae166467 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -2125,7 +2125,7 @@ namespace rsx std::memcpy( current_fragment_program.texture_params[i].scale, sampler_descriptors[i]->texcoord_xform.scale, - sizeof(sampler_descriptors[i]->texcoord_xform.scale)); + sizeof(sampler_descriptors[i]->texcoord_xform.scale) * 2); // Copy scale and bias together current_fragment_program.texture_params[i].remap = tex.remap(); @@ -2139,7 +2139,7 @@ namespace rsx std::memcpy( current_fragment_program.texture_params[i].clamp_min, sampler_descriptors[i]->texcoord_xform.clamp_min, - sizeof(sampler_descriptors[i]->texcoord_xform.clamp_min)); + sizeof(sampler_descriptors[i]->texcoord_xform.clamp_min) * 2); // Copy clamp_min and clamp_max together texture_control |= (1 << rsx::texture_control_bits::CLAMP_TEXCOORDS_BIT); } From a7e0e4d76e9ff47eabdd69379f402fa53de2642c Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 15 Dec 2025 00:43:26 +0300 Subject: [PATCH 13/13] rsx: Fix alpha testing - We no longer reload fragment state when toggling alpha-test. Instead, a different shader variant is picked. --- rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp index ed2447ab24..53f6ce31e7 100644 --- a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp +++ b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp @@ -680,12 +680,9 @@ namespace rsx ROP_control_t rop_control{}; alignas(16) fragment_context_t payload{}; - if (REGS(m_ctx)->alpha_test_enabled()) - { - const u32 alpha_func = static_cast(REGS(m_ctx)->alpha_func()); - rop_control.set_alpha_test_func(alpha_func); - rop_control.enable_alpha_test(); - } + // Always encode the alpha function. Toggling alpha-test is not guaranteed to trigger context param reload anymore. + const u32 alpha_func = static_cast(REGS(m_ctx)->alpha_func()); + rop_control.set_alpha_test_func(alpha_func); // Generate wpos coefficients // wpos equation is now as follows (ignoring pixel center offset):