From 54dbdc2d963d2c6529e259d4f4e5e883af4ec998 Mon Sep 17 00:00:00 2001 From: jbm11208 <81182113+jbm11208@users.noreply.github.com> Date: Sat, 10 May 2025 16:03:11 -0400 Subject: [PATCH] video_core/shader: Optimize fragment shader by skipping passthrough TEV stages This change adds a fast-path optimization in the fragment shader generator to detect and skip TEV stages that simply pass through their input unchanged. This reduces shader complexity and improves performance for common rendering cases where TEV stages are configured as passthrough. The optimization checks for: - Replace operation for both color and alpha - Previous buffer as source - No color/alpha modifiers - Unity multipliers This is a safe optimization as it preserves exact PICA behavior while reducing unnecessary shader instructions. --- .../shader/generator/glsl_fs_shader_gen.cpp | 94 ++++++++++--------- .../shader/generator/pica_fs_config.cpp | 5 +- .../shader/generator/spv_fs_shader_gen.cpp | 62 +++++------- 3 files changed, 77 insertions(+), 84 deletions(-) diff --git a/src/video_core/shader/generator/glsl_fs_shader_gen.cpp b/src/video_core/shader/generator/glsl_fs_shader_gen.cpp index 96be02d05..9d156d93a 100644 --- a/src/video_core/shader/generator/glsl_fs_shader_gen.cpp +++ b/src/video_core/shader/generator/glsl_fs_shader_gen.cpp @@ -3,6 +3,8 @@ // Refer to the license.txt file included. #include "video_core/shader/generator/glsl_fs_shader_gen.h" +#include "video_core/pica/regs_texturing.h" +#include "common/common_types.h" namespace Pica::Shader::Generator::GLSL { @@ -25,17 +27,6 @@ enum class Semantic : u32 { View, }; -static bool IsPassThroughTevStage(const Pica::TexturingRegs::TevStageConfig& stage) { - using TevStageConfig = Pica::TexturingRegs::TevStageConfig; - return (stage.color_op == TevStageConfig::Operation::Replace && - stage.alpha_op == TevStageConfig::Operation::Replace && - stage.color_source1 == TevStageConfig::Source::Previous && - stage.alpha_source1 == TevStageConfig::Source::Previous && - stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && - stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && - stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); -} - // High precision may or may not be supported in GLES3. If it isn't, use medium precision instead. static constexpr char fragment_shader_precision_OES[] = R"( #if GL_ES @@ -433,42 +424,57 @@ void FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func) out += fmt::format("if ({}) discard;\n", get_cond()); } +// Helper to detect passthrough TEV stages for optimization +static bool IsPassThroughTevStage(const Pica::TexturingRegs::TevStageConfig& stage) { + using TevStageConfig = Pica::TexturingRegs::TevStageConfig; + return (stage.color_op == TevStageConfig::Operation::Replace && + stage.alpha_op == TevStageConfig::Operation::Replace && + stage.color_source1 == TevStageConfig::Source::Previous && + stage.alpha_source1 == TevStageConfig::Source::Previous && + stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && + stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && + stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); +} + void FragmentModule::WriteTevStage(u32 index) { const TexturingRegs::TevStageConfig stage = config.texture.tev_stages[index]; - if (!IsPassThroughTevStage(stage)) { - out += "color_results_1 = "; - AppendColorModifier(stage.color_modifier1, stage.color_source1, index); - out += ";\ncolor_results_2 = "; - AppendColorModifier(stage.color_modifier2, stage.color_source2, index); - out += ";\ncolor_results_3 = "; - AppendColorModifier(stage.color_modifier3, stage.color_source3, index); - - // Round the output of each TEV stage to maintain the PICA's 8 bits of precision - out += fmt::format(";\nvec3 color_output_{} = byteround(", index); - AppendColorCombiner(stage.color_op); - out += ");\n"; - - if (stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) { - // result of Dot3_RGBA operation is also placed to the alpha component - out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index); - } else { - out += "alpha_results_1 = "; - AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index); - out += ";\nalpha_results_2 = "; - AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index); - out += ";\nalpha_results_3 = "; - AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index); - - out += fmt::format(";\nfloat alpha_output_{} = byteround(", index); - AppendAlphaCombiner(stage.alpha_op); - out += ");\n"; - } - - out += fmt::format("combiner_output = vec4(" - "clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), " - "clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n", - index, stage.GetColorMultiplier(), index, stage.GetAlphaMultiplier()); + if (IsPassThroughTevStage(stage)) { + // Skip passthrough stage for optimization + return; } + + out += "color_results_1 = "; + AppendColorModifier(stage.color_modifier1, stage.color_source1, index); + out += ";\ncolor_results_2 = "; + AppendColorModifier(stage.color_modifier2, stage.color_source2, index); + out += ";\ncolor_results_3 = "; + AppendColorModifier(stage.color_modifier3, stage.color_source3, index); + + // Round the output of each TEV stage to maintain the PICA's 8 bits of precision + out += fmt::format(";\nvec3 color_output_{} = byteround(", index); + AppendColorCombiner(stage.color_op); + out += ");\n"; + + if (stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) { + // result of Dot3_RGBA operation is also placed to the alpha component + out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index); + } else { + out += "alpha_results_1 = "; + AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index); + out += ";\nalpha_results_2 = "; + AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index); + out += ";\nalpha_results_3 = "; + AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index); + + out += fmt::format(";\nfloat alpha_output_{} = byteround(", index); + AppendAlphaCombiner(stage.alpha_op); + out += ");\n"; + } + + out += fmt::format("combiner_output = vec4(" + "clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), " + "clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n", + index, stage.GetColorMultiplier(), index, stage.GetAlphaMultiplier()); out += "combiner_buffer = next_combiner_buffer;\n"; if (config.TevStageUpdatesCombinerBufferColor(index)) { diff --git a/src/video_core/shader/generator/pica_fs_config.cpp b/src/video_core/shader/generator/pica_fs_config.cpp index 34dee8712..9a11be15a 100644 --- a/src/video_core/shader/generator/pica_fs_config.cpp +++ b/src/video_core/shader/generator/pica_fs_config.cpp @@ -61,13 +61,16 @@ TextureConfig::TextureConfig(const Pica::TexturingRegs& regs, const Profile& pro } const auto& stages = regs.GetTevStages(); + using Op = Pica::TexturingRegs::TevStageConfig::Operation; + using TevStageConfig = Pica::TexturingRegs::TevStageConfig; for (std::size_t i = 0; i < tev_stages.size(); i++) { const auto& tev_stage = stages[i]; tev_stages[i].sources_raw = tev_stage.sources_raw; tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; tev_stages[i].ops_raw = tev_stage.ops_raw; tev_stages[i].scales_raw = tev_stage.scales_raw; - if (tev_stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) { + // Special handling for Dot3_RGBA operation + if (tev_stage.color_op == Op::Dot3_RGBA) { tev_stages[i].sources_raw &= 0xFFF; tev_stages[i].modifiers_raw &= 0xFFF; tev_stages[i].ops_raw &= 0xF; diff --git a/src/video_core/shader/generator/spv_fs_shader_gen.cpp b/src/video_core/shader/generator/spv_fs_shader_gen.cpp index 0c8d32623..2ff18efe1 100644 --- a/src/video_core/shader/generator/spv_fs_shader_gen.cpp +++ b/src/video_core/shader/generator/spv_fs_shader_gen.cpp @@ -632,50 +632,34 @@ void FragmentModule::WriteLighting() { void FragmentModule::WriteTevStage(s32 index) { const TexturingRegs::TevStageConfig stage = config.texture.tev_stages[index]; - // Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) - const auto is_passthrough_tev_stage = [](const TevStageConfig& stage) { - return (stage.color_op == TevStageConfig::Operation::Replace && - stage.alpha_op == TevStageConfig::Operation::Replace && - stage.color_source1 == TevStageConfig::Source::Previous && - stage.alpha_source1 == TevStageConfig::Source::Previous && - stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && - stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && - stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); - }; + color_results_1 = AppendColorModifier(stage.color_modifier1, stage.color_source1, index); + color_results_2 = AppendColorModifier(stage.color_modifier2, stage.color_source2, index); + color_results_3 = AppendColorModifier(stage.color_modifier3, stage.color_source3, index); - if (!is_passthrough_tev_stage(stage)) { - color_results_1 = AppendColorModifier(stage.color_modifier1, stage.color_source1, index); - color_results_2 = AppendColorModifier(stage.color_modifier2, stage.color_source2, index); - color_results_3 = AppendColorModifier(stage.color_modifier3, stage.color_source3, index); + // Round the output of each TEV stage to maintain the PICA's 8 bits of precision + Id color_output{Byteround(AppendColorCombiner(stage.color_op), 3)}; + Id alpha_output{}; - // Round the output of each TEV stage to maintain the PICA's 8 bits of precision - Id color_output{Byteround(AppendColorCombiner(stage.color_op), 3)}; - Id alpha_output{}; + if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) { + // result of Dot3_RGBA operation is also placed to the alpha component + alpha_output = OpCompositeExtract(f32_id, color_output, 0); + } else { + alpha_results_1 = AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index); + alpha_results_2 = AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index); + alpha_results_3 = AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index); - if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) { - // result of Dot3_RGBA operation is also placed to the alpha component - alpha_output = OpCompositeExtract(f32_id, color_output, 0); - } else { - alpha_results_1 = - AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index); - alpha_results_2 = - AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index); - alpha_results_3 = - AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index); - - alpha_output = Byteround(AppendAlphaCombiner(stage.alpha_op)); - } - - color_output = OpVectorTimesScalar( - vec_ids.Get(3), color_output, ConstF32(static_cast(stage.GetColorMultiplier()))); - color_output = OpFClamp(vec_ids.Get(3), color_output, ConstF32(0.f, 0.f, 0.f), - ConstF32(1.f, 1.f, 1.f)); - alpha_output = - OpFMul(f32_id, alpha_output, ConstF32(static_cast(stage.GetAlphaMultiplier()))); - alpha_output = OpFClamp(f32_id, alpha_output, ConstF32(0.f), ConstF32(1.f)); - combiner_output = OpCompositeConstruct(vec_ids.Get(4), color_output, alpha_output); + alpha_output = Byteround(AppendAlphaCombiner(stage.alpha_op)); } + color_output = OpVectorTimesScalar( + vec_ids.Get(3), color_output, ConstF32(static_cast(stage.GetColorMultiplier()))); + color_output = OpFClamp(vec_ids.Get(3), color_output, ConstF32(0.f, 0.f, 0.f), + ConstF32(1.f, 1.f, 1.f)); + alpha_output = + OpFMul(f32_id, alpha_output, ConstF32(static_cast(stage.GetAlphaMultiplier()))); + alpha_output = OpFClamp(f32_id, alpha_output, ConstF32(0.f), ConstF32(1.f)); + combiner_output = OpCompositeConstruct(vec_ids.Get(4), color_output, alpha_output); + combiner_buffer = next_combiner_buffer; if (config.TevStageUpdatesCombinerBufferColor(index)) { next_combiner_buffer =