diff --git a/src/video_core/shader/generator/glsl_fs_shader_gen.cpp b/src/video_core/shader/generator/glsl_fs_shader_gen.cpp index 96be02d05..9d156d93a 100644 --- a/src/video_core/shader/generator/glsl_fs_shader_gen.cpp +++ b/src/video_core/shader/generator/glsl_fs_shader_gen.cpp @@ -3,6 +3,8 @@ // Refer to the license.txt file included. #include "video_core/shader/generator/glsl_fs_shader_gen.h" +#include "video_core/pica/regs_texturing.h" +#include "common/common_types.h" namespace Pica::Shader::Generator::GLSL { @@ -25,17 +27,6 @@ enum class Semantic : u32 { View, }; -static bool IsPassThroughTevStage(const Pica::TexturingRegs::TevStageConfig& stage) { - using TevStageConfig = Pica::TexturingRegs::TevStageConfig; - return (stage.color_op == TevStageConfig::Operation::Replace && - stage.alpha_op == TevStageConfig::Operation::Replace && - stage.color_source1 == TevStageConfig::Source::Previous && - stage.alpha_source1 == TevStageConfig::Source::Previous && - stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && - stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && - stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); -} - // High precision may or may not be supported in GLES3. If it isn't, use medium precision instead. static constexpr char fragment_shader_precision_OES[] = R"( #if GL_ES @@ -433,42 +424,57 @@ void FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func) out += fmt::format("if ({}) discard;\n", get_cond()); } +// Helper to detect passthrough TEV stages for optimization +static bool IsPassThroughTevStage(const Pica::TexturingRegs::TevStageConfig& stage) { + using TevStageConfig = Pica::TexturingRegs::TevStageConfig; + return (stage.color_op == TevStageConfig::Operation::Replace && + stage.alpha_op == TevStageConfig::Operation::Replace && + stage.color_source1 == TevStageConfig::Source::Previous && + stage.alpha_source1 == TevStageConfig::Source::Previous && + stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && + stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && + stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); +} + void FragmentModule::WriteTevStage(u32 index) { const TexturingRegs::TevStageConfig stage = config.texture.tev_stages[index]; - if (!IsPassThroughTevStage(stage)) { - out += "color_results_1 = "; - AppendColorModifier(stage.color_modifier1, stage.color_source1, index); - out += ";\ncolor_results_2 = "; - AppendColorModifier(stage.color_modifier2, stage.color_source2, index); - out += ";\ncolor_results_3 = "; - AppendColorModifier(stage.color_modifier3, stage.color_source3, index); - - // Round the output of each TEV stage to maintain the PICA's 8 bits of precision - out += fmt::format(";\nvec3 color_output_{} = byteround(", index); - AppendColorCombiner(stage.color_op); - out += ");\n"; - - if (stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) { - // result of Dot3_RGBA operation is also placed to the alpha component - out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index); - } else { - out += "alpha_results_1 = "; - AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index); - out += ";\nalpha_results_2 = "; - AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index); - out += ";\nalpha_results_3 = "; - AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index); - - out += fmt::format(";\nfloat alpha_output_{} = byteround(", index); - AppendAlphaCombiner(stage.alpha_op); - out += ");\n"; - } - - out += fmt::format("combiner_output = vec4(" - "clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), " - "clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n", - index, stage.GetColorMultiplier(), index, stage.GetAlphaMultiplier()); + if (IsPassThroughTevStage(stage)) { + // Skip passthrough stage for optimization + return; } + + out += "color_results_1 = "; + AppendColorModifier(stage.color_modifier1, stage.color_source1, index); + out += ";\ncolor_results_2 = "; + AppendColorModifier(stage.color_modifier2, stage.color_source2, index); + out += ";\ncolor_results_3 = "; + AppendColorModifier(stage.color_modifier3, stage.color_source3, index); + + // Round the output of each TEV stage to maintain the PICA's 8 bits of precision + out += fmt::format(";\nvec3 color_output_{} = byteround(", index); + AppendColorCombiner(stage.color_op); + out += ");\n"; + + if (stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) { + // result of Dot3_RGBA operation is also placed to the alpha component + out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index); + } else { + out += "alpha_results_1 = "; + AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index); + out += ";\nalpha_results_2 = "; + AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index); + out += ";\nalpha_results_3 = "; + AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index); + + out += fmt::format(";\nfloat alpha_output_{} = byteround(", index); + AppendAlphaCombiner(stage.alpha_op); + out += ");\n"; + } + + out += fmt::format("combiner_output = vec4(" + "clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), " + "clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n", + index, stage.GetColorMultiplier(), index, stage.GetAlphaMultiplier()); out += "combiner_buffer = next_combiner_buffer;\n"; if (config.TevStageUpdatesCombinerBufferColor(index)) { diff --git a/src/video_core/shader/generator/pica_fs_config.cpp b/src/video_core/shader/generator/pica_fs_config.cpp index 34dee8712..9a11be15a 100644 --- a/src/video_core/shader/generator/pica_fs_config.cpp +++ b/src/video_core/shader/generator/pica_fs_config.cpp @@ -61,13 +61,16 @@ TextureConfig::TextureConfig(const Pica::TexturingRegs& regs, const Profile& pro } const auto& stages = regs.GetTevStages(); + using Op = Pica::TexturingRegs::TevStageConfig::Operation; + using TevStageConfig = Pica::TexturingRegs::TevStageConfig; for (std::size_t i = 0; i < tev_stages.size(); i++) { const auto& tev_stage = stages[i]; tev_stages[i].sources_raw = tev_stage.sources_raw; tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; tev_stages[i].ops_raw = tev_stage.ops_raw; tev_stages[i].scales_raw = tev_stage.scales_raw; - if (tev_stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) { + // Special handling for Dot3_RGBA operation + if (tev_stage.color_op == Op::Dot3_RGBA) { tev_stages[i].sources_raw &= 0xFFF; tev_stages[i].modifiers_raw &= 0xFFF; tev_stages[i].ops_raw &= 0xF; diff --git a/src/video_core/shader/generator/spv_fs_shader_gen.cpp b/src/video_core/shader/generator/spv_fs_shader_gen.cpp index 0c8d32623..2ff18efe1 100644 --- a/src/video_core/shader/generator/spv_fs_shader_gen.cpp +++ b/src/video_core/shader/generator/spv_fs_shader_gen.cpp @@ -632,50 +632,34 @@ void FragmentModule::WriteLighting() { void FragmentModule::WriteTevStage(s32 index) { const TexturingRegs::TevStageConfig stage = config.texture.tev_stages[index]; - // Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) - const auto is_passthrough_tev_stage = [](const TevStageConfig& stage) { - return (stage.color_op == TevStageConfig::Operation::Replace && - stage.alpha_op == TevStageConfig::Operation::Replace && - stage.color_source1 == TevStageConfig::Source::Previous && - stage.alpha_source1 == TevStageConfig::Source::Previous && - stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor && - stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha && - stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1); - }; + color_results_1 = AppendColorModifier(stage.color_modifier1, stage.color_source1, index); + color_results_2 = AppendColorModifier(stage.color_modifier2, stage.color_source2, index); + color_results_3 = AppendColorModifier(stage.color_modifier3, stage.color_source3, index); - if (!is_passthrough_tev_stage(stage)) { - color_results_1 = AppendColorModifier(stage.color_modifier1, stage.color_source1, index); - color_results_2 = AppendColorModifier(stage.color_modifier2, stage.color_source2, index); - color_results_3 = AppendColorModifier(stage.color_modifier3, stage.color_source3, index); + // Round the output of each TEV stage to maintain the PICA's 8 bits of precision + Id color_output{Byteround(AppendColorCombiner(stage.color_op), 3)}; + Id alpha_output{}; - // Round the output of each TEV stage to maintain the PICA's 8 bits of precision - Id color_output{Byteround(AppendColorCombiner(stage.color_op), 3)}; - Id alpha_output{}; + if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) { + // result of Dot3_RGBA operation is also placed to the alpha component + alpha_output = OpCompositeExtract(f32_id, color_output, 0); + } else { + alpha_results_1 = AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index); + alpha_results_2 = AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index); + alpha_results_3 = AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index); - if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) { - // result of Dot3_RGBA operation is also placed to the alpha component - alpha_output = OpCompositeExtract(f32_id, color_output, 0); - } else { - alpha_results_1 = - AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index); - alpha_results_2 = - AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index); - alpha_results_3 = - AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index); - - alpha_output = Byteround(AppendAlphaCombiner(stage.alpha_op)); - } - - color_output = OpVectorTimesScalar( - vec_ids.Get(3), color_output, ConstF32(static_cast(stage.GetColorMultiplier()))); - color_output = OpFClamp(vec_ids.Get(3), color_output, ConstF32(0.f, 0.f, 0.f), - ConstF32(1.f, 1.f, 1.f)); - alpha_output = - OpFMul(f32_id, alpha_output, ConstF32(static_cast(stage.GetAlphaMultiplier()))); - alpha_output = OpFClamp(f32_id, alpha_output, ConstF32(0.f), ConstF32(1.f)); - combiner_output = OpCompositeConstruct(vec_ids.Get(4), color_output, alpha_output); + alpha_output = Byteround(AppendAlphaCombiner(stage.alpha_op)); } + color_output = OpVectorTimesScalar( + vec_ids.Get(3), color_output, ConstF32(static_cast(stage.GetColorMultiplier()))); + color_output = OpFClamp(vec_ids.Get(3), color_output, ConstF32(0.f, 0.f, 0.f), + ConstF32(1.f, 1.f, 1.f)); + alpha_output = + OpFMul(f32_id, alpha_output, ConstF32(static_cast(stage.GetAlphaMultiplier()))); + alpha_output = OpFClamp(f32_id, alpha_output, ConstF32(0.f), ConstF32(1.f)); + combiner_output = OpCompositeConstruct(vec_ids.Get(4), color_output, alpha_output); + combiner_buffer = next_combiner_buffer; if (config.TevStageUpdatesCombinerBufferColor(index)) { next_combiner_buffer =