video_core/shader: Optimize fragment shader by skipping passthrough TEV stages

This change adds a fast-path optimization in the fragment shader generator to detect and skip TEV stages that simply pass through their input unchanged. This reduces shader complexity and improves performance for common rendering cases where TEV stages are configured as passthrough.

The optimization checks for:
- Replace operation for both color and alpha
- Previous buffer as source
- No color/alpha modifiers
- Unity multipliers

This is a safe optimization as it preserves exact PICA behavior while reducing unnecessary shader instructions.
This commit is contained in:
jbm11208 2025-05-10 16:03:11 -04:00 committed by OpenSauce
parent e0b8e8440a
commit 54dbdc2d96
3 changed files with 77 additions and 84 deletions

View File

@ -3,6 +3,8 @@
// Refer to the license.txt file included.
#include "video_core/shader/generator/glsl_fs_shader_gen.h"
#include "video_core/pica/regs_texturing.h"
#include "common/common_types.h"
namespace Pica::Shader::Generator::GLSL {
@ -25,17 +27,6 @@ enum class Semantic : u32 {
View,
};
static bool IsPassThroughTevStage(const Pica::TexturingRegs::TevStageConfig& stage) {
using TevStageConfig = Pica::TexturingRegs::TevStageConfig;
return (stage.color_op == TevStageConfig::Operation::Replace &&
stage.alpha_op == TevStageConfig::Operation::Replace &&
stage.color_source1 == TevStageConfig::Source::Previous &&
stage.alpha_source1 == TevStageConfig::Source::Previous &&
stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
}
// High precision may or may not be supported in GLES3. If it isn't, use medium precision instead.
static constexpr char fragment_shader_precision_OES[] = R"(
#if GL_ES
@ -433,42 +424,57 @@ void FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func)
out += fmt::format("if ({}) discard;\n", get_cond());
}
// Helper to detect passthrough TEV stages for optimization
static bool IsPassThroughTevStage(const Pica::TexturingRegs::TevStageConfig& stage) {
using TevStageConfig = Pica::TexturingRegs::TevStageConfig;
return (stage.color_op == TevStageConfig::Operation::Replace &&
stage.alpha_op == TevStageConfig::Operation::Replace &&
stage.color_source1 == TevStageConfig::Source::Previous &&
stage.alpha_source1 == TevStageConfig::Source::Previous &&
stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
}
void FragmentModule::WriteTevStage(u32 index) {
const TexturingRegs::TevStageConfig stage = config.texture.tev_stages[index];
if (!IsPassThroughTevStage(stage)) {
out += "color_results_1 = ";
AppendColorModifier(stage.color_modifier1, stage.color_source1, index);
out += ";\ncolor_results_2 = ";
AppendColorModifier(stage.color_modifier2, stage.color_source2, index);
out += ";\ncolor_results_3 = ";
AppendColorModifier(stage.color_modifier3, stage.color_source3, index);
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision
out += fmt::format(";\nvec3 color_output_{} = byteround(", index);
AppendColorCombiner(stage.color_op);
out += ");\n";
if (stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
// result of Dot3_RGBA operation is also placed to the alpha component
out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index);
} else {
out += "alpha_results_1 = ";
AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index);
out += ";\nalpha_results_2 = ";
AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index);
out += ";\nalpha_results_3 = ";
AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index);
out += fmt::format(";\nfloat alpha_output_{} = byteround(", index);
AppendAlphaCombiner(stage.alpha_op);
out += ");\n";
}
out += fmt::format("combiner_output = vec4("
"clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), "
"clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n",
index, stage.GetColorMultiplier(), index, stage.GetAlphaMultiplier());
if (IsPassThroughTevStage(stage)) {
// Skip passthrough stage for optimization
return;
}
out += "color_results_1 = ";
AppendColorModifier(stage.color_modifier1, stage.color_source1, index);
out += ";\ncolor_results_2 = ";
AppendColorModifier(stage.color_modifier2, stage.color_source2, index);
out += ";\ncolor_results_3 = ";
AppendColorModifier(stage.color_modifier3, stage.color_source3, index);
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision
out += fmt::format(";\nvec3 color_output_{} = byteround(", index);
AppendColorCombiner(stage.color_op);
out += ");\n";
if (stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
// result of Dot3_RGBA operation is also placed to the alpha component
out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index);
} else {
out += "alpha_results_1 = ";
AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index);
out += ";\nalpha_results_2 = ";
AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index);
out += ";\nalpha_results_3 = ";
AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index);
out += fmt::format(";\nfloat alpha_output_{} = byteround(", index);
AppendAlphaCombiner(stage.alpha_op);
out += ");\n";
}
out += fmt::format("combiner_output = vec4("
"clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), "
"clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n",
index, stage.GetColorMultiplier(), index, stage.GetAlphaMultiplier());
out += "combiner_buffer = next_combiner_buffer;\n";
if (config.TevStageUpdatesCombinerBufferColor(index)) {

View File

@ -61,13 +61,16 @@ TextureConfig::TextureConfig(const Pica::TexturingRegs& regs, const Profile& pro
}
const auto& stages = regs.GetTevStages();
using Op = Pica::TexturingRegs::TevStageConfig::Operation;
using TevStageConfig = Pica::TexturingRegs::TevStageConfig;
for (std::size_t i = 0; i < tev_stages.size(); i++) {
const auto& tev_stage = stages[i];
tev_stages[i].sources_raw = tev_stage.sources_raw;
tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
tev_stages[i].ops_raw = tev_stage.ops_raw;
tev_stages[i].scales_raw = tev_stage.scales_raw;
if (tev_stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
// Special handling for Dot3_RGBA operation
if (tev_stage.color_op == Op::Dot3_RGBA) {
tev_stages[i].sources_raw &= 0xFFF;
tev_stages[i].modifiers_raw &= 0xFFF;
tev_stages[i].ops_raw &= 0xF;

View File

@ -632,50 +632,34 @@ void FragmentModule::WriteLighting() {
void FragmentModule::WriteTevStage(s32 index) {
const TexturingRegs::TevStageConfig stage = config.texture.tev_stages[index];
// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code)
const auto is_passthrough_tev_stage = [](const TevStageConfig& stage) {
return (stage.color_op == TevStageConfig::Operation::Replace &&
stage.alpha_op == TevStageConfig::Operation::Replace &&
stage.color_source1 == TevStageConfig::Source::Previous &&
stage.alpha_source1 == TevStageConfig::Source::Previous &&
stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
};
color_results_1 = AppendColorModifier(stage.color_modifier1, stage.color_source1, index);
color_results_2 = AppendColorModifier(stage.color_modifier2, stage.color_source2, index);
color_results_3 = AppendColorModifier(stage.color_modifier3, stage.color_source3, index);
if (!is_passthrough_tev_stage(stage)) {
color_results_1 = AppendColorModifier(stage.color_modifier1, stage.color_source1, index);
color_results_2 = AppendColorModifier(stage.color_modifier2, stage.color_source2, index);
color_results_3 = AppendColorModifier(stage.color_modifier3, stage.color_source3, index);
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision
Id color_output{Byteround(AppendColorCombiner(stage.color_op), 3)};
Id alpha_output{};
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision
Id color_output{Byteround(AppendColorCombiner(stage.color_op), 3)};
Id alpha_output{};
if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
// result of Dot3_RGBA operation is also placed to the alpha component
alpha_output = OpCompositeExtract(f32_id, color_output, 0);
} else {
alpha_results_1 = AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index);
alpha_results_2 = AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index);
alpha_results_3 = AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index);
if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
// result of Dot3_RGBA operation is also placed to the alpha component
alpha_output = OpCompositeExtract(f32_id, color_output, 0);
} else {
alpha_results_1 =
AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index);
alpha_results_2 =
AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index);
alpha_results_3 =
AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index);
alpha_output = Byteround(AppendAlphaCombiner(stage.alpha_op));
}
color_output = OpVectorTimesScalar(
vec_ids.Get(3), color_output, ConstF32(static_cast<float>(stage.GetColorMultiplier())));
color_output = OpFClamp(vec_ids.Get(3), color_output, ConstF32(0.f, 0.f, 0.f),
ConstF32(1.f, 1.f, 1.f));
alpha_output =
OpFMul(f32_id, alpha_output, ConstF32(static_cast<float>(stage.GetAlphaMultiplier())));
alpha_output = OpFClamp(f32_id, alpha_output, ConstF32(0.f), ConstF32(1.f));
combiner_output = OpCompositeConstruct(vec_ids.Get(4), color_output, alpha_output);
alpha_output = Byteround(AppendAlphaCombiner(stage.alpha_op));
}
color_output = OpVectorTimesScalar(
vec_ids.Get(3), color_output, ConstF32(static_cast<float>(stage.GetColorMultiplier())));
color_output = OpFClamp(vec_ids.Get(3), color_output, ConstF32(0.f, 0.f, 0.f),
ConstF32(1.f, 1.f, 1.f));
alpha_output =
OpFMul(f32_id, alpha_output, ConstF32(static_cast<float>(stage.GetAlphaMultiplier())));
alpha_output = OpFClamp(f32_id, alpha_output, ConstF32(0.f), ConstF32(1.f));
combiner_output = OpCompositeConstruct(vec_ids.Get(4), color_output, alpha_output);
combiner_buffer = next_combiner_buffer;
if (config.TevStageUpdatesCombinerBufferColor(index)) {
next_combiner_buffer =