mirror of
https://github.com/Lime3DS/Lime3DS.git
synced 2026-06-10 01:55:01 -06:00
video_core/shader: Optimize fragment shader by skipping passthrough TEV stages
This change adds a fast-path optimization in the fragment shader generator to detect and skip TEV stages that simply pass through their input unchanged. This reduces shader complexity and improves performance for common rendering cases where TEV stages are configured as passthrough. The optimization checks for: - Replace operation for both color and alpha - Previous buffer as source - No color/alpha modifiers - Unity multipliers This is a safe optimization as it preserves exact PICA behavior while reducing unnecessary shader instructions.
This commit is contained in:
parent
e0b8e8440a
commit
54dbdc2d96
@ -3,6 +3,8 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/shader/generator/glsl_fs_shader_gen.h"
|
||||
#include "video_core/pica/regs_texturing.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Pica::Shader::Generator::GLSL {
|
||||
|
||||
@ -25,17 +27,6 @@ enum class Semantic : u32 {
|
||||
View,
|
||||
};
|
||||
|
||||
static bool IsPassThroughTevStage(const Pica::TexturingRegs::TevStageConfig& stage) {
|
||||
using TevStageConfig = Pica::TexturingRegs::TevStageConfig;
|
||||
return (stage.color_op == TevStageConfig::Operation::Replace &&
|
||||
stage.alpha_op == TevStageConfig::Operation::Replace &&
|
||||
stage.color_source1 == TevStageConfig::Source::Previous &&
|
||||
stage.alpha_source1 == TevStageConfig::Source::Previous &&
|
||||
stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
|
||||
stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
|
||||
stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
|
||||
}
|
||||
|
||||
// High precision may or may not be supported in GLES3. If it isn't, use medium precision instead.
|
||||
static constexpr char fragment_shader_precision_OES[] = R"(
|
||||
#if GL_ES
|
||||
@ -433,42 +424,57 @@ void FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func)
|
||||
out += fmt::format("if ({}) discard;\n", get_cond());
|
||||
}
|
||||
|
||||
// Helper to detect passthrough TEV stages for optimization
|
||||
static bool IsPassThroughTevStage(const Pica::TexturingRegs::TevStageConfig& stage) {
|
||||
using TevStageConfig = Pica::TexturingRegs::TevStageConfig;
|
||||
return (stage.color_op == TevStageConfig::Operation::Replace &&
|
||||
stage.alpha_op == TevStageConfig::Operation::Replace &&
|
||||
stage.color_source1 == TevStageConfig::Source::Previous &&
|
||||
stage.alpha_source1 == TevStageConfig::Source::Previous &&
|
||||
stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
|
||||
stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
|
||||
stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
|
||||
}
|
||||
|
||||
void FragmentModule::WriteTevStage(u32 index) {
|
||||
const TexturingRegs::TevStageConfig stage = config.texture.tev_stages[index];
|
||||
if (!IsPassThroughTevStage(stage)) {
|
||||
out += "color_results_1 = ";
|
||||
AppendColorModifier(stage.color_modifier1, stage.color_source1, index);
|
||||
out += ";\ncolor_results_2 = ";
|
||||
AppendColorModifier(stage.color_modifier2, stage.color_source2, index);
|
||||
out += ";\ncolor_results_3 = ";
|
||||
AppendColorModifier(stage.color_modifier3, stage.color_source3, index);
|
||||
|
||||
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision
|
||||
out += fmt::format(";\nvec3 color_output_{} = byteround(", index);
|
||||
AppendColorCombiner(stage.color_op);
|
||||
out += ");\n";
|
||||
|
||||
if (stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
|
||||
// result of Dot3_RGBA operation is also placed to the alpha component
|
||||
out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index);
|
||||
} else {
|
||||
out += "alpha_results_1 = ";
|
||||
AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index);
|
||||
out += ";\nalpha_results_2 = ";
|
||||
AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index);
|
||||
out += ";\nalpha_results_3 = ";
|
||||
AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index);
|
||||
|
||||
out += fmt::format(";\nfloat alpha_output_{} = byteround(", index);
|
||||
AppendAlphaCombiner(stage.alpha_op);
|
||||
out += ");\n";
|
||||
}
|
||||
|
||||
out += fmt::format("combiner_output = vec4("
|
||||
"clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), "
|
||||
"clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n",
|
||||
index, stage.GetColorMultiplier(), index, stage.GetAlphaMultiplier());
|
||||
if (IsPassThroughTevStage(stage)) {
|
||||
// Skip passthrough stage for optimization
|
||||
return;
|
||||
}
|
||||
|
||||
out += "color_results_1 = ";
|
||||
AppendColorModifier(stage.color_modifier1, stage.color_source1, index);
|
||||
out += ";\ncolor_results_2 = ";
|
||||
AppendColorModifier(stage.color_modifier2, stage.color_source2, index);
|
||||
out += ";\ncolor_results_3 = ";
|
||||
AppendColorModifier(stage.color_modifier3, stage.color_source3, index);
|
||||
|
||||
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision
|
||||
out += fmt::format(";\nvec3 color_output_{} = byteround(", index);
|
||||
AppendColorCombiner(stage.color_op);
|
||||
out += ");\n";
|
||||
|
||||
if (stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
|
||||
// result of Dot3_RGBA operation is also placed to the alpha component
|
||||
out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index);
|
||||
} else {
|
||||
out += "alpha_results_1 = ";
|
||||
AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index);
|
||||
out += ";\nalpha_results_2 = ";
|
||||
AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index);
|
||||
out += ";\nalpha_results_3 = ";
|
||||
AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index);
|
||||
|
||||
out += fmt::format(";\nfloat alpha_output_{} = byteround(", index);
|
||||
AppendAlphaCombiner(stage.alpha_op);
|
||||
out += ");\n";
|
||||
}
|
||||
|
||||
out += fmt::format("combiner_output = vec4("
|
||||
"clamp(color_output_{} * {}.0, vec3(0.0), vec3(1.0)), "
|
||||
"clamp(alpha_output_{} * {}.0, 0.0, 1.0));\n",
|
||||
index, stage.GetColorMultiplier(), index, stage.GetAlphaMultiplier());
|
||||
|
||||
out += "combiner_buffer = next_combiner_buffer;\n";
|
||||
if (config.TevStageUpdatesCombinerBufferColor(index)) {
|
||||
|
||||
@ -61,13 +61,16 @@ TextureConfig::TextureConfig(const Pica::TexturingRegs& regs, const Profile& pro
|
||||
}
|
||||
|
||||
const auto& stages = regs.GetTevStages();
|
||||
using Op = Pica::TexturingRegs::TevStageConfig::Operation;
|
||||
using TevStageConfig = Pica::TexturingRegs::TevStageConfig;
|
||||
for (std::size_t i = 0; i < tev_stages.size(); i++) {
|
||||
const auto& tev_stage = stages[i];
|
||||
tev_stages[i].sources_raw = tev_stage.sources_raw;
|
||||
tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
|
||||
tev_stages[i].ops_raw = tev_stage.ops_raw;
|
||||
tev_stages[i].scales_raw = tev_stage.scales_raw;
|
||||
if (tev_stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
|
||||
// Special handling for Dot3_RGBA operation
|
||||
if (tev_stage.color_op == Op::Dot3_RGBA) {
|
||||
tev_stages[i].sources_raw &= 0xFFF;
|
||||
tev_stages[i].modifiers_raw &= 0xFFF;
|
||||
tev_stages[i].ops_raw &= 0xF;
|
||||
|
||||
@ -632,50 +632,34 @@ void FragmentModule::WriteLighting() {
|
||||
void FragmentModule::WriteTevStage(s32 index) {
|
||||
const TexturingRegs::TevStageConfig stage = config.texture.tev_stages[index];
|
||||
|
||||
// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code)
|
||||
const auto is_passthrough_tev_stage = [](const TevStageConfig& stage) {
|
||||
return (stage.color_op == TevStageConfig::Operation::Replace &&
|
||||
stage.alpha_op == TevStageConfig::Operation::Replace &&
|
||||
stage.color_source1 == TevStageConfig::Source::Previous &&
|
||||
stage.alpha_source1 == TevStageConfig::Source::Previous &&
|
||||
stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
|
||||
stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
|
||||
stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
|
||||
};
|
||||
color_results_1 = AppendColorModifier(stage.color_modifier1, stage.color_source1, index);
|
||||
color_results_2 = AppendColorModifier(stage.color_modifier2, stage.color_source2, index);
|
||||
color_results_3 = AppendColorModifier(stage.color_modifier3, stage.color_source3, index);
|
||||
|
||||
if (!is_passthrough_tev_stage(stage)) {
|
||||
color_results_1 = AppendColorModifier(stage.color_modifier1, stage.color_source1, index);
|
||||
color_results_2 = AppendColorModifier(stage.color_modifier2, stage.color_source2, index);
|
||||
color_results_3 = AppendColorModifier(stage.color_modifier3, stage.color_source3, index);
|
||||
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision
|
||||
Id color_output{Byteround(AppendColorCombiner(stage.color_op), 3)};
|
||||
Id alpha_output{};
|
||||
|
||||
// Round the output of each TEV stage to maintain the PICA's 8 bits of precision
|
||||
Id color_output{Byteround(AppendColorCombiner(stage.color_op), 3)};
|
||||
Id alpha_output{};
|
||||
if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
|
||||
// result of Dot3_RGBA operation is also placed to the alpha component
|
||||
alpha_output = OpCompositeExtract(f32_id, color_output, 0);
|
||||
} else {
|
||||
alpha_results_1 = AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index);
|
||||
alpha_results_2 = AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index);
|
||||
alpha_results_3 = AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index);
|
||||
|
||||
if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
|
||||
// result of Dot3_RGBA operation is also placed to the alpha component
|
||||
alpha_output = OpCompositeExtract(f32_id, color_output, 0);
|
||||
} else {
|
||||
alpha_results_1 =
|
||||
AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index);
|
||||
alpha_results_2 =
|
||||
AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index);
|
||||
alpha_results_3 =
|
||||
AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index);
|
||||
|
||||
alpha_output = Byteround(AppendAlphaCombiner(stage.alpha_op));
|
||||
}
|
||||
|
||||
color_output = OpVectorTimesScalar(
|
||||
vec_ids.Get(3), color_output, ConstF32(static_cast<float>(stage.GetColorMultiplier())));
|
||||
color_output = OpFClamp(vec_ids.Get(3), color_output, ConstF32(0.f, 0.f, 0.f),
|
||||
ConstF32(1.f, 1.f, 1.f));
|
||||
alpha_output =
|
||||
OpFMul(f32_id, alpha_output, ConstF32(static_cast<float>(stage.GetAlphaMultiplier())));
|
||||
alpha_output = OpFClamp(f32_id, alpha_output, ConstF32(0.f), ConstF32(1.f));
|
||||
combiner_output = OpCompositeConstruct(vec_ids.Get(4), color_output, alpha_output);
|
||||
alpha_output = Byteround(AppendAlphaCombiner(stage.alpha_op));
|
||||
}
|
||||
|
||||
color_output = OpVectorTimesScalar(
|
||||
vec_ids.Get(3), color_output, ConstF32(static_cast<float>(stage.GetColorMultiplier())));
|
||||
color_output = OpFClamp(vec_ids.Get(3), color_output, ConstF32(0.f, 0.f, 0.f),
|
||||
ConstF32(1.f, 1.f, 1.f));
|
||||
alpha_output =
|
||||
OpFMul(f32_id, alpha_output, ConstF32(static_cast<float>(stage.GetAlphaMultiplier())));
|
||||
alpha_output = OpFClamp(f32_id, alpha_output, ConstF32(0.f), ConstF32(1.f));
|
||||
combiner_output = OpCompositeConstruct(vec_ids.Get(4), color_output, alpha_output);
|
||||
|
||||
combiner_buffer = next_combiner_buffer;
|
||||
if (config.TevStageUpdatesCombinerBufferColor(index)) {
|
||||
next_combiner_buffer =
|
||||
|
||||
Loading…
Reference in New Issue
Block a user