rsx/cfg: Fix delay-slot detection when copying from the same register index but different precision.

This commit is contained in:
kd-11 2025-12-10 01:40:26 +03:00 committed by kd-11
parent 2c6d3dde67
commit 1ea3c121fa
3 changed files with 54 additions and 4 deletions

View File

@ -24,6 +24,7 @@ namespace rsx::assembler
// Arithmetic // Arithmetic
{ "NOP", { .op = RSX_FP_OPCODE_NOP, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } }, { "NOP", { .op = RSX_FP_OPCODE_NOP, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } },
{ "MOV", { .op = RSX_FP_OPCODE_MOV, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } }, { "MOV", { .op = RSX_FP_OPCODE_MOV, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } },
{ "MUL", { .op = RSX_FP_OPCODE_MUL, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } },
{ "ADD", { .op = RSX_FP_OPCODE_ADD, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } }, { "ADD", { .op = RSX_FP_OPCODE_ADD, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } },
{ "MAD", { .op = RSX_FP_OPCODE_MAD, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } }, { "MAD", { .op = RSX_FP_OPCODE_MAD, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } },
{ "FMA", { .op = RSX_FP_OPCODE_MAD, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } }, { "FMA", { .op = RSX_FP_OPCODE_MAD, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } },

View File

@ -1,4 +1,5 @@
#include "stdafx.h" #include "stdafx.h"
#include "RegisterAnnotationPass.h" #include "RegisterAnnotationPass.h"
#include "Emu/RSX/Program/Assembler/FPOpcodes.h" #include "Emu/RSX/Program/Assembler/FPOpcodes.h"
#include "Emu/RSX/Program/RSXFragmentProgram.h" #include "Emu/RSX/Program/RSXFragmentProgram.h"
@ -24,14 +25,27 @@ namespace rsx::assembler::FP
dst.no_dest || // Must have a sink dst.no_dest || // Must have a sink
src0.reg_type != RSX_FP_REGISTER_TYPE_TEMP || // Must read from reg src0.reg_type != RSX_FP_REGISTER_TYPE_TEMP || // Must read from reg
dst.dest_reg != src0.tmp_reg_index || // Must be a write-to-self dst.dest_reg != src0.tmp_reg_index || // Must be a write-to-self
dst.fp16 || // Always full lane. We need to collect more data on this but it won't matter dst.fp16 != src0.fp16 || // Must really be the same register
dst.saturate || // Precision modifier src0.abs || src0.neg ||
(dst.prec != RSX_FP_PRECISION_REAL && dst.saturate) // Precision modifier
dst.prec != RSX_FP_PRECISION_UNKNOWN)) // Cannot have precision modifiers
{ {
return false; return false;
} }
switch (dst.prec)
{
case RSX_FP_PRECISION_REAL:
case RSX_FP_PRECISION_UNKNOWN:
break;
case RSX_FP_PRECISION_HALF:
if (!src0.fp16) return false;
break;
case RSX_FP_PRECISION_FIXED12:
case RSX_FP_PRECISION_FIXED9:
case RSX_FP_PRECISION_SATURATE:
return false;
}
// Check if we have precision modifiers on the source // Check if we have precision modifiers on the source
if (src0.abs || src0.neg || src1.scale) if (src0.abs || src0.neg || src1.scale)
{ {

View File

@ -731,4 +731,39 @@ namespace rsx::assembler
EXPECT_EQ(get_graph_block(graph, 5)->instructions.size(), 1); EXPECT_EQ(get_graph_block(graph, 5)->instructions.size(), 1);
EXPECT_EQ(get_graph_block(graph, 6)->instructions.size(), 1); EXPECT_EQ(get_graph_block(graph, 6)->instructions.size(), 1);
} }
TEST(TestFPIR, RegisterDependencyPass_SplinterCell_DelaySlot)
{
// Real shader pattern found in splinter cell blacklist.
// TEX instructions replaced with MOV for simplicity.
// There are no dependent reads here, no barriers are expected.
// In the game, instruction 4 was misclassified as a delay slot, causing a skipped clobber.
auto ir = FPIR::from_source(R"(
MOV R0.w, #{ 0.25 }
MOV H0, H8
MUL R0.w, H0.w, R0.w
MOV R0.xyz, H0.xyz
MOV R1, #{ 0.25 }
FMA H0, R0, #{ 0.125 }, R1
)");
auto bytecode = ir.compile();
RSXFragmentProgram prog{};
prog.data = bytecode.data();
auto graph = deconstruct_fragment_program(prog);
// Verify state before
ASSERT_EQ(graph.blocks.size(), 1);
EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 6);
FP::RegisterAnnotationPass annotation_pass{ prog, {.skip_delay_slots = true } };
FP::RegisterDependencyPass deps_pass{};
annotation_pass.run(graph);
deps_pass.run(graph);
// Verify state after
EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 6);
EXPECT_EQ(get_graph_block(graph, 0)->epilogue.size(), 0);
}
} }