rsx/cfg: Fix delay-slot detection when copying from the same register index but different precision.

2025-12-16 04:09:07 +00:00 · 2025-12-10 01:40:26 +03:00 · 2025-12-10 01:40:26 +03:00 · 1ea3c121fa
commit 1ea3c121fa
parent 2c6d3dde67
3 changed files with 54 additions and 4 deletions
--- a/rpcs3/Emu/RSX/Program/Assembler/FPASM.cpp
+++ b/rpcs3/Emu/RSX/Program/Assembler/FPASM.cpp
@ -24,6 +24,7 @@ namespace rsx::assembler
 		// Arithmetic
 		{ "NOP", { .op = RSX_FP_OPCODE_NOP, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } },
 		{ "MOV", { .op = RSX_FP_OPCODE_MOV, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } },
+		{ "MUL", { .op = RSX_FP_OPCODE_MUL, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } },
 		{ "ADD", { .op = RSX_FP_OPCODE_ADD, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } },
 		{ "MAD", { .op = RSX_FP_OPCODE_MAD, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } },
 		{ "FMA", { .op = RSX_FP_OPCODE_MAD, .exec_if_lt = true, .exec_if_eq = true, .exec_if_gt = true, .set_cond = false } },
--- a/rpcs3/Emu/RSX/Program/Assembler/Passes/FP/RegisterAnnotationPass.cpp
+++ b/rpcs3/Emu/RSX/Program/Assembler/Passes/FP/RegisterAnnotationPass.cpp
@ -1,4 +1,5 @@
 #include "stdafx.h"
+
 #include "RegisterAnnotationPass.h"
 #include "Emu/RSX/Program/Assembler/FPOpcodes.h"
 #include "Emu/RSX/Program/RSXFragmentProgram.h"
@ -24,14 +25,27 @@ namespace rsx::assembler::FP
 			dst.no_dest ||                                // Must have a sink
 			src0.reg_type != RSX_FP_REGISTER_TYPE_TEMP || // Must read from reg
 			dst.dest_reg != src0.tmp_reg_index ||         // Must be a write-to-self
-			dst.fp16 ||                                   // Always full lane. We need to collect more data on this but it won't matter
-			dst.saturate ||                               // Precision modifier
-			(dst.prec != RSX_FP_PRECISION_REAL &&
-				dst.prec != RSX_FP_PRECISION_UNKNOWN))    // Cannot have precision modifiers
+			dst.fp16 != src0.fp16 ||                      // Must really be the same register
+			src0.abs || src0.neg ||
+			dst.saturate)                                 // Precision modifier
 		{
 			return false;
 		}

+		switch (dst.prec)
+		{
+		case RSX_FP_PRECISION_REAL:
+		case RSX_FP_PRECISION_UNKNOWN:
+			break;
+		case RSX_FP_PRECISION_HALF:
+			if (!src0.fp16) return false;
+			break;
+		case RSX_FP_PRECISION_FIXED12:
+		case RSX_FP_PRECISION_FIXED9:
+		case RSX_FP_PRECISION_SATURATE:
+			return false;
+		}
+
 		// Check if we have precision modifiers on the source
 		if (src0.abs || src0.neg || src1.scale)
 		{
--- a/rpcs3/tests/test_rsx_fp_asm.cpp
+++ b/rpcs3/tests/test_rsx_fp_asm.cpp
@ -731,4 +731,39 @@ namespace rsx::assembler
 		EXPECT_EQ(get_graph_block(graph, 5)->instructions.size(), 1);
 		EXPECT_EQ(get_graph_block(graph, 6)->instructions.size(), 1);
 	}
+
+	TEST(TestFPIR, RegisterDependencyPass_SplinterCell_DelaySlot)
+	{
+		// Real shader pattern found in splinter cell blacklist.
+		// TEX instructions replaced with MOV for simplicity.
+		// There are no dependent reads here, no barriers are expected.
+		// In the game, instruction 4 was misclassified as a delay slot, causing a skipped clobber.
+		auto ir = FPIR::from_source(R"(
+			MOV R0.w, #{ 0.25 }
+			MOV H0, H8
+			MUL R0.w, H0.w, R0.w
+			MOV R0.xyz, H0.xyz
+			MOV R1, #{ 0.25 }
+			FMA H0, R0, #{ 0.125 }, R1
+		)");
+
+		auto bytecode = ir.compile();
+		RSXFragmentProgram prog{};
+		prog.data = bytecode.data();
+		auto graph = deconstruct_fragment_program(prog);
+
+		// Verify state before
+		ASSERT_EQ(graph.blocks.size(), 1);
+		EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 6);
+
+		FP::RegisterAnnotationPass annotation_pass{ prog, {.skip_delay_slots = true } };
+		FP::RegisterDependencyPass deps_pass{};
+
+		annotation_pass.run(graph);
+		deps_pass.run(graph);
+
+		// Verify state after
+		EXPECT_EQ(get_graph_block(graph, 0)->instructions.size(), 6);
+		EXPECT_EQ(get_graph_block(graph, 0)->epilogue.size(), 0);
+	}
 }