mirror of
https://github.com/RPCS3/rpcs3.git
synced 2026-06-02 04:36:57 -06:00
rsx/cfg/fp: Add delay-slot detection to remove unnecessary barriers
- Reduces emitted barriers by like 99%
This commit is contained in:
parent
93f89b8a74
commit
1e6fe1f4ab
@ -1,6 +1,7 @@
|
|||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
#include "RegisterAnnotationPass.h"
|
#include "RegisterAnnotationPass.h"
|
||||||
#include "Emu/RSX/Program/Assembler/FPOpcodes.h"
|
#include "Emu/RSX/Program/Assembler/FPOpcodes.h"
|
||||||
|
#include "Emu/RSX/Program/RSXFragmentProgram.h"
|
||||||
|
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
@ -13,6 +14,38 @@ namespace rsx::assembler::FP
|
|||||||
static constexpr char content_float16 = 'H';
|
static constexpr char content_float16 = 'H';
|
||||||
static constexpr char content_dual = 'D';
|
static constexpr char content_dual = 'D';
|
||||||
|
|
||||||
|
bool is_delay_slot(const Instruction& instruction)
|
||||||
|
{
|
||||||
|
OPDEST dst{ .HEX = instruction.bytecode[0] };
|
||||||
|
SRC0 src0{ .HEX = instruction.bytecode[1] };
|
||||||
|
SRC1 src1{ .HEX = instruction.bytecode[2] };
|
||||||
|
|
||||||
|
if (dst.opcode != RSX_FP_OPCODE_MOV || // These slots are always populated with MOV
|
||||||
|
dst.no_dest || // Must have a sink
|
||||||
|
src0.reg_type != RSX_FP_REGISTER_TYPE_TEMP || // Must read from reg
|
||||||
|
dst.dest_reg != src0.tmp_reg_index || // Must be a write-to-self
|
||||||
|
dst.fp16 || // Always full lane. We need to collect more data on this but it won't matter
|
||||||
|
dst.saturate || // Precision modifier
|
||||||
|
(dst.prec != RSX_FP_PRECISION_REAL &&
|
||||||
|
dst.prec != RSX_FP_PRECISION_UNKNOWN)) // Cannot have precision modifiers
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we have precision modifiers on the source
|
||||||
|
if (src0.abs || src0.neg || src1.scale)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dst.mask_x && src0.swizzle_x != 0) return false;
|
||||||
|
if (dst.mask_y && src0.swizzle_y != 1) return false;
|
||||||
|
if (dst.mask_z && src0.swizzle_z != 2) return false;
|
||||||
|
if (dst.mask_w && src0.swizzle_w != 3) return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<RegisterRef> compile_register_file(const std::array<char, 48 * 8>& file)
|
std::vector<RegisterRef> compile_register_file(const std::array<char, 48 * 8>& file)
|
||||||
{
|
{
|
||||||
std::vector<RegisterRef> results;
|
std::vector<RegisterRef> results;
|
||||||
@ -90,10 +123,15 @@ namespace rsx::assembler::FP
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Decay instructions into register references
|
// Decay instructions into register references
|
||||||
void annotate_instructions(BasicBlock* block, const RSXFragmentProgram& prog)
|
void annotate_instructions(BasicBlock* block, const RSXFragmentProgram& prog, bool skip_delay_slots)
|
||||||
{
|
{
|
||||||
for (auto& instruction : block->instructions)
|
for (auto& instruction : block->instructions)
|
||||||
{
|
{
|
||||||
|
if (skip_delay_slots && is_delay_slot(instruction))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
const u32 operand_count = get_operand_count(static_cast<FP_opcode>(instruction.opcode));
|
const u32 operand_count = get_operand_count(static_cast<FP_opcode>(instruction.opcode));
|
||||||
for (u32 i = 0; i < operand_count; i++)
|
for (u32 i = 0; i < operand_count; i++)
|
||||||
{
|
{
|
||||||
@ -178,7 +216,7 @@ namespace rsx::assembler::FP
|
|||||||
{
|
{
|
||||||
for (auto& block : graph.blocks)
|
for (auto& block : graph.blocks)
|
||||||
{
|
{
|
||||||
annotate_instructions(&block, m_prog);
|
annotate_instructions(&block, m_prog, m_config.skip_delay_slots);
|
||||||
annotate_block_io(&block);
|
annotate_block_io(&block);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -6,6 +6,11 @@ struct RSXFragmentProgram;
|
|||||||
|
|
||||||
namespace rsx::assembler::FP
|
namespace rsx::assembler::FP
|
||||||
{
|
{
|
||||||
|
struct RegisterAnnotationPassOptions
|
||||||
|
{
|
||||||
|
bool skip_delay_slots = false; // When enabled, detect delay slots and ignore annotating them.
|
||||||
|
};
|
||||||
|
|
||||||
// The annotation pass annotates each basic block with 2 pieces of information:
|
// The annotation pass annotates each basic block with 2 pieces of information:
|
||||||
// 1. The "input" register list for a block.
|
// 1. The "input" register list for a block.
|
||||||
// 2. The "output" register list for a block (clobber list).
|
// 2. The "output" register list for a block (clobber list).
|
||||||
@ -14,13 +19,16 @@ namespace rsx::assembler::FP
|
|||||||
class RegisterAnnotationPass : public CFGPass
|
class RegisterAnnotationPass : public CFGPass
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
RegisterAnnotationPass(const RSXFragmentProgram& prog)
|
RegisterAnnotationPass(
|
||||||
: m_prog(prog)
|
const RSXFragmentProgram& prog,
|
||||||
|
const RegisterAnnotationPassOptions& options = {})
|
||||||
|
: m_prog(prog), m_config(options)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
void run(FlowGraph& graph) override;
|
void run(FlowGraph& graph) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const RSXFragmentProgram& m_prog;
|
const RSXFragmentProgram& m_prog;
|
||||||
|
RegisterAnnotationPassOptions m_config;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1294,7 +1294,7 @@ std::string FragmentProgramDecompiler::Decompile()
|
|||||||
const auto rop_inputs = get_fragment_program_output_set(m_prog.ctrl, m_prog.mrt_buffers_count);
|
const auto rop_inputs = get_fragment_program_output_set(m_prog.ctrl, m_prog.mrt_buffers_count);
|
||||||
rop_block->input_list.insert(rop_block->input_list.end(), rop_inputs.begin(), rop_inputs.end());
|
rop_block->input_list.insert(rop_block->input_list.end(), rop_inputs.begin(), rop_inputs.end());
|
||||||
|
|
||||||
FP::RegisterAnnotationPass annotation_pass{ m_prog };
|
FP::RegisterAnnotationPass annotation_pass{ m_prog, { .skip_delay_slots = true } };
|
||||||
FP::RegisterDependencyPass dependency_pass{};
|
FP::RegisterDependencyPass dependency_pass{};
|
||||||
|
|
||||||
annotation_pass.run(graph);
|
annotation_pass.run(graph);
|
||||||
|
|||||||
@ -568,4 +568,30 @@ namespace rsx::assembler
|
|||||||
EXPECT_EQ(src1.fp16, 1);
|
EXPECT_EQ(src1.fp16, 1);
|
||||||
EXPECT_EQ(src1.swizzle_x, 1);
|
EXPECT_EQ(src1.swizzle_x, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(TestFPIR, RegisterDependencyPass_SkipDelaySlots)
|
||||||
|
{
|
||||||
|
// Instruction 2 clobers H1 which in turn clobbers R0.
|
||||||
|
// Instruction 3 reads from R0 but is a delay slot that does nothing and can be NOPed.
|
||||||
|
auto graph = CFG_from_source(R"(
|
||||||
|
ADD R1, R0, R1;
|
||||||
|
MOV H1, R1
|
||||||
|
MOV R0, R0;
|
||||||
|
)");
|
||||||
|
|
||||||
|
ASSERT_EQ(graph.blocks.size(), 1);
|
||||||
|
ASSERT_EQ(graph.blocks.front().instructions.size(), 3);
|
||||||
|
|
||||||
|
auto& block = graph.blocks.front();
|
||||||
|
RSXFragmentProgram prog{};
|
||||||
|
|
||||||
|
FP::RegisterAnnotationPass annotation_pass{ prog, { .skip_delay_slots = true } };
|
||||||
|
FP::RegisterDependencyPass deps_pass{};
|
||||||
|
|
||||||
|
annotation_pass.run(graph);
|
||||||
|
deps_pass.run(graph);
|
||||||
|
|
||||||
|
// Delay slot detection will cause no dependency injection
|
||||||
|
ASSERT_EQ(block.instructions.size(), 3);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user