rsx/cfg: Increase valid register file size to 768 bytes per pixel pipe

This commit is contained in:
kd-11 2025-12-16 01:18:31 +03:00 committed by Ani
parent e0cdafddb5
commit 03a91aa052
4 changed files with 25 additions and 22 deletions

View File

@ -402,12 +402,10 @@ namespace rsx::assembler::FP
return {}; return {};
} }
constexpr u32 register_file_max_len = 48 * 8; // H0 - H47, R0 - R23
const u32 lane_width = reg.reg.f16 ? 2 : 4; const u32 lane_width = reg.reg.f16 ? 2 : 4;
const u32 file_offset = reg.reg.id * lane_width * 4; const u32 file_offset = reg.reg.id * lane_width * 4;
ensure(file_offset < register_file_max_len, "Invalid register index"); ensure(file_offset < constants::register_file_max_len, "Invalid register index");
rsx::simple_array<u32> result{}; rsx::simple_array<u32> result{};
auto insert_lane = [&](u32 word_offset) auto insert_lane = [&](u32 word_offset)

View File

@ -85,6 +85,21 @@ namespace rsx::assembler
namespace FP namespace FP
{ {
namespace constants
{
// The ISA can encode for 48 registers of any width.
// This allows to encode R0-R64 and H0-H95, though there aren't enough addressing bits for the latter.
constexpr u32 register_file_max_len = 48 * 16;
// Enums for analysis passes.
constexpr char content_unknown = 0;
constexpr char content_float32 = 'R';
constexpr char content_float16 = 'H';
constexpr char content_dual = 'D';
}
using register_file_t = std::array<char, constants::register_file_max_len>;
// Returns number of operands consumed by an instruction // Returns number of operands consumed by an instruction
u8 get_operand_count(FP_opcode opcode); u8 get_operand_count(FP_opcode opcode);

View File

@ -9,11 +9,7 @@
namespace rsx::assembler::FP namespace rsx::assembler::FP
{ {
static constexpr u32 register_file_length = 48 * 8; // 24 F32 or 48 F16 registers using namespace constants;
static constexpr char content_unknown = 0;
static constexpr char content_float32 = 'R';
static constexpr char content_float16 = 'H';
static constexpr char content_dual = 'D';
bool is_delay_slot(const Instruction& instruction) bool is_delay_slot(const Instruction& instruction)
{ {
@ -60,7 +56,7 @@ namespace rsx::assembler::FP
return true; return true;
} }
std::vector<RegisterRef> compile_register_file(const std::array<char, 48 * 8>& file) std::vector<RegisterRef> compile_register_file(const register_file_t& file)
{ {
std::vector<RegisterRef> results; std::vector<RegisterRef> results;
@ -163,11 +159,11 @@ namespace rsx::assembler::FP
// Annotate each block with input and output lanes (read and clobber list) // Annotate each block with input and output lanes (read and clobber list)
void annotate_block_io(BasicBlock* block) void annotate_block_io(BasicBlock* block)
{ {
alignas(16) std::array<char, register_file_length> output_register_file; alignas(16) register_file_t output_register_file;
alignas(16) std::array<char, register_file_length> input_register_file; // We'll eventually replace with a bitfield mask, but for ease of debugging, we use char for now alignas(16) register_file_t input_register_file; // We'll eventually replace with a bitfield mask, but for ease of debugging, we use char for now
std::memset(output_register_file.data(), content_unknown, register_file_length); std::memset(output_register_file.data(), content_unknown, register_file_max_len);
std::memset(input_register_file.data(), content_unknown, register_file_length); std::memset(input_register_file.data(), content_unknown, register_file_max_len);
for (const auto& instruction : block->instructions) for (const auto& instruction : block->instructions)
{ {

View File

@ -9,13 +9,7 @@
namespace rsx::assembler::FP namespace rsx::assembler::FP
{ {
static constexpr u32 register_file_length = 48 * 8; // 24 F32 or 48 F16 registers using namespace constants;
static constexpr char content_unknown = 0;
static constexpr char content_float32 = 'R';
static constexpr char content_float16 = 'H';
static constexpr char content_dual = 'D';
using register_file_t = std::array<char, register_file_length>;
struct DependencyPassContext struct DependencyPassContext
{ {
@ -293,7 +287,7 @@ namespace rsx::assembler::FP
void insert_dependency_barriers(DependencyPassContext& ctx, BasicBlock* block) void insert_dependency_barriers(DependencyPassContext& ctx, BasicBlock* block)
{ {
register_file_t& register_file = ctx.exec_register_map[block]; register_file_t& register_file = ctx.exec_register_map[block];
std::memset(register_file.data(), content_unknown, register_file_length); std::memset(register_file.data(), content_unknown, register_file_max_len);
std::unordered_set<u32> barrier16; std::unordered_set<u32> barrier16;
std::unordered_set<u32> barrier32; std::unordered_set<u32> barrier32;
@ -403,7 +397,7 @@ namespace rsx::assembler::FP
if (ctx.sync_register_map.find(target) == ctx.sync_register_map.end()) if (ctx.sync_register_map.find(target) == ctx.sync_register_map.end())
{ {
auto& blob = ctx.sync_register_map[target]; auto& blob = ctx.sync_register_map[target];
std::memset(blob.data(), content_unknown, register_file_length); std::memset(blob.data(), content_unknown, register_file_max_len);
} }
auto& sync_register_file = ctx.sync_register_map[target]; auto& sync_register_file = ctx.sync_register_map[target];