mirror of
https://github.com/RPCS3/rpcs3.git
synced 2026-06-08 08:05:06 -06:00
rsx/cfg: Increase valid register file size to 768 bytes per pixel pipe
This commit is contained in:
parent
e0cdafddb5
commit
03a91aa052
@ -402,12 +402,10 @@ namespace rsx::assembler::FP
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr u32 register_file_max_len = 48 * 8; // H0 - H47, R0 - R23
|
|
||||||
|
|
||||||
const u32 lane_width = reg.reg.f16 ? 2 : 4;
|
const u32 lane_width = reg.reg.f16 ? 2 : 4;
|
||||||
const u32 file_offset = reg.reg.id * lane_width * 4;
|
const u32 file_offset = reg.reg.id * lane_width * 4;
|
||||||
|
|
||||||
ensure(file_offset < register_file_max_len, "Invalid register index");
|
ensure(file_offset < constants::register_file_max_len, "Invalid register index");
|
||||||
|
|
||||||
rsx::simple_array<u32> result{};
|
rsx::simple_array<u32> result{};
|
||||||
auto insert_lane = [&](u32 word_offset)
|
auto insert_lane = [&](u32 word_offset)
|
||||||
|
|||||||
@ -85,6 +85,21 @@ namespace rsx::assembler
|
|||||||
|
|
||||||
namespace FP
|
namespace FP
|
||||||
{
|
{
|
||||||
|
namespace constants
|
||||||
|
{
|
||||||
|
// The ISA can encode for 48 registers of any width.
|
||||||
|
// This allows to encode R0-R64 and H0-H95, though there aren't enough addressing bits for the latter.
|
||||||
|
constexpr u32 register_file_max_len = 48 * 16;
|
||||||
|
|
||||||
|
// Enums for analysis passes.
|
||||||
|
constexpr char content_unknown = 0;
|
||||||
|
constexpr char content_float32 = 'R';
|
||||||
|
constexpr char content_float16 = 'H';
|
||||||
|
constexpr char content_dual = 'D';
|
||||||
|
}
|
||||||
|
|
||||||
|
using register_file_t = std::array<char, constants::register_file_max_len>;
|
||||||
|
|
||||||
// Returns number of operands consumed by an instruction
|
// Returns number of operands consumed by an instruction
|
||||||
u8 get_operand_count(FP_opcode opcode);
|
u8 get_operand_count(FP_opcode opcode);
|
||||||
|
|
||||||
|
|||||||
@ -9,11 +9,7 @@
|
|||||||
|
|
||||||
namespace rsx::assembler::FP
|
namespace rsx::assembler::FP
|
||||||
{
|
{
|
||||||
static constexpr u32 register_file_length = 48 * 8; // 24 F32 or 48 F16 registers
|
using namespace constants;
|
||||||
static constexpr char content_unknown = 0;
|
|
||||||
static constexpr char content_float32 = 'R';
|
|
||||||
static constexpr char content_float16 = 'H';
|
|
||||||
static constexpr char content_dual = 'D';
|
|
||||||
|
|
||||||
bool is_delay_slot(const Instruction& instruction)
|
bool is_delay_slot(const Instruction& instruction)
|
||||||
{
|
{
|
||||||
@ -60,7 +56,7 @@ namespace rsx::assembler::FP
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<RegisterRef> compile_register_file(const std::array<char, 48 * 8>& file)
|
std::vector<RegisterRef> compile_register_file(const register_file_t& file)
|
||||||
{
|
{
|
||||||
std::vector<RegisterRef> results;
|
std::vector<RegisterRef> results;
|
||||||
|
|
||||||
@ -163,11 +159,11 @@ namespace rsx::assembler::FP
|
|||||||
// Annotate each block with input and output lanes (read and clobber list)
|
// Annotate each block with input and output lanes (read and clobber list)
|
||||||
void annotate_block_io(BasicBlock* block)
|
void annotate_block_io(BasicBlock* block)
|
||||||
{
|
{
|
||||||
alignas(16) std::array<char, register_file_length> output_register_file;
|
alignas(16) register_file_t output_register_file;
|
||||||
alignas(16) std::array<char, register_file_length> input_register_file; // We'll eventually replace with a bitfield mask, but for ease of debugging, we use char for now
|
alignas(16) register_file_t input_register_file; // We'll eventually replace with a bitfield mask, but for ease of debugging, we use char for now
|
||||||
|
|
||||||
std::memset(output_register_file.data(), content_unknown, register_file_length);
|
std::memset(output_register_file.data(), content_unknown, register_file_max_len);
|
||||||
std::memset(input_register_file.data(), content_unknown, register_file_length);
|
std::memset(input_register_file.data(), content_unknown, register_file_max_len);
|
||||||
|
|
||||||
for (const auto& instruction : block->instructions)
|
for (const auto& instruction : block->instructions)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -9,13 +9,7 @@
|
|||||||
|
|
||||||
namespace rsx::assembler::FP
|
namespace rsx::assembler::FP
|
||||||
{
|
{
|
||||||
static constexpr u32 register_file_length = 48 * 8; // 24 F32 or 48 F16 registers
|
using namespace constants;
|
||||||
static constexpr char content_unknown = 0;
|
|
||||||
static constexpr char content_float32 = 'R';
|
|
||||||
static constexpr char content_float16 = 'H';
|
|
||||||
static constexpr char content_dual = 'D';
|
|
||||||
|
|
||||||
using register_file_t = std::array<char, register_file_length>;
|
|
||||||
|
|
||||||
struct DependencyPassContext
|
struct DependencyPassContext
|
||||||
{
|
{
|
||||||
@ -293,7 +287,7 @@ namespace rsx::assembler::FP
|
|||||||
void insert_dependency_barriers(DependencyPassContext& ctx, BasicBlock* block)
|
void insert_dependency_barriers(DependencyPassContext& ctx, BasicBlock* block)
|
||||||
{
|
{
|
||||||
register_file_t& register_file = ctx.exec_register_map[block];
|
register_file_t& register_file = ctx.exec_register_map[block];
|
||||||
std::memset(register_file.data(), content_unknown, register_file_length);
|
std::memset(register_file.data(), content_unknown, register_file_max_len);
|
||||||
|
|
||||||
std::unordered_set<u32> barrier16;
|
std::unordered_set<u32> barrier16;
|
||||||
std::unordered_set<u32> barrier32;
|
std::unordered_set<u32> barrier32;
|
||||||
@ -403,7 +397,7 @@ namespace rsx::assembler::FP
|
|||||||
if (ctx.sync_register_map.find(target) == ctx.sync_register_map.end())
|
if (ctx.sync_register_map.find(target) == ctx.sync_register_map.end())
|
||||||
{
|
{
|
||||||
auto& blob = ctx.sync_register_map[target];
|
auto& blob = ctx.sync_register_map[target];
|
||||||
std::memset(blob.data(), content_unknown, register_file_length);
|
std::memset(blob.data(), content_unknown, register_file_max_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& sync_register_file = ctx.sync_register_map[target];
|
auto& sync_register_file = ctx.sync_register_map[target];
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user