shadPS4/tests/stubs/resource_tracking_pass_stub.cpp
Marcin Mikołajczyk 963d10f220
Test framework for GCN pipeline (#4272)
* Test framework for GCN pipeline

* Try fixing test compilation on CI
2026-04-19 22:15:08 +03:00

336 lines
13 KiB
C++

// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/control_flow_graph.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/breadth_first_search.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/operand_helper.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/ir/reinterpret.h"
#include "shader_recompiler/profile.h"
#include "video_core/amdgpu/resource.h"
namespace Shader::Optimization {
namespace {
using SharpLocation = u32;
bool IsBufferAtomic(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::BufferAtomicIAdd32:
case IR::Opcode::BufferAtomicIAdd64:
case IR::Opcode::BufferAtomicISub32:
case IR::Opcode::BufferAtomicSMin32:
case IR::Opcode::BufferAtomicSMin64:
case IR::Opcode::BufferAtomicUMin32:
case IR::Opcode::BufferAtomicUMin64:
case IR::Opcode::BufferAtomicFMin32:
case IR::Opcode::BufferAtomicSMax32:
case IR::Opcode::BufferAtomicSMax64:
case IR::Opcode::BufferAtomicUMax32:
case IR::Opcode::BufferAtomicUMax64:
case IR::Opcode::BufferAtomicFMax32:
case IR::Opcode::BufferAtomicInc32:
case IR::Opcode::BufferAtomicDec32:
case IR::Opcode::BufferAtomicAnd32:
case IR::Opcode::BufferAtomicOr32:
case IR::Opcode::BufferAtomicXor32:
case IR::Opcode::BufferAtomicSwap32:
case IR::Opcode::BufferAtomicCmpSwap32:
case IR::Opcode::BufferAtomicFCmpSwap32:
return true;
default:
return false;
}
}
bool IsBufferStore(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::StoreBufferU8:
case IR::Opcode::StoreBufferU16:
case IR::Opcode::StoreBufferU32:
case IR::Opcode::StoreBufferU32x2:
case IR::Opcode::StoreBufferU32x3:
case IR::Opcode::StoreBufferU32x4:
case IR::Opcode::StoreBufferU64:
case IR::Opcode::StoreBufferF32:
case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4:
case IR::Opcode::StoreBufferFormatF32:
return true;
default:
return IsBufferAtomic(inst);
}
}
bool IsBufferInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferU8:
case IR::Opcode::LoadBufferU16:
case IR::Opcode::LoadBufferU32:
case IR::Opcode::LoadBufferU32x2:
case IR::Opcode::LoadBufferU32x3:
case IR::Opcode::LoadBufferU32x4:
case IR::Opcode::LoadBufferU64:
case IR::Opcode::LoadBufferF32:
case IR::Opcode::LoadBufferF32x2:
case IR::Opcode::LoadBufferF32x3:
case IR::Opcode::LoadBufferF32x4:
case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::ReadConstBuffer:
return true;
default:
return IsBufferStore(inst);
}
}
u32 BufferAddressShift(const IR::Inst& inst, AmdGpu::DataFormat data_format) {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferU8:
case IR::Opcode::StoreBufferU8:
return 0;
case IR::Opcode::LoadBufferU16:
case IR::Opcode::StoreBufferU16:
return 1;
case IR::Opcode::LoadBufferU64:
case IR::Opcode::StoreBufferU64:
case IR::Opcode::BufferAtomicIAdd64:
case IR::Opcode::BufferAtomicSMax64:
case IR::Opcode::BufferAtomicSMin64:
case IR::Opcode::BufferAtomicUMax64:
case IR::Opcode::BufferAtomicUMin64:
return 3;
case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32: {
switch (data_format) {
case AmdGpu::DataFormat::Format8:
return 0;
case AmdGpu::DataFormat::Format8_8:
case AmdGpu::DataFormat::Format16:
return 1;
case AmdGpu::DataFormat::Format8_8_8_8:
case AmdGpu::DataFormat::Format16_16:
case AmdGpu::DataFormat::Format10_11_11:
case AmdGpu::DataFormat::Format2_10_10_10:
case AmdGpu::DataFormat::Format16_16_16_16:
case AmdGpu::DataFormat::Format32:
case AmdGpu::DataFormat::Format32_32:
case AmdGpu::DataFormat::Format32_32_32:
case AmdGpu::DataFormat::Format32_32_32_32:
return 2;
default:
return 0;
}
break;
}
case IR::Opcode::ReadConstBuffer:
// Provided address is already in dwords
return 0;
default:
return 2;
}
}
class Descriptors {
public:
explicit Descriptors(Info& info_)
: info{info_}, buffer_resources{info_.buffers}, image_resources{info_.images},
sampler_resources{info_.samplers}, fmask_resources(info_.fmasks) {}
u32 Add(const BufferResource& desc) {
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
return desc.sharp_idx == existing.sharp_idx &&
desc.inline_cbuf == existing.inline_cbuf &&
desc.buffer_type == existing.buffer_type;
})};
auto& buffer = buffer_resources[index];
buffer.used_types |= desc.used_types;
buffer.is_written |= desc.is_written;
buffer.is_formatted |= desc.is_formatted;
return index;
}
u32 Add(const ImageResource& desc) {
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array &&
desc.mip_fallback_mode == existing.mip_fallback_mode &&
desc.constant_mip_index == existing.constant_mip_index;
})};
auto& image = image_resources[index];
image.is_atomic |= desc.is_atomic;
image.is_written |= desc.is_written;
return index;
}
u32 Add(const SamplerResource& desc) {
const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) {
return desc.sharp_idx == existing.sharp_idx &&
desc.is_inline_sampler == existing.is_inline_sampler &&
desc.inline_sampler == existing.inline_sampler;
})};
return index;
}
u32 Add(const FMaskResource& desc) {
u32 index = Add(fmask_resources, desc, [&desc](const auto& existing) {
return desc.sharp_idx == existing.sharp_idx;
});
return index;
}
private:
template <typename Descriptors, typename Descriptor, typename Func>
static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
const auto it{std::ranges::find_if(descriptors, pred)};
if (it != descriptors.end()) {
return static_cast<u32>(std::distance(descriptors.begin(), it));
}
descriptors.push_back(desc);
return static_cast<u32>(descriptors.size()) - 1;
}
const Info& info;
BufferResourceList& buffer_resources;
ImageResourceList& image_resources;
SamplerResourceList& sampler_resources;
FMaskResourceList& fmask_resources;
};
} // Anonymous namespace
void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors,
const Profile& profile) {
u32 buffer_binding = descriptors.Add(BufferResource{.sharp_idx = 0,
.used_types = IR::Type::U32,
.buffer_type = BufferType::Guest,
.is_written = true,
.is_formatted = false});
// Replace handle with binding index in buffer resource list.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(buffer_binding));
}
IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info,
const AmdGpu::Buffer& buffer, u32 stride) {
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
const u32 inst_offset = inst_info.inst_offset.Value();
const auto is_inst_typed = inst_info.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid;
const auto data_format = is_inst_typed
? AmdGpu::RemapDataFormat(inst_info.inst_data_fmt.Value())
: buffer.GetDataFmt();
const u32 shift = BufferAddressShift(inst, data_format);
const u32 mask = (1 << shift) - 1;
const IR::U32 soffset = IR::GetBufferSOffsetArg(&inst);
// If address calculation is of the form "index * const_stride + offset" with
// offset constant and both const_stride and offset are divisible with the
// element size, apply shift directly.
if (inst_info.index_enable && !inst_info.voffset_enable && soffset.IsImmediate() &&
!buffer.swizzle_enable && !buffer.add_tid_enable && (stride & mask) == 0) {
const u32 total_offset = soffset.U32() + inst_offset;
if ((total_offset & mask) == 0) {
// buffer_offset = index * (const_stride >> shift) + (offset >> shift)
const IR::U32 index = IR::GetBufferIndexArg(&inst);
return ir.IAdd(ir.IMul(index, ir.Imm32(stride >> shift)),
ir.Imm32(total_offset >> shift));
}
}
// index = (inst_idxen ? vgpr_index : 0) + (const_add_tid_enable ?
// thread_id[5:0] : 0)
IR::U32 index = ir.Imm32(0U);
if (inst_info.index_enable) {
const IR::U32 vgpr_index = IR::GetBufferIndexArg(&inst);
index = ir.IAdd(index, vgpr_index);
}
if (buffer.add_tid_enable) {
ASSERT_MSG(info.l_stage == LogicalStage::Compute,
"Thread ID buffer addressing is not supported outside of compute.");
const IR::U32 thread_id{ir.LaneId()};
index = ir.IAdd(index, thread_id);
}
// offset = (inst_offen ? vgpr_offset : 0) + inst_offset
IR::U32 offset = ir.Imm32(inst_offset);
offset = ir.IAdd(offset, soffset);
if (inst_info.voffset_enable) {
const IR::U32 voffset = IR::GetBufferVOffsetArg(&inst);
offset = ir.IAdd(offset, voffset);
}
const IR::U32 const_stride = ir.Imm32(stride);
IR::U32 buffer_offset;
if (buffer.swizzle_enable) {
const IR::U32 const_index_stride = ir.Imm32(buffer.GetIndexStride());
const IR::U32 const_element_size = ir.Imm32(buffer.GetElementSize());
// index_msb = index / const_index_stride
const IR::U32 index_msb{ir.IDiv(index, const_index_stride)};
// index_lsb = index % const_index_stride
const IR::U32 index_lsb{ir.IMod(index, const_index_stride)};
// offset_msb = offset / const_element_size
const IR::U32 offset_msb{ir.IDiv(offset, const_element_size)};
// offset_lsb = offset % const_element_size
const IR::U32 offset_lsb{ir.IMod(offset, const_element_size)};
// buffer_offset =
// (index_msb * const_stride + offset_msb * const_element_size) *
// const_index_stride
// + index_lsb * const_element_size + offset_lsb
const IR::U32 buffer_offset_msb = ir.IMul(
ir.IAdd(ir.IMul(index_msb, const_stride), ir.IMul(offset_msb, const_element_size)),
const_index_stride);
const IR::U32 buffer_offset_lsb =
ir.IAdd(ir.IMul(index_lsb, const_element_size), offset_lsb);
buffer_offset = ir.IAdd(buffer_offset_msb, buffer_offset_lsb);
} else {
// buffer_offset = index * const_stride + offset
buffer_offset = ir.IAdd(ir.IMul(index, const_stride), offset);
}
if (shift != 0) {
buffer_offset = ir.ShiftRightLogical(buffer_offset, ir.Imm32(shift));
}
return buffer_offset;
}
void PatchBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
const auto handle = inst.Arg(0);
const auto buffer_res = info.buffers[handle.U32()];
const auto buffer = AmdGpu::Buffer::Null();
// Address of constant buffer reads can be calculated at IR emission time.
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
return;
}
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(IR::LoadBufferArgs::Address,
CalculateBufferAddress(ir, inst, info, buffer, buffer.stride));
}
void ResourceTrackingPassStub(IR::Program& program, const Profile& profile) {
// Iterate resource instructions and patch them after finding the sharp.
auto& info = program.info;
// Pass 1: Track resource sharps
Descriptors descriptors{info};
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (IsBufferInstruction(inst)) {
PatchBufferSharp(*block, inst, info, descriptors, profile);
}
}
}
// Pass 2: Patch instruction args
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (IsBufferInstruction(inst)) {
PatchBufferArgs(*block, inst, info);
}
}
}
}
} // namespace Shader::Optimization