shader_recompiler: Strip out manual bounds checking (#4380)

This commit is contained in:
squidbus 2026-05-09 10:05:18 -07:00 committed by GitHub
parent 82c760135f
commit ac61f4aee2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 53 additions and 167 deletions

View File

@ -77,9 +77,7 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
const auto [id, pointer_type] = buffer.Alias(is_float ? PointerType::F32 : PointerType::U32);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32, 1, is_float>(ctx, address, buffer.Size(PointerSize::B32), [&] {
return (ctx.*atomic_func)(type, ptr, scope, semantics, value);
});
return (ctx.*atomic_func)(type, ptr, scope, semantics, value);
}
Id BufferAtomicU32IncDec(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
@ -91,9 +89,7 @@ Id BufferAtomicU32IncDec(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32>(ctx, address, buffer.Size(PointerSize::B32), [&] {
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics);
});
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics);
}
Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
@ -106,9 +102,7 @@ Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32>(ctx, address, buffer.Size(PointerSize::B32), [&] {
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value);
});
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value);
}
Id BufferAtomicU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
@ -120,9 +114,7 @@ Id BufferAtomicU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
const auto [id, pointer_type] = buffer.Alias(PointerType::U64);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<64>(ctx, address, buffer.Size(PointerSize::B64), [&] {
return (ctx.*atomic_func)(ctx.U64, ptr, scope, semantics, value);
});
return (ctx.*atomic_func)(ctx.U64, ptr, scope, semantics, value);
}
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value,

View File

@ -78,10 +78,6 @@ Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpLoad(ctx.U32[1], ptr)};
if (const Id size = buffer.Size(PointerSize::B32); Sirit::ValidId(size)) {
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, size);
return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value);
}
return result;
}
@ -320,8 +316,6 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
template <u32 N, PointerType alias>
static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
constexpr bool is_float = alias == PointerType::F32;
const auto flags = inst->Flags<IR::BufferInstInfo>();
const auto& spv_buffer = ctx.buffers[handle];
if (const Id offset = spv_buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
@ -334,21 +328,10 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
const Id index_i = i == 0 ? address : ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i));
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
const Id result_i = ctx.OpLoad(data_types[1], ptr_i);
if (!flags.typed) {
// Untyped loads have bounds checking per-component.
ids.push_back(LoadAccessBoundsCheck<32, 1, is_float>(
ctx, index_i, spv_buffer.Size(PointerSize::B32), result_i));
} else {
ids.push_back(result_i);
}
ids.push_back(result_i);
}
const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids);
if (flags.typed) {
// Typed loads have single bounds check for the whole load.
return LoadAccessBoundsCheck<32, N, is_float>(ctx, address,
spv_buffer.Size(PointerSize::B32), result);
}
return result;
}
@ -360,7 +343,7 @@ Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U8);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id result{ctx.OpLoad(ctx.U8, ptr)};
return LoadAccessBoundsCheck<8>(ctx, address, spv_buffer.Size(PointerSize::B8), result);
return result;
}
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
@ -371,7 +354,7 @@ Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U16);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id result{ctx.OpLoad(ctx.U16, ptr)};
return LoadAccessBoundsCheck<16>(ctx, address, spv_buffer.Size(PointerSize::B16), result);
return result;
}
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
@ -398,7 +381,7 @@ Id EmitLoadBufferU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U64);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, address)};
const Id result{ctx.OpLoad(ctx.U64, ptr)};
return LoadAccessBoundsCheck<64>(ctx, address, spv_buffer.Size(PointerSize::B64), result);
return result;
}
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
@ -424,8 +407,6 @@ Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addr
template <u32 N, PointerType alias>
static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id value) {
constexpr bool is_float = alias == PointerType::F32;
const auto flags = inst->Flags<IR::BufferInstInfo>();
const auto& spv_buffer = ctx.buffers[handle];
if (const Id offset = spv_buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, offset);
@ -433,31 +414,11 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I
const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer.Alias(alias);
auto store = [&] {
for (u32 i = 0; i < N; i++) {
const Id index_i = i == 0 ? address : ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i));
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i);
auto store_i = [&] {
ctx.OpStore(ptr_i, value_i);
return Id{};
};
if (!flags.typed) {
// Untyped stores have bounds checking per-component.
AccessBoundsCheck<32, 1, is_float>(ctx, index_i, spv_buffer.Size(PointerSize::B32),
store_i);
} else {
store_i();
}
}
return Id{};
};
if (flags.typed) {
// Typed stores have single bounds check for the whole store.
AccessBoundsCheck<32, N, is_float>(ctx, address, spv_buffer.Size(PointerSize::B32), store);
} else {
store();
for (u32 i = 0; i < N; i++) {
const Id index_i = i == 0 ? address : ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i));
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i);
ctx.OpStore(ptr_i, value_i);
}
}
@ -468,10 +429,7 @@ void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id v
}
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U8);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
AccessBoundsCheck<8>(ctx, address, spv_buffer.Size(PointerSize::B8), [&] {
ctx.OpStore(ptr, value);
return Id{};
});
ctx.OpStore(ptr, value);
}
void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
@ -481,10 +439,7 @@ void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id
}
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U16);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
AccessBoundsCheck<16>(ctx, address, spv_buffer.Size(PointerSize::B16), [&] {
ctx.OpStore(ptr, value);
return Id{};
});
ctx.OpStore(ptr, value);
}
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
@ -510,10 +465,7 @@ void EmitStoreBufferU64(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id
}
const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U64);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, address)};
AccessBoundsCheck<64>(ctx, address, spv_buffer.Size(PointerSize::B64), [&] {
ctx.OpStore(ptr, value);
return Id{};
});
ctx.OpStore(ptr, value);
}
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {

View File

@ -231,7 +231,7 @@ Id EmitContext::GetBufferSize(const u32 sharp_idx) {
}
void EmitContext::DefineBufferProperties() {
if (!profile.needs_buffer_offsets && profile.supports_robust_buffer_access) {
if (!profile.needs_buffer_offsets) {
return;
}
for (u32 i = 0; i < buffers.size(); i++) {
@ -242,59 +242,31 @@ void EmitContext::DefineBufferProperties() {
continue;
}
// Only load and apply buffer offsets if host GPU alignment is larger than guest.
if (profile.needs_buffer_offsets) {
const u32 half = PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
const u32 offset = (binding & 0x3) << 3;
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
const u32 half = PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2;
const u32 offset = (binding & 0x3) << 3;
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)};
const Id buf_offset{OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U))};
Name(buf_offset, fmt::format("buf{}_off", binding));
buffer.Offset(PointerSize::B8) = buf_offset;
const Id buf_offset{OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U))};
Name(buf_offset, fmt::format("buf{}_off", binding));
buffer.Offset(PointerSize::B8) = buf_offset;
if (True(desc.used_types & IR::Type::U16)) {
const Id buf_word_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(1U))};
Name(buf_word_offset, fmt::format("buf{}_word_off", binding));
buffer.Offset(PointerSize::B16) = buf_word_offset;
}
if (True(desc.used_types & IR::Type::U32)) {
const Id buf_dword_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(2U))};
Name(buf_dword_offset, fmt::format("buf{}_dword_off", binding));
buffer.Offset(PointerSize::B32) = buf_dword_offset;
}
if (True(desc.used_types & IR::Type::U64)) {
const Id buf_qword_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(3U))};
Name(buf_qword_offset, fmt::format("buf{}_qword_off", binding));
buffer.Offset(PointerSize::B64) = buf_qword_offset;
}
if (True(desc.used_types & IR::Type::U16)) {
const Id buf_word_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(1U))};
Name(buf_word_offset, fmt::format("buf{}_word_off", binding));
buffer.Offset(PointerSize::B16) = buf_word_offset;
}
// Only load size if performing bounds checks.
if (!profile.supports_robust_buffer_access) {
const Id buf_size{desc.sharp_idx == std::numeric_limits<u32>::max()
? ConstU32(desc.inline_cbuf.GetSize())
: GetBufferSize(desc.sharp_idx)};
Name(buf_size, fmt::format("buf{}_size", binding));
buffer.Size(PointerSize::B8) = buf_size;
if (True(desc.used_types & IR::Type::U16)) {
const Id buf_word_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(1U))};
Name(buf_word_size, fmt::format("buf{}_short_size", binding));
buffer.Size(PointerSize::B16) = buf_word_size;
}
if (True(desc.used_types & IR::Type::U32)) {
const Id buf_dword_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(2U))};
Name(buf_dword_size, fmt::format("buf{}_dword_size", binding));
buffer.Size(PointerSize::B32) = buf_dword_size;
}
if (True(desc.used_types & IR::Type::U64)) {
const Id buf_qword_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(3U))};
Name(buf_qword_size, fmt::format("buf{}_qword_size", binding));
buffer.Size(PointerSize::B64) = buf_qword_size;
}
if (True(desc.used_types & IR::Type::U32)) {
const Id buf_dword_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(2U))};
Name(buf_dword_offset, fmt::format("buf{}_dword_off", binding));
buffer.Offset(PointerSize::B32) = buf_dword_offset;
}
if (True(desc.used_types & IR::Type::U64)) {
const Id buf_qword_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(3U))};
Name(buf_qword_offset, fmt::format("buf{}_qword_off", binding));
buffer.Offset(PointerSize::B64) = buf_qword_offset;
}
}
}

View File

@ -327,7 +327,6 @@ public:
u32 binding;
BufferType buffer_type;
std::array<Id, u32(PointerSize::NumClass)> offsets;
std::array<Id, u32(PointerSize::NumClass)> sizes;
std::array<BufferSpv, u32(PointerType::NumAlias)> aliases;
template <class Self>
@ -339,11 +338,6 @@ public:
auto& Offset(this Self& self, PointerSize size) {
return self.offsets[u32(size)];
}
template <class Self>
auto& Size(this Self& self, PointerSize size) {
return self.sizes[u32(size)];
}
};
Bindings& binding;

View File

@ -1038,24 +1038,23 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra
void Translator::EmitFetch(const GcnInst& inst) {
const auto code_sgpr_base = inst.src[0].code;
// The fetch shader must be inlined to access as regular buffers, so that
// bounds checks can be emitted to emulate robust buffer access.
if (!profile.supports_robust_buffer_access) {
const auto* code = GetFetchShaderCode(info, code_sgpr_base);
GcnCodeSlice slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder;
#if 0
// Translate fetch shader inline using regular buffer bindings; useful for debugging.
const auto* code = GetFetchShaderCode(info, code_sgpr_base);
GcnCodeSlice slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder;
// Decode and save instructions
while (!slice.atEnd()) {
const auto sub_inst = decoder.decodeInstruction(slice);
if (sub_inst.opcode == Opcode::S_SETPC_B64) {
// Assume we're swapping back to the main shader.
break;
}
TranslateInstruction(sub_inst);
// Decode and save instructions
while (!slice.atEnd()) {
const auto sub_inst = decoder.decodeInstruction(slice);
if (sub_inst.opcode == Opcode::S_SETPC_B64) {
// Assume we're swapping back to the main shader.
break;
}
return;
TranslateInstruction(sub_inst);
}
return;
#endif
info.has_fetch_shader = true;
info.fetch_shader_sgpr_base = code_sgpr_base;

View File

@ -162,20 +162,6 @@ void CollectShaderInfoPass(IR::Program& program, const Profile& profile) {
}
}
// In case Flatbuf has not already been bound by IR and is needed
// to query buffer sizes, bind it now.
if (!profile.supports_robust_buffer_access && !info.uses_dma) {
info.buffers.push_back({
.used_types = IR::Type::U32,
// We can't guarantee that flatbuf will not grow past UBO
// limit if there are a lot of ReadConsts. (We could specialize)
.inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
.buffer_type = BufferType::Flatbuf,
});
// In the future we may want to read buffer sizes from GPU memory if available.
// info.readconst_types |= Info::ReadConstType::Immediate;
}
if (!EmulatorSettings.IsDirectMemoryAccessEnabled()) {
info.uses_dma = false;
info.readconst_types = Info::ReadConstType::None;

View File

@ -37,7 +37,6 @@ struct Profile {
bool supports_image_load_store_lod{};
bool supports_native_cube_calc{};
bool supports_trinary_minmax{};
bool supports_robust_buffer_access{};
bool supports_buffer_fp32_atomic_min_max{};
bool supports_image_fp32_atomic_min_max{};
bool supports_buffer_int64_atomics{};

View File

@ -165,11 +165,6 @@ public:
return vertex_input_dynamic_state;
}
/// Returns true when the robustBufferAccess2 feature of VK_EXT_robustness2 is supported.
bool IsRobustBufferAccess2Supported() const {
return robustness2 && robustness2_features.robustBufferAccess2;
}
/// Returns true when the nullDescriptor feature of VK_EXT_robustness2 is supported.
bool IsNullDescriptorSupported() const {
return robustness2 && robustness2_features.nullDescriptor;

View File

@ -289,8 +289,6 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
.supports_trinary_minmax = instance_.IsAmdShaderTrinaryMinMaxSupported(),
// TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed.
.supports_robust_buffer_access = true, // instance_.IsRobustBufferAccess2Supported(),
.supports_buffer_fp32_atomic_min_max =
instance_.IsShaderAtomicFloatBuffer32MinMaxSupported(),
.supports_image_fp32_atomic_min_max = instance_.IsShaderAtomicFloatImage32MinMaxSupported(),

View File

@ -60,7 +60,6 @@ std::vector<u32> TranslateToSpirv(u64 raw_gcn_inst) {
Profile profile{};
profile.supported_spirv = 0x00010600;
profile.subgroup_size = 32;
profile.supports_robust_buffer_access = true;
RuntimeInfo runtime_info{};
runtime_info.Initialize(Stage::Compute);