IMAGE_STORE_MIP fallback (#4075)

* fallback for IMAGE_STORE_MIP when not natively supported

* Lod should be treated as absolute, independent of sharp's base_level (judging by other implemented instructions)

* fix descriptor set layouts

* dumb error

* force fallback for testing

* treat Lod as relative to base_level

* optimization when lod index is constant
This commit is contained in:
baggins183 2026-03-17 12:47:19 -07:00 committed by GitHub
parent 88c3437240
commit 1bb152d976
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 168 additions and 40 deletions

View File

@ -220,20 +220,33 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id ms) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id color_type = texture.data_types->Get(4);
ImageOperands operands;
operands.Add(spv::ImageOperandsMask::Sample, ms);
Id texel;
if (!texture.is_storage) {
const Id image = ctx.OpLoad(texture.image_type, texture.id);
operands.Add(spv::ImageOperandsMask::Lod, lod);
texel = ctx.OpImageFetch(color_type, image, coords, operands.mask, operands.operands);
} else {
Id image_ptr = texture.id;
if (ctx.profile.supports_image_load_store_lod) {
operands.Add(spv::ImageOperandsMask::Lod, lod);
} else if (Sirit::ValidId(lod)) {
LOG_WARNING(Render, "Image read with LOD not supported by driver");
#if 1
// It's confusing what interactions will cause this code path so leave it as
// unreachable until a case is found.
// Normally IMAGE_LOAD_MIP should translate -> OpImageFetch
UNREACHABLE_MSG("Unsupported ImageRead with Lod");
#else
LOG_WARNING(Render, "Fallback for ImageRead with LOD");
ASSERT(texture.mip_fallback_mode == MipStorageFallbackMode::DynamicIndex);
const Id single_image_ptr_type =
ctx.TypePointer(spv::StorageClass::UniformConstant, texture.image_type);
image_ptr = ctx.OpAccessChain(single_image_ptr_type, image_ptr, std::array{lod});
#endif
}
const Id image = ctx.OpLoad(texture.image_type, image_ptr);
texel = ctx.OpImageRead(color_type, image, coords, operands.mask, operands.operands);
}
return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texel) : texel;
@ -242,15 +255,20 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id ms,
Id color) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
Id image_ptr = texture.id;
const Id color_type = texture.data_types->Get(4);
ImageOperands operands;
operands.Add(spv::ImageOperandsMask::Sample, ms);
if (ctx.profile.supports_image_load_store_lod) {
operands.Add(spv::ImageOperandsMask::Lod, lod);
} else if (Sirit::ValidId(lod)) {
LOG_WARNING(Render, "Image write with LOD not supported by driver");
LOG_WARNING(Render, "Fallback for ImageWrite with LOD");
ASSERT(texture.mip_fallback_mode == MipStorageFallbackMode::DynamicIndex);
const Id single_image_ptr_type =
ctx.TypePointer(spv::StorageClass::UniformConstant, texture.image_type);
image_ptr = ctx.OpAccessChain(single_image_ptr_type, image_ptr, std::array{lod});
}
const Id image = ctx.OpLoad(texture.image_type, image_ptr);
const Id texel = texture.is_integer ? ctx.OpBitcast(color_type, color) : color;
ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands);
}

View File

@ -961,23 +961,33 @@ void EmitContext::DefineImagesAndSamplers() {
const auto nfmt = sharp.GetNumberFmt();
const bool is_integer = AmdGpu::IsInteger(nfmt);
const bool is_storage = image_desc.is_written;
const MipStorageFallbackMode mip_fallback_mode = image_desc.mip_fallback_mode;
const VectorIds& data_types = GetAttributeType(*this, nfmt);
const Id sampled_type = data_types[1];
const Id image_type{ImageType(*this, image_desc, sampled_type)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
const u32 num_bindings = image_desc.NumBindings(info);
Id pointee_type = image_type;
if (mip_fallback_mode == MipStorageFallbackMode::DynamicIndex) {
pointee_type = TypeArray(pointee_type, ConstU32(num_bindings));
}
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, pointee_type)};
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::Binding, binding.unified);
binding.unified += num_bindings;
Decorate(id, spv::Decoration::DescriptorSet, 0U);
// TODO better naming for resources (flattened sharp_idx is not informative)
Name(id, fmt::format("{}_{}{}", stage, "img", image_desc.sharp_idx));
images.push_back({
.data_types = &data_types,
.id = id,
.sampled_type = is_storage ? sampled_type : TypeSampledImage(image_type),
.pointer_type = pointer_type,
.image_type = image_type,
.view_type = sharp.GetViewType(image_desc.is_array),
.is_integer = is_integer,
.is_storage = is_storage,
.mip_fallback_mode = mip_fallback_mode,
});
interfaces.push_back(id);
}

View File

@ -293,11 +293,11 @@ public:
const VectorIds* data_types;
Id id;
Id sampled_type;
Id pointer_type;
Id image_type;
AmdGpu::ImageType view_type;
bool is_integer = false;
bool is_storage = false;
MipStorageFallbackMode mip_fallback_mode{};
};
enum class PointerType : u32 {

View File

@ -19,7 +19,7 @@ void DeadCodeEliminationPass(IR::Program& program);
void ConstantPropagationPass(IR::BlockList& program);
void FlattenExtendedUserdataPass(IR::Program& program);
void ReadLaneEliminationPass(IR::Program& program);
void ResourceTrackingPass(IR::Program& program);
void ResourceTrackingPass(IR::Program& program, const Profile& profile);
void CollectShaderInfoPass(IR::Program& program, const Profile& profile);
void LowerBufferFormatToRaw(IR::Program& program);
void LowerFp64ToFp32(IR::Program& program);

View File

@ -9,6 +9,7 @@
#include "shader_recompiler/ir/operand_helper.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/ir/reinterpret.h"
#include "shader_recompiler/profile.h"
#include "video_core/amdgpu/resource.h"
namespace Shader::Optimization {
@ -255,7 +256,9 @@ public:
u32 Add(const ImageResource& desc) {
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array;
return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array &&
desc.mip_fallback_mode == existing.mip_fallback_mode &&
desc.constant_mip_index == existing.constant_mip_index;
})};
auto& image = image_resources[index];
image.is_atomic |= desc.is_atomic;
@ -529,14 +532,21 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
inst.SetArg(0, ir.Imm32(buffer_binding));
}
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors,
const Profile& profile) {
// Read image sharp.
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
const IR::Inst* image_handle = inst.Arg(0).InstRecursive();
const auto tsharp = TrackSharp(image_handle, block, inst_info.pc);
const bool is_atomic = IsImageAtomicInstruction(inst);
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite || is_atomic;
const ImageResource image_res = {
const bool is_storage =
inst.GetOpcode() == IR::Opcode::ImageRead || inst.GetOpcode() == IR::Opcode::ImageWrite;
// ImageRead with !is_written gets emitted as OpImageFetch with LOD operand, doesn't
// need fallback (TODO is this 100% true?)
const bool needs_mip_storage_fallback =
inst_info.has_lod && is_written && !profile.supports_image_load_store_lod;
ImageResource image_res = {
.sharp_idx = tsharp,
.is_depth = bool(inst_info.is_depth),
.is_atomic = is_atomic,
@ -544,9 +554,42 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
.is_written = is_written,
.is_r128 = bool(inst_info.is_r128),
};
auto image = image_res.GetSharp(info);
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
if (needs_mip_storage_fallback) {
// If the mip level to IMAGE_(LOAD/STORE)_MIP is a constant, set up ImageResource
// so that we will only bind a single level.
// If index is dynamic, we will bind levels as an array
const auto view_type = image.GetViewType(image_res.is_array);
IR::Inst* body = inst.Arg(1).InstRecursive();
const auto lod_arg = [&] -> IR::Value {
switch (view_type) {
case AmdGpu::ImageType::Color1D: // x, [lod]
return body->Arg(1);
case AmdGpu::ImageType::Color1DArray: // x, slice, [lod]
case AmdGpu::ImageType::Color2D: // x, y, [lod]
return body->Arg(2);
case AmdGpu::ImageType::Color2DArray: // x, y, slice, [lod]
case AmdGpu::ImageType::Color3D: // x, y, z, [lod]
return body->Arg(3);
case AmdGpu::ImageType::Color2DMsaa:
case AmdGpu::ImageType::Color2DMsaaArray:
default:
UNREACHABLE_MSG("Invalid image type {}", view_type);
}
}();
if (lod_arg.IsImmediate()) {
image_res.mip_fallback_mode = MipStorageFallbackMode::ConstantIndex;
image_res.constant_mip_index = lod_arg.U32();
} else {
image_res.mip_fallback_mode = MipStorageFallbackMode::DynamicIndex;
}
}
// Patch image instruction if image is FMask.
if (AmdGpu::IsFmask(image.GetDataFmt())) {
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
@ -1080,7 +1123,11 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
const auto has_ms = view_type == AmdGpu::ImageType::Color2DMsaa ||
view_type == AmdGpu::ImageType::Color2DMsaaArray;
ASSERT(!inst_info.has_lod || !has_ms);
const auto lod = inst_info.has_lod ? IR::U32{arg} : IR::U32{};
// If we are binding a single mip level as fallback, drop the argument
const auto lod =
(inst_info.has_lod && image_res.mip_fallback_mode != MipStorageFallbackMode::ConstantIndex)
? IR::U32{arg}
: IR::U32{};
const auto ms = has_ms ? IR::U32{arg} : IR::U32{};
const auto is_storage = image_res.is_written;
@ -1111,7 +1158,7 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
}
}
void ResourceTrackingPass(IR::Program& program) {
void ResourceTrackingPass(IR::Program& program, const Profile& profile) {
// Iterate resource instructions and patch them after finding the sharp.
auto& info = program.info;
@ -1122,7 +1169,7 @@ void ResourceTrackingPass(IR::Program& program) {
if (IsBufferInstruction(inst)) {
PatchBufferSharp(*block, inst, info, descriptors);
} else if (IsImageInstruction(inst)) {
PatchImageSharp(*block, inst, info, descriptors);
PatchImageSharp(*block, inst, info, descriptors, profile);
}
}
}

View File

@ -80,7 +80,7 @@ IR::Program TranslateProgram(const std::span<const u32>& code, Pools& pools, Inf
Shader::Optimization::RingAccessElimination(program, runtime_info);
Shader::Optimization::ReadLaneEliminationPass(program);
Shader::Optimization::FlattenExtendedUserdataPass(program);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::ResourceTrackingPass(program, profile);
Shader::Optimization::LowerBufferFormatToRaw(program);
Shader::Optimization::SharedMemorySimplifyPass(program, profile);
Shader::Optimization::SharedMemoryToStoragePass(program, runtime_info, profile);

View File

@ -71,6 +71,8 @@ struct BufferResource {
};
using BufferResourceList = boost::container::static_vector<BufferResource, NUM_BUFFERS>;
enum class MipStorageFallbackMode : u32 { None, DynamicIndex, ConstantIndex };
struct ImageResource {
u32 sharp_idx;
bool is_depth{};
@ -78,6 +80,8 @@ struct ImageResource {
bool is_array{};
bool is_written{};
bool is_r128{};
MipStorageFallbackMode mip_fallback_mode{};
u32 constant_mip_index{};
constexpr AmdGpu::Image GetSharp(const auto& info) const noexcept {
AmdGpu::Image image{};
@ -102,6 +106,13 @@ struct ImageResource {
}
return image;
}
u32 NumBindings(const auto& info) const {
const AmdGpu::Image tsharp = GetSharp(info);
return (mip_fallback_mode == MipStorageFallbackMode::DynamicIndex)
? (tsharp.last_level - tsharp.base_level + 1)
: 1;
}
};
using ImageResourceList = boost::container::static_vector<ImageResource, NUM_IMAGES>;

View File

@ -52,6 +52,8 @@ struct ImageSpecialization {
bool is_srgb = false;
AmdGpu::CompMapping dst_select{};
AmdGpu::NumberConversion num_conversion{};
// FIXME any pipeline cache changes needed?
u32 num_bindings = 0;
bool operator==(const ImageSpecialization&) const = default;
};
@ -133,7 +135,7 @@ struct StageSpecialization {
}
});
ForEachSharp(binding, images, info->images,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
[&](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.type = sharp.GetViewType(desc.is_array);
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
spec.is_storage = desc.is_written;
@ -144,6 +146,7 @@ struct StageSpecialization {
spec.is_srgb = sharp.GetNumberFmt() == AmdGpu::NumberFormat::Srgb;
}
spec.num_conversion = sharp.GetNumberConversion();
spec.num_bindings = desc.NumBindings(*info);
});
ForEachSharp(binding, fmasks, info->fmasks,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {

View File

@ -48,13 +48,15 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler,
});
}
for (const auto& image : info->images) {
const u32 num_bindings = image.NumBindings(*info);
bindings.push_back({
.binding = binding++,
.binding = binding,
.descriptorType = image.is_written ? vk::DescriptorType::eStorageImage
: vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.descriptorCount = num_bindings,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
binding += num_bindings;
}
for (const auto& sampler : info->samplers) {
bindings.push_back({

View File

@ -457,13 +457,15 @@ void GraphicsPipeline::BuildDescSetLayout(bool preloading) {
});
}
for (const auto& image : stage->images) {
const u32 num_bindings = image.NumBindings(*stage);
bindings.push_back({
.binding = binding++,
.binding = binding,
.descriptorType = image.is_written ? vk::DescriptorType::eStorageImage
: vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.descriptorCount = num_bindings,
.stageFlags = stage_bit,
});
binding += num_bindings;
}
for (const auto& sampler : stage->samplers) {
bindings.push_back({

View File

@ -246,7 +246,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
.support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32),
.support_fp32_round_to_zero = bool(vk12_props.shaderRoundingModeRTZFloat32),
.support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(),
.supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(),
.supports_image_load_store_lod = /*instance_.IsImageLoadStoreLodSupported()*/ false, // TEST
.supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(),
.supports_trinary_minmax = instance_.IsAmdShaderTrinaryMinMaxSupported(),
// TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed.

View File

@ -662,6 +662,13 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding) {
image_bindings.clear();
const u32 first_image_idx = image_infos.size();
// For loading/storing to explicit mip levels, when no native instruction support, bind an array
// of descriptors consecutively, 1 for each mip level. The shader can index this with LOD
// operand.
// This array holds the size of each consecutive array with the number of bindings consumed.
// This is currently always 1 for anything other than mip fallback arrays.
boost::container::small_vector<u32, 8> image_descriptor_array_sizes;
for (const auto& image_desc : stage.images) {
const auto tsharp = image_desc.GetSharp(stage);
@ -671,25 +678,43 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
if (tsharp.GetDataFmt() == AmdGpu::DataFormat::FormatInvalid) {
image_bindings.emplace_back(std::piecewise_construct, std::tuple{}, std::tuple{});
image_descriptor_array_sizes.push_back(1);
continue;
}
auto& [image_id, desc] = image_bindings.emplace_back(std::piecewise_construct, std::tuple{},
std::tuple{tsharp, image_desc});
image_id = texture_cache.FindImage(desc);
auto* image = &texture_cache.GetImage(image_id);
if (image->depth_id) {
// If this image has an associated depth image, it's a stencil attachment.
// Redirect the access to the actual depth-stencil buffer.
image_id = image->depth_id;
image = &texture_cache.GetImage(image_id);
const Shader::MipStorageFallbackMode mip_fallback_mode = image_desc.mip_fallback_mode;
const u32 num_bindings = image_desc.NumBindings(stage);
for (auto i = 0; i < num_bindings; i++) {
auto& [image_id, desc] = image_bindings.emplace_back(
std::piecewise_construct, std::tuple{}, std::tuple{tsharp, image_desc});
if (mip_fallback_mode == Shader::MipStorageFallbackMode::ConstantIndex) {
ASSERT(num_bindings == 1);
desc.view_info.range.base.level += image_desc.constant_mip_index;
desc.view_info.range.extent.levels = 1;
} else if (mip_fallback_mode == Shader::MipStorageFallbackMode::DynamicIndex) {
desc.view_info.range.base.level += i;
desc.view_info.range.extent.levels = 1;
}
image_id = texture_cache.FindImage(desc);
auto* image = &texture_cache.GetImage(image_id);
if (image->depth_id) {
// If this image has an associated depth image, it's a stencil attachment.
// Redirect the access to the actual depth-stencil buffer.
image_id = image->depth_id;
image = &texture_cache.GetImage(image_id);
}
if (image->binding.is_bound) {
// The image is already bound. In case if it is about to be used as storage we
// need to force general layout on it.
image->binding.force_general |= image_desc.is_written;
}
image->binding.is_bound = 1u;
}
if (image->binding.is_bound) {
// The image is already bound. In case if it is about to be used as storage we need
// to force general layout on it.
image->binding.force_general |= image_desc.is_written;
}
image->binding.is_bound = 1u;
image_descriptor_array_sizes.push_back(num_bindings);
}
// Second pass to re-bind images that were updated after binding
@ -749,16 +774,26 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
image.backing->state.layout);
}
}
u32 image_info_idx = first_image_idx;
u32 image_binding_idx = 0;
for (u32 array_size : image_descriptor_array_sizes) {
const auto& [_, desc] = image_bindings[image_binding_idx];
const bool is_storage = desc.type == VideoCore::TextureCache::BindingType::Storage;
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstBinding = binding.unified,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorCount = array_size,
.descriptorType =
is_storage ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage,
.pImageInfo = &image_infos.back(),
.pImageInfo = &image_infos[image_info_idx],
});
image_info_idx += array_size;
image_binding_idx += array_size;
binding.unified += array_size;
}
for (const auto& sampler : stage.samplers) {