Test framework for GCN pipeline (#4272)

* Test framework for GCN pipeline

* Try fixing test compilation on CI
This commit is contained in:
Marcin Mikołajczyk 2026-04-19 21:15:08 +02:00 committed by GitHub
parent a95e71e156
commit 963d10f220
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 1194 additions and 30 deletions

View File

@ -104,7 +104,7 @@ jobs:
if: runner.os == 'Linux'
run: |
sudo apt-get update
sudo apt-get install -y ninja-build libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libxcursor-dev libxi-dev libxss-dev libxtst-dev libxrandr-dev libxfixes-dev libudev-dev uuid-dev uuid-dev
sudo apt-get install -y libc++-dev libc++abi-dev ninja-build libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libxcursor-dev libxi-dev libxss-dev libxtst-dev libxrandr-dev libxfixes-dev libudev-dev uuid-dev uuid-dev
- name: Install dependencies (macOS)
if: runner.os == 'macOS'
@ -118,6 +118,7 @@ jobs:
-DCMAKE_C_COMPILER="${{ matrix.compiler_c }}" \
-DCMAKE_BUILD_TYPE=Debug \
-DENABLE_TESTS=ON \
${{ runner.os == 'Linux' && '-DCMAKE_CXX_FLAGS=-stdlib=libc++' || '' }} \
${{ runner.os == 'macOS' && '-DCMAKE_OSX_ARCHITECTURES=x86_64' || '' }}
shell: bash
@ -139,7 +140,7 @@ jobs:
shell: bash
- name: Run tests with CTest
run: ctest --test-dir build --output-on-failure --progress
run: ctest --test-dir build --output-on-failure --progress -E 'GcnTest'
shell: bash
windows-sdl:

View File

@ -9,6 +9,8 @@ include(FetchContent)
)
FetchContent_MakeAvailable(googletest)
set(TEST_TARGETS shadps4_settings_test shadps4_gcn_test)
set(SETTINGS_TEST_SOURCES
# Under test
${CMAKE_SOURCE_DIR}/src/core/emulator_settings.cpp
@ -30,13 +32,111 @@ set(SETTINGS_TEST_SOURCES
test_emulator_settings.cpp
)
add_executable(shadps4_settings_test ${SETTINGS_TEST_SOURCES})
set(GCN_TEST_SOURCES
# Under test
${CMAKE_SOURCE_DIR}/src/core/emulator_settings.cpp
${CMAKE_SOURCE_DIR}/src/core/emulator_state.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/spirv_emit_context.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/format.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/decode.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/decode.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/fetch_shader.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/fetch_shader.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/data_share.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/export.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/translate.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/translate.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/scalar_alu.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/scalar_flow.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/scalar_memory.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/vector_alu.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/vector_interpolation.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/vector_memory.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/abstract_syntax_list.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/abstract_syntax_list.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/basic_block.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/basic_block.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/microinstruction.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/attribute.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/attribute.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/ir_emitter.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/ir_emitter.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/opcodes.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/opcodes.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/patch.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/patch.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/program.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/program.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/ir_passes.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/identity_removal_pass.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/post_order.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/post_order.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/type.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/type.h
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/value.cpp
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/value.h
${CMAKE_SOURCE_DIR}/src/video_core/amdgpu/pixel_format.cpp
${CMAKE_SOURCE_DIR}/src/video_core/amdgpu/pixel_format.h
${CMAKE_SOURCE_DIR}/src/video_core/amdgpu/resource.h
target_include_directories(shadps4_settings_test PRIVATE
${CMAKE_SOURCE_DIR}/src
${CMAKE_SOURCE_DIR}
# Minimal common support
${CMAKE_SOURCE_DIR}/src/common/path_util.cpp
${CMAKE_SOURCE_DIR}/src/common/assert.cpp
${CMAKE_SOURCE_DIR}/src/common/error.cpp
${CMAKE_SOURCE_DIR}/src/common/io_file.cpp
${CMAKE_SOURCE_DIR}/src/common/string_util.cpp
${CMAKE_SOURCE_DIR}/src/common/logging/log.cpp
${CMAKE_SOURCE_DIR}/src/common/ntapi.cpp
# Stubs that replace dependencies
stubs/common_stub.cpp
stubs/resource_tracking_pass_stub.cpp
stubs/scm_rev_stub.cpp
gcn/gcn_test_runner.hpp
gcn/gcn_test_runner.cpp
gcn/translator.hpp
gcn/translator.cpp
# Tests
gcn/test_gcn_instructions.cpp
)
add_executable(shadps4_settings_test ${SETTINGS_TEST_SOURCES})
add_executable(shadps4_gcn_test ${GCN_TEST_SOURCES})
foreach(t ${TEST_TARGETS})
target_include_directories(${t} PRIVATE
${CMAKE_SOURCE_DIR}/src
${CMAKE_SOURCE_DIR}
)
target_compile_features(${t} PRIVATE cxx_std_23)
target_compile_definitions(${t} PRIVATE BOOST_ASIO_STANDALONE)
endforeach()
target_link_libraries(shadps4_settings_test PRIVATE
GTest::gtest_main
fmt::fmt
@ -45,40 +145,56 @@ target_link_libraries(shadps4_settings_test PRIVATE
SDL3::SDL3
spdlog::spdlog
)
target_compile_features(shadps4_settings_test PRIVATE cxx_std_23)
target_compile_definitions(shadps4_settings_test PRIVATE BOOST_ASIO_STANDALONE)
target_link_libraries(shadps4_gcn_test PRIVATE
GTest::gtest_main
fmt::fmt
nlohmann_json::nlohmann_json
magic_enum::magic_enum
toml11::toml11
Boost::headers
Vulkan::Headers
sirit
SDL3::SDL3
spdlog::spdlog
)
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR
CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
include(CheckCXXSymbolExists)
check_cxx_symbol_exists(_LIBCPP_VERSION version LIBCPP)
if (LIBCPP)
target_compile_options(shadps4_settings_test PRIVATE -fexperimental-library)
foreach(t ${TEST_TARGETS})
target_compile_options(${t} PRIVATE -fexperimental-library)
endforeach()
endif()
endif()
if (WIN32)
target_compile_definitions(shadps4_settings_test PRIVATE
NOMINMAX
WIN32_LEAN_AND_MEAN
NTDDI_VERSION=0x0A000006
_WIN32_WINNT=0x0A00
WINVER=0x0A00
)
if (MSVC)
target_compile_definitions(shadps4_settings_test PRIVATE
_CRT_SECURE_NO_WARNINGS
_CRT_NONSTDC_NO_DEPRECATE
_SCL_SECURE_NO_WARNINGS
_TIMESPEC_DEFINED
foreach(t ${TEST_TARGETS})
if (WIN32)
target_link_libraries(${t} PRIVATE onecore)
target_compile_definitions(${t} PRIVATE
NOMINMAX
WIN32_LEAN_AND_MEAN
NTDDI_VERSION=0x0A000006
_WIN32_WINNT=0x0A00
WINVER=0x0A00
)
if (MSVC)
target_compile_definitions(${t} PRIVATE
_CRT_SECURE_NO_WARNINGS
_CRT_NONSTDC_NO_DEPRECATE
_SCL_SECURE_NO_WARNINGS
_TIMESPEC_DEFINED
)
endif()
endif()
endif()
endforeach()
include(GoogleTest)
gtest_discover_tests(shadps4_settings_test
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
PROPERTIES TIMEOUT 60
)
foreach(t ${TEST_TARGETS})
gtest_discover_tests(${t}
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
PROPERTIES TIMEOUT 60
)
endforeach()

View File

@ -0,0 +1,431 @@
// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "gcn_test_runner.hpp"
#include <algorithm>
#include <array>
#include <format>
#include <memory>
#include <mutex>
#include <ranges>
#include <string_view>
#include <vector>
#include "shader_recompiler/resource.h"
// Exactly one TU must define the dynamic dispatcher storage.
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
namespace gcn_test {
namespace {
constexpr bool kEnableValidation =
#ifdef NDEBUG
false;
#else
true;
#endif
auto make_error(Error code, std::string message) {
return std::unexpected(ErrorInfo{code, std::move(message)});
}
auto find_memory_type(vk::PhysicalDevice pd, std::uint32_t type_filter,
vk::MemoryPropertyFlags required) -> std::expected<std::uint32_t, ErrorInfo> {
auto props = pd.getMemoryProperties();
for (std::uint32_t i = 0; i < props.memoryTypeCount; ++i) {
if ((type_filter & (1u << i)) &&
(props.memoryTypes[i].propertyFlags & required) == required) {
return i;
}
}
return make_error(Error::BufferAllocationFailed, "no suitable memory type found");
}
struct HostBuffer {
vk::Device device;
vk::Buffer buffer;
vk::DeviceMemory memory;
void* mapped = nullptr;
~HostBuffer() {
if (mapped)
device.unmapMemory(memory);
if (buffer)
device.destroyBuffer(buffer);
if (memory)
device.freeMemory(memory);
}
HostBuffer() = default;
HostBuffer(const HostBuffer&) = delete;
HostBuffer& operator=(const HostBuffer&) = delete;
};
auto create_host_buffer(vk::Device dev, vk::PhysicalDevice pd, vk::DeviceSize size,
vk::BufferUsageFlags usage)
-> std::expected<std::unique_ptr<HostBuffer>, ErrorInfo> {
auto buf = std::make_unique<HostBuffer>();
buf->device = dev;
auto [br, buffer] = dev.createBuffer(vk::BufferCreateInfo{
.size = size,
.usage = usage,
.sharingMode = vk::SharingMode::eExclusive,
});
if (br != vk::Result::eSuccess)
return make_error(Error::BufferAllocationFailed, "createBuffer");
buf->buffer = buffer;
auto req = dev.getBufferMemoryRequirements(buffer);
auto mt = find_memory_type(pd, req.memoryTypeBits,
vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent);
if (!mt)
return std::unexpected(mt.error());
auto [mr, mem] = dev.allocateMemory({
.allocationSize = req.size,
.memoryTypeIndex = *mt,
});
if (mr != vk::Result::eSuccess)
return make_error(Error::BufferAllocationFailed, "allocateMemory");
buf->memory = mem;
if (dev.bindBufferMemory(buffer, mem, 0) != vk::Result::eSuccess)
return make_error(Error::BufferAllocationFailed, "bindBufferMemory");
auto [mapr, ptr] = dev.mapMemory(mem, 0, size);
if (mapr != vk::Result::eSuccess)
return make_error(Error::BufferAllocationFailed, "mapMemory");
buf->mapped = ptr;
return buf;
}
std::mutex g_runner_mutex;
std::unique_ptr<Runner> g_runner;
} // namespace
Runner::~Runner() {
if (device_) {
device_.waitIdle();
if (fence_)
device_.destroyFence(fence_);
if (pipeline_layout_)
device_.destroyPipelineLayout(pipeline_layout_);
if (descriptor_set_layout_)
device_.destroyDescriptorSetLayout(descriptor_set_layout_);
if (command_pool_)
device_.destroyCommandPool(command_pool_);
device_.destroy();
}
if (instance_)
instance_.destroy();
}
std::expected<Runner*, ErrorInfo> Runner::instance() {
std::lock_guard lock{g_runner_mutex};
if (g_runner)
return g_runner.get();
auto r = std::unique_ptr<Runner>(new Runner{});
if (auto init = r->initialize(); !init)
return std::unexpected(init.error());
g_runner = std::move(r);
return g_runner.get();
}
std::expected<void, ErrorInfo> Runner::initialize() {
VULKAN_HPP_DEFAULT_DISPATCHER.init();
// ---- Instance ------------------------------------------------------
vk::ApplicationInfo app_info{
.pApplicationName = "gcn_test_runner",
.applicationVersion = 1,
.pEngineName = "gcn_test_runner",
.engineVersion = 1,
.apiVersion = vk::ApiVersion13,
};
std::vector<const char*> layers;
if (kEnableValidation)
layers.push_back("VK_LAYER_KHRONOS_validation");
auto [ir, inst] = vk::createInstance({
.pApplicationInfo = &app_info,
.enabledLayerCount = static_cast<std::uint32_t>(layers.size()),
.ppEnabledLayerNames = layers.data(),
});
if (ir != vk::Result::eSuccess)
return make_error(Error::InstanceCreationFailed,
std::format("createInstance: {}", vk::to_string(ir)));
instance_ = inst;
VULKAN_HPP_DEFAULT_DISPATCHER.init(instance_);
// ---- Pick physical device with the extensions we need -------------
auto [pr, devs] = instance_.enumeratePhysicalDevices();
if (pr != vk::Result::eSuccess || devs.empty())
return make_error(Error::NoSuitableDevice, "no Vulkan devices");
constexpr std::array required_exts{
VK_EXT_SHADER_OBJECT_EXTENSION_NAME,
VK_KHR_MAINTENANCE_6_EXTENSION_NAME,
VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
};
for (auto pd : devs) {
auto [er, exts] = pd.enumerateDeviceExtensionProperties();
if (er != vk::Result::eSuccess)
continue;
auto has_ext = [&](const char* name) {
return std::ranges::any_of(
exts, [&](auto& e) { return std::string_view{e.extensionName} == name; });
};
if (!std::ranges::all_of(required_exts, has_ext))
continue;
auto families = pd.getQueueFamilyProperties();
for (std::uint32_t i = 0; i < families.size(); ++i) {
if (families[i].queueFlags & vk::QueueFlagBits::eCompute) {
physical_device_ = pd;
queue_family_ = i;
break;
}
}
if (physical_device_)
break;
}
if (!physical_device_)
return make_error(Error::NoSuitableDevice,
"no device with compute + shader_object + maintenance6 + "
"push_descriptor");
max_push_constant_size_ = sizeof(Shader::PushData);
// physical_device_.getProperties().limits.maxPushConstantsSize;
// ---- Device with feature chain ------------------------------------
float priority = 1.0f;
vk::DeviceQueueCreateInfo qci{
.queueFamilyIndex = queue_family_,
.queueCount = 1,
.pQueuePriorities = &priority,
};
vk::PhysicalDeviceShaderObjectFeaturesEXT so_feat{.shaderObject = VK_TRUE};
vk::PhysicalDeviceMaintenance6FeaturesKHR m6_feat{
.pNext = &so_feat,
.maintenance6 = VK_TRUE,
};
vk::PhysicalDeviceVulkan11Features v11_feat{
.pNext = &m6_feat,
.uniformAndStorageBuffer16BitAccess = VK_TRUE,
};
vk::PhysicalDeviceVulkan12Features v12_feat{
.pNext = &v11_feat,
.uniformAndStorageBuffer8BitAccess = VK_TRUE,
.shaderInt8 = VK_TRUE,
};
vk::PhysicalDeviceFeatures phys_feat{
.shaderInt64 = VK_TRUE,
.shaderInt16 = VK_TRUE,
};
auto [dr, dev] = physical_device_.createDevice({
.pNext = &v12_feat,
.queueCreateInfoCount = 1,
.pQueueCreateInfos = &qci,
.enabledExtensionCount = required_exts.size(),
.ppEnabledExtensionNames = required_exts.data(),
.pEnabledFeatures = &phys_feat,
});
if (dr != vk::Result::eSuccess)
return make_error(Error::DeviceCreationFailed,
std::format("createDevice: {}", vk::to_string(dr)));
device_ = dev;
VULKAN_HPP_DEFAULT_DISPATCHER.init(device_);
queue_ = device_.getQueue(queue_family_, 0);
// ---- Command pool + cached command buffer -------------------------
auto [cpr, pool] = device_.createCommandPool({
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
.queueFamilyIndex = queue_family_,
});
if (cpr != vk::Result::eSuccess)
return make_error(Error::DeviceCreationFailed, "createCommandPool");
command_pool_ = pool;
auto [cbr, cbs] = device_.allocateCommandBuffers({
.commandPool = command_pool_,
.level = vk::CommandBufferLevel::ePrimary,
.commandBufferCount = 1,
});
if (cbr != vk::Result::eSuccess)
return make_error(Error::DeviceCreationFailed, "allocateCommandBuffers");
command_buffer_ = cbs[0];
// ---- Fence (cached, reset per call) --------------------------------
auto [fr, fence] = device_.createFence({});
if (fr != vk::Result::eSuccess)
return make_error(Error::DeviceCreationFailed, "createFence");
fence_ = fence;
// ---- Descriptor set layout with push-descriptor flag --------------
// Single storage buffer at binding 0. No descriptor sets are ever
// allocated from this layout — the layout is just used to tell the
// pipeline layout and shader what the push-descriptor shape is.
vk::DescriptorSetLayoutBinding dsl_binding{
.binding = 0,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
};
auto [dslr, dsl] = device_.createDescriptorSetLayout({
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
.bindingCount = 1,
.pBindings = &dsl_binding,
});
if (dslr != vk::Result::eSuccess)
return make_error(Error::DeviceCreationFailed, "createDescriptorSetLayout");
descriptor_set_layout_ = dsl;
// ---- Pipeline layout sized to device max push constants -----------
vk::PushConstantRange pc{
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 0,
.size = max_push_constant_size_,
};
auto [plr, pl] = device_.createPipelineLayout({
.setLayoutCount = 1,
.pSetLayouts = &descriptor_set_layout_,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &pc,
});
if (plr != vk::Result::eSuccess)
return make_error(Error::DeviceCreationFailed, "createPipelineLayout");
pipeline_layout_ = pl;
return {};
}
std::expected<void, ErrorInfo> Runner::run_raw(std::span<const std::uint32_t> spirv,
std::span<const std::byte> push_constants,
std::span<std::byte> output, DispatchSize dispatch) {
if (push_constants.size() > max_push_constant_size_)
return make_error(Error::PushConstantTooLarge,
std::format("push constants {} exceed device max {}",
push_constants.size(), max_push_constant_size_));
if (output.empty())
return make_error(Error::OutputTooLarge, "output buffer is empty");
// Per-call: output buffer --------------------------------------------
auto buf_r = create_host_buffer(device_, physical_device_, output.size(),
vk::BufferUsageFlagBits::eStorageBuffer);
if (!buf_r)
return std::unexpected(buf_r.error());
auto& output_buffer = *buf_r;
std::memset(output_buffer->mapped, 0, output.size());
// Per-call: shader object --------------------------------------------
vk::PushConstantRange shader_pc{
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 0,
// .size = static_cast<std::uint32_t>(push_constants.size()),
.size = sizeof(Shader::PushData),
};
vk::ShaderCreateInfoEXT sci{
.stage = vk::ShaderStageFlagBits::eCompute,
.codeType = vk::ShaderCodeTypeEXT::eSpirv,
.codeSize = spirv.size() * sizeof(std::uint32_t),
.pCode = spirv.data(),
.pName = "main",
.setLayoutCount = 1,
.pSetLayouts = &descriptor_set_layout_,
.pushConstantRangeCount = push_constants.empty() ? 0u : 1u,
.pPushConstantRanges = push_constants.empty() ? nullptr : &shader_pc,
};
auto [sr, shaders] = device_.createShadersEXT(sci);
if (sr != vk::Result::eSuccess)
return make_error(Error::ShaderCreationFailed,
std::format("createShadersEXT: {}", vk::to_string(sr)));
auto shader = shaders[0];
struct ShaderGuard {
vk::Device d;
vk::ShaderEXT s;
~ShaderGuard() {
if (s)
d.destroyShaderEXT(s);
}
} sg{device_, shader};
// Reset cached command buffer + fence --------------------------------
device_.resetFences(fence_);
command_buffer_.reset();
if (command_buffer_.begin({
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
}) != vk::Result::eSuccess)
return make_error(Error::CommandSubmissionFailed, "cmd.begin");
// Bind shader object -------------------------------------------------
vk::ShaderStageFlagBits stage = vk::ShaderStageFlagBits::eCompute;
command_buffer_.bindShadersEXT(1, &stage, &shader);
// Push descriptor: binding 0 = output SSBO ---------------------------
vk::DescriptorBufferInfo dbi{
.buffer = output_buffer->buffer,
.offset = 0,
.range = VK_WHOLE_SIZE,
};
vk::WriteDescriptorSet write{
.dstBinding = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.pBufferInfo = &dbi,
};
vk::PushDescriptorSetInfoKHR push_desc{
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.layout = pipeline_layout_,
.set = 0,
.descriptorWriteCount = 1,
.pDescriptorWrites = &write,
};
command_buffer_.pushDescriptorSet2KHR(push_desc);
// Push constants -----------------------------------------------------
if (!push_constants.empty()) {
vk::PushConstantsInfoKHR pci{
.layout = pipeline_layout_,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 16, // fall onto ud_regs in PushData
.size = static_cast<std::uint32_t>(push_constants.size()),
.pValues = push_constants.data(),
};
command_buffer_.pushConstants2KHR(pci);
}
command_buffer_.dispatch(dispatch.x, dispatch.y, dispatch.z);
vk::MemoryBarrier barrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
.dstAccessMask = vk::AccessFlagBits::eHostRead,
};
command_buffer_.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eHost, {}, barrier, {}, {});
if (command_buffer_.end() != vk::Result::eSuccess)
return make_error(Error::CommandSubmissionFailed, "cmd.end");
vk::SubmitInfo si{
.commandBufferCount = 1,
.pCommandBuffers = &command_buffer_,
};
if (queue_.submit(si, fence_) != vk::Result::eSuccess)
return make_error(Error::CommandSubmissionFailed, "queue.submit");
if (device_.waitForFences(fence_, VK_TRUE, UINT64_MAX) != vk::Result::eSuccess)
return make_error(Error::ExecutionFailed, "waitForFences");
std::memcpy(output.data(), output_buffer->mapped, output.size());
return {};
}
} // namespace gcn_test

View File

@ -0,0 +1,110 @@
// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#define VULKAN_HPP_NO_EXCEPTIONS
#define VULKAN_HPP_NO_CONSTRUCTORS
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
#include <vulkan/vulkan.hpp>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <expected>
#include <span>
#include <string>
#include <type_traits>
namespace gcn_test {
struct DispatchSize {
std::uint32_t x = 1, y = 1, z = 1;
};
enum class Error {
NoSuitableDevice,
InstanceCreationFailed,
DeviceCreationFailed,
ShaderCreationFailed,
BufferAllocationFailed,
CommandSubmissionFailed,
PushConstantTooLarge,
OutputTooLarge,
ExecutionFailed,
};
struct ErrorInfo {
Error code;
std::string message;
};
class Runner {
public:
static std::expected<Runner*, ErrorInfo> instance();
std::expected<void, ErrorInfo> run_raw(
std::span<const std::uint32_t> spirv,
std::span<const std::byte> push_constants,
std::span<std::byte> output,
DispatchSize dispatch = {}
);
template <typename OutputT, typename PushT>
std::expected<OutputT, ErrorInfo> run(
std::span<const std::uint32_t> spirv,
const PushT& push,
DispatchSize dispatch = {}
) {
static_assert(std::is_trivially_copyable_v<PushT>);
static_assert(std::is_trivially_copyable_v<OutputT>);
OutputT result{};
auto r = run_raw(
spirv,
{reinterpret_cast<const std::byte*>(&push), sizeof(PushT)},
{reinterpret_cast<std::byte*>(&result), sizeof(OutputT)},
dispatch
);
if (!r) return std::unexpected(r.error());
return result;
}
template <typename OutputT>
std::expected<OutputT, ErrorInfo> run(
std::span<const std::uint32_t> spirv,
DispatchSize dispatch = {}
) {
static_assert(std::is_trivially_copyable_v<OutputT>);
OutputT result{};
auto r = run_raw(
spirv, {},
{reinterpret_cast<std::byte*>(&result), sizeof(OutputT)},
dispatch
);
if (!r) return std::unexpected(r.error());
return result;
}
~Runner();
Runner(const Runner&) = delete;
Runner& operator=(const Runner&) = delete;
private:
Runner() = default;
std::expected<void, ErrorInfo> initialize();
vk::Instance instance_;
vk::PhysicalDevice physical_device_;
vk::Device device_;
vk::Queue queue_;
std::uint32_t queue_family_ = 0;
vk::CommandPool command_pool_;
vk::CommandBuffer command_buffer_; // cached, reset per call
vk::Fence fence_; // cached, reset per call
vk::DescriptorSetLayout descriptor_set_layout_; // push-descriptor
vk::PipelineLayout pipeline_layout_;
std::uint32_t max_push_constant_size_ = 128;
};
} // namespace gcn_test

View File

@ -0,0 +1,63 @@
// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cmath>
#include <gtest/gtest.h>
#include "gcn_test_runner.hpp"
#include "translator.hpp"
class GcnTest : public ::testing::Test {
protected:
void SetUp() override {}
void TearDown() override {}
};
struct F32x2 {
float a;
float b;
};
// Example
// TEST_F(GcnTest, test_name) {
// // Runner sets the vulkan context
// auto runner = gcn_test::Runner::instance().value();
//
// // v_add_f32 v0, v0, v1
// auto spirv = TranslateToSpirv(0x06000300);
//
// // run<T> tells how to interpret the result (only 32bit as of now)
// // the second argument is templated, it can be at most 4 u32s
// // the data is accessible by the instruction in v0-4 and s0-4 (mirrored)
// // the result has to be placed in v0
// auto result = runner->run<float>(spirv, F32x2{1.5f, 6.0f});
//
// EXPECT_TRUE(result.has_value());
// EXPECT_EQ(*result, 7.5f);
// }
TEST_F(GcnTest, add_f32) {
auto runner = gcn_test::Runner::instance().value();
// v_add_f32 v0, v0, v1
auto spirv = TranslateToSpirv(0x06000300);
auto result = runner->run<float>(spirv, F32x2{1.5f, 6.0f});
EXPECT_TRUE(result.has_value());
EXPECT_EQ(*result, 7.5f);
}
TEST_F(GcnTest, add_nan) {
auto runner = gcn_test::Runner::instance().value();
// v_add_f32 v0, v0, v1
auto spirv = TranslateToSpirv(0x06000300);
auto result = runner->run<float>(spirv, F32x2{1.0f, std::numeric_limits<float>::quiet_NaN()});
EXPECT_TRUE(result.has_value());
EXPECT_TRUE(std::isnan(*result));
}

98
tests/gcn/translator.cpp Normal file
View File

@ -0,0 +1,98 @@
// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "translator.hpp"
#include <iostream>
#include "common/io_file.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/passes/ir_passes.h"
#include "shader_recompiler/ir/post_order.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/recompiler.h"
using namespace Shader;
namespace Shader::Optimization {
void ResourceTrackingPassStub(IR::Program& program, const Profile& profile);
}
std::vector<u32> TranslateToSpirv(u64 raw_gcn_inst) {
std::array<u32, 2> provided_inst{static_cast<u32>(raw_gcn_inst & 0xFFFFFFFFU),
static_cast<u32>(raw_gcn_inst >> 32)};
std::array<u32, 2> store{
0xe0700000,
0x80000000 // buffer_store_dword v0, v0, s[0:3], 0
};
Gcn::GcnCodeSlice first(provided_inst.data(), provided_inst.data() + provided_inst.size());
Gcn::GcnCodeSlice second(store.data(), store.data() + store.size());
Gcn::GcnDecodeContext decoder;
Gcn::GcnInst inst = decoder.decodeInstruction(first);
Gcn::GcnInst store_inst = decoder.decodeInstruction(second);
Shader::Info info{};
info.stage = Stage::Compute;
info.l_stage = LogicalStage::Compute;
info.flattened_ud_buf.resize(4);
AmdGpu::Buffer buf = AmdGpu::Buffer::Null();
std::memcpy(info.flattened_ud_buf.data(), &buf, sizeof(buf));
IR::Program program{info};
Pools pools{};
IR::Block* block = pools.block_pool.Create(pools.inst_pool);
program.blocks.push_back(block);
program.syntax_list.emplace_back();
program.syntax_list.back().type = IR::AbstractSyntaxNode::Type::Block;
program.syntax_list.back().data.block = block;
program.syntax_list.emplace_back();
program.syntax_list.back().type = IR::AbstractSyntaxNode::Type::Return;
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
Profile profile{};
profile.supported_spirv = 0x00010600;
profile.subgroup_size = 32;
profile.supports_robust_buffer_access = true;
RuntimeInfo runtime_info{};
runtime_info.Initialize(Stage::Compute);
runtime_info.num_user_data = 4;
runtime_info.cs_info.workgroup_size = {1, 1, 1};
Gcn::Translator translator(program.info, runtime_info, profile);
translator.EmitPrologue(block);
for (int i = 0; i < 4; ++i) {
// copy user data from SGPR to VGPR as (most?) instructions cannot access
// two SGPRs
Shader::Gcn::GcnInst mov{};
mov.src[0].field = Shader::Gcn::OperandField::ScalarGPR;
mov.src[0].code = i;
mov.dst[0].field = Shader::Gcn::OperandField::VectorGPR;
mov.dst[0].code = i;
translator.S_MOV(mov);
}
translator.TranslateInstruction(inst);
translator.TranslateInstruction(store_inst);
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::ResourceTrackingPassStub(program, profile);
Shader::Optimization::ConstantPropagationPass(program.blocks);
Shader::Optimization::DeadCodeEliminationPass(program);
Shader::Optimization::CollectShaderInfoPass(program, profile);
Backend::Bindings bindings{};
const auto spirv = Backend::SPIRV::EmitSPIRV(profile, runtime_info, program, bindings);
return spirv;
}

10
tests/gcn/translator.hpp Normal file
View File

@ -0,0 +1,10 @@
// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include "common/types.h"
std::vector<u32> TranslateToSpirv(u64 raw_gcn_inst);

View File

@ -0,0 +1,335 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/control_flow_graph.h"
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/breadth_first_search.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/ir/operand_helper.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/ir/reinterpret.h"
#include "shader_recompiler/profile.h"
#include "video_core/amdgpu/resource.h"
namespace Shader::Optimization {
namespace {
using SharpLocation = u32;
bool IsBufferAtomic(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::BufferAtomicIAdd32:
case IR::Opcode::BufferAtomicIAdd64:
case IR::Opcode::BufferAtomicISub32:
case IR::Opcode::BufferAtomicSMin32:
case IR::Opcode::BufferAtomicSMin64:
case IR::Opcode::BufferAtomicUMin32:
case IR::Opcode::BufferAtomicUMin64:
case IR::Opcode::BufferAtomicFMin32:
case IR::Opcode::BufferAtomicSMax32:
case IR::Opcode::BufferAtomicSMax64:
case IR::Opcode::BufferAtomicUMax32:
case IR::Opcode::BufferAtomicUMax64:
case IR::Opcode::BufferAtomicFMax32:
case IR::Opcode::BufferAtomicInc32:
case IR::Opcode::BufferAtomicDec32:
case IR::Opcode::BufferAtomicAnd32:
case IR::Opcode::BufferAtomicOr32:
case IR::Opcode::BufferAtomicXor32:
case IR::Opcode::BufferAtomicSwap32:
case IR::Opcode::BufferAtomicCmpSwap32:
case IR::Opcode::BufferAtomicFCmpSwap32:
return true;
default:
return false;
}
}
bool IsBufferStore(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::StoreBufferU8:
case IR::Opcode::StoreBufferU16:
case IR::Opcode::StoreBufferU32:
case IR::Opcode::StoreBufferU32x2:
case IR::Opcode::StoreBufferU32x3:
case IR::Opcode::StoreBufferU32x4:
case IR::Opcode::StoreBufferU64:
case IR::Opcode::StoreBufferF32:
case IR::Opcode::StoreBufferF32x2:
case IR::Opcode::StoreBufferF32x3:
case IR::Opcode::StoreBufferF32x4:
case IR::Opcode::StoreBufferFormatF32:
return true;
default:
return IsBufferAtomic(inst);
}
}
bool IsBufferInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferU8:
case IR::Opcode::LoadBufferU16:
case IR::Opcode::LoadBufferU32:
case IR::Opcode::LoadBufferU32x2:
case IR::Opcode::LoadBufferU32x3:
case IR::Opcode::LoadBufferU32x4:
case IR::Opcode::LoadBufferU64:
case IR::Opcode::LoadBufferF32:
case IR::Opcode::LoadBufferF32x2:
case IR::Opcode::LoadBufferF32x3:
case IR::Opcode::LoadBufferF32x4:
case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::ReadConstBuffer:
return true;
default:
return IsBufferStore(inst);
}
}
u32 BufferAddressShift(const IR::Inst& inst, AmdGpu::DataFormat data_format) {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferU8:
case IR::Opcode::StoreBufferU8:
return 0;
case IR::Opcode::LoadBufferU16:
case IR::Opcode::StoreBufferU16:
return 1;
case IR::Opcode::LoadBufferU64:
case IR::Opcode::StoreBufferU64:
case IR::Opcode::BufferAtomicIAdd64:
case IR::Opcode::BufferAtomicSMax64:
case IR::Opcode::BufferAtomicSMin64:
case IR::Opcode::BufferAtomicUMax64:
case IR::Opcode::BufferAtomicUMin64:
return 3;
case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32: {
switch (data_format) {
case AmdGpu::DataFormat::Format8:
return 0;
case AmdGpu::DataFormat::Format8_8:
case AmdGpu::DataFormat::Format16:
return 1;
case AmdGpu::DataFormat::Format8_8_8_8:
case AmdGpu::DataFormat::Format16_16:
case AmdGpu::DataFormat::Format10_11_11:
case AmdGpu::DataFormat::Format2_10_10_10:
case AmdGpu::DataFormat::Format16_16_16_16:
case AmdGpu::DataFormat::Format32:
case AmdGpu::DataFormat::Format32_32:
case AmdGpu::DataFormat::Format32_32_32:
case AmdGpu::DataFormat::Format32_32_32_32:
return 2;
default:
return 0;
}
break;
}
case IR::Opcode::ReadConstBuffer:
// Provided address is already in dwords
return 0;
default:
return 2;
}
}
class Descriptors {
public:
explicit Descriptors(Info& info_)
: info{info_}, buffer_resources{info_.buffers}, image_resources{info_.images},
sampler_resources{info_.samplers}, fmask_resources(info_.fmasks) {}
u32 Add(const BufferResource& desc) {
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
return desc.sharp_idx == existing.sharp_idx &&
desc.inline_cbuf == existing.inline_cbuf &&
desc.buffer_type == existing.buffer_type;
})};
auto& buffer = buffer_resources[index];
buffer.used_types |= desc.used_types;
buffer.is_written |= desc.is_written;
buffer.is_formatted |= desc.is_formatted;
return index;
}
u32 Add(const ImageResource& desc) {
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array &&
desc.mip_fallback_mode == existing.mip_fallback_mode &&
desc.constant_mip_index == existing.constant_mip_index;
})};
auto& image = image_resources[index];
image.is_atomic |= desc.is_atomic;
image.is_written |= desc.is_written;
return index;
}
u32 Add(const SamplerResource& desc) {
const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) {
return desc.sharp_idx == existing.sharp_idx &&
desc.is_inline_sampler == existing.is_inline_sampler &&
desc.inline_sampler == existing.inline_sampler;
})};
return index;
}
u32 Add(const FMaskResource& desc) {
u32 index = Add(fmask_resources, desc, [&desc](const auto& existing) {
return desc.sharp_idx == existing.sharp_idx;
});
return index;
}
private:
template <typename Descriptors, typename Descriptor, typename Func>
static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
const auto it{std::ranges::find_if(descriptors, pred)};
if (it != descriptors.end()) {
return static_cast<u32>(std::distance(descriptors.begin(), it));
}
descriptors.push_back(desc);
return static_cast<u32>(descriptors.size()) - 1;
}
const Info& info;
BufferResourceList& buffer_resources;
ImageResourceList& image_resources;
SamplerResourceList& sampler_resources;
FMaskResourceList& fmask_resources;
};
} // Anonymous namespace
void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors,
const Profile& profile) {
u32 buffer_binding = descriptors.Add(BufferResource{.sharp_idx = 0,
.used_types = IR::Type::U32,
.buffer_type = BufferType::Guest,
.is_written = true,
.is_formatted = false});
// Replace handle with binding index in buffer resource list.
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(0, ir.Imm32(buffer_binding));
}
IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info,
const AmdGpu::Buffer& buffer, u32 stride) {
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
const u32 inst_offset = inst_info.inst_offset.Value();
const auto is_inst_typed = inst_info.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid;
const auto data_format = is_inst_typed
? AmdGpu::RemapDataFormat(inst_info.inst_data_fmt.Value())
: buffer.GetDataFmt();
const u32 shift = BufferAddressShift(inst, data_format);
const u32 mask = (1 << shift) - 1;
const IR::U32 soffset = IR::GetBufferSOffsetArg(&inst);
// If address calculation is of the form "index * const_stride + offset" with
// offset constant and both const_stride and offset are divisible with the
// element size, apply shift directly.
if (inst_info.index_enable && !inst_info.voffset_enable && soffset.IsImmediate() &&
!buffer.swizzle_enable && !buffer.add_tid_enable && (stride & mask) == 0) {
const u32 total_offset = soffset.U32() + inst_offset;
if ((total_offset & mask) == 0) {
// buffer_offset = index * (const_stride >> shift) + (offset >> shift)
const IR::U32 index = IR::GetBufferIndexArg(&inst);
return ir.IAdd(ir.IMul(index, ir.Imm32(stride >> shift)),
ir.Imm32(total_offset >> shift));
}
}
// index = (inst_idxen ? vgpr_index : 0) + (const_add_tid_enable ?
// thread_id[5:0] : 0)
IR::U32 index = ir.Imm32(0U);
if (inst_info.index_enable) {
const IR::U32 vgpr_index = IR::GetBufferIndexArg(&inst);
index = ir.IAdd(index, vgpr_index);
}
if (buffer.add_tid_enable) {
ASSERT_MSG(info.l_stage == LogicalStage::Compute,
"Thread ID buffer addressing is not supported outside of compute.");
const IR::U32 thread_id{ir.LaneId()};
index = ir.IAdd(index, thread_id);
}
// offset = (inst_offen ? vgpr_offset : 0) + inst_offset
IR::U32 offset = ir.Imm32(inst_offset);
offset = ir.IAdd(offset, soffset);
if (inst_info.voffset_enable) {
const IR::U32 voffset = IR::GetBufferVOffsetArg(&inst);
offset = ir.IAdd(offset, voffset);
}
const IR::U32 const_stride = ir.Imm32(stride);
IR::U32 buffer_offset;
if (buffer.swizzle_enable) {
const IR::U32 const_index_stride = ir.Imm32(buffer.GetIndexStride());
const IR::U32 const_element_size = ir.Imm32(buffer.GetElementSize());
// index_msb = index / const_index_stride
const IR::U32 index_msb{ir.IDiv(index, const_index_stride)};
// index_lsb = index % const_index_stride
const IR::U32 index_lsb{ir.IMod(index, const_index_stride)};
// offset_msb = offset / const_element_size
const IR::U32 offset_msb{ir.IDiv(offset, const_element_size)};
// offset_lsb = offset % const_element_size
const IR::U32 offset_lsb{ir.IMod(offset, const_element_size)};
// buffer_offset =
// (index_msb * const_stride + offset_msb * const_element_size) *
// const_index_stride
// + index_lsb * const_element_size + offset_lsb
const IR::U32 buffer_offset_msb = ir.IMul(
ir.IAdd(ir.IMul(index_msb, const_stride), ir.IMul(offset_msb, const_element_size)),
const_index_stride);
const IR::U32 buffer_offset_lsb =
ir.IAdd(ir.IMul(index_lsb, const_element_size), offset_lsb);
buffer_offset = ir.IAdd(buffer_offset_msb, buffer_offset_lsb);
} else {
// buffer_offset = index * const_stride + offset
buffer_offset = ir.IAdd(ir.IMul(index, const_stride), offset);
}
if (shift != 0) {
buffer_offset = ir.ShiftRightLogical(buffer_offset, ir.Imm32(shift));
}
return buffer_offset;
}
void PatchBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
const auto handle = inst.Arg(0);
const auto buffer_res = info.buffers[handle.U32()];
const auto buffer = AmdGpu::Buffer::Null();
// Address of constant buffer reads can be calculated at IR emission time.
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
return;
}
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.SetArg(IR::LoadBufferArgs::Address,
CalculateBufferAddress(ir, inst, info, buffer, buffer.stride));
}
void ResourceTrackingPassStub(IR::Program& program, const Profile& profile) {
// Iterate resource instructions and patch them after finding the sharp.
auto& info = program.info;
// Pass 1: Track resource sharps
Descriptors descriptors{info};
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (IsBufferInstruction(inst)) {
PatchBufferSharp(*block, inst, info, descriptors, profile);
}
}
}
// Pass 2: Patch instruction args
for (IR::Block* const block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
if (IsBufferInstruction(inst)) {
PatchBufferArgs(*block, inst, info);
}
}
}
}
} // namespace Shader::Optimization