From 963d10f2203596ec6485e560f0b962298eaedc5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Sun, 19 Apr 2026 21:15:08 +0200 Subject: [PATCH] Test framework for GCN pipeline (#4272) * Test framework for GCN pipeline * Try fixing test compilation on CI --- .github/workflows/build.yml | 5 +- tests/CMakeLists.txt | 172 ++++++-- tests/gcn/gcn_test_runner.cpp | 431 ++++++++++++++++++++ tests/gcn/gcn_test_runner.hpp | 110 +++++ tests/gcn/test_gcn_instructions.cpp | 63 +++ tests/gcn/translator.cpp | 98 +++++ tests/gcn/translator.hpp | 10 + tests/stubs/resource_tracking_pass_stub.cpp | 335 +++++++++++++++ 8 files changed, 1194 insertions(+), 30 deletions(-) create mode 100644 tests/gcn/gcn_test_runner.cpp create mode 100644 tests/gcn/gcn_test_runner.hpp create mode 100644 tests/gcn/test_gcn_instructions.cpp create mode 100644 tests/gcn/translator.cpp create mode 100644 tests/gcn/translator.hpp create mode 100644 tests/stubs/resource_tracking_pass_stub.cpp diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 96f5e33d7..ba573a79f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -104,7 +104,7 @@ jobs: if: runner.os == 'Linux' run: | sudo apt-get update - sudo apt-get install -y ninja-build libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libxcursor-dev libxi-dev libxss-dev libxtst-dev libxrandr-dev libxfixes-dev libudev-dev uuid-dev uuid-dev + sudo apt-get install -y libc++-dev libc++abi-dev ninja-build libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libxcursor-dev libxi-dev libxss-dev libxtst-dev libxrandr-dev libxfixes-dev libudev-dev uuid-dev uuid-dev - name: Install dependencies (macOS) if: runner.os == 'macOS' @@ -118,6 +118,7 @@ jobs: -DCMAKE_C_COMPILER="${{ matrix.compiler_c }}" \ -DCMAKE_BUILD_TYPE=Debug \ -DENABLE_TESTS=ON \ + ${{ runner.os == 'Linux' && '-DCMAKE_CXX_FLAGS=-stdlib=libc++' || '' }} \ ${{ runner.os == 'macOS' && '-DCMAKE_OSX_ARCHITECTURES=x86_64' || '' }} shell: bash @@ -139,7 +140,7 @@ jobs: shell: bash - name: Run tests with CTest - run: ctest --test-dir build --output-on-failure --progress + run: ctest --test-dir build --output-on-failure --progress -E 'GcnTest' shell: bash windows-sdl: diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index cf5e02936..43fc3b62f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -9,6 +9,8 @@ include(FetchContent) ) FetchContent_MakeAvailable(googletest) +set(TEST_TARGETS shadps4_settings_test shadps4_gcn_test) + set(SETTINGS_TEST_SOURCES # Under test ${CMAKE_SOURCE_DIR}/src/core/emulator_settings.cpp @@ -30,13 +32,111 @@ set(SETTINGS_TEST_SOURCES test_emulator_settings.cpp ) -add_executable(shadps4_settings_test ${SETTINGS_TEST_SOURCES}) +set(GCN_TEST_SOURCES + # Under test + ${CMAKE_SOURCE_DIR}/src/core/emulator_settings.cpp + ${CMAKE_SOURCE_DIR}/src/core/emulator_state.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/spirv_emit_context.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/format.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/decode.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/decode.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/fetch_shader.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/fetch_shader.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/data_share.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/export.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/translate.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/translate.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/scalar_alu.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/scalar_flow.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/scalar_memory.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/vector_alu.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/vector_interpolation.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/vector_memory.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/abstract_syntax_list.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/abstract_syntax_list.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/basic_block.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/basic_block.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/microinstruction.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/attribute.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/attribute.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/ir_emitter.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/ir_emitter.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/opcodes.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/opcodes.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/patch.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/patch.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/program.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/program.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/ir_passes.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/identity_removal_pass.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/post_order.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/post_order.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/type.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/type.h + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/value.cpp + ${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/value.h + ${CMAKE_SOURCE_DIR}/src/video_core/amdgpu/pixel_format.cpp + ${CMAKE_SOURCE_DIR}/src/video_core/amdgpu/pixel_format.h + ${CMAKE_SOURCE_DIR}/src/video_core/amdgpu/resource.h -target_include_directories(shadps4_settings_test PRIVATE - ${CMAKE_SOURCE_DIR}/src - ${CMAKE_SOURCE_DIR} + # Minimal common support + ${CMAKE_SOURCE_DIR}/src/common/path_util.cpp + ${CMAKE_SOURCE_DIR}/src/common/assert.cpp + ${CMAKE_SOURCE_DIR}/src/common/error.cpp + ${CMAKE_SOURCE_DIR}/src/common/io_file.cpp + ${CMAKE_SOURCE_DIR}/src/common/string_util.cpp + ${CMAKE_SOURCE_DIR}/src/common/logging/log.cpp + ${CMAKE_SOURCE_DIR}/src/common/ntapi.cpp + + # Stubs that replace dependencies + stubs/common_stub.cpp + stubs/resource_tracking_pass_stub.cpp + stubs/scm_rev_stub.cpp + + gcn/gcn_test_runner.hpp + gcn/gcn_test_runner.cpp + gcn/translator.hpp + gcn/translator.cpp + + # Tests + gcn/test_gcn_instructions.cpp ) +add_executable(shadps4_settings_test ${SETTINGS_TEST_SOURCES}) +add_executable(shadps4_gcn_test ${GCN_TEST_SOURCES}) + +foreach(t ${TEST_TARGETS}) + target_include_directories(${t} PRIVATE + ${CMAKE_SOURCE_DIR}/src + ${CMAKE_SOURCE_DIR} + ) + target_compile_features(${t} PRIVATE cxx_std_23) + target_compile_definitions(${t} PRIVATE BOOST_ASIO_STANDALONE) +endforeach() + target_link_libraries(shadps4_settings_test PRIVATE GTest::gtest_main fmt::fmt @@ -45,40 +145,56 @@ target_link_libraries(shadps4_settings_test PRIVATE SDL3::SDL3 spdlog::spdlog ) - -target_compile_features(shadps4_settings_test PRIVATE cxx_std_23) - -target_compile_definitions(shadps4_settings_test PRIVATE BOOST_ASIO_STANDALONE) +target_link_libraries(shadps4_gcn_test PRIVATE + GTest::gtest_main + fmt::fmt + nlohmann_json::nlohmann_json + magic_enum::magic_enum + toml11::toml11 + Boost::headers + Vulkan::Headers + sirit + SDL3::SDL3 + spdlog::spdlog +) if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") include(CheckCXXSymbolExists) check_cxx_symbol_exists(_LIBCPP_VERSION version LIBCPP) if (LIBCPP) - target_compile_options(shadps4_settings_test PRIVATE -fexperimental-library) + foreach(t ${TEST_TARGETS}) + target_compile_options(${t} PRIVATE -fexperimental-library) + endforeach() endif() endif() -if (WIN32) - target_compile_definitions(shadps4_settings_test PRIVATE - NOMINMAX - WIN32_LEAN_AND_MEAN - NTDDI_VERSION=0x0A000006 - _WIN32_WINNT=0x0A00 - WINVER=0x0A00 - ) - if (MSVC) - target_compile_definitions(shadps4_settings_test PRIVATE - _CRT_SECURE_NO_WARNINGS - _CRT_NONSTDC_NO_DEPRECATE - _SCL_SECURE_NO_WARNINGS - _TIMESPEC_DEFINED +foreach(t ${TEST_TARGETS}) + if (WIN32) + target_link_libraries(${t} PRIVATE onecore) + target_compile_definitions(${t} PRIVATE + NOMINMAX + WIN32_LEAN_AND_MEAN + NTDDI_VERSION=0x0A000006 + _WIN32_WINNT=0x0A00 + WINVER=0x0A00 ) + if (MSVC) + target_compile_definitions(${t} PRIVATE + _CRT_SECURE_NO_WARNINGS + _CRT_NONSTDC_NO_DEPRECATE + _SCL_SECURE_NO_WARNINGS + _TIMESPEC_DEFINED + ) + endif() endif() -endif() +endforeach() include(GoogleTest) -gtest_discover_tests(shadps4_settings_test - WORKING_DIRECTORY ${CMAKE_BINARY_DIR} - PROPERTIES TIMEOUT 60 -) + +foreach(t ${TEST_TARGETS}) + gtest_discover_tests(${t} + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + PROPERTIES TIMEOUT 60 + ) +endforeach() diff --git a/tests/gcn/gcn_test_runner.cpp b/tests/gcn/gcn_test_runner.cpp new file mode 100644 index 000000000..8daf168df --- /dev/null +++ b/tests/gcn/gcn_test_runner.cpp @@ -0,0 +1,431 @@ +// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "gcn_test_runner.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "shader_recompiler/resource.h" + +// Exactly one TU must define the dynamic dispatcher storage. +VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE + +namespace gcn_test { +namespace { + +constexpr bool kEnableValidation = +#ifdef NDEBUG + false; +#else + true; +#endif + +auto make_error(Error code, std::string message) { + return std::unexpected(ErrorInfo{code, std::move(message)}); +} + +auto find_memory_type(vk::PhysicalDevice pd, std::uint32_t type_filter, + vk::MemoryPropertyFlags required) -> std::expected { + auto props = pd.getMemoryProperties(); + for (std::uint32_t i = 0; i < props.memoryTypeCount; ++i) { + if ((type_filter & (1u << i)) && + (props.memoryTypes[i].propertyFlags & required) == required) { + return i; + } + } + return make_error(Error::BufferAllocationFailed, "no suitable memory type found"); +} + +struct HostBuffer { + vk::Device device; + vk::Buffer buffer; + vk::DeviceMemory memory; + void* mapped = nullptr; + + ~HostBuffer() { + if (mapped) + device.unmapMemory(memory); + if (buffer) + device.destroyBuffer(buffer); + if (memory) + device.freeMemory(memory); + } + HostBuffer() = default; + HostBuffer(const HostBuffer&) = delete; + HostBuffer& operator=(const HostBuffer&) = delete; +}; + +auto create_host_buffer(vk::Device dev, vk::PhysicalDevice pd, vk::DeviceSize size, + vk::BufferUsageFlags usage) + -> std::expected, ErrorInfo> { + auto buf = std::make_unique(); + buf->device = dev; + + auto [br, buffer] = dev.createBuffer(vk::BufferCreateInfo{ + .size = size, + .usage = usage, + .sharingMode = vk::SharingMode::eExclusive, + }); + if (br != vk::Result::eSuccess) + return make_error(Error::BufferAllocationFailed, "createBuffer"); + buf->buffer = buffer; + + auto req = dev.getBufferMemoryRequirements(buffer); + auto mt = find_memory_type(pd, req.memoryTypeBits, + vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent); + if (!mt) + return std::unexpected(mt.error()); + + auto [mr, mem] = dev.allocateMemory({ + .allocationSize = req.size, + .memoryTypeIndex = *mt, + }); + if (mr != vk::Result::eSuccess) + return make_error(Error::BufferAllocationFailed, "allocateMemory"); + buf->memory = mem; + + if (dev.bindBufferMemory(buffer, mem, 0) != vk::Result::eSuccess) + return make_error(Error::BufferAllocationFailed, "bindBufferMemory"); + + auto [mapr, ptr] = dev.mapMemory(mem, 0, size); + if (mapr != vk::Result::eSuccess) + return make_error(Error::BufferAllocationFailed, "mapMemory"); + buf->mapped = ptr; + + return buf; +} + +std::mutex g_runner_mutex; +std::unique_ptr g_runner; + +} // namespace + +Runner::~Runner() { + if (device_) { + device_.waitIdle(); + if (fence_) + device_.destroyFence(fence_); + if (pipeline_layout_) + device_.destroyPipelineLayout(pipeline_layout_); + if (descriptor_set_layout_) + device_.destroyDescriptorSetLayout(descriptor_set_layout_); + if (command_pool_) + device_.destroyCommandPool(command_pool_); + device_.destroy(); + } + if (instance_) + instance_.destroy(); +} + +std::expected Runner::instance() { + std::lock_guard lock{g_runner_mutex}; + if (g_runner) + return g_runner.get(); + auto r = std::unique_ptr(new Runner{}); + if (auto init = r->initialize(); !init) + return std::unexpected(init.error()); + g_runner = std::move(r); + return g_runner.get(); +} + +std::expected Runner::initialize() { + VULKAN_HPP_DEFAULT_DISPATCHER.init(); + + // ---- Instance ------------------------------------------------------ + vk::ApplicationInfo app_info{ + .pApplicationName = "gcn_test_runner", + .applicationVersion = 1, + .pEngineName = "gcn_test_runner", + .engineVersion = 1, + .apiVersion = vk::ApiVersion13, + }; + std::vector layers; + if (kEnableValidation) + layers.push_back("VK_LAYER_KHRONOS_validation"); + + auto [ir, inst] = vk::createInstance({ + .pApplicationInfo = &app_info, + .enabledLayerCount = static_cast(layers.size()), + .ppEnabledLayerNames = layers.data(), + }); + if (ir != vk::Result::eSuccess) + return make_error(Error::InstanceCreationFailed, + std::format("createInstance: {}", vk::to_string(ir))); + instance_ = inst; + VULKAN_HPP_DEFAULT_DISPATCHER.init(instance_); + + // ---- Pick physical device with the extensions we need ------------- + auto [pr, devs] = instance_.enumeratePhysicalDevices(); + if (pr != vk::Result::eSuccess || devs.empty()) + return make_error(Error::NoSuitableDevice, "no Vulkan devices"); + + constexpr std::array required_exts{ + VK_EXT_SHADER_OBJECT_EXTENSION_NAME, + VK_KHR_MAINTENANCE_6_EXTENSION_NAME, + VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, + }; + + for (auto pd : devs) { + auto [er, exts] = pd.enumerateDeviceExtensionProperties(); + if (er != vk::Result::eSuccess) + continue; + + auto has_ext = [&](const char* name) { + return std::ranges::any_of( + exts, [&](auto& e) { return std::string_view{e.extensionName} == name; }); + }; + if (!std::ranges::all_of(required_exts, has_ext)) + continue; + + auto families = pd.getQueueFamilyProperties(); + for (std::uint32_t i = 0; i < families.size(); ++i) { + if (families[i].queueFlags & vk::QueueFlagBits::eCompute) { + physical_device_ = pd; + queue_family_ = i; + break; + } + } + if (physical_device_) + break; + } + if (!physical_device_) + return make_error(Error::NoSuitableDevice, + "no device with compute + shader_object + maintenance6 + " + "push_descriptor"); + + max_push_constant_size_ = sizeof(Shader::PushData); + // physical_device_.getProperties().limits.maxPushConstantsSize; + + // ---- Device with feature chain ------------------------------------ + float priority = 1.0f; + vk::DeviceQueueCreateInfo qci{ + .queueFamilyIndex = queue_family_, + .queueCount = 1, + .pQueuePriorities = &priority, + }; + vk::PhysicalDeviceShaderObjectFeaturesEXT so_feat{.shaderObject = VK_TRUE}; + vk::PhysicalDeviceMaintenance6FeaturesKHR m6_feat{ + .pNext = &so_feat, + .maintenance6 = VK_TRUE, + }; + vk::PhysicalDeviceVulkan11Features v11_feat{ + .pNext = &m6_feat, + .uniformAndStorageBuffer16BitAccess = VK_TRUE, + }; + vk::PhysicalDeviceVulkan12Features v12_feat{ + .pNext = &v11_feat, + .uniformAndStorageBuffer8BitAccess = VK_TRUE, + .shaderInt8 = VK_TRUE, + }; + vk::PhysicalDeviceFeatures phys_feat{ + .shaderInt64 = VK_TRUE, + .shaderInt16 = VK_TRUE, + }; + + auto [dr, dev] = physical_device_.createDevice({ + .pNext = &v12_feat, + .queueCreateInfoCount = 1, + .pQueueCreateInfos = &qci, + .enabledExtensionCount = required_exts.size(), + .ppEnabledExtensionNames = required_exts.data(), + .pEnabledFeatures = &phys_feat, + }); + if (dr != vk::Result::eSuccess) + return make_error(Error::DeviceCreationFailed, + std::format("createDevice: {}", vk::to_string(dr))); + device_ = dev; + VULKAN_HPP_DEFAULT_DISPATCHER.init(device_); + queue_ = device_.getQueue(queue_family_, 0); + + // ---- Command pool + cached command buffer ------------------------- + auto [cpr, pool] = device_.createCommandPool({ + .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer, + .queueFamilyIndex = queue_family_, + }); + if (cpr != vk::Result::eSuccess) + return make_error(Error::DeviceCreationFailed, "createCommandPool"); + command_pool_ = pool; + + auto [cbr, cbs] = device_.allocateCommandBuffers({ + .commandPool = command_pool_, + .level = vk::CommandBufferLevel::ePrimary, + .commandBufferCount = 1, + }); + if (cbr != vk::Result::eSuccess) + return make_error(Error::DeviceCreationFailed, "allocateCommandBuffers"); + command_buffer_ = cbs[0]; + + // ---- Fence (cached, reset per call) -------------------------------- + auto [fr, fence] = device_.createFence({}); + if (fr != vk::Result::eSuccess) + return make_error(Error::DeviceCreationFailed, "createFence"); + fence_ = fence; + + // ---- Descriptor set layout with push-descriptor flag -------------- + // Single storage buffer at binding 0. No descriptor sets are ever + // allocated from this layout — the layout is just used to tell the + // pipeline layout and shader what the push-descriptor shape is. + vk::DescriptorSetLayoutBinding dsl_binding{ + .binding = 0, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute, + }; + auto [dslr, dsl] = device_.createDescriptorSetLayout({ + .flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR, + .bindingCount = 1, + .pBindings = &dsl_binding, + }); + if (dslr != vk::Result::eSuccess) + return make_error(Error::DeviceCreationFailed, "createDescriptorSetLayout"); + descriptor_set_layout_ = dsl; + + // ---- Pipeline layout sized to device max push constants ----------- + vk::PushConstantRange pc{ + .stageFlags = vk::ShaderStageFlagBits::eCompute, + .offset = 0, + .size = max_push_constant_size_, + }; + auto [plr, pl] = device_.createPipelineLayout({ + .setLayoutCount = 1, + .pSetLayouts = &descriptor_set_layout_, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &pc, + }); + if (plr != vk::Result::eSuccess) + return make_error(Error::DeviceCreationFailed, "createPipelineLayout"); + pipeline_layout_ = pl; + + return {}; +} + +std::expected Runner::run_raw(std::span spirv, + std::span push_constants, + std::span output, DispatchSize dispatch) { + if (push_constants.size() > max_push_constant_size_) + return make_error(Error::PushConstantTooLarge, + std::format("push constants {} exceed device max {}", + push_constants.size(), max_push_constant_size_)); + if (output.empty()) + return make_error(Error::OutputTooLarge, "output buffer is empty"); + + // Per-call: output buffer -------------------------------------------- + auto buf_r = create_host_buffer(device_, physical_device_, output.size(), + vk::BufferUsageFlagBits::eStorageBuffer); + if (!buf_r) + return std::unexpected(buf_r.error()); + auto& output_buffer = *buf_r; + std::memset(output_buffer->mapped, 0, output.size()); + + // Per-call: shader object -------------------------------------------- + vk::PushConstantRange shader_pc{ + .stageFlags = vk::ShaderStageFlagBits::eCompute, + .offset = 0, + // .size = static_cast(push_constants.size()), + .size = sizeof(Shader::PushData), + }; + vk::ShaderCreateInfoEXT sci{ + .stage = vk::ShaderStageFlagBits::eCompute, + .codeType = vk::ShaderCodeTypeEXT::eSpirv, + .codeSize = spirv.size() * sizeof(std::uint32_t), + .pCode = spirv.data(), + .pName = "main", + .setLayoutCount = 1, + .pSetLayouts = &descriptor_set_layout_, + .pushConstantRangeCount = push_constants.empty() ? 0u : 1u, + .pPushConstantRanges = push_constants.empty() ? nullptr : &shader_pc, + }; + auto [sr, shaders] = device_.createShadersEXT(sci); + if (sr != vk::Result::eSuccess) + return make_error(Error::ShaderCreationFailed, + std::format("createShadersEXT: {}", vk::to_string(sr))); + auto shader = shaders[0]; + struct ShaderGuard { + vk::Device d; + vk::ShaderEXT s; + ~ShaderGuard() { + if (s) + d.destroyShaderEXT(s); + } + } sg{device_, shader}; + + // Reset cached command buffer + fence -------------------------------- + device_.resetFences(fence_); + command_buffer_.reset(); + + if (command_buffer_.begin({ + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, + }) != vk::Result::eSuccess) + return make_error(Error::CommandSubmissionFailed, "cmd.begin"); + + // Bind shader object ------------------------------------------------- + vk::ShaderStageFlagBits stage = vk::ShaderStageFlagBits::eCompute; + command_buffer_.bindShadersEXT(1, &stage, &shader); + + // Push descriptor: binding 0 = output SSBO --------------------------- + vk::DescriptorBufferInfo dbi{ + .buffer = output_buffer->buffer, + .offset = 0, + .range = VK_WHOLE_SIZE, + }; + vk::WriteDescriptorSet write{ + .dstBinding = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .pBufferInfo = &dbi, + }; + vk::PushDescriptorSetInfoKHR push_desc{ + .stageFlags = vk::ShaderStageFlagBits::eCompute, + .layout = pipeline_layout_, + .set = 0, + .descriptorWriteCount = 1, + .pDescriptorWrites = &write, + }; + command_buffer_.pushDescriptorSet2KHR(push_desc); + + // Push constants ----------------------------------------------------- + if (!push_constants.empty()) { + vk::PushConstantsInfoKHR pci{ + .layout = pipeline_layout_, + .stageFlags = vk::ShaderStageFlagBits::eCompute, + .offset = 16, // fall onto ud_regs in PushData + .size = static_cast(push_constants.size()), + .pValues = push_constants.data(), + }; + command_buffer_.pushConstants2KHR(pci); + } + + command_buffer_.dispatch(dispatch.x, dispatch.y, dispatch.z); + + vk::MemoryBarrier barrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderWrite, + .dstAccessMask = vk::AccessFlagBits::eHostRead, + }; + command_buffer_.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eHost, {}, barrier, {}, {}); + + if (command_buffer_.end() != vk::Result::eSuccess) + return make_error(Error::CommandSubmissionFailed, "cmd.end"); + + vk::SubmitInfo si{ + .commandBufferCount = 1, + .pCommandBuffers = &command_buffer_, + }; + if (queue_.submit(si, fence_) != vk::Result::eSuccess) + return make_error(Error::CommandSubmissionFailed, "queue.submit"); + if (device_.waitForFences(fence_, VK_TRUE, UINT64_MAX) != vk::Result::eSuccess) + return make_error(Error::ExecutionFailed, "waitForFences"); + + std::memcpy(output.data(), output_buffer->mapped, output.size()); + return {}; +} + +} // namespace gcn_test diff --git a/tests/gcn/gcn_test_runner.hpp b/tests/gcn/gcn_test_runner.hpp new file mode 100644 index 000000000..e15a97f52 --- /dev/null +++ b/tests/gcn/gcn_test_runner.hpp @@ -0,0 +1,110 @@ +// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#define VULKAN_HPP_NO_EXCEPTIONS +#define VULKAN_HPP_NO_CONSTRUCTORS +#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace gcn_test { + +struct DispatchSize { + std::uint32_t x = 1, y = 1, z = 1; +}; + +enum class Error { + NoSuitableDevice, + InstanceCreationFailed, + DeviceCreationFailed, + ShaderCreationFailed, + BufferAllocationFailed, + CommandSubmissionFailed, + PushConstantTooLarge, + OutputTooLarge, + ExecutionFailed, +}; + +struct ErrorInfo { + Error code; + std::string message; +}; + +class Runner { +public: + static std::expected instance(); + + std::expected run_raw( + std::span spirv, + std::span push_constants, + std::span output, + DispatchSize dispatch = {} + ); + + template + std::expected run( + std::span spirv, + const PushT& push, + DispatchSize dispatch = {} + ) { + static_assert(std::is_trivially_copyable_v); + static_assert(std::is_trivially_copyable_v); + OutputT result{}; + auto r = run_raw( + spirv, + {reinterpret_cast(&push), sizeof(PushT)}, + {reinterpret_cast(&result), sizeof(OutputT)}, + dispatch + ); + if (!r) return std::unexpected(r.error()); + return result; + } + + template + std::expected run( + std::span spirv, + DispatchSize dispatch = {} + ) { + static_assert(std::is_trivially_copyable_v); + OutputT result{}; + auto r = run_raw( + spirv, {}, + {reinterpret_cast(&result), sizeof(OutputT)}, + dispatch + ); + if (!r) return std::unexpected(r.error()); + return result; + } + + ~Runner(); + Runner(const Runner&) = delete; + Runner& operator=(const Runner&) = delete; + +private: + Runner() = default; + std::expected initialize(); + + vk::Instance instance_; + vk::PhysicalDevice physical_device_; + vk::Device device_; + vk::Queue queue_; + std::uint32_t queue_family_ = 0; + vk::CommandPool command_pool_; + vk::CommandBuffer command_buffer_; // cached, reset per call + vk::Fence fence_; // cached, reset per call + vk::DescriptorSetLayout descriptor_set_layout_; // push-descriptor + vk::PipelineLayout pipeline_layout_; + + std::uint32_t max_push_constant_size_ = 128; +}; + +} // namespace gcn_test diff --git a/tests/gcn/test_gcn_instructions.cpp b/tests/gcn/test_gcn_instructions.cpp new file mode 100644 index 000000000..88f649681 --- /dev/null +++ b/tests/gcn/test_gcn_instructions.cpp @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include + +#include "gcn_test_runner.hpp" +#include "translator.hpp" + +class GcnTest : public ::testing::Test { +protected: + void SetUp() override {} + + void TearDown() override {} +}; + +struct F32x2 { + float a; + float b; +}; + +// Example +// TEST_F(GcnTest, test_name) { +// // Runner sets the vulkan context +// auto runner = gcn_test::Runner::instance().value(); +// +// // v_add_f32 v0, v0, v1 +// auto spirv = TranslateToSpirv(0x06000300); +// +// // run tells how to interpret the result (only 32bit as of now) +// // the second argument is templated, it can be at most 4 u32s +// // the data is accessible by the instruction in v0-4 and s0-4 (mirrored) +// // the result has to be placed in v0 +// auto result = runner->run(spirv, F32x2{1.5f, 6.0f}); +// +// EXPECT_TRUE(result.has_value()); +// EXPECT_EQ(*result, 7.5f); +// } + +TEST_F(GcnTest, add_f32) { + auto runner = gcn_test::Runner::instance().value(); + + // v_add_f32 v0, v0, v1 + auto spirv = TranslateToSpirv(0x06000300); + + auto result = runner->run(spirv, F32x2{1.5f, 6.0f}); + + EXPECT_TRUE(result.has_value()); + EXPECT_EQ(*result, 7.5f); +} + +TEST_F(GcnTest, add_nan) { + auto runner = gcn_test::Runner::instance().value(); + + // v_add_f32 v0, v0, v1 + auto spirv = TranslateToSpirv(0x06000300); + + auto result = runner->run(spirv, F32x2{1.0f, std::numeric_limits::quiet_NaN()}); + + EXPECT_TRUE(result.has_value()); + EXPECT_TRUE(std::isnan(*result)); +} diff --git a/tests/gcn/translator.cpp b/tests/gcn/translator.cpp new file mode 100644 index 000000000..4e7aca798 --- /dev/null +++ b/tests/gcn/translator.cpp @@ -0,0 +1,98 @@ +// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "translator.hpp" + +#include + +#include "common/io_file.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/frontend/decode.h" +#include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/info.h" +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/passes/ir_passes.h" +#include "shader_recompiler/ir/post_order.h" +#include "shader_recompiler/ir/program.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/recompiler.h" + +using namespace Shader; + +namespace Shader::Optimization { +void ResourceTrackingPassStub(IR::Program& program, const Profile& profile); +} + +std::vector TranslateToSpirv(u64 raw_gcn_inst) { + std::array provided_inst{static_cast(raw_gcn_inst & 0xFFFFFFFFU), + static_cast(raw_gcn_inst >> 32)}; + std::array store{ + 0xe0700000, + 0x80000000 // buffer_store_dword v0, v0, s[0:3], 0 + }; + Gcn::GcnCodeSlice first(provided_inst.data(), provided_inst.data() + provided_inst.size()); + Gcn::GcnCodeSlice second(store.data(), store.data() + store.size()); + + Gcn::GcnDecodeContext decoder; + Gcn::GcnInst inst = decoder.decodeInstruction(first); + Gcn::GcnInst store_inst = decoder.decodeInstruction(second); + + Shader::Info info{}; + info.stage = Stage::Compute; + info.l_stage = LogicalStage::Compute; + info.flattened_ud_buf.resize(4); + AmdGpu::Buffer buf = AmdGpu::Buffer::Null(); + std::memcpy(info.flattened_ud_buf.data(), &buf, sizeof(buf)); + + IR::Program program{info}; + Pools pools{}; + + IR::Block* block = pools.block_pool.Create(pools.inst_pool); + program.blocks.push_back(block); + + program.syntax_list.emplace_back(); + program.syntax_list.back().type = IR::AbstractSyntaxNode::Type::Block; + program.syntax_list.back().data.block = block; + program.syntax_list.emplace_back(); + program.syntax_list.back().type = IR::AbstractSyntaxNode::Type::Return; + program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front()); + + Profile profile{}; + profile.supported_spirv = 0x00010600; + profile.subgroup_size = 32; + profile.supports_robust_buffer_access = true; + + RuntimeInfo runtime_info{}; + runtime_info.Initialize(Stage::Compute); + runtime_info.num_user_data = 4; + runtime_info.cs_info.workgroup_size = {1, 1, 1}; + + Gcn::Translator translator(program.info, runtime_info, profile); + translator.EmitPrologue(block); + + for (int i = 0; i < 4; ++i) { + // copy user data from SGPR to VGPR as (most?) instructions cannot access + // two SGPRs + Shader::Gcn::GcnInst mov{}; + mov.src[0].field = Shader::Gcn::OperandField::ScalarGPR; + mov.src[0].code = i; + mov.dst[0].field = Shader::Gcn::OperandField::VectorGPR; + mov.dst[0].code = i; + translator.S_MOV(mov); + } + translator.TranslateInstruction(inst); + translator.TranslateInstruction(store_inst); + + Shader::Optimization::SsaRewritePass(program.post_order_blocks); + Shader::Optimization::IdentityRemovalPass(program.blocks); + Shader::Optimization::ResourceTrackingPassStub(program, profile); + Shader::Optimization::ConstantPropagationPass(program.blocks); + Shader::Optimization::DeadCodeEliminationPass(program); + Shader::Optimization::CollectShaderInfoPass(program, profile); + + Backend::Bindings bindings{}; + + const auto spirv = Backend::SPIRV::EmitSPIRV(profile, runtime_info, program, bindings); + + return spirv; +} diff --git a/tests/gcn/translator.hpp b/tests/gcn/translator.hpp new file mode 100644 index 000000000..6108d918f --- /dev/null +++ b/tests/gcn/translator.hpp @@ -0,0 +1,10 @@ +// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "common/types.h" + +std::vector TranslateToSpirv(u64 raw_gcn_inst); diff --git a/tests/stubs/resource_tracking_pass_stub.cpp b/tests/stubs/resource_tracking_pass_stub.cpp new file mode 100644 index 000000000..017b6cdda --- /dev/null +++ b/tests/stubs/resource_tracking_pass_stub.cpp @@ -0,0 +1,335 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "shader_recompiler/frontend/control_flow_graph.h" +#include "shader_recompiler/info.h" +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/ir/breadth_first_search.h" +#include "shader_recompiler/ir/ir_emitter.h" +#include "shader_recompiler/ir/operand_helper.h" +#include "shader_recompiler/ir/program.h" +#include "shader_recompiler/ir/reinterpret.h" +#include "shader_recompiler/profile.h" +#include "video_core/amdgpu/resource.h" + +namespace Shader::Optimization { +namespace { + +using SharpLocation = u32; + +bool IsBufferAtomic(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::BufferAtomicIAdd32: + case IR::Opcode::BufferAtomicIAdd64: + case IR::Opcode::BufferAtomicISub32: + case IR::Opcode::BufferAtomicSMin32: + case IR::Opcode::BufferAtomicSMin64: + case IR::Opcode::BufferAtomicUMin32: + case IR::Opcode::BufferAtomicUMin64: + case IR::Opcode::BufferAtomicFMin32: + case IR::Opcode::BufferAtomicSMax32: + case IR::Opcode::BufferAtomicSMax64: + case IR::Opcode::BufferAtomicUMax32: + case IR::Opcode::BufferAtomicUMax64: + case IR::Opcode::BufferAtomicFMax32: + case IR::Opcode::BufferAtomicInc32: + case IR::Opcode::BufferAtomicDec32: + case IR::Opcode::BufferAtomicAnd32: + case IR::Opcode::BufferAtomicOr32: + case IR::Opcode::BufferAtomicXor32: + case IR::Opcode::BufferAtomicSwap32: + case IR::Opcode::BufferAtomicCmpSwap32: + case IR::Opcode::BufferAtomicFCmpSwap32: + return true; + default: + return false; + } +} + +bool IsBufferStore(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::StoreBufferU8: + case IR::Opcode::StoreBufferU16: + case IR::Opcode::StoreBufferU32: + case IR::Opcode::StoreBufferU32x2: + case IR::Opcode::StoreBufferU32x3: + case IR::Opcode::StoreBufferU32x4: + case IR::Opcode::StoreBufferU64: + case IR::Opcode::StoreBufferF32: + case IR::Opcode::StoreBufferF32x2: + case IR::Opcode::StoreBufferF32x3: + case IR::Opcode::StoreBufferF32x4: + case IR::Opcode::StoreBufferFormatF32: + return true; + default: + return IsBufferAtomic(inst); + } +} + +bool IsBufferInstruction(const IR::Inst& inst) { + switch (inst.GetOpcode()) { + case IR::Opcode::LoadBufferU8: + case IR::Opcode::LoadBufferU16: + case IR::Opcode::LoadBufferU32: + case IR::Opcode::LoadBufferU32x2: + case IR::Opcode::LoadBufferU32x3: + case IR::Opcode::LoadBufferU32x4: + case IR::Opcode::LoadBufferU64: + case IR::Opcode::LoadBufferF32: + case IR::Opcode::LoadBufferF32x2: + case IR::Opcode::LoadBufferF32x3: + case IR::Opcode::LoadBufferF32x4: + case IR::Opcode::LoadBufferFormatF32: + case IR::Opcode::ReadConstBuffer: + return true; + default: + return IsBufferStore(inst); + } +} + +u32 BufferAddressShift(const IR::Inst& inst, AmdGpu::DataFormat data_format) { + switch (inst.GetOpcode()) { + case IR::Opcode::LoadBufferU8: + case IR::Opcode::StoreBufferU8: + return 0; + case IR::Opcode::LoadBufferU16: + case IR::Opcode::StoreBufferU16: + return 1; + case IR::Opcode::LoadBufferU64: + case IR::Opcode::StoreBufferU64: + case IR::Opcode::BufferAtomicIAdd64: + case IR::Opcode::BufferAtomicSMax64: + case IR::Opcode::BufferAtomicSMin64: + case IR::Opcode::BufferAtomicUMax64: + case IR::Opcode::BufferAtomicUMin64: + return 3; + case IR::Opcode::LoadBufferFormatF32: + case IR::Opcode::StoreBufferFormatF32: { + switch (data_format) { + case AmdGpu::DataFormat::Format8: + return 0; + case AmdGpu::DataFormat::Format8_8: + case AmdGpu::DataFormat::Format16: + return 1; + case AmdGpu::DataFormat::Format8_8_8_8: + case AmdGpu::DataFormat::Format16_16: + case AmdGpu::DataFormat::Format10_11_11: + case AmdGpu::DataFormat::Format2_10_10_10: + case AmdGpu::DataFormat::Format16_16_16_16: + case AmdGpu::DataFormat::Format32: + case AmdGpu::DataFormat::Format32_32: + case AmdGpu::DataFormat::Format32_32_32: + case AmdGpu::DataFormat::Format32_32_32_32: + return 2; + default: + return 0; + } + break; + } + case IR::Opcode::ReadConstBuffer: + // Provided address is already in dwords + return 0; + default: + return 2; + } +} + +class Descriptors { +public: + explicit Descriptors(Info& info_) + : info{info_}, buffer_resources{info_.buffers}, image_resources{info_.images}, + sampler_resources{info_.samplers}, fmask_resources(info_.fmasks) {} + + u32 Add(const BufferResource& desc) { + const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) { + return desc.sharp_idx == existing.sharp_idx && + desc.inline_cbuf == existing.inline_cbuf && + desc.buffer_type == existing.buffer_type; + })}; + auto& buffer = buffer_resources[index]; + buffer.used_types |= desc.used_types; + buffer.is_written |= desc.is_written; + buffer.is_formatted |= desc.is_formatted; + return index; + } + + u32 Add(const ImageResource& desc) { + const u32 index{Add(image_resources, desc, [&desc](const auto& existing) { + return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array && + desc.mip_fallback_mode == existing.mip_fallback_mode && + desc.constant_mip_index == existing.constant_mip_index; + })}; + auto& image = image_resources[index]; + image.is_atomic |= desc.is_atomic; + image.is_written |= desc.is_written; + return index; + } + + u32 Add(const SamplerResource& desc) { + const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) { + return desc.sharp_idx == existing.sharp_idx && + desc.is_inline_sampler == existing.is_inline_sampler && + desc.inline_sampler == existing.inline_sampler; + })}; + return index; + } + + u32 Add(const FMaskResource& desc) { + u32 index = Add(fmask_resources, desc, [&desc](const auto& existing) { + return desc.sharp_idx == existing.sharp_idx; + }); + return index; + } + +private: + template + static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { + const auto it{std::ranges::find_if(descriptors, pred)}; + if (it != descriptors.end()) { + return static_cast(std::distance(descriptors.begin(), it)); + } + descriptors.push_back(desc); + return static_cast(descriptors.size()) - 1; + } + + const Info& info; + BufferResourceList& buffer_resources; + ImageResourceList& image_resources; + SamplerResourceList& sampler_resources; + FMaskResourceList& fmask_resources; +}; + +} // Anonymous namespace + +void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors, + const Profile& profile) { + u32 buffer_binding = descriptors.Add(BufferResource{.sharp_idx = 0, + .used_types = IR::Type::U32, + .buffer_type = BufferType::Guest, + .is_written = true, + .is_formatted = false}); + + // Replace handle with binding index in buffer resource list. + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.SetArg(0, ir.Imm32(buffer_binding)); +} + +IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info, + const AmdGpu::Buffer& buffer, u32 stride) { + const auto inst_info = inst.Flags(); + const u32 inst_offset = inst_info.inst_offset.Value(); + const auto is_inst_typed = inst_info.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid; + const auto data_format = is_inst_typed + ? AmdGpu::RemapDataFormat(inst_info.inst_data_fmt.Value()) + : buffer.GetDataFmt(); + const u32 shift = BufferAddressShift(inst, data_format); + const u32 mask = (1 << shift) - 1; + const IR::U32 soffset = IR::GetBufferSOffsetArg(&inst); + + // If address calculation is of the form "index * const_stride + offset" with + // offset constant and both const_stride and offset are divisible with the + // element size, apply shift directly. + if (inst_info.index_enable && !inst_info.voffset_enable && soffset.IsImmediate() && + !buffer.swizzle_enable && !buffer.add_tid_enable && (stride & mask) == 0) { + const u32 total_offset = soffset.U32() + inst_offset; + if ((total_offset & mask) == 0) { + // buffer_offset = index * (const_stride >> shift) + (offset >> shift) + const IR::U32 index = IR::GetBufferIndexArg(&inst); + return ir.IAdd(ir.IMul(index, ir.Imm32(stride >> shift)), + ir.Imm32(total_offset >> shift)); + } + } + + // index = (inst_idxen ? vgpr_index : 0) + (const_add_tid_enable ? + // thread_id[5:0] : 0) + IR::U32 index = ir.Imm32(0U); + if (inst_info.index_enable) { + const IR::U32 vgpr_index = IR::GetBufferIndexArg(&inst); + index = ir.IAdd(index, vgpr_index); + } + if (buffer.add_tid_enable) { + ASSERT_MSG(info.l_stage == LogicalStage::Compute, + "Thread ID buffer addressing is not supported outside of compute."); + const IR::U32 thread_id{ir.LaneId()}; + index = ir.IAdd(index, thread_id); + } + // offset = (inst_offen ? vgpr_offset : 0) + inst_offset + IR::U32 offset = ir.Imm32(inst_offset); + offset = ir.IAdd(offset, soffset); + if (inst_info.voffset_enable) { + const IR::U32 voffset = IR::GetBufferVOffsetArg(&inst); + offset = ir.IAdd(offset, voffset); + } + const IR::U32 const_stride = ir.Imm32(stride); + IR::U32 buffer_offset; + if (buffer.swizzle_enable) { + const IR::U32 const_index_stride = ir.Imm32(buffer.GetIndexStride()); + const IR::U32 const_element_size = ir.Imm32(buffer.GetElementSize()); + // index_msb = index / const_index_stride + const IR::U32 index_msb{ir.IDiv(index, const_index_stride)}; + // index_lsb = index % const_index_stride + const IR::U32 index_lsb{ir.IMod(index, const_index_stride)}; + // offset_msb = offset / const_element_size + const IR::U32 offset_msb{ir.IDiv(offset, const_element_size)}; + // offset_lsb = offset % const_element_size + const IR::U32 offset_lsb{ir.IMod(offset, const_element_size)}; + // buffer_offset = + // (index_msb * const_stride + offset_msb * const_element_size) * + // const_index_stride + // + index_lsb * const_element_size + offset_lsb + const IR::U32 buffer_offset_msb = ir.IMul( + ir.IAdd(ir.IMul(index_msb, const_stride), ir.IMul(offset_msb, const_element_size)), + const_index_stride); + const IR::U32 buffer_offset_lsb = + ir.IAdd(ir.IMul(index_lsb, const_element_size), offset_lsb); + buffer_offset = ir.IAdd(buffer_offset_msb, buffer_offset_lsb); + } else { + // buffer_offset = index * const_stride + offset + buffer_offset = ir.IAdd(ir.IMul(index, const_stride), offset); + } + if (shift != 0) { + buffer_offset = ir.ShiftRightLogical(buffer_offset, ir.Imm32(shift)); + } + return buffer_offset; +} + +void PatchBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) { + const auto handle = inst.Arg(0); + const auto buffer_res = info.buffers[handle.U32()]; + const auto buffer = AmdGpu::Buffer::Null(); + + // Address of constant buffer reads can be calculated at IR emission time. + if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) { + return; + } + + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.SetArg(IR::LoadBufferArgs::Address, + CalculateBufferAddress(ir, inst, info, buffer, buffer.stride)); +} + +void ResourceTrackingPassStub(IR::Program& program, const Profile& profile) { + // Iterate resource instructions and patch them after finding the sharp. + auto& info = program.info; + + // Pass 1: Track resource sharps + Descriptors descriptors{info}; + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (IsBufferInstruction(inst)) { + PatchBufferSharp(*block, inst, info, descriptors, profile); + } + } + } + + // Pass 2: Patch instruction args + for (IR::Block* const block : program.blocks) { + for (IR::Inst& inst : block->Instructions()) { + if (IsBufferInstruction(inst)) { + PatchBufferArgs(*block, inst, info); + } + } + } +} + +} // namespace Shader::Optimization