mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2026-04-29 23:41:19 -06:00
Test framework for GCN pipeline (#4272)
* Test framework for GCN pipeline * Try fixing test compilation on CI
This commit is contained in:
parent
a95e71e156
commit
963d10f220
5
.github/workflows/build.yml
vendored
5
.github/workflows/build.yml
vendored
@ -104,7 +104,7 @@ jobs:
|
||||
if: runner.os == 'Linux'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ninja-build libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libxcursor-dev libxi-dev libxss-dev libxtst-dev libxrandr-dev libxfixes-dev libudev-dev uuid-dev uuid-dev
|
||||
sudo apt-get install -y libc++-dev libc++abi-dev ninja-build libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libxcursor-dev libxi-dev libxss-dev libxtst-dev libxrandr-dev libxfixes-dev libudev-dev uuid-dev uuid-dev
|
||||
|
||||
- name: Install dependencies (macOS)
|
||||
if: runner.os == 'macOS'
|
||||
@ -118,6 +118,7 @@ jobs:
|
||||
-DCMAKE_C_COMPILER="${{ matrix.compiler_c }}" \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DENABLE_TESTS=ON \
|
||||
${{ runner.os == 'Linux' && '-DCMAKE_CXX_FLAGS=-stdlib=libc++' || '' }} \
|
||||
${{ runner.os == 'macOS' && '-DCMAKE_OSX_ARCHITECTURES=x86_64' || '' }}
|
||||
shell: bash
|
||||
|
||||
@ -139,7 +140,7 @@ jobs:
|
||||
shell: bash
|
||||
|
||||
- name: Run tests with CTest
|
||||
run: ctest --test-dir build --output-on-failure --progress
|
||||
run: ctest --test-dir build --output-on-failure --progress -E 'GcnTest'
|
||||
shell: bash
|
||||
|
||||
windows-sdl:
|
||||
|
||||
@ -9,6 +9,8 @@ include(FetchContent)
|
||||
)
|
||||
FetchContent_MakeAvailable(googletest)
|
||||
|
||||
set(TEST_TARGETS shadps4_settings_test shadps4_gcn_test)
|
||||
|
||||
set(SETTINGS_TEST_SOURCES
|
||||
# Under test
|
||||
${CMAKE_SOURCE_DIR}/src/core/emulator_settings.cpp
|
||||
@ -30,13 +32,111 @@ set(SETTINGS_TEST_SOURCES
|
||||
test_emulator_settings.cpp
|
||||
)
|
||||
|
||||
add_executable(shadps4_settings_test ${SETTINGS_TEST_SOURCES})
|
||||
set(GCN_TEST_SOURCES
|
||||
# Under test
|
||||
${CMAKE_SOURCE_DIR}/src/core/emulator_settings.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/core/emulator_state.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_barriers.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_logical.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_undefined.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/backend/spirv/spirv_emit_context.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/format.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/decode.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/decode.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/fetch_shader.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/fetch_shader.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/data_share.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/export.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/translate.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/translate.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/scalar_alu.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/scalar_flow.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/scalar_memory.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/vector_alu.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/vector_interpolation.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/frontend/translate/vector_memory.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/abstract_syntax_list.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/abstract_syntax_list.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/basic_block.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/basic_block.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/microinstruction.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/attribute.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/attribute.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/ir_emitter.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/ir_emitter.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/opcodes.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/opcodes.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/patch.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/patch.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/program.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/program.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/ir_passes.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/identity_removal_pass.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/post_order.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/post_order.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/type.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/type.h
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/value.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/shader_recompiler/ir/value.h
|
||||
${CMAKE_SOURCE_DIR}/src/video_core/amdgpu/pixel_format.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/video_core/amdgpu/pixel_format.h
|
||||
${CMAKE_SOURCE_DIR}/src/video_core/amdgpu/resource.h
|
||||
|
||||
target_include_directories(shadps4_settings_test PRIVATE
|
||||
${CMAKE_SOURCE_DIR}/src
|
||||
${CMAKE_SOURCE_DIR}
|
||||
# Minimal common support
|
||||
${CMAKE_SOURCE_DIR}/src/common/path_util.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/common/assert.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/common/error.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/common/io_file.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/common/string_util.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/common/logging/log.cpp
|
||||
${CMAKE_SOURCE_DIR}/src/common/ntapi.cpp
|
||||
|
||||
# Stubs that replace dependencies
|
||||
stubs/common_stub.cpp
|
||||
stubs/resource_tracking_pass_stub.cpp
|
||||
stubs/scm_rev_stub.cpp
|
||||
|
||||
gcn/gcn_test_runner.hpp
|
||||
gcn/gcn_test_runner.cpp
|
||||
gcn/translator.hpp
|
||||
gcn/translator.cpp
|
||||
|
||||
# Tests
|
||||
gcn/test_gcn_instructions.cpp
|
||||
)
|
||||
|
||||
add_executable(shadps4_settings_test ${SETTINGS_TEST_SOURCES})
|
||||
add_executable(shadps4_gcn_test ${GCN_TEST_SOURCES})
|
||||
|
||||
foreach(t ${TEST_TARGETS})
|
||||
target_include_directories(${t} PRIVATE
|
||||
${CMAKE_SOURCE_DIR}/src
|
||||
${CMAKE_SOURCE_DIR}
|
||||
)
|
||||
target_compile_features(${t} PRIVATE cxx_std_23)
|
||||
target_compile_definitions(${t} PRIVATE BOOST_ASIO_STANDALONE)
|
||||
endforeach()
|
||||
|
||||
target_link_libraries(shadps4_settings_test PRIVATE
|
||||
GTest::gtest_main
|
||||
fmt::fmt
|
||||
@ -45,40 +145,56 @@ target_link_libraries(shadps4_settings_test PRIVATE
|
||||
SDL3::SDL3
|
||||
spdlog::spdlog
|
||||
)
|
||||
|
||||
target_compile_features(shadps4_settings_test PRIVATE cxx_std_23)
|
||||
|
||||
target_compile_definitions(shadps4_settings_test PRIVATE BOOST_ASIO_STANDALONE)
|
||||
target_link_libraries(shadps4_gcn_test PRIVATE
|
||||
GTest::gtest_main
|
||||
fmt::fmt
|
||||
nlohmann_json::nlohmann_json
|
||||
magic_enum::magic_enum
|
||||
toml11::toml11
|
||||
Boost::headers
|
||||
Vulkan::Headers
|
||||
sirit
|
||||
SDL3::SDL3
|
||||
spdlog::spdlog
|
||||
)
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR
|
||||
CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
include(CheckCXXSymbolExists)
|
||||
check_cxx_symbol_exists(_LIBCPP_VERSION version LIBCPP)
|
||||
if (LIBCPP)
|
||||
target_compile_options(shadps4_settings_test PRIVATE -fexperimental-library)
|
||||
foreach(t ${TEST_TARGETS})
|
||||
target_compile_options(${t} PRIVATE -fexperimental-library)
|
||||
endforeach()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
target_compile_definitions(shadps4_settings_test PRIVATE
|
||||
NOMINMAX
|
||||
WIN32_LEAN_AND_MEAN
|
||||
NTDDI_VERSION=0x0A000006
|
||||
_WIN32_WINNT=0x0A00
|
||||
WINVER=0x0A00
|
||||
)
|
||||
if (MSVC)
|
||||
target_compile_definitions(shadps4_settings_test PRIVATE
|
||||
_CRT_SECURE_NO_WARNINGS
|
||||
_CRT_NONSTDC_NO_DEPRECATE
|
||||
_SCL_SECURE_NO_WARNINGS
|
||||
_TIMESPEC_DEFINED
|
||||
foreach(t ${TEST_TARGETS})
|
||||
if (WIN32)
|
||||
target_link_libraries(${t} PRIVATE onecore)
|
||||
target_compile_definitions(${t} PRIVATE
|
||||
NOMINMAX
|
||||
WIN32_LEAN_AND_MEAN
|
||||
NTDDI_VERSION=0x0A000006
|
||||
_WIN32_WINNT=0x0A00
|
||||
WINVER=0x0A00
|
||||
)
|
||||
if (MSVC)
|
||||
target_compile_definitions(${t} PRIVATE
|
||||
_CRT_SECURE_NO_WARNINGS
|
||||
_CRT_NONSTDC_NO_DEPRECATE
|
||||
_SCL_SECURE_NO_WARNINGS
|
||||
_TIMESPEC_DEFINED
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
include(GoogleTest)
|
||||
gtest_discover_tests(shadps4_settings_test
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
PROPERTIES TIMEOUT 60
|
||||
)
|
||||
|
||||
foreach(t ${TEST_TARGETS})
|
||||
gtest_discover_tests(${t}
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
PROPERTIES TIMEOUT 60
|
||||
)
|
||||
endforeach()
|
||||
|
||||
431
tests/gcn/gcn_test_runner.cpp
Normal file
431
tests/gcn/gcn_test_runner.cpp
Normal file
@ -0,0 +1,431 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "gcn_test_runner.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <format>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <ranges>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include "shader_recompiler/resource.h"
|
||||
|
||||
// Exactly one TU must define the dynamic dispatcher storage.
|
||||
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
|
||||
|
||||
namespace gcn_test {
|
||||
namespace {
|
||||
|
||||
constexpr bool kEnableValidation =
|
||||
#ifdef NDEBUG
|
||||
false;
|
||||
#else
|
||||
true;
|
||||
#endif
|
||||
|
||||
auto make_error(Error code, std::string message) {
|
||||
return std::unexpected(ErrorInfo{code, std::move(message)});
|
||||
}
|
||||
|
||||
auto find_memory_type(vk::PhysicalDevice pd, std::uint32_t type_filter,
|
||||
vk::MemoryPropertyFlags required) -> std::expected<std::uint32_t, ErrorInfo> {
|
||||
auto props = pd.getMemoryProperties();
|
||||
for (std::uint32_t i = 0; i < props.memoryTypeCount; ++i) {
|
||||
if ((type_filter & (1u << i)) &&
|
||||
(props.memoryTypes[i].propertyFlags & required) == required) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return make_error(Error::BufferAllocationFailed, "no suitable memory type found");
|
||||
}
|
||||
|
||||
struct HostBuffer {
|
||||
vk::Device device;
|
||||
vk::Buffer buffer;
|
||||
vk::DeviceMemory memory;
|
||||
void* mapped = nullptr;
|
||||
|
||||
~HostBuffer() {
|
||||
if (mapped)
|
||||
device.unmapMemory(memory);
|
||||
if (buffer)
|
||||
device.destroyBuffer(buffer);
|
||||
if (memory)
|
||||
device.freeMemory(memory);
|
||||
}
|
||||
HostBuffer() = default;
|
||||
HostBuffer(const HostBuffer&) = delete;
|
||||
HostBuffer& operator=(const HostBuffer&) = delete;
|
||||
};
|
||||
|
||||
auto create_host_buffer(vk::Device dev, vk::PhysicalDevice pd, vk::DeviceSize size,
|
||||
vk::BufferUsageFlags usage)
|
||||
-> std::expected<std::unique_ptr<HostBuffer>, ErrorInfo> {
|
||||
auto buf = std::make_unique<HostBuffer>();
|
||||
buf->device = dev;
|
||||
|
||||
auto [br, buffer] = dev.createBuffer(vk::BufferCreateInfo{
|
||||
.size = size,
|
||||
.usage = usage,
|
||||
.sharingMode = vk::SharingMode::eExclusive,
|
||||
});
|
||||
if (br != vk::Result::eSuccess)
|
||||
return make_error(Error::BufferAllocationFailed, "createBuffer");
|
||||
buf->buffer = buffer;
|
||||
|
||||
auto req = dev.getBufferMemoryRequirements(buffer);
|
||||
auto mt = find_memory_type(pd, req.memoryTypeBits,
|
||||
vk::MemoryPropertyFlagBits::eHostVisible |
|
||||
vk::MemoryPropertyFlagBits::eHostCoherent);
|
||||
if (!mt)
|
||||
return std::unexpected(mt.error());
|
||||
|
||||
auto [mr, mem] = dev.allocateMemory({
|
||||
.allocationSize = req.size,
|
||||
.memoryTypeIndex = *mt,
|
||||
});
|
||||
if (mr != vk::Result::eSuccess)
|
||||
return make_error(Error::BufferAllocationFailed, "allocateMemory");
|
||||
buf->memory = mem;
|
||||
|
||||
if (dev.bindBufferMemory(buffer, mem, 0) != vk::Result::eSuccess)
|
||||
return make_error(Error::BufferAllocationFailed, "bindBufferMemory");
|
||||
|
||||
auto [mapr, ptr] = dev.mapMemory(mem, 0, size);
|
||||
if (mapr != vk::Result::eSuccess)
|
||||
return make_error(Error::BufferAllocationFailed, "mapMemory");
|
||||
buf->mapped = ptr;
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
std::mutex g_runner_mutex;
|
||||
std::unique_ptr<Runner> g_runner;
|
||||
|
||||
} // namespace
|
||||
|
||||
Runner::~Runner() {
|
||||
if (device_) {
|
||||
device_.waitIdle();
|
||||
if (fence_)
|
||||
device_.destroyFence(fence_);
|
||||
if (pipeline_layout_)
|
||||
device_.destroyPipelineLayout(pipeline_layout_);
|
||||
if (descriptor_set_layout_)
|
||||
device_.destroyDescriptorSetLayout(descriptor_set_layout_);
|
||||
if (command_pool_)
|
||||
device_.destroyCommandPool(command_pool_);
|
||||
device_.destroy();
|
||||
}
|
||||
if (instance_)
|
||||
instance_.destroy();
|
||||
}
|
||||
|
||||
std::expected<Runner*, ErrorInfo> Runner::instance() {
|
||||
std::lock_guard lock{g_runner_mutex};
|
||||
if (g_runner)
|
||||
return g_runner.get();
|
||||
auto r = std::unique_ptr<Runner>(new Runner{});
|
||||
if (auto init = r->initialize(); !init)
|
||||
return std::unexpected(init.error());
|
||||
g_runner = std::move(r);
|
||||
return g_runner.get();
|
||||
}
|
||||
|
||||
std::expected<void, ErrorInfo> Runner::initialize() {
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init();
|
||||
|
||||
// ---- Instance ------------------------------------------------------
|
||||
vk::ApplicationInfo app_info{
|
||||
.pApplicationName = "gcn_test_runner",
|
||||
.applicationVersion = 1,
|
||||
.pEngineName = "gcn_test_runner",
|
||||
.engineVersion = 1,
|
||||
.apiVersion = vk::ApiVersion13,
|
||||
};
|
||||
std::vector<const char*> layers;
|
||||
if (kEnableValidation)
|
||||
layers.push_back("VK_LAYER_KHRONOS_validation");
|
||||
|
||||
auto [ir, inst] = vk::createInstance({
|
||||
.pApplicationInfo = &app_info,
|
||||
.enabledLayerCount = static_cast<std::uint32_t>(layers.size()),
|
||||
.ppEnabledLayerNames = layers.data(),
|
||||
});
|
||||
if (ir != vk::Result::eSuccess)
|
||||
return make_error(Error::InstanceCreationFailed,
|
||||
std::format("createInstance: {}", vk::to_string(ir)));
|
||||
instance_ = inst;
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(instance_);
|
||||
|
||||
// ---- Pick physical device with the extensions we need -------------
|
||||
auto [pr, devs] = instance_.enumeratePhysicalDevices();
|
||||
if (pr != vk::Result::eSuccess || devs.empty())
|
||||
return make_error(Error::NoSuitableDevice, "no Vulkan devices");
|
||||
|
||||
constexpr std::array required_exts{
|
||||
VK_EXT_SHADER_OBJECT_EXTENSION_NAME,
|
||||
VK_KHR_MAINTENANCE_6_EXTENSION_NAME,
|
||||
VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
|
||||
};
|
||||
|
||||
for (auto pd : devs) {
|
||||
auto [er, exts] = pd.enumerateDeviceExtensionProperties();
|
||||
if (er != vk::Result::eSuccess)
|
||||
continue;
|
||||
|
||||
auto has_ext = [&](const char* name) {
|
||||
return std::ranges::any_of(
|
||||
exts, [&](auto& e) { return std::string_view{e.extensionName} == name; });
|
||||
};
|
||||
if (!std::ranges::all_of(required_exts, has_ext))
|
||||
continue;
|
||||
|
||||
auto families = pd.getQueueFamilyProperties();
|
||||
for (std::uint32_t i = 0; i < families.size(); ++i) {
|
||||
if (families[i].queueFlags & vk::QueueFlagBits::eCompute) {
|
||||
physical_device_ = pd;
|
||||
queue_family_ = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (physical_device_)
|
||||
break;
|
||||
}
|
||||
if (!physical_device_)
|
||||
return make_error(Error::NoSuitableDevice,
|
||||
"no device with compute + shader_object + maintenance6 + "
|
||||
"push_descriptor");
|
||||
|
||||
max_push_constant_size_ = sizeof(Shader::PushData);
|
||||
// physical_device_.getProperties().limits.maxPushConstantsSize;
|
||||
|
||||
// ---- Device with feature chain ------------------------------------
|
||||
float priority = 1.0f;
|
||||
vk::DeviceQueueCreateInfo qci{
|
||||
.queueFamilyIndex = queue_family_,
|
||||
.queueCount = 1,
|
||||
.pQueuePriorities = &priority,
|
||||
};
|
||||
vk::PhysicalDeviceShaderObjectFeaturesEXT so_feat{.shaderObject = VK_TRUE};
|
||||
vk::PhysicalDeviceMaintenance6FeaturesKHR m6_feat{
|
||||
.pNext = &so_feat,
|
||||
.maintenance6 = VK_TRUE,
|
||||
};
|
||||
vk::PhysicalDeviceVulkan11Features v11_feat{
|
||||
.pNext = &m6_feat,
|
||||
.uniformAndStorageBuffer16BitAccess = VK_TRUE,
|
||||
};
|
||||
vk::PhysicalDeviceVulkan12Features v12_feat{
|
||||
.pNext = &v11_feat,
|
||||
.uniformAndStorageBuffer8BitAccess = VK_TRUE,
|
||||
.shaderInt8 = VK_TRUE,
|
||||
};
|
||||
vk::PhysicalDeviceFeatures phys_feat{
|
||||
.shaderInt64 = VK_TRUE,
|
||||
.shaderInt16 = VK_TRUE,
|
||||
};
|
||||
|
||||
auto [dr, dev] = physical_device_.createDevice({
|
||||
.pNext = &v12_feat,
|
||||
.queueCreateInfoCount = 1,
|
||||
.pQueueCreateInfos = &qci,
|
||||
.enabledExtensionCount = required_exts.size(),
|
||||
.ppEnabledExtensionNames = required_exts.data(),
|
||||
.pEnabledFeatures = &phys_feat,
|
||||
});
|
||||
if (dr != vk::Result::eSuccess)
|
||||
return make_error(Error::DeviceCreationFailed,
|
||||
std::format("createDevice: {}", vk::to_string(dr)));
|
||||
device_ = dev;
|
||||
VULKAN_HPP_DEFAULT_DISPATCHER.init(device_);
|
||||
queue_ = device_.getQueue(queue_family_, 0);
|
||||
|
||||
// ---- Command pool + cached command buffer -------------------------
|
||||
auto [cpr, pool] = device_.createCommandPool({
|
||||
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
|
||||
.queueFamilyIndex = queue_family_,
|
||||
});
|
||||
if (cpr != vk::Result::eSuccess)
|
||||
return make_error(Error::DeviceCreationFailed, "createCommandPool");
|
||||
command_pool_ = pool;
|
||||
|
||||
auto [cbr, cbs] = device_.allocateCommandBuffers({
|
||||
.commandPool = command_pool_,
|
||||
.level = vk::CommandBufferLevel::ePrimary,
|
||||
.commandBufferCount = 1,
|
||||
});
|
||||
if (cbr != vk::Result::eSuccess)
|
||||
return make_error(Error::DeviceCreationFailed, "allocateCommandBuffers");
|
||||
command_buffer_ = cbs[0];
|
||||
|
||||
// ---- Fence (cached, reset per call) --------------------------------
|
||||
auto [fr, fence] = device_.createFence({});
|
||||
if (fr != vk::Result::eSuccess)
|
||||
return make_error(Error::DeviceCreationFailed, "createFence");
|
||||
fence_ = fence;
|
||||
|
||||
// ---- Descriptor set layout with push-descriptor flag --------------
|
||||
// Single storage buffer at binding 0. No descriptor sets are ever
|
||||
// allocated from this layout — the layout is just used to tell the
|
||||
// pipeline layout and shader what the push-descriptor shape is.
|
||||
vk::DescriptorSetLayoutBinding dsl_binding{
|
||||
.binding = 0,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
};
|
||||
auto [dslr, dsl] = device_.createDescriptorSetLayout({
|
||||
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
|
||||
.bindingCount = 1,
|
||||
.pBindings = &dsl_binding,
|
||||
});
|
||||
if (dslr != vk::Result::eSuccess)
|
||||
return make_error(Error::DeviceCreationFailed, "createDescriptorSetLayout");
|
||||
descriptor_set_layout_ = dsl;
|
||||
|
||||
// ---- Pipeline layout sized to device max push constants -----------
|
||||
vk::PushConstantRange pc{
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
.offset = 0,
|
||||
.size = max_push_constant_size_,
|
||||
};
|
||||
auto [plr, pl] = device_.createPipelineLayout({
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = &descriptor_set_layout_,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &pc,
|
||||
});
|
||||
if (plr != vk::Result::eSuccess)
|
||||
return make_error(Error::DeviceCreationFailed, "createPipelineLayout");
|
||||
pipeline_layout_ = pl;
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
std::expected<void, ErrorInfo> Runner::run_raw(std::span<const std::uint32_t> spirv,
|
||||
std::span<const std::byte> push_constants,
|
||||
std::span<std::byte> output, DispatchSize dispatch) {
|
||||
if (push_constants.size() > max_push_constant_size_)
|
||||
return make_error(Error::PushConstantTooLarge,
|
||||
std::format("push constants {} exceed device max {}",
|
||||
push_constants.size(), max_push_constant_size_));
|
||||
if (output.empty())
|
||||
return make_error(Error::OutputTooLarge, "output buffer is empty");
|
||||
|
||||
// Per-call: output buffer --------------------------------------------
|
||||
auto buf_r = create_host_buffer(device_, physical_device_, output.size(),
|
||||
vk::BufferUsageFlagBits::eStorageBuffer);
|
||||
if (!buf_r)
|
||||
return std::unexpected(buf_r.error());
|
||||
auto& output_buffer = *buf_r;
|
||||
std::memset(output_buffer->mapped, 0, output.size());
|
||||
|
||||
// Per-call: shader object --------------------------------------------
|
||||
vk::PushConstantRange shader_pc{
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
.offset = 0,
|
||||
// .size = static_cast<std::uint32_t>(push_constants.size()),
|
||||
.size = sizeof(Shader::PushData),
|
||||
};
|
||||
vk::ShaderCreateInfoEXT sci{
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
.codeType = vk::ShaderCodeTypeEXT::eSpirv,
|
||||
.codeSize = spirv.size() * sizeof(std::uint32_t),
|
||||
.pCode = spirv.data(),
|
||||
.pName = "main",
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = &descriptor_set_layout_,
|
||||
.pushConstantRangeCount = push_constants.empty() ? 0u : 1u,
|
||||
.pPushConstantRanges = push_constants.empty() ? nullptr : &shader_pc,
|
||||
};
|
||||
auto [sr, shaders] = device_.createShadersEXT(sci);
|
||||
if (sr != vk::Result::eSuccess)
|
||||
return make_error(Error::ShaderCreationFailed,
|
||||
std::format("createShadersEXT: {}", vk::to_string(sr)));
|
||||
auto shader = shaders[0];
|
||||
struct ShaderGuard {
|
||||
vk::Device d;
|
||||
vk::ShaderEXT s;
|
||||
~ShaderGuard() {
|
||||
if (s)
|
||||
d.destroyShaderEXT(s);
|
||||
}
|
||||
} sg{device_, shader};
|
||||
|
||||
// Reset cached command buffer + fence --------------------------------
|
||||
device_.resetFences(fence_);
|
||||
command_buffer_.reset();
|
||||
|
||||
if (command_buffer_.begin({
|
||||
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
|
||||
}) != vk::Result::eSuccess)
|
||||
return make_error(Error::CommandSubmissionFailed, "cmd.begin");
|
||||
|
||||
// Bind shader object -------------------------------------------------
|
||||
vk::ShaderStageFlagBits stage = vk::ShaderStageFlagBits::eCompute;
|
||||
command_buffer_.bindShadersEXT(1, &stage, &shader);
|
||||
|
||||
// Push descriptor: binding 0 = output SSBO ---------------------------
|
||||
vk::DescriptorBufferInfo dbi{
|
||||
.buffer = output_buffer->buffer,
|
||||
.offset = 0,
|
||||
.range = VK_WHOLE_SIZE,
|
||||
};
|
||||
vk::WriteDescriptorSet write{
|
||||
.dstBinding = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
||||
.pBufferInfo = &dbi,
|
||||
};
|
||||
vk::PushDescriptorSetInfoKHR push_desc{
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
.layout = pipeline_layout_,
|
||||
.set = 0,
|
||||
.descriptorWriteCount = 1,
|
||||
.pDescriptorWrites = &write,
|
||||
};
|
||||
command_buffer_.pushDescriptorSet2KHR(push_desc);
|
||||
|
||||
// Push constants -----------------------------------------------------
|
||||
if (!push_constants.empty()) {
|
||||
vk::PushConstantsInfoKHR pci{
|
||||
.layout = pipeline_layout_,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
.offset = 16, // fall onto ud_regs in PushData
|
||||
.size = static_cast<std::uint32_t>(push_constants.size()),
|
||||
.pValues = push_constants.data(),
|
||||
};
|
||||
command_buffer_.pushConstants2KHR(pci);
|
||||
}
|
||||
|
||||
command_buffer_.dispatch(dispatch.x, dispatch.y, dispatch.z);
|
||||
|
||||
vk::MemoryBarrier barrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eHostRead,
|
||||
};
|
||||
command_buffer_.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eHost, {}, barrier, {}, {});
|
||||
|
||||
if (command_buffer_.end() != vk::Result::eSuccess)
|
||||
return make_error(Error::CommandSubmissionFailed, "cmd.end");
|
||||
|
||||
vk::SubmitInfo si{
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &command_buffer_,
|
||||
};
|
||||
if (queue_.submit(si, fence_) != vk::Result::eSuccess)
|
||||
return make_error(Error::CommandSubmissionFailed, "queue.submit");
|
||||
if (device_.waitForFences(fence_, VK_TRUE, UINT64_MAX) != vk::Result::eSuccess)
|
||||
return make_error(Error::ExecutionFailed, "waitForFences");
|
||||
|
||||
std::memcpy(output.data(), output_buffer->mapped, output.size());
|
||||
return {};
|
||||
}
|
||||
|
||||
} // namespace gcn_test
|
||||
110
tests/gcn/gcn_test_runner.hpp
Normal file
110
tests/gcn/gcn_test_runner.hpp
Normal file
@ -0,0 +1,110 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#define VULKAN_HPP_NO_EXCEPTIONS
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
#include <vulkan/vulkan.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <expected>
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
namespace gcn_test {
|
||||
|
||||
struct DispatchSize {
|
||||
std::uint32_t x = 1, y = 1, z = 1;
|
||||
};
|
||||
|
||||
enum class Error {
|
||||
NoSuitableDevice,
|
||||
InstanceCreationFailed,
|
||||
DeviceCreationFailed,
|
||||
ShaderCreationFailed,
|
||||
BufferAllocationFailed,
|
||||
CommandSubmissionFailed,
|
||||
PushConstantTooLarge,
|
||||
OutputTooLarge,
|
||||
ExecutionFailed,
|
||||
};
|
||||
|
||||
struct ErrorInfo {
|
||||
Error code;
|
||||
std::string message;
|
||||
};
|
||||
|
||||
class Runner {
|
||||
public:
|
||||
static std::expected<Runner*, ErrorInfo> instance();
|
||||
|
||||
std::expected<void, ErrorInfo> run_raw(
|
||||
std::span<const std::uint32_t> spirv,
|
||||
std::span<const std::byte> push_constants,
|
||||
std::span<std::byte> output,
|
||||
DispatchSize dispatch = {}
|
||||
);
|
||||
|
||||
template <typename OutputT, typename PushT>
|
||||
std::expected<OutputT, ErrorInfo> run(
|
||||
std::span<const std::uint32_t> spirv,
|
||||
const PushT& push,
|
||||
DispatchSize dispatch = {}
|
||||
) {
|
||||
static_assert(std::is_trivially_copyable_v<PushT>);
|
||||
static_assert(std::is_trivially_copyable_v<OutputT>);
|
||||
OutputT result{};
|
||||
auto r = run_raw(
|
||||
spirv,
|
||||
{reinterpret_cast<const std::byte*>(&push), sizeof(PushT)},
|
||||
{reinterpret_cast<std::byte*>(&result), sizeof(OutputT)},
|
||||
dispatch
|
||||
);
|
||||
if (!r) return std::unexpected(r.error());
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename OutputT>
|
||||
std::expected<OutputT, ErrorInfo> run(
|
||||
std::span<const std::uint32_t> spirv,
|
||||
DispatchSize dispatch = {}
|
||||
) {
|
||||
static_assert(std::is_trivially_copyable_v<OutputT>);
|
||||
OutputT result{};
|
||||
auto r = run_raw(
|
||||
spirv, {},
|
||||
{reinterpret_cast<std::byte*>(&result), sizeof(OutputT)},
|
||||
dispatch
|
||||
);
|
||||
if (!r) return std::unexpected(r.error());
|
||||
return result;
|
||||
}
|
||||
|
||||
~Runner();
|
||||
Runner(const Runner&) = delete;
|
||||
Runner& operator=(const Runner&) = delete;
|
||||
|
||||
private:
|
||||
Runner() = default;
|
||||
std::expected<void, ErrorInfo> initialize();
|
||||
|
||||
vk::Instance instance_;
|
||||
vk::PhysicalDevice physical_device_;
|
||||
vk::Device device_;
|
||||
vk::Queue queue_;
|
||||
std::uint32_t queue_family_ = 0;
|
||||
vk::CommandPool command_pool_;
|
||||
vk::CommandBuffer command_buffer_; // cached, reset per call
|
||||
vk::Fence fence_; // cached, reset per call
|
||||
vk::DescriptorSetLayout descriptor_set_layout_; // push-descriptor
|
||||
vk::PipelineLayout pipeline_layout_;
|
||||
|
||||
std::uint32_t max_push_constant_size_ = 128;
|
||||
};
|
||||
|
||||
} // namespace gcn_test
|
||||
63
tests/gcn/test_gcn_instructions.cpp
Normal file
63
tests/gcn/test_gcn_instructions.cpp
Normal file
@ -0,0 +1,63 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "gcn_test_runner.hpp"
|
||||
#include "translator.hpp"
|
||||
|
||||
class GcnTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() override {}
|
||||
|
||||
void TearDown() override {}
|
||||
};
|
||||
|
||||
struct F32x2 {
|
||||
float a;
|
||||
float b;
|
||||
};
|
||||
|
||||
// Example
|
||||
// TEST_F(GcnTest, test_name) {
|
||||
// // Runner sets the vulkan context
|
||||
// auto runner = gcn_test::Runner::instance().value();
|
||||
//
|
||||
// // v_add_f32 v0, v0, v1
|
||||
// auto spirv = TranslateToSpirv(0x06000300);
|
||||
//
|
||||
// // run<T> tells how to interpret the result (only 32bit as of now)
|
||||
// // the second argument is templated, it can be at most 4 u32s
|
||||
// // the data is accessible by the instruction in v0-4 and s0-4 (mirrored)
|
||||
// // the result has to be placed in v0
|
||||
// auto result = runner->run<float>(spirv, F32x2{1.5f, 6.0f});
|
||||
//
|
||||
// EXPECT_TRUE(result.has_value());
|
||||
// EXPECT_EQ(*result, 7.5f);
|
||||
// }
|
||||
|
||||
TEST_F(GcnTest, add_f32) {
|
||||
auto runner = gcn_test::Runner::instance().value();
|
||||
|
||||
// v_add_f32 v0, v0, v1
|
||||
auto spirv = TranslateToSpirv(0x06000300);
|
||||
|
||||
auto result = runner->run<float>(spirv, F32x2{1.5f, 6.0f});
|
||||
|
||||
EXPECT_TRUE(result.has_value());
|
||||
EXPECT_EQ(*result, 7.5f);
|
||||
}
|
||||
|
||||
TEST_F(GcnTest, add_nan) {
|
||||
auto runner = gcn_test::Runner::instance().value();
|
||||
|
||||
// v_add_f32 v0, v0, v1
|
||||
auto spirv = TranslateToSpirv(0x06000300);
|
||||
|
||||
auto result = runner->run<float>(spirv, F32x2{1.0f, std::numeric_limits<float>::quiet_NaN()});
|
||||
|
||||
EXPECT_TRUE(result.has_value());
|
||||
EXPECT_TRUE(std::isnan(*result));
|
||||
}
|
||||
98
tests/gcn/translator.cpp
Normal file
98
tests/gcn/translator.cpp
Normal file
@ -0,0 +1,98 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "translator.hpp"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "common/io_file.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/frontend/decode.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/passes/ir_passes.h"
|
||||
#include "shader_recompiler/ir/post_order.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
|
||||
using namespace Shader;
|
||||
|
||||
namespace Shader::Optimization {
|
||||
void ResourceTrackingPassStub(IR::Program& program, const Profile& profile);
|
||||
}
|
||||
|
||||
std::vector<u32> TranslateToSpirv(u64 raw_gcn_inst) {
|
||||
std::array<u32, 2> provided_inst{static_cast<u32>(raw_gcn_inst & 0xFFFFFFFFU),
|
||||
static_cast<u32>(raw_gcn_inst >> 32)};
|
||||
std::array<u32, 2> store{
|
||||
0xe0700000,
|
||||
0x80000000 // buffer_store_dword v0, v0, s[0:3], 0
|
||||
};
|
||||
Gcn::GcnCodeSlice first(provided_inst.data(), provided_inst.data() + provided_inst.size());
|
||||
Gcn::GcnCodeSlice second(store.data(), store.data() + store.size());
|
||||
|
||||
Gcn::GcnDecodeContext decoder;
|
||||
Gcn::GcnInst inst = decoder.decodeInstruction(first);
|
||||
Gcn::GcnInst store_inst = decoder.decodeInstruction(second);
|
||||
|
||||
Shader::Info info{};
|
||||
info.stage = Stage::Compute;
|
||||
info.l_stage = LogicalStage::Compute;
|
||||
info.flattened_ud_buf.resize(4);
|
||||
AmdGpu::Buffer buf = AmdGpu::Buffer::Null();
|
||||
std::memcpy(info.flattened_ud_buf.data(), &buf, sizeof(buf));
|
||||
|
||||
IR::Program program{info};
|
||||
Pools pools{};
|
||||
|
||||
IR::Block* block = pools.block_pool.Create(pools.inst_pool);
|
||||
program.blocks.push_back(block);
|
||||
|
||||
program.syntax_list.emplace_back();
|
||||
program.syntax_list.back().type = IR::AbstractSyntaxNode::Type::Block;
|
||||
program.syntax_list.back().data.block = block;
|
||||
program.syntax_list.emplace_back();
|
||||
program.syntax_list.back().type = IR::AbstractSyntaxNode::Type::Return;
|
||||
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
|
||||
|
||||
Profile profile{};
|
||||
profile.supported_spirv = 0x00010600;
|
||||
profile.subgroup_size = 32;
|
||||
profile.supports_robust_buffer_access = true;
|
||||
|
||||
RuntimeInfo runtime_info{};
|
||||
runtime_info.Initialize(Stage::Compute);
|
||||
runtime_info.num_user_data = 4;
|
||||
runtime_info.cs_info.workgroup_size = {1, 1, 1};
|
||||
|
||||
Gcn::Translator translator(program.info, runtime_info, profile);
|
||||
translator.EmitPrologue(block);
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
// copy user data from SGPR to VGPR as (most?) instructions cannot access
|
||||
// two SGPRs
|
||||
Shader::Gcn::GcnInst mov{};
|
||||
mov.src[0].field = Shader::Gcn::OperandField::ScalarGPR;
|
||||
mov.src[0].code = i;
|
||||
mov.dst[0].field = Shader::Gcn::OperandField::VectorGPR;
|
||||
mov.dst[0].code = i;
|
||||
translator.S_MOV(mov);
|
||||
}
|
||||
translator.TranslateInstruction(inst);
|
||||
translator.TranslateInstruction(store_inst);
|
||||
|
||||
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
|
||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||
Shader::Optimization::ResourceTrackingPassStub(program, profile);
|
||||
Shader::Optimization::ConstantPropagationPass(program.blocks);
|
||||
Shader::Optimization::DeadCodeEliminationPass(program);
|
||||
Shader::Optimization::CollectShaderInfoPass(program, profile);
|
||||
|
||||
Backend::Bindings bindings{};
|
||||
|
||||
const auto spirv = Backend::SPIRV::EmitSPIRV(profile, runtime_info, program, bindings);
|
||||
|
||||
return spirv;
|
||||
}
|
||||
10
tests/gcn/translator.hpp
Normal file
10
tests/gcn/translator.hpp
Normal file
@ -0,0 +1,10 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "common/types.h"
|
||||
|
||||
std::vector<u32> TranslateToSpirv(u64 raw_gcn_inst);
|
||||
335
tests/stubs/resource_tracking_pass_stub.cpp
Normal file
335
tests/stubs/resource_tracking_pass_stub.cpp
Normal file
@ -0,0 +1,335 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/control_flow_graph.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/operand_helper.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/ir/reinterpret.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
|
||||
using SharpLocation = u32;
|
||||
|
||||
bool IsBufferAtomic(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::BufferAtomicIAdd32:
|
||||
case IR::Opcode::BufferAtomicIAdd64:
|
||||
case IR::Opcode::BufferAtomicISub32:
|
||||
case IR::Opcode::BufferAtomicSMin32:
|
||||
case IR::Opcode::BufferAtomicSMin64:
|
||||
case IR::Opcode::BufferAtomicUMin32:
|
||||
case IR::Opcode::BufferAtomicUMin64:
|
||||
case IR::Opcode::BufferAtomicFMin32:
|
||||
case IR::Opcode::BufferAtomicSMax32:
|
||||
case IR::Opcode::BufferAtomicSMax64:
|
||||
case IR::Opcode::BufferAtomicUMax32:
|
||||
case IR::Opcode::BufferAtomicUMax64:
|
||||
case IR::Opcode::BufferAtomicFMax32:
|
||||
case IR::Opcode::BufferAtomicInc32:
|
||||
case IR::Opcode::BufferAtomicDec32:
|
||||
case IR::Opcode::BufferAtomicAnd32:
|
||||
case IR::Opcode::BufferAtomicOr32:
|
||||
case IR::Opcode::BufferAtomicXor32:
|
||||
case IR::Opcode::BufferAtomicSwap32:
|
||||
case IR::Opcode::BufferAtomicCmpSwap32:
|
||||
case IR::Opcode::BufferAtomicFCmpSwap32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsBufferStore(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::StoreBufferU8:
|
||||
case IR::Opcode::StoreBufferU16:
|
||||
case IR::Opcode::StoreBufferU32:
|
||||
case IR::Opcode::StoreBufferU32x2:
|
||||
case IR::Opcode::StoreBufferU32x3:
|
||||
case IR::Opcode::StoreBufferU32x4:
|
||||
case IR::Opcode::StoreBufferU64:
|
||||
case IR::Opcode::StoreBufferF32:
|
||||
case IR::Opcode::StoreBufferF32x2:
|
||||
case IR::Opcode::StoreBufferF32x3:
|
||||
case IR::Opcode::StoreBufferF32x4:
|
||||
case IR::Opcode::StoreBufferFormatF32:
|
||||
return true;
|
||||
default:
|
||||
return IsBufferAtomic(inst);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsBufferInstruction(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferU8:
|
||||
case IR::Opcode::LoadBufferU16:
|
||||
case IR::Opcode::LoadBufferU32:
|
||||
case IR::Opcode::LoadBufferU32x2:
|
||||
case IR::Opcode::LoadBufferU32x3:
|
||||
case IR::Opcode::LoadBufferU32x4:
|
||||
case IR::Opcode::LoadBufferU64:
|
||||
case IR::Opcode::LoadBufferF32:
|
||||
case IR::Opcode::LoadBufferF32x2:
|
||||
case IR::Opcode::LoadBufferF32x3:
|
||||
case IR::Opcode::LoadBufferF32x4:
|
||||
case IR::Opcode::LoadBufferFormatF32:
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
return true;
|
||||
default:
|
||||
return IsBufferStore(inst);
|
||||
}
|
||||
}
|
||||
|
||||
u32 BufferAddressShift(const IR::Inst& inst, AmdGpu::DataFormat data_format) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadBufferU8:
|
||||
case IR::Opcode::StoreBufferU8:
|
||||
return 0;
|
||||
case IR::Opcode::LoadBufferU16:
|
||||
case IR::Opcode::StoreBufferU16:
|
||||
return 1;
|
||||
case IR::Opcode::LoadBufferU64:
|
||||
case IR::Opcode::StoreBufferU64:
|
||||
case IR::Opcode::BufferAtomicIAdd64:
|
||||
case IR::Opcode::BufferAtomicSMax64:
|
||||
case IR::Opcode::BufferAtomicSMin64:
|
||||
case IR::Opcode::BufferAtomicUMax64:
|
||||
case IR::Opcode::BufferAtomicUMin64:
|
||||
return 3;
|
||||
case IR::Opcode::LoadBufferFormatF32:
|
||||
case IR::Opcode::StoreBufferFormatF32: {
|
||||
switch (data_format) {
|
||||
case AmdGpu::DataFormat::Format8:
|
||||
return 0;
|
||||
case AmdGpu::DataFormat::Format8_8:
|
||||
case AmdGpu::DataFormat::Format16:
|
||||
return 1;
|
||||
case AmdGpu::DataFormat::Format8_8_8_8:
|
||||
case AmdGpu::DataFormat::Format16_16:
|
||||
case AmdGpu::DataFormat::Format10_11_11:
|
||||
case AmdGpu::DataFormat::Format2_10_10_10:
|
||||
case AmdGpu::DataFormat::Format16_16_16_16:
|
||||
case AmdGpu::DataFormat::Format32:
|
||||
case AmdGpu::DataFormat::Format32_32:
|
||||
case AmdGpu::DataFormat::Format32_32_32:
|
||||
case AmdGpu::DataFormat::Format32_32_32_32:
|
||||
return 2;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::ReadConstBuffer:
|
||||
// Provided address is already in dwords
|
||||
return 0;
|
||||
default:
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
class Descriptors {
|
||||
public:
|
||||
explicit Descriptors(Info& info_)
|
||||
: info{info_}, buffer_resources{info_.buffers}, image_resources{info_.images},
|
||||
sampler_resources{info_.samplers}, fmask_resources(info_.fmasks) {}
|
||||
|
||||
u32 Add(const BufferResource& desc) {
|
||||
const u32 index{Add(buffer_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sharp_idx == existing.sharp_idx &&
|
||||
desc.inline_cbuf == existing.inline_cbuf &&
|
||||
desc.buffer_type == existing.buffer_type;
|
||||
})};
|
||||
auto& buffer = buffer_resources[index];
|
||||
buffer.used_types |= desc.used_types;
|
||||
buffer.is_written |= desc.is_written;
|
||||
buffer.is_formatted |= desc.is_formatted;
|
||||
return index;
|
||||
}
|
||||
|
||||
u32 Add(const ImageResource& desc) {
|
||||
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array &&
|
||||
desc.mip_fallback_mode == existing.mip_fallback_mode &&
|
||||
desc.constant_mip_index == existing.constant_mip_index;
|
||||
})};
|
||||
auto& image = image_resources[index];
|
||||
image.is_atomic |= desc.is_atomic;
|
||||
image.is_written |= desc.is_written;
|
||||
return index;
|
||||
}
|
||||
|
||||
u32 Add(const SamplerResource& desc) {
|
||||
const u32 index{Add(sampler_resources, desc, [this, &desc](const auto& existing) {
|
||||
return desc.sharp_idx == existing.sharp_idx &&
|
||||
desc.is_inline_sampler == existing.is_inline_sampler &&
|
||||
desc.inline_sampler == existing.inline_sampler;
|
||||
})};
|
||||
return index;
|
||||
}
|
||||
|
||||
u32 Add(const FMaskResource& desc) {
|
||||
u32 index = Add(fmask_resources, desc, [&desc](const auto& existing) {
|
||||
return desc.sharp_idx == existing.sharp_idx;
|
||||
});
|
||||
return index;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename Descriptors, typename Descriptor, typename Func>
|
||||
static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) {
|
||||
const auto it{std::ranges::find_if(descriptors, pred)};
|
||||
if (it != descriptors.end()) {
|
||||
return static_cast<u32>(std::distance(descriptors.begin(), it));
|
||||
}
|
||||
descriptors.push_back(desc);
|
||||
return static_cast<u32>(descriptors.size()) - 1;
|
||||
}
|
||||
|
||||
const Info& info;
|
||||
BufferResourceList& buffer_resources;
|
||||
ImageResourceList& image_resources;
|
||||
SamplerResourceList& sampler_resources;
|
||||
FMaskResourceList& fmask_resources;
|
||||
};
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors,
|
||||
const Profile& profile) {
|
||||
u32 buffer_binding = descriptors.Add(BufferResource{.sharp_idx = 0,
|
||||
.used_types = IR::Type::U32,
|
||||
.buffer_type = BufferType::Guest,
|
||||
.is_written = true,
|
||||
.is_formatted = false});
|
||||
|
||||
// Replace handle with binding index in buffer resource list.
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(0, ir.Imm32(buffer_binding));
|
||||
}
|
||||
|
||||
IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info,
|
||||
const AmdGpu::Buffer& buffer, u32 stride) {
|
||||
const auto inst_info = inst.Flags<IR::BufferInstInfo>();
|
||||
const u32 inst_offset = inst_info.inst_offset.Value();
|
||||
const auto is_inst_typed = inst_info.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid;
|
||||
const auto data_format = is_inst_typed
|
||||
? AmdGpu::RemapDataFormat(inst_info.inst_data_fmt.Value())
|
||||
: buffer.GetDataFmt();
|
||||
const u32 shift = BufferAddressShift(inst, data_format);
|
||||
const u32 mask = (1 << shift) - 1;
|
||||
const IR::U32 soffset = IR::GetBufferSOffsetArg(&inst);
|
||||
|
||||
// If address calculation is of the form "index * const_stride + offset" with
|
||||
// offset constant and both const_stride and offset are divisible with the
|
||||
// element size, apply shift directly.
|
||||
if (inst_info.index_enable && !inst_info.voffset_enable && soffset.IsImmediate() &&
|
||||
!buffer.swizzle_enable && !buffer.add_tid_enable && (stride & mask) == 0) {
|
||||
const u32 total_offset = soffset.U32() + inst_offset;
|
||||
if ((total_offset & mask) == 0) {
|
||||
// buffer_offset = index * (const_stride >> shift) + (offset >> shift)
|
||||
const IR::U32 index = IR::GetBufferIndexArg(&inst);
|
||||
return ir.IAdd(ir.IMul(index, ir.Imm32(stride >> shift)),
|
||||
ir.Imm32(total_offset >> shift));
|
||||
}
|
||||
}
|
||||
|
||||
// index = (inst_idxen ? vgpr_index : 0) + (const_add_tid_enable ?
|
||||
// thread_id[5:0] : 0)
|
||||
IR::U32 index = ir.Imm32(0U);
|
||||
if (inst_info.index_enable) {
|
||||
const IR::U32 vgpr_index = IR::GetBufferIndexArg(&inst);
|
||||
index = ir.IAdd(index, vgpr_index);
|
||||
}
|
||||
if (buffer.add_tid_enable) {
|
||||
ASSERT_MSG(info.l_stage == LogicalStage::Compute,
|
||||
"Thread ID buffer addressing is not supported outside of compute.");
|
||||
const IR::U32 thread_id{ir.LaneId()};
|
||||
index = ir.IAdd(index, thread_id);
|
||||
}
|
||||
// offset = (inst_offen ? vgpr_offset : 0) + inst_offset
|
||||
IR::U32 offset = ir.Imm32(inst_offset);
|
||||
offset = ir.IAdd(offset, soffset);
|
||||
if (inst_info.voffset_enable) {
|
||||
const IR::U32 voffset = IR::GetBufferVOffsetArg(&inst);
|
||||
offset = ir.IAdd(offset, voffset);
|
||||
}
|
||||
const IR::U32 const_stride = ir.Imm32(stride);
|
||||
IR::U32 buffer_offset;
|
||||
if (buffer.swizzle_enable) {
|
||||
const IR::U32 const_index_stride = ir.Imm32(buffer.GetIndexStride());
|
||||
const IR::U32 const_element_size = ir.Imm32(buffer.GetElementSize());
|
||||
// index_msb = index / const_index_stride
|
||||
const IR::U32 index_msb{ir.IDiv(index, const_index_stride)};
|
||||
// index_lsb = index % const_index_stride
|
||||
const IR::U32 index_lsb{ir.IMod(index, const_index_stride)};
|
||||
// offset_msb = offset / const_element_size
|
||||
const IR::U32 offset_msb{ir.IDiv(offset, const_element_size)};
|
||||
// offset_lsb = offset % const_element_size
|
||||
const IR::U32 offset_lsb{ir.IMod(offset, const_element_size)};
|
||||
// buffer_offset =
|
||||
// (index_msb * const_stride + offset_msb * const_element_size) *
|
||||
// const_index_stride
|
||||
// + index_lsb * const_element_size + offset_lsb
|
||||
const IR::U32 buffer_offset_msb = ir.IMul(
|
||||
ir.IAdd(ir.IMul(index_msb, const_stride), ir.IMul(offset_msb, const_element_size)),
|
||||
const_index_stride);
|
||||
const IR::U32 buffer_offset_lsb =
|
||||
ir.IAdd(ir.IMul(index_lsb, const_element_size), offset_lsb);
|
||||
buffer_offset = ir.IAdd(buffer_offset_msb, buffer_offset_lsb);
|
||||
} else {
|
||||
// buffer_offset = index * const_stride + offset
|
||||
buffer_offset = ir.IAdd(ir.IMul(index, const_stride), offset);
|
||||
}
|
||||
if (shift != 0) {
|
||||
buffer_offset = ir.ShiftRightLogical(buffer_offset, ir.Imm32(shift));
|
||||
}
|
||||
return buffer_offset;
|
||||
}
|
||||
|
||||
void PatchBufferArgs(IR::Block& block, IR::Inst& inst, Info& info) {
|
||||
const auto handle = inst.Arg(0);
|
||||
const auto buffer_res = info.buffers[handle.U32()];
|
||||
const auto buffer = AmdGpu::Buffer::Null();
|
||||
|
||||
// Address of constant buffer reads can be calculated at IR emission time.
|
||||
if (inst.GetOpcode() == IR::Opcode::ReadConstBuffer) {
|
||||
return;
|
||||
}
|
||||
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
inst.SetArg(IR::LoadBufferArgs::Address,
|
||||
CalculateBufferAddress(ir, inst, info, buffer, buffer.stride));
|
||||
}
|
||||
|
||||
void ResourceTrackingPassStub(IR::Program& program, const Profile& profile) {
|
||||
// Iterate resource instructions and patch them after finding the sharp.
|
||||
auto& info = program.info;
|
||||
|
||||
// Pass 1: Track resource sharps
|
||||
Descriptors descriptors{info};
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferSharp(*block, inst, info, descriptors, profile);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pass 2: Patch instruction args
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (IsBufferInstruction(inst)) {
|
||||
PatchBufferArgs(*block, inst, info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
Loading…
Reference in New Issue
Block a user