shadPS4/tests/gcn/gcn_test_runner.cpp
Marcin Mikołajczyk 963d10f220
Test framework for GCN pipeline (#4272)
* Test framework for GCN pipeline

* Try fixing test compilation on CI
2026-04-19 22:15:08 +03:00

432 lines
15 KiB
C++

// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "gcn_test_runner.hpp"
#include <algorithm>
#include <array>
#include <format>
#include <memory>
#include <mutex>
#include <ranges>
#include <string_view>
#include <vector>
#include "shader_recompiler/resource.h"
// Exactly one TU must define the dynamic dispatcher storage.
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
namespace gcn_test {
namespace {
constexpr bool kEnableValidation =
#ifdef NDEBUG
false;
#else
true;
#endif
auto make_error(Error code, std::string message) {
return std::unexpected(ErrorInfo{code, std::move(message)});
}
auto find_memory_type(vk::PhysicalDevice pd, std::uint32_t type_filter,
vk::MemoryPropertyFlags required) -> std::expected<std::uint32_t, ErrorInfo> {
auto props = pd.getMemoryProperties();
for (std::uint32_t i = 0; i < props.memoryTypeCount; ++i) {
if ((type_filter & (1u << i)) &&
(props.memoryTypes[i].propertyFlags & required) == required) {
return i;
}
}
return make_error(Error::BufferAllocationFailed, "no suitable memory type found");
}
struct HostBuffer {
vk::Device device;
vk::Buffer buffer;
vk::DeviceMemory memory;
void* mapped = nullptr;
~HostBuffer() {
if (mapped)
device.unmapMemory(memory);
if (buffer)
device.destroyBuffer(buffer);
if (memory)
device.freeMemory(memory);
}
HostBuffer() = default;
HostBuffer(const HostBuffer&) = delete;
HostBuffer& operator=(const HostBuffer&) = delete;
};
auto create_host_buffer(vk::Device dev, vk::PhysicalDevice pd, vk::DeviceSize size,
vk::BufferUsageFlags usage)
-> std::expected<std::unique_ptr<HostBuffer>, ErrorInfo> {
auto buf = std::make_unique<HostBuffer>();
buf->device = dev;
auto [br, buffer] = dev.createBuffer(vk::BufferCreateInfo{
.size = size,
.usage = usage,
.sharingMode = vk::SharingMode::eExclusive,
});
if (br != vk::Result::eSuccess)
return make_error(Error::BufferAllocationFailed, "createBuffer");
buf->buffer = buffer;
auto req = dev.getBufferMemoryRequirements(buffer);
auto mt = find_memory_type(pd, req.memoryTypeBits,
vk::MemoryPropertyFlagBits::eHostVisible |
vk::MemoryPropertyFlagBits::eHostCoherent);
if (!mt)
return std::unexpected(mt.error());
auto [mr, mem] = dev.allocateMemory({
.allocationSize = req.size,
.memoryTypeIndex = *mt,
});
if (mr != vk::Result::eSuccess)
return make_error(Error::BufferAllocationFailed, "allocateMemory");
buf->memory = mem;
if (dev.bindBufferMemory(buffer, mem, 0) != vk::Result::eSuccess)
return make_error(Error::BufferAllocationFailed, "bindBufferMemory");
auto [mapr, ptr] = dev.mapMemory(mem, 0, size);
if (mapr != vk::Result::eSuccess)
return make_error(Error::BufferAllocationFailed, "mapMemory");
buf->mapped = ptr;
return buf;
}
std::mutex g_runner_mutex;
std::unique_ptr<Runner> g_runner;
} // namespace
Runner::~Runner() {
if (device_) {
device_.waitIdle();
if (fence_)
device_.destroyFence(fence_);
if (pipeline_layout_)
device_.destroyPipelineLayout(pipeline_layout_);
if (descriptor_set_layout_)
device_.destroyDescriptorSetLayout(descriptor_set_layout_);
if (command_pool_)
device_.destroyCommandPool(command_pool_);
device_.destroy();
}
if (instance_)
instance_.destroy();
}
std::expected<Runner*, ErrorInfo> Runner::instance() {
std::lock_guard lock{g_runner_mutex};
if (g_runner)
return g_runner.get();
auto r = std::unique_ptr<Runner>(new Runner{});
if (auto init = r->initialize(); !init)
return std::unexpected(init.error());
g_runner = std::move(r);
return g_runner.get();
}
std::expected<void, ErrorInfo> Runner::initialize() {
VULKAN_HPP_DEFAULT_DISPATCHER.init();
// ---- Instance ------------------------------------------------------
vk::ApplicationInfo app_info{
.pApplicationName = "gcn_test_runner",
.applicationVersion = 1,
.pEngineName = "gcn_test_runner",
.engineVersion = 1,
.apiVersion = vk::ApiVersion13,
};
std::vector<const char*> layers;
if (kEnableValidation)
layers.push_back("VK_LAYER_KHRONOS_validation");
auto [ir, inst] = vk::createInstance({
.pApplicationInfo = &app_info,
.enabledLayerCount = static_cast<std::uint32_t>(layers.size()),
.ppEnabledLayerNames = layers.data(),
});
if (ir != vk::Result::eSuccess)
return make_error(Error::InstanceCreationFailed,
std::format("createInstance: {}", vk::to_string(ir)));
instance_ = inst;
VULKAN_HPP_DEFAULT_DISPATCHER.init(instance_);
// ---- Pick physical device with the extensions we need -------------
auto [pr, devs] = instance_.enumeratePhysicalDevices();
if (pr != vk::Result::eSuccess || devs.empty())
return make_error(Error::NoSuitableDevice, "no Vulkan devices");
constexpr std::array required_exts{
VK_EXT_SHADER_OBJECT_EXTENSION_NAME,
VK_KHR_MAINTENANCE_6_EXTENSION_NAME,
VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
};
for (auto pd : devs) {
auto [er, exts] = pd.enumerateDeviceExtensionProperties();
if (er != vk::Result::eSuccess)
continue;
auto has_ext = [&](const char* name) {
return std::ranges::any_of(
exts, [&](auto& e) { return std::string_view{e.extensionName} == name; });
};
if (!std::ranges::all_of(required_exts, has_ext))
continue;
auto families = pd.getQueueFamilyProperties();
for (std::uint32_t i = 0; i < families.size(); ++i) {
if (families[i].queueFlags & vk::QueueFlagBits::eCompute) {
physical_device_ = pd;
queue_family_ = i;
break;
}
}
if (physical_device_)
break;
}
if (!physical_device_)
return make_error(Error::NoSuitableDevice,
"no device with compute + shader_object + maintenance6 + "
"push_descriptor");
max_push_constant_size_ = sizeof(Shader::PushData);
// physical_device_.getProperties().limits.maxPushConstantsSize;
// ---- Device with feature chain ------------------------------------
float priority = 1.0f;
vk::DeviceQueueCreateInfo qci{
.queueFamilyIndex = queue_family_,
.queueCount = 1,
.pQueuePriorities = &priority,
};
vk::PhysicalDeviceShaderObjectFeaturesEXT so_feat{.shaderObject = VK_TRUE};
vk::PhysicalDeviceMaintenance6FeaturesKHR m6_feat{
.pNext = &so_feat,
.maintenance6 = VK_TRUE,
};
vk::PhysicalDeviceVulkan11Features v11_feat{
.pNext = &m6_feat,
.uniformAndStorageBuffer16BitAccess = VK_TRUE,
};
vk::PhysicalDeviceVulkan12Features v12_feat{
.pNext = &v11_feat,
.uniformAndStorageBuffer8BitAccess = VK_TRUE,
.shaderInt8 = VK_TRUE,
};
vk::PhysicalDeviceFeatures phys_feat{
.shaderInt64 = VK_TRUE,
.shaderInt16 = VK_TRUE,
};
auto [dr, dev] = physical_device_.createDevice({
.pNext = &v12_feat,
.queueCreateInfoCount = 1,
.pQueueCreateInfos = &qci,
.enabledExtensionCount = required_exts.size(),
.ppEnabledExtensionNames = required_exts.data(),
.pEnabledFeatures = &phys_feat,
});
if (dr != vk::Result::eSuccess)
return make_error(Error::DeviceCreationFailed,
std::format("createDevice: {}", vk::to_string(dr)));
device_ = dev;
VULKAN_HPP_DEFAULT_DISPATCHER.init(device_);
queue_ = device_.getQueue(queue_family_, 0);
// ---- Command pool + cached command buffer -------------------------
auto [cpr, pool] = device_.createCommandPool({
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
.queueFamilyIndex = queue_family_,
});
if (cpr != vk::Result::eSuccess)
return make_error(Error::DeviceCreationFailed, "createCommandPool");
command_pool_ = pool;
auto [cbr, cbs] = device_.allocateCommandBuffers({
.commandPool = command_pool_,
.level = vk::CommandBufferLevel::ePrimary,
.commandBufferCount = 1,
});
if (cbr != vk::Result::eSuccess)
return make_error(Error::DeviceCreationFailed, "allocateCommandBuffers");
command_buffer_ = cbs[0];
// ---- Fence (cached, reset per call) --------------------------------
auto [fr, fence] = device_.createFence({});
if (fr != vk::Result::eSuccess)
return make_error(Error::DeviceCreationFailed, "createFence");
fence_ = fence;
// ---- Descriptor set layout with push-descriptor flag --------------
// Single storage buffer at binding 0. No descriptor sets are ever
// allocated from this layout — the layout is just used to tell the
// pipeline layout and shader what the push-descriptor shape is.
vk::DescriptorSetLayoutBinding dsl_binding{
.binding = 0,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
};
auto [dslr, dsl] = device_.createDescriptorSetLayout({
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
.bindingCount = 1,
.pBindings = &dsl_binding,
});
if (dslr != vk::Result::eSuccess)
return make_error(Error::DeviceCreationFailed, "createDescriptorSetLayout");
descriptor_set_layout_ = dsl;
// ---- Pipeline layout sized to device max push constants -----------
vk::PushConstantRange pc{
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 0,
.size = max_push_constant_size_,
};
auto [plr, pl] = device_.createPipelineLayout({
.setLayoutCount = 1,
.pSetLayouts = &descriptor_set_layout_,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &pc,
});
if (plr != vk::Result::eSuccess)
return make_error(Error::DeviceCreationFailed, "createPipelineLayout");
pipeline_layout_ = pl;
return {};
}
std::expected<void, ErrorInfo> Runner::run_raw(std::span<const std::uint32_t> spirv,
std::span<const std::byte> push_constants,
std::span<std::byte> output, DispatchSize dispatch) {
if (push_constants.size() > max_push_constant_size_)
return make_error(Error::PushConstantTooLarge,
std::format("push constants {} exceed device max {}",
push_constants.size(), max_push_constant_size_));
if (output.empty())
return make_error(Error::OutputTooLarge, "output buffer is empty");
// Per-call: output buffer --------------------------------------------
auto buf_r = create_host_buffer(device_, physical_device_, output.size(),
vk::BufferUsageFlagBits::eStorageBuffer);
if (!buf_r)
return std::unexpected(buf_r.error());
auto& output_buffer = *buf_r;
std::memset(output_buffer->mapped, 0, output.size());
// Per-call: shader object --------------------------------------------
vk::PushConstantRange shader_pc{
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 0,
// .size = static_cast<std::uint32_t>(push_constants.size()),
.size = sizeof(Shader::PushData),
};
vk::ShaderCreateInfoEXT sci{
.stage = vk::ShaderStageFlagBits::eCompute,
.codeType = vk::ShaderCodeTypeEXT::eSpirv,
.codeSize = spirv.size() * sizeof(std::uint32_t),
.pCode = spirv.data(),
.pName = "main",
.setLayoutCount = 1,
.pSetLayouts = &descriptor_set_layout_,
.pushConstantRangeCount = push_constants.empty() ? 0u : 1u,
.pPushConstantRanges = push_constants.empty() ? nullptr : &shader_pc,
};
auto [sr, shaders] = device_.createShadersEXT(sci);
if (sr != vk::Result::eSuccess)
return make_error(Error::ShaderCreationFailed,
std::format("createShadersEXT: {}", vk::to_string(sr)));
auto shader = shaders[0];
struct ShaderGuard {
vk::Device d;
vk::ShaderEXT s;
~ShaderGuard() {
if (s)
d.destroyShaderEXT(s);
}
} sg{device_, shader};
// Reset cached command buffer + fence --------------------------------
device_.resetFences(fence_);
command_buffer_.reset();
if (command_buffer_.begin({
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
}) != vk::Result::eSuccess)
return make_error(Error::CommandSubmissionFailed, "cmd.begin");
// Bind shader object -------------------------------------------------
vk::ShaderStageFlagBits stage = vk::ShaderStageFlagBits::eCompute;
command_buffer_.bindShadersEXT(1, &stage, &shader);
// Push descriptor: binding 0 = output SSBO ---------------------------
vk::DescriptorBufferInfo dbi{
.buffer = output_buffer->buffer,
.offset = 0,
.range = VK_WHOLE_SIZE,
};
vk::WriteDescriptorSet write{
.dstBinding = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.pBufferInfo = &dbi,
};
vk::PushDescriptorSetInfoKHR push_desc{
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.layout = pipeline_layout_,
.set = 0,
.descriptorWriteCount = 1,
.pDescriptorWrites = &write,
};
command_buffer_.pushDescriptorSet2KHR(push_desc);
// Push constants -----------------------------------------------------
if (!push_constants.empty()) {
vk::PushConstantsInfoKHR pci{
.layout = pipeline_layout_,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 16, // fall onto ud_regs in PushData
.size = static_cast<std::uint32_t>(push_constants.size()),
.pValues = push_constants.data(),
};
command_buffer_.pushConstants2KHR(pci);
}
command_buffer_.dispatch(dispatch.x, dispatch.y, dispatch.z);
vk::MemoryBarrier barrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
.dstAccessMask = vk::AccessFlagBits::eHostRead,
};
command_buffer_.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eHost, {}, barrier, {}, {});
if (command_buffer_.end() != vk::Result::eSuccess)
return make_error(Error::CommandSubmissionFailed, "cmd.end");
vk::SubmitInfo si{
.commandBufferCount = 1,
.pCommandBuffers = &command_buffer_,
};
if (queue_.submit(si, fence_) != vk::Result::eSuccess)
return make_error(Error::CommandSubmissionFailed, "queue.submit");
if (device_.waitForFences(fence_, VK_TRUE, UINT64_MAX) != vk::Result::eSuccess)
return make_error(Error::ExecutionFailed, "waitForFences");
std::memcpy(output.data(), output_buffer->mapped, output.size());
return {};
}
} // namespace gcn_test