mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2026-04-29 23:41:19 -06:00
432 lines
15 KiB
C++
432 lines
15 KiB
C++
// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include "gcn_test_runner.hpp"
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <format>
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <ranges>
|
|
#include <string_view>
|
|
#include <vector>
|
|
|
|
#include "shader_recompiler/resource.h"
|
|
|
|
// Exactly one TU must define the dynamic dispatcher storage.
|
|
VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE
|
|
|
|
namespace gcn_test {
|
|
namespace {
|
|
|
|
constexpr bool kEnableValidation =
|
|
#ifdef NDEBUG
|
|
false;
|
|
#else
|
|
true;
|
|
#endif
|
|
|
|
auto make_error(Error code, std::string message) {
|
|
return std::unexpected(ErrorInfo{code, std::move(message)});
|
|
}
|
|
|
|
auto find_memory_type(vk::PhysicalDevice pd, std::uint32_t type_filter,
|
|
vk::MemoryPropertyFlags required) -> std::expected<std::uint32_t, ErrorInfo> {
|
|
auto props = pd.getMemoryProperties();
|
|
for (std::uint32_t i = 0; i < props.memoryTypeCount; ++i) {
|
|
if ((type_filter & (1u << i)) &&
|
|
(props.memoryTypes[i].propertyFlags & required) == required) {
|
|
return i;
|
|
}
|
|
}
|
|
return make_error(Error::BufferAllocationFailed, "no suitable memory type found");
|
|
}
|
|
|
|
struct HostBuffer {
|
|
vk::Device device;
|
|
vk::Buffer buffer;
|
|
vk::DeviceMemory memory;
|
|
void* mapped = nullptr;
|
|
|
|
~HostBuffer() {
|
|
if (mapped)
|
|
device.unmapMemory(memory);
|
|
if (buffer)
|
|
device.destroyBuffer(buffer);
|
|
if (memory)
|
|
device.freeMemory(memory);
|
|
}
|
|
HostBuffer() = default;
|
|
HostBuffer(const HostBuffer&) = delete;
|
|
HostBuffer& operator=(const HostBuffer&) = delete;
|
|
};
|
|
|
|
auto create_host_buffer(vk::Device dev, vk::PhysicalDevice pd, vk::DeviceSize size,
|
|
vk::BufferUsageFlags usage)
|
|
-> std::expected<std::unique_ptr<HostBuffer>, ErrorInfo> {
|
|
auto buf = std::make_unique<HostBuffer>();
|
|
buf->device = dev;
|
|
|
|
auto [br, buffer] = dev.createBuffer(vk::BufferCreateInfo{
|
|
.size = size,
|
|
.usage = usage,
|
|
.sharingMode = vk::SharingMode::eExclusive,
|
|
});
|
|
if (br != vk::Result::eSuccess)
|
|
return make_error(Error::BufferAllocationFailed, "createBuffer");
|
|
buf->buffer = buffer;
|
|
|
|
auto req = dev.getBufferMemoryRequirements(buffer);
|
|
auto mt = find_memory_type(pd, req.memoryTypeBits,
|
|
vk::MemoryPropertyFlagBits::eHostVisible |
|
|
vk::MemoryPropertyFlagBits::eHostCoherent);
|
|
if (!mt)
|
|
return std::unexpected(mt.error());
|
|
|
|
auto [mr, mem] = dev.allocateMemory({
|
|
.allocationSize = req.size,
|
|
.memoryTypeIndex = *mt,
|
|
});
|
|
if (mr != vk::Result::eSuccess)
|
|
return make_error(Error::BufferAllocationFailed, "allocateMemory");
|
|
buf->memory = mem;
|
|
|
|
if (dev.bindBufferMemory(buffer, mem, 0) != vk::Result::eSuccess)
|
|
return make_error(Error::BufferAllocationFailed, "bindBufferMemory");
|
|
|
|
auto [mapr, ptr] = dev.mapMemory(mem, 0, size);
|
|
if (mapr != vk::Result::eSuccess)
|
|
return make_error(Error::BufferAllocationFailed, "mapMemory");
|
|
buf->mapped = ptr;
|
|
|
|
return buf;
|
|
}
|
|
|
|
std::mutex g_runner_mutex;
|
|
std::unique_ptr<Runner> g_runner;
|
|
|
|
} // namespace
|
|
|
|
Runner::~Runner() {
|
|
if (device_) {
|
|
device_.waitIdle();
|
|
if (fence_)
|
|
device_.destroyFence(fence_);
|
|
if (pipeline_layout_)
|
|
device_.destroyPipelineLayout(pipeline_layout_);
|
|
if (descriptor_set_layout_)
|
|
device_.destroyDescriptorSetLayout(descriptor_set_layout_);
|
|
if (command_pool_)
|
|
device_.destroyCommandPool(command_pool_);
|
|
device_.destroy();
|
|
}
|
|
if (instance_)
|
|
instance_.destroy();
|
|
}
|
|
|
|
std::expected<Runner*, ErrorInfo> Runner::instance() {
|
|
std::lock_guard lock{g_runner_mutex};
|
|
if (g_runner)
|
|
return g_runner.get();
|
|
auto r = std::unique_ptr<Runner>(new Runner{});
|
|
if (auto init = r->initialize(); !init)
|
|
return std::unexpected(init.error());
|
|
g_runner = std::move(r);
|
|
return g_runner.get();
|
|
}
|
|
|
|
std::expected<void, ErrorInfo> Runner::initialize() {
|
|
VULKAN_HPP_DEFAULT_DISPATCHER.init();
|
|
|
|
// ---- Instance ------------------------------------------------------
|
|
vk::ApplicationInfo app_info{
|
|
.pApplicationName = "gcn_test_runner",
|
|
.applicationVersion = 1,
|
|
.pEngineName = "gcn_test_runner",
|
|
.engineVersion = 1,
|
|
.apiVersion = vk::ApiVersion13,
|
|
};
|
|
std::vector<const char*> layers;
|
|
if (kEnableValidation)
|
|
layers.push_back("VK_LAYER_KHRONOS_validation");
|
|
|
|
auto [ir, inst] = vk::createInstance({
|
|
.pApplicationInfo = &app_info,
|
|
.enabledLayerCount = static_cast<std::uint32_t>(layers.size()),
|
|
.ppEnabledLayerNames = layers.data(),
|
|
});
|
|
if (ir != vk::Result::eSuccess)
|
|
return make_error(Error::InstanceCreationFailed,
|
|
std::format("createInstance: {}", vk::to_string(ir)));
|
|
instance_ = inst;
|
|
VULKAN_HPP_DEFAULT_DISPATCHER.init(instance_);
|
|
|
|
// ---- Pick physical device with the extensions we need -------------
|
|
auto [pr, devs] = instance_.enumeratePhysicalDevices();
|
|
if (pr != vk::Result::eSuccess || devs.empty())
|
|
return make_error(Error::NoSuitableDevice, "no Vulkan devices");
|
|
|
|
constexpr std::array required_exts{
|
|
VK_EXT_SHADER_OBJECT_EXTENSION_NAME,
|
|
VK_KHR_MAINTENANCE_6_EXTENSION_NAME,
|
|
VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME,
|
|
};
|
|
|
|
for (auto pd : devs) {
|
|
auto [er, exts] = pd.enumerateDeviceExtensionProperties();
|
|
if (er != vk::Result::eSuccess)
|
|
continue;
|
|
|
|
auto has_ext = [&](const char* name) {
|
|
return std::ranges::any_of(
|
|
exts, [&](auto& e) { return std::string_view{e.extensionName} == name; });
|
|
};
|
|
if (!std::ranges::all_of(required_exts, has_ext))
|
|
continue;
|
|
|
|
auto families = pd.getQueueFamilyProperties();
|
|
for (std::uint32_t i = 0; i < families.size(); ++i) {
|
|
if (families[i].queueFlags & vk::QueueFlagBits::eCompute) {
|
|
physical_device_ = pd;
|
|
queue_family_ = i;
|
|
break;
|
|
}
|
|
}
|
|
if (physical_device_)
|
|
break;
|
|
}
|
|
if (!physical_device_)
|
|
return make_error(Error::NoSuitableDevice,
|
|
"no device with compute + shader_object + maintenance6 + "
|
|
"push_descriptor");
|
|
|
|
max_push_constant_size_ = sizeof(Shader::PushData);
|
|
// physical_device_.getProperties().limits.maxPushConstantsSize;
|
|
|
|
// ---- Device with feature chain ------------------------------------
|
|
float priority = 1.0f;
|
|
vk::DeviceQueueCreateInfo qci{
|
|
.queueFamilyIndex = queue_family_,
|
|
.queueCount = 1,
|
|
.pQueuePriorities = &priority,
|
|
};
|
|
vk::PhysicalDeviceShaderObjectFeaturesEXT so_feat{.shaderObject = VK_TRUE};
|
|
vk::PhysicalDeviceMaintenance6FeaturesKHR m6_feat{
|
|
.pNext = &so_feat,
|
|
.maintenance6 = VK_TRUE,
|
|
};
|
|
vk::PhysicalDeviceVulkan11Features v11_feat{
|
|
.pNext = &m6_feat,
|
|
.uniformAndStorageBuffer16BitAccess = VK_TRUE,
|
|
};
|
|
vk::PhysicalDeviceVulkan12Features v12_feat{
|
|
.pNext = &v11_feat,
|
|
.uniformAndStorageBuffer8BitAccess = VK_TRUE,
|
|
.shaderInt8 = VK_TRUE,
|
|
};
|
|
vk::PhysicalDeviceFeatures phys_feat{
|
|
.shaderInt64 = VK_TRUE,
|
|
.shaderInt16 = VK_TRUE,
|
|
};
|
|
|
|
auto [dr, dev] = physical_device_.createDevice({
|
|
.pNext = &v12_feat,
|
|
.queueCreateInfoCount = 1,
|
|
.pQueueCreateInfos = &qci,
|
|
.enabledExtensionCount = required_exts.size(),
|
|
.ppEnabledExtensionNames = required_exts.data(),
|
|
.pEnabledFeatures = &phys_feat,
|
|
});
|
|
if (dr != vk::Result::eSuccess)
|
|
return make_error(Error::DeviceCreationFailed,
|
|
std::format("createDevice: {}", vk::to_string(dr)));
|
|
device_ = dev;
|
|
VULKAN_HPP_DEFAULT_DISPATCHER.init(device_);
|
|
queue_ = device_.getQueue(queue_family_, 0);
|
|
|
|
// ---- Command pool + cached command buffer -------------------------
|
|
auto [cpr, pool] = device_.createCommandPool({
|
|
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
|
|
.queueFamilyIndex = queue_family_,
|
|
});
|
|
if (cpr != vk::Result::eSuccess)
|
|
return make_error(Error::DeviceCreationFailed, "createCommandPool");
|
|
command_pool_ = pool;
|
|
|
|
auto [cbr, cbs] = device_.allocateCommandBuffers({
|
|
.commandPool = command_pool_,
|
|
.level = vk::CommandBufferLevel::ePrimary,
|
|
.commandBufferCount = 1,
|
|
});
|
|
if (cbr != vk::Result::eSuccess)
|
|
return make_error(Error::DeviceCreationFailed, "allocateCommandBuffers");
|
|
command_buffer_ = cbs[0];
|
|
|
|
// ---- Fence (cached, reset per call) --------------------------------
|
|
auto [fr, fence] = device_.createFence({});
|
|
if (fr != vk::Result::eSuccess)
|
|
return make_error(Error::DeviceCreationFailed, "createFence");
|
|
fence_ = fence;
|
|
|
|
// ---- Descriptor set layout with push-descriptor flag --------------
|
|
// Single storage buffer at binding 0. No descriptor sets are ever
|
|
// allocated from this layout — the layout is just used to tell the
|
|
// pipeline layout and shader what the push-descriptor shape is.
|
|
vk::DescriptorSetLayoutBinding dsl_binding{
|
|
.binding = 0,
|
|
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
|
.descriptorCount = 1,
|
|
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
|
};
|
|
auto [dslr, dsl] = device_.createDescriptorSetLayout({
|
|
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
|
|
.bindingCount = 1,
|
|
.pBindings = &dsl_binding,
|
|
});
|
|
if (dslr != vk::Result::eSuccess)
|
|
return make_error(Error::DeviceCreationFailed, "createDescriptorSetLayout");
|
|
descriptor_set_layout_ = dsl;
|
|
|
|
// ---- Pipeline layout sized to device max push constants -----------
|
|
vk::PushConstantRange pc{
|
|
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
|
.offset = 0,
|
|
.size = max_push_constant_size_,
|
|
};
|
|
auto [plr, pl] = device_.createPipelineLayout({
|
|
.setLayoutCount = 1,
|
|
.pSetLayouts = &descriptor_set_layout_,
|
|
.pushConstantRangeCount = 1,
|
|
.pPushConstantRanges = &pc,
|
|
});
|
|
if (plr != vk::Result::eSuccess)
|
|
return make_error(Error::DeviceCreationFailed, "createPipelineLayout");
|
|
pipeline_layout_ = pl;
|
|
|
|
return {};
|
|
}
|
|
|
|
std::expected<void, ErrorInfo> Runner::run_raw(std::span<const std::uint32_t> spirv,
|
|
std::span<const std::byte> push_constants,
|
|
std::span<std::byte> output, DispatchSize dispatch) {
|
|
if (push_constants.size() > max_push_constant_size_)
|
|
return make_error(Error::PushConstantTooLarge,
|
|
std::format("push constants {} exceed device max {}",
|
|
push_constants.size(), max_push_constant_size_));
|
|
if (output.empty())
|
|
return make_error(Error::OutputTooLarge, "output buffer is empty");
|
|
|
|
// Per-call: output buffer --------------------------------------------
|
|
auto buf_r = create_host_buffer(device_, physical_device_, output.size(),
|
|
vk::BufferUsageFlagBits::eStorageBuffer);
|
|
if (!buf_r)
|
|
return std::unexpected(buf_r.error());
|
|
auto& output_buffer = *buf_r;
|
|
std::memset(output_buffer->mapped, 0, output.size());
|
|
|
|
// Per-call: shader object --------------------------------------------
|
|
vk::PushConstantRange shader_pc{
|
|
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
|
.offset = 0,
|
|
// .size = static_cast<std::uint32_t>(push_constants.size()),
|
|
.size = sizeof(Shader::PushData),
|
|
};
|
|
vk::ShaderCreateInfoEXT sci{
|
|
.stage = vk::ShaderStageFlagBits::eCompute,
|
|
.codeType = vk::ShaderCodeTypeEXT::eSpirv,
|
|
.codeSize = spirv.size() * sizeof(std::uint32_t),
|
|
.pCode = spirv.data(),
|
|
.pName = "main",
|
|
.setLayoutCount = 1,
|
|
.pSetLayouts = &descriptor_set_layout_,
|
|
.pushConstantRangeCount = push_constants.empty() ? 0u : 1u,
|
|
.pPushConstantRanges = push_constants.empty() ? nullptr : &shader_pc,
|
|
};
|
|
auto [sr, shaders] = device_.createShadersEXT(sci);
|
|
if (sr != vk::Result::eSuccess)
|
|
return make_error(Error::ShaderCreationFailed,
|
|
std::format("createShadersEXT: {}", vk::to_string(sr)));
|
|
auto shader = shaders[0];
|
|
struct ShaderGuard {
|
|
vk::Device d;
|
|
vk::ShaderEXT s;
|
|
~ShaderGuard() {
|
|
if (s)
|
|
d.destroyShaderEXT(s);
|
|
}
|
|
} sg{device_, shader};
|
|
|
|
// Reset cached command buffer + fence --------------------------------
|
|
device_.resetFences(fence_);
|
|
command_buffer_.reset();
|
|
|
|
if (command_buffer_.begin({
|
|
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
|
|
}) != vk::Result::eSuccess)
|
|
return make_error(Error::CommandSubmissionFailed, "cmd.begin");
|
|
|
|
// Bind shader object -------------------------------------------------
|
|
vk::ShaderStageFlagBits stage = vk::ShaderStageFlagBits::eCompute;
|
|
command_buffer_.bindShadersEXT(1, &stage, &shader);
|
|
|
|
// Push descriptor: binding 0 = output SSBO ---------------------------
|
|
vk::DescriptorBufferInfo dbi{
|
|
.buffer = output_buffer->buffer,
|
|
.offset = 0,
|
|
.range = VK_WHOLE_SIZE,
|
|
};
|
|
vk::WriteDescriptorSet write{
|
|
.dstBinding = 0,
|
|
.descriptorCount = 1,
|
|
.descriptorType = vk::DescriptorType::eStorageBuffer,
|
|
.pBufferInfo = &dbi,
|
|
};
|
|
vk::PushDescriptorSetInfoKHR push_desc{
|
|
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
|
.layout = pipeline_layout_,
|
|
.set = 0,
|
|
.descriptorWriteCount = 1,
|
|
.pDescriptorWrites = &write,
|
|
};
|
|
command_buffer_.pushDescriptorSet2KHR(push_desc);
|
|
|
|
// Push constants -----------------------------------------------------
|
|
if (!push_constants.empty()) {
|
|
vk::PushConstantsInfoKHR pci{
|
|
.layout = pipeline_layout_,
|
|
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
|
.offset = 16, // fall onto ud_regs in PushData
|
|
.size = static_cast<std::uint32_t>(push_constants.size()),
|
|
.pValues = push_constants.data(),
|
|
};
|
|
command_buffer_.pushConstants2KHR(pci);
|
|
}
|
|
|
|
command_buffer_.dispatch(dispatch.x, dispatch.y, dispatch.z);
|
|
|
|
vk::MemoryBarrier barrier{
|
|
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
|
|
.dstAccessMask = vk::AccessFlagBits::eHostRead,
|
|
};
|
|
command_buffer_.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
|
|
vk::PipelineStageFlagBits::eHost, {}, barrier, {}, {});
|
|
|
|
if (command_buffer_.end() != vk::Result::eSuccess)
|
|
return make_error(Error::CommandSubmissionFailed, "cmd.end");
|
|
|
|
vk::SubmitInfo si{
|
|
.commandBufferCount = 1,
|
|
.pCommandBuffers = &command_buffer_,
|
|
};
|
|
if (queue_.submit(si, fence_) != vk::Result::eSuccess)
|
|
return make_error(Error::CommandSubmissionFailed, "queue.submit");
|
|
if (device_.waitForFences(fence_, VK_TRUE, UINT64_MAX) != vk::Result::eSuccess)
|
|
return make_error(Error::ExecutionFailed, "waitForFences");
|
|
|
|
std::memcpy(output.data(), output_buffer->mapped, output.size());
|
|
return {};
|
|
}
|
|
|
|
} // namespace gcn_test
|