// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include "gcn_test_runner.hpp" #include #include #include #include #include #include #include #include #include "shader_recompiler/resource.h" // Exactly one TU must define the dynamic dispatcher storage. VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE namespace gcn_test { namespace { constexpr bool kEnableValidation = #ifdef NDEBUG false; #else true; #endif auto make_error(Error code, std::string message) { return std::unexpected(ErrorInfo{code, std::move(message)}); } auto find_memory_type(vk::PhysicalDevice pd, std::uint32_t type_filter, vk::MemoryPropertyFlags required) -> std::expected { auto props = pd.getMemoryProperties(); for (std::uint32_t i = 0; i < props.memoryTypeCount; ++i) { if ((type_filter & (1u << i)) && (props.memoryTypes[i].propertyFlags & required) == required) { return i; } } return make_error(Error::BufferAllocationFailed, "no suitable memory type found"); } struct HostBuffer { vk::Device device; vk::Buffer buffer; vk::DeviceMemory memory; void* mapped = nullptr; ~HostBuffer() { if (mapped) device.unmapMemory(memory); if (buffer) device.destroyBuffer(buffer); if (memory) device.freeMemory(memory); } HostBuffer() = default; HostBuffer(const HostBuffer&) = delete; HostBuffer& operator=(const HostBuffer&) = delete; }; auto create_host_buffer(vk::Device dev, vk::PhysicalDevice pd, vk::DeviceSize size, vk::BufferUsageFlags usage) -> std::expected, ErrorInfo> { auto buf = std::make_unique(); buf->device = dev; auto [br, buffer] = dev.createBuffer(vk::BufferCreateInfo{ .size = size, .usage = usage, .sharingMode = vk::SharingMode::eExclusive, }); if (br != vk::Result::eSuccess) return make_error(Error::BufferAllocationFailed, "createBuffer"); buf->buffer = buffer; auto req = dev.getBufferMemoryRequirements(buffer); auto mt = find_memory_type(pd, req.memoryTypeBits, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent); if (!mt) return std::unexpected(mt.error()); auto [mr, mem] = dev.allocateMemory({ .allocationSize = req.size, .memoryTypeIndex = *mt, }); if (mr != vk::Result::eSuccess) return make_error(Error::BufferAllocationFailed, "allocateMemory"); buf->memory = mem; if (dev.bindBufferMemory(buffer, mem, 0) != vk::Result::eSuccess) return make_error(Error::BufferAllocationFailed, "bindBufferMemory"); auto [mapr, ptr] = dev.mapMemory(mem, 0, size); if (mapr != vk::Result::eSuccess) return make_error(Error::BufferAllocationFailed, "mapMemory"); buf->mapped = ptr; return buf; } std::mutex g_runner_mutex; std::unique_ptr g_runner; } // namespace Runner::~Runner() { if (device_) { device_.waitIdle(); if (fence_) device_.destroyFence(fence_); if (pipeline_layout_) device_.destroyPipelineLayout(pipeline_layout_); if (descriptor_set_layout_) device_.destroyDescriptorSetLayout(descriptor_set_layout_); if (command_pool_) device_.destroyCommandPool(command_pool_); device_.destroy(); } if (instance_) instance_.destroy(); } std::expected Runner::instance() { std::lock_guard lock{g_runner_mutex}; if (g_runner) return g_runner.get(); auto r = std::unique_ptr(new Runner{}); if (auto init = r->initialize(); !init) return std::unexpected(init.error()); g_runner = std::move(r); return g_runner.get(); } std::expected Runner::initialize() { VULKAN_HPP_DEFAULT_DISPATCHER.init(); // ---- Instance ------------------------------------------------------ vk::ApplicationInfo app_info{ .pApplicationName = "gcn_test_runner", .applicationVersion = 1, .pEngineName = "gcn_test_runner", .engineVersion = 1, .apiVersion = vk::ApiVersion13, }; std::vector layers; if (kEnableValidation) layers.push_back("VK_LAYER_KHRONOS_validation"); auto [ir, inst] = vk::createInstance({ .pApplicationInfo = &app_info, .enabledLayerCount = static_cast(layers.size()), .ppEnabledLayerNames = layers.data(), }); if (ir != vk::Result::eSuccess) return make_error(Error::InstanceCreationFailed, std::format("createInstance: {}", vk::to_string(ir))); instance_ = inst; VULKAN_HPP_DEFAULT_DISPATCHER.init(instance_); // ---- Pick physical device with the extensions we need ------------- auto [pr, devs] = instance_.enumeratePhysicalDevices(); if (pr != vk::Result::eSuccess || devs.empty()) return make_error(Error::NoSuitableDevice, "no Vulkan devices"); constexpr std::array required_exts{ VK_EXT_SHADER_OBJECT_EXTENSION_NAME, VK_KHR_MAINTENANCE_6_EXTENSION_NAME, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, }; for (auto pd : devs) { auto [er, exts] = pd.enumerateDeviceExtensionProperties(); if (er != vk::Result::eSuccess) continue; auto has_ext = [&](const char* name) { return std::ranges::any_of( exts, [&](auto& e) { return std::string_view{e.extensionName} == name; }); }; if (!std::ranges::all_of(required_exts, has_ext)) continue; auto families = pd.getQueueFamilyProperties(); for (std::uint32_t i = 0; i < families.size(); ++i) { if (families[i].queueFlags & vk::QueueFlagBits::eCompute) { physical_device_ = pd; queue_family_ = i; break; } } if (physical_device_) break; } if (!physical_device_) return make_error(Error::NoSuitableDevice, "no device with compute + shader_object + maintenance6 + " "push_descriptor"); max_push_constant_size_ = sizeof(Shader::PushData); // physical_device_.getProperties().limits.maxPushConstantsSize; // ---- Device with feature chain ------------------------------------ float priority = 1.0f; vk::DeviceQueueCreateInfo qci{ .queueFamilyIndex = queue_family_, .queueCount = 1, .pQueuePriorities = &priority, }; vk::PhysicalDeviceShaderObjectFeaturesEXT so_feat{.shaderObject = VK_TRUE}; vk::PhysicalDeviceMaintenance6FeaturesKHR m6_feat{ .pNext = &so_feat, .maintenance6 = VK_TRUE, }; vk::PhysicalDeviceVulkan11Features v11_feat{ .pNext = &m6_feat, .uniformAndStorageBuffer16BitAccess = VK_TRUE, }; vk::PhysicalDeviceVulkan12Features v12_feat{ .pNext = &v11_feat, .uniformAndStorageBuffer8BitAccess = VK_TRUE, .shaderInt8 = VK_TRUE, }; vk::PhysicalDeviceFeatures phys_feat{ .shaderInt64 = VK_TRUE, .shaderInt16 = VK_TRUE, }; auto [dr, dev] = physical_device_.createDevice({ .pNext = &v12_feat, .queueCreateInfoCount = 1, .pQueueCreateInfos = &qci, .enabledExtensionCount = required_exts.size(), .ppEnabledExtensionNames = required_exts.data(), .pEnabledFeatures = &phys_feat, }); if (dr != vk::Result::eSuccess) return make_error(Error::DeviceCreationFailed, std::format("createDevice: {}", vk::to_string(dr))); device_ = dev; VULKAN_HPP_DEFAULT_DISPATCHER.init(device_); queue_ = device_.getQueue(queue_family_, 0); // ---- Command pool + cached command buffer ------------------------- auto [cpr, pool] = device_.createCommandPool({ .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer, .queueFamilyIndex = queue_family_, }); if (cpr != vk::Result::eSuccess) return make_error(Error::DeviceCreationFailed, "createCommandPool"); command_pool_ = pool; auto [cbr, cbs] = device_.allocateCommandBuffers({ .commandPool = command_pool_, .level = vk::CommandBufferLevel::ePrimary, .commandBufferCount = 1, }); if (cbr != vk::Result::eSuccess) return make_error(Error::DeviceCreationFailed, "allocateCommandBuffers"); command_buffer_ = cbs[0]; // ---- Fence (cached, reset per call) -------------------------------- auto [fr, fence] = device_.createFence({}); if (fr != vk::Result::eSuccess) return make_error(Error::DeviceCreationFailed, "createFence"); fence_ = fence; // ---- Descriptor set layout with push-descriptor flag -------------- // Single storage buffer at binding 0. No descriptor sets are ever // allocated from this layout — the layout is just used to tell the // pipeline layout and shader what the push-descriptor shape is. vk::DescriptorSetLayoutBinding dsl_binding{ .binding = 0, .descriptorType = vk::DescriptorType::eStorageBuffer, .descriptorCount = 1, .stageFlags = vk::ShaderStageFlagBits::eCompute, }; auto [dslr, dsl] = device_.createDescriptorSetLayout({ .flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR, .bindingCount = 1, .pBindings = &dsl_binding, }); if (dslr != vk::Result::eSuccess) return make_error(Error::DeviceCreationFailed, "createDescriptorSetLayout"); descriptor_set_layout_ = dsl; // ---- Pipeline layout sized to device max push constants ----------- vk::PushConstantRange pc{ .stageFlags = vk::ShaderStageFlagBits::eCompute, .offset = 0, .size = max_push_constant_size_, }; auto [plr, pl] = device_.createPipelineLayout({ .setLayoutCount = 1, .pSetLayouts = &descriptor_set_layout_, .pushConstantRangeCount = 1, .pPushConstantRanges = &pc, }); if (plr != vk::Result::eSuccess) return make_error(Error::DeviceCreationFailed, "createPipelineLayout"); pipeline_layout_ = pl; return {}; } std::expected Runner::run_raw(std::span spirv, std::span push_constants, std::span output, DispatchSize dispatch) { if (push_constants.size() > max_push_constant_size_) return make_error(Error::PushConstantTooLarge, std::format("push constants {} exceed device max {}", push_constants.size(), max_push_constant_size_)); if (output.empty()) return make_error(Error::OutputTooLarge, "output buffer is empty"); // Per-call: output buffer -------------------------------------------- auto buf_r = create_host_buffer(device_, physical_device_, output.size(), vk::BufferUsageFlagBits::eStorageBuffer); if (!buf_r) return std::unexpected(buf_r.error()); auto& output_buffer = *buf_r; std::memset(output_buffer->mapped, 0, output.size()); // Per-call: shader object -------------------------------------------- vk::PushConstantRange shader_pc{ .stageFlags = vk::ShaderStageFlagBits::eCompute, .offset = 0, // .size = static_cast(push_constants.size()), .size = sizeof(Shader::PushData), }; vk::ShaderCreateInfoEXT sci{ .stage = vk::ShaderStageFlagBits::eCompute, .codeType = vk::ShaderCodeTypeEXT::eSpirv, .codeSize = spirv.size() * sizeof(std::uint32_t), .pCode = spirv.data(), .pName = "main", .setLayoutCount = 1, .pSetLayouts = &descriptor_set_layout_, .pushConstantRangeCount = push_constants.empty() ? 0u : 1u, .pPushConstantRanges = push_constants.empty() ? nullptr : &shader_pc, }; auto [sr, shaders] = device_.createShadersEXT(sci); if (sr != vk::Result::eSuccess) return make_error(Error::ShaderCreationFailed, std::format("createShadersEXT: {}", vk::to_string(sr))); auto shader = shaders[0]; struct ShaderGuard { vk::Device d; vk::ShaderEXT s; ~ShaderGuard() { if (s) d.destroyShaderEXT(s); } } sg{device_, shader}; // Reset cached command buffer + fence -------------------------------- device_.resetFences(fence_); command_buffer_.reset(); if (command_buffer_.begin({ .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, }) != vk::Result::eSuccess) return make_error(Error::CommandSubmissionFailed, "cmd.begin"); // Bind shader object ------------------------------------------------- vk::ShaderStageFlagBits stage = vk::ShaderStageFlagBits::eCompute; command_buffer_.bindShadersEXT(1, &stage, &shader); // Push descriptor: binding 0 = output SSBO --------------------------- vk::DescriptorBufferInfo dbi{ .buffer = output_buffer->buffer, .offset = 0, .range = VK_WHOLE_SIZE, }; vk::WriteDescriptorSet write{ .dstBinding = 0, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &dbi, }; vk::PushDescriptorSetInfoKHR push_desc{ .stageFlags = vk::ShaderStageFlagBits::eCompute, .layout = pipeline_layout_, .set = 0, .descriptorWriteCount = 1, .pDescriptorWrites = &write, }; command_buffer_.pushDescriptorSet2KHR(push_desc); // Push constants ----------------------------------------------------- if (!push_constants.empty()) { vk::PushConstantsInfoKHR pci{ .layout = pipeline_layout_, .stageFlags = vk::ShaderStageFlagBits::eCompute, .offset = 16, // fall onto ud_regs in PushData .size = static_cast(push_constants.size()), .pValues = push_constants.data(), }; command_buffer_.pushConstants2KHR(pci); } command_buffer_.dispatch(dispatch.x, dispatch.y, dispatch.z); vk::MemoryBarrier barrier{ .srcAccessMask = vk::AccessFlagBits::eShaderWrite, .dstAccessMask = vk::AccessFlagBits::eHostRead, }; command_buffer_.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eHost, {}, barrier, {}, {}); if (command_buffer_.end() != vk::Result::eSuccess) return make_error(Error::CommandSubmissionFailed, "cmd.end"); vk::SubmitInfo si{ .commandBufferCount = 1, .pCommandBuffers = &command_buffer_, }; if (queue_.submit(si, fence_) != vk::Result::eSuccess) return make_error(Error::CommandSubmissionFailed, "queue.submit"); if (device_.waitForFences(fence_, VK_TRUE, UINT64_MAX) != vk::Result::eSuccess) return make_error(Error::ExecutionFailed, "waitForFences"); std::memcpy(output.data(), output_buffer->mapped, output.size()); return {}; } } // namespace gcn_test