From 526c346f907a6e068fe905e185bc5a97a0d36f9a Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Mon, 20 Apr 2026 09:20:07 +0300 Subject: [PATCH] renderer_vulkan: Renderer microoptimizations (#4290) * renderer_vulkan: Microoptimizations from profiling * buffer_cache: Only use streaming path on cpu modified memory If the pages arent cpu modified, then a gpu buffer with valid data exists that can be used instead * clang format * fix max num_layers --- src/core/libraries/kernel/process.cpp | 8 +- src/video_core/buffer_cache/buffer_cache.cpp | 3 +- .../renderer_vulkan/vk_pipeline_common.h | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 111 +++++++++--------- .../renderer_vulkan/vk_rasterizer.h | 1 + .../renderer_vulkan/vk_scheduler.cpp | 39 +++++- src/video_core/renderer_vulkan/vk_scheduler.h | 40 ++++--- src/video_core/texture_cache/blit_helper.cpp | 24 ++-- .../texture_cache/texture_cache.cpp | 8 +- src/video_core/texture_cache/texture_cache.h | 1 + 10 files changed, 137 insertions(+), 100 deletions(-) diff --git a/src/core/libraries/kernel/process.cpp b/src/core/libraries/kernel/process.cpp index 2af5aa1bf..16b7159e9 100644 --- a/src/core/libraries/kernel/process.cpp +++ b/src/core/libraries/kernel/process.cpp @@ -17,8 +17,12 @@ s32 PS4_SYSV_ABI sceKernelIsInSandbox() { } s32 PS4_SYSV_ABI sceKernelIsNeoMode() { - return EmulatorSettings.IsNeo() && - Common::ElfInfo::Instance().GetPSFAttributes().support_neo_mode; + static s32 IsNeoMode = -1; + if (IsNeoMode == -1) { + IsNeoMode = EmulatorSettings.IsNeo() && + Common::ElfInfo::Instance().GetPSFAttributes().support_neo_mode; + } + return IsNeoMode; } s32 PS4_SYSV_ABI sceKernelHasNeoMode() { diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 7347e99a2..c886e37a3 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -375,7 +375,8 @@ void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written, bool is_texel_buffer, BufferId buffer_id) { // For read-only buffers use device local stream buffer to reduce renderpass breaks. - if (!is_written && size <= CACHING_PAGESIZE && !IsRegionGpuModified(device_addr, size)) { + if (!is_written && size <= CACHING_PAGESIZE && !IsRegionGpuModified(device_addr, size) && + IsRegionCpuModified(device_addr, size)) { const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment()); return {&stream_buffer, offset}; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_common.h b/src/video_core/renderer_vulkan/vk_pipeline_common.h index eb4e64c80..11fd8bbf3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_common.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_common.h @@ -57,7 +57,7 @@ public: return is_compute; } - using DescriptorWrites = boost::container::small_vector; + using DescriptorWrites = std::vector; using BufferBarriers = boost::container::small_vector; void BindResources(DescriptorWrites& set_writes, const BufferBarriers& buffer_barriers, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 800941fe3..427468bea 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -385,6 +385,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { return false; } + set_write_index = 0; set_writes.clear(); buffer_barriers.clear(); buffer_infos.clear(); @@ -399,6 +400,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { if (!stage) { continue; } + set_writes.resize(set_writes.size() + stage->buffers.size() + stage->images.size() + + stage->samplers.size()); stage->PushUd(binding, push_data); BindBuffers(*stage, binding, push_data); BindTextures(*stage, binding); @@ -647,15 +650,14 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding } } - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer - : vk::DescriptorType::eUniformBuffer, - .pBufferInfo = &buffer_infos.back(), - }); + auto& set_write = set_writes[set_write_index++]; + set_write.dstSet = VK_NULL_HANDLE; + set_write.dstBinding = binding.unified++; + set_write.dstArrayElement = 0; + set_write.descriptorCount = 1; + set_write.descriptorType = + is_storage ? vk::DescriptorType::eStorageBuffer : vk::DescriptorType::eUniformBuffer; + set_write.pBufferInfo = &buffer_infos.back(); ++binding.buffer; } } @@ -781,15 +783,14 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin for (u32 array_size : image_descriptor_array_sizes) { const auto& [_, desc] = image_bindings[image_binding_idx]; const bool is_storage = desc.type == VideoCore::TextureCache::BindingType::Storage; - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified, - .dstArrayElement = 0, - .descriptorCount = array_size, - .descriptorType = - is_storage ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage, - .pImageInfo = &image_infos[image_info_idx], - }); + auto& set_write = set_writes[set_write_index++]; + set_write.dstSet = VK_NULL_HANDLE; + set_write.dstBinding = binding.unified; + set_write.dstArrayElement = 0; + set_write.descriptorCount = array_size; + set_write.descriptorType = + is_storage ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage; + set_write.pImageInfo = &image_infos[image_info_idx]; image_info_idx += array_size; image_binding_idx += array_size; @@ -806,14 +807,13 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin } const auto vk_sampler = texture_cache.GetSampler(ssharp, liverpool->regs.ta_bc_base); image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral); - set_writes.push_back({ - .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eSampler, - .pImageInfo = &image_infos.back(), - }); + auto& set_write = set_writes[set_write_index++]; + set_write.dstSet = VK_NULL_HANDLE; + set_write.dstBinding = binding.unified++; + set_write.dstArrayElement = 0; + set_write.descriptorCount = 1; + set_write.descriptorType = vk::DescriptorType::eSampler; + set_write.pImageInfo = &image_infos.back(); } } @@ -824,7 +824,7 @@ RenderState Rasterizer::BeginRendering(const GraphicsPipeline* pipeline) { RenderState state; state.width = instance.GetMaxFramebufferWidth(); state.height = instance.GetMaxFramebufferHeight(); - state.num_layers = std::numeric_limits::max(); + state.num_layers = std::numeric_limits::max(); state.num_color_attachments = std::bit_width(key.mrt_mask); for (auto cb = 0u; cb < state.num_color_attachments; ++cb) { auto& [image_id, desc] = cb_descs[cb]; @@ -864,16 +864,20 @@ RenderState Rasterizer::BeginRendering(const GraphicsPipeline* pipeline) { state.width = std::min(state.width, std::max(image->info.size.width >> mip, 1u)); state.height = std::min(state.height, std::max(image->info.size.height >> mip, 1u)); state.num_layers = std::min(state.num_layers, image_view.info.range.extent.layers); - state.color_attachments[cb] = { - .imageView = *image_view.image_view, - .imageLayout = image->backing->state.layout, - .loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = - is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{}, - }; + + const auto clear_value = + is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{}; + auto& attachment = state.color_attachments[cb]; + attachment.image_view = *image_view.image_view; + attachment.image_layout = image->backing->state.layout; + attachment.clear_value = clear_value.color.uint32; + attachment.is_clear = is_clear; + image->usage.render_target = 1u; } + for (u32 cb = state.num_color_attachments; cb < state.color_attachments.size(); ++cb) { + state.color_attachments[cb] = {}; + } if (auto image_id = db_desc.first; image_id) { auto& desc = db_desc.second; @@ -901,34 +905,29 @@ RenderState Rasterizer::BeginRendering(const GraphicsPipeline* pipeline) { state.width = std::min(state.width, image.info.size.width); state.height = std::min(state.height, image.info.size.height); - state.has_depth = regs.depth_buffer.DepthValid(); - state.has_stencil = regs.depth_buffer.StencilValid(); state.num_layers = std::min(state.num_layers, image_view.info.range.extent.layers); - if (state.has_depth) { - state.depth_attachment = { - .imageView = *image_view.image_view, - .imageLayout = image.backing->state.layout, - .loadOp = - is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}}, - }; + + auto& attachment = state.depth_stencil_attachment; + attachment.image_view = *image_view.image_view; + attachment.image_layout = image.backing->state.layout; + + if (regs.depth_buffer.DepthValid()) { + attachment.clear_value[0] = is_depth_clear ? std::bit_cast(regs.depth_clear) : 0u; + attachment.has_depth = true; + attachment.depth_clear = is_depth_clear; } - if (state.has_stencil) { - state.stencil_attachment = { - .imageView = *image_view.image_view, - .imageLayout = image.backing->state.layout, - .loadOp = - is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}}, - }; + if (regs.depth_buffer.StencilValid()) { + attachment.clear_value[1] = is_stencil_clear ? regs.stencil_clear : 0u; + attachment.has_stencil = true; + attachment.stencil_clear = is_stencil_clear; } image.usage.depth_target = true; + } else { + state.depth_stencil_attachment = {}; } - if (state.num_layers == std::numeric_limits::max()) { + if (state.num_layers == std::numeric_limits::max()) { state.num_layers = 1; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c73626f3f..1820da417 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -135,6 +135,7 @@ private: boost::container::static_vector buffer_infos; boost::container::static_vector bound_images; + u32 set_write_index{}; Pipeline::DescriptorWrites set_writes; Pipeline::BufferBarriers buffer_barriers; Shader::PushData push_data; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index fee0b408e..2745363b4 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -36,6 +36,37 @@ void Scheduler::BeginRendering(const RenderState& new_state) { is_rendering = true; render_state = new_state; + std::array color_attachments; + for (u32 i = 0; i < render_state.num_color_attachments; ++i) { + const auto& cb = render_state.color_attachments[i]; + color_attachments[i] = vk::RenderingAttachmentInfo{ + .imageView = cb.image_view, + .imageLayout = cb.image_layout, + .loadOp = cb.is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{.color = vk::ClearColorValue{.uint32 = cb.clear_value}}, + }; + } + + const auto& db = render_state.depth_stencil_attachment; + const vk::RenderingAttachmentInfo depth_attachment = { + .imageView = db.image_view, + .imageLayout = db.image_layout, + .loadOp = db.depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = + vk::ClearValue{.depthStencil = vk::ClearDepthStencilValue{.depth = std::bit_cast( + db.clear_value[0])}}, + }; + const vk::RenderingAttachmentInfo stencil_attachment = { + .imageView = db.image_view, + .imageLayout = db.image_layout, + .loadOp = db.stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{.depthStencil = + vk::ClearDepthStencilValue{.stencil = db.clear_value[1]}}, + }; + const vk::RenderingInfo rendering_info = { .renderArea = { @@ -44,11 +75,9 @@ void Scheduler::BeginRendering(const RenderState& new_state) { }, .layerCount = render_state.num_layers, .colorAttachmentCount = render_state.num_color_attachments, - .pColorAttachments = render_state.num_color_attachments > 0 - ? render_state.color_attachments.data() - : nullptr, - .pDepthAttachment = render_state.has_depth ? &render_state.depth_attachment : nullptr, - .pStencilAttachment = render_state.has_stencil ? &render_state.stencil_attachment : nullptr, + .pColorAttachments = color_attachments.data(), + .pDepthAttachment = db.has_depth ? &depth_attachment : nullptr, + .pStencilAttachment = db.has_stencil ? &stencil_attachment : nullptr, }; current_cmdbuf.beginRendering(rendering_info); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index aff299e54..f381ca4e9 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -22,29 +22,35 @@ namespace Vulkan { class Instance; -struct RenderState { - std::array color_attachments; - vk::RenderingAttachmentInfo depth_attachment; - vk::RenderingAttachmentInfo stencil_attachment; - u32 num_color_attachments; - u32 num_layers; - bool has_depth; - bool has_stencil; - u32 width; - u32 height; +struct RenderAttachment { + vk::ImageView image_view; + vk::ImageLayout image_layout; + std::array clear_value; + union { + u32 is_clear; + struct { + bool has_depth; + bool depth_clear; + bool has_stencil; + bool stencil_clear; + }; + }; +}; +static_assert(std::has_unique_object_representations_v); - RenderState() { - std::memset(this, 0, sizeof(*this)); - color_attachments.fill(vk::RenderingAttachmentInfo{}); - depth_attachment = vk::RenderingAttachmentInfo{}; - stencil_attachment = vk::RenderingAttachmentInfo{}; - num_layers = 1; - } +struct RenderState { + std::array color_attachments; + RenderAttachment depth_stencil_attachment; + u16 width; + u16 height; + u16 num_layers; + u16 num_color_attachments; bool operator==(const RenderState& other) const noexcept { return std::memcmp(this, &other, sizeof(RenderState)) == 0; } }; +static_assert(std::has_unique_object_representations_v); struct SubmitInfo { std::array wait_semas; diff --git a/src/video_core/texture_cache/blit_helper.cpp b/src/video_core/texture_cache/blit_helper.cpp index f1b79e925..16696aa66 100644 --- a/src/video_core/texture_cache/blit_helper.cpp +++ b/src/video_core/texture_cache/blit_helper.cpp @@ -90,16 +90,13 @@ void BlitHelper::ReinterpretColorAsMsDepth(u32 width, u32 height, u32 num_sample }); Vulkan::RenderState state{}; - state.has_depth = true; state.width = width; state.height = height; - state.depth_attachment = vk::RenderingAttachmentInfo{ - .imageView = depth_view, - .imageLayout = vk::ImageLayout::eDepthAttachmentOptimal, - .loadOp = vk::AttachmentLoadOp::eDontCare, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{.depthStencil = {.depth = 0.f}}, - }; + state.num_layers = 1; + state.depth_stencil_attachment.image_view = depth_view; + state.depth_stencil_attachment.image_layout = vk::ImageLayout::eDepthAttachmentOptimal; + state.depth_stencil_attachment.has_depth = true; + state.depth_stencil_attachment.depth_clear = true; scheduler.BeginRendering(state); const auto cmdbuf = scheduler.CommandBuffer(); @@ -196,12 +193,11 @@ void BlitHelper::CopyBetweenMsImages(u32 width, u32 height, u32 num_samples, Vulkan::RenderState state{}; state.width = width; state.height = height; - state.color_attachments[state.num_color_attachments++] = vk::RenderingAttachmentInfo{ - .imageView = dst_view, - .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, - .loadOp = vk::AttachmentLoadOp::eDontCare, - .storeOp = vk::AttachmentStoreOp::eStore, - }; + state.num_layers = 1; + state.num_color_attachments = 1; + state.color_attachments[0].image_view = dst_view; + state.color_attachments[0].image_layout = vk::ImageLayout::eColorAttachmentOptimal; + state.color_attachments[0].is_clear = true; scheduler.BeginRendering(state); const auto cmdbuf = scheduler.CommandBuffer(); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 507238d7c..ca63f60ed 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -26,7 +26,8 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& PageManager& tracker_) : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, buffer_cache{buffer_cache_}, tracker{tracker_}, blit_helper{instance, scheduler}, - tile_manager{instance, scheduler, buffer_cache.GetUtilityBuffer(MemoryUsage::Stream)} { + tile_manager{instance, scheduler, buffer_cache.GetUtilityBuffer(MemoryUsage::Stream)}, + readback_linear_images{EmulatorSettings.IsReadbackLinearImagesEnabled()} { // Create basic null image at fixed image ID. const auto null_id = GetNullImage(vk::Format::eR8G8B8A8Unorm); ASSERT(null_id.index == NULL_IMAGE_ID.index); @@ -641,8 +642,7 @@ ImageView& TextureCache::FindTexture(ImageId image_id, const ImageDesc& desc) { Image& image = slot_images[image_id]; if (desc.type == BindingType::Storage) { image.flags |= ImageFlagBits::GpuModified; - if (EmulatorSettings.IsReadbackLinearImagesEnabled() && !image.info.props.is_tiled && - image.info.guest_address != 0) { + if (readback_linear_images && !image.info.props.is_tiled && image.info.guest_address != 0) { download_images.emplace(image_id); } } @@ -653,7 +653,7 @@ ImageView& TextureCache::FindTexture(ImageId image_id, const ImageDesc& desc) { ImageView& TextureCache::FindRenderTarget(ImageId image_id, const ImageDesc& desc) { Image& image = slot_images[image_id]; image.flags |= ImageFlagBits::GpuModified; - if (EmulatorSettings.IsReadbackLinearImagesEnabled() && !image.info.props.is_tiled) { + if (readback_linear_images && !image.info.props.is_tiled) { download_images.emplace(image_id); } image.usage.render_target = 1u; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 141ac938f..3741e6af7 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -312,6 +312,7 @@ private: u64 critical_gc_memory = 0; u64 gc_tick = 0; Common::LeastRecentlyUsedCache lru_cache; + bool readback_linear_images; PageTable page_table; std::mutex mutex; struct MetaDataInfo {