renderer_vulkan: Renderer microoptimizations (#4290)

* renderer_vulkan: Microoptimizations from profiling

* buffer_cache: Only use streaming path on cpu modified memory

If the pages arent cpu modified, then a gpu buffer with valid data exists that can be used instead

* clang format

* fix max num_layers
This commit is contained in:
TheTurtle 2026-04-20 09:20:07 +03:00 committed by GitHub
parent 012b4a6ef5
commit 526c346f90
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 137 additions and 100 deletions

View File

@ -17,8 +17,12 @@ s32 PS4_SYSV_ABI sceKernelIsInSandbox() {
}
s32 PS4_SYSV_ABI sceKernelIsNeoMode() {
return EmulatorSettings.IsNeo() &&
Common::ElfInfo::Instance().GetPSFAttributes().support_neo_mode;
static s32 IsNeoMode = -1;
if (IsNeoMode == -1) {
IsNeoMode = EmulatorSettings.IsNeo() &&
Common::ElfInfo::Instance().GetPSFAttributes().support_neo_mode;
}
return IsNeoMode;
}
s32 PS4_SYSV_ABI sceKernelHasNeoMode() {

View File

@ -375,7 +375,8 @@ void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds,
std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written,
bool is_texel_buffer, BufferId buffer_id) {
// For read-only buffers use device local stream buffer to reduce renderpass breaks.
if (!is_written && size <= CACHING_PAGESIZE && !IsRegionGpuModified(device_addr, size)) {
if (!is_written && size <= CACHING_PAGESIZE && !IsRegionGpuModified(device_addr, size) &&
IsRegionCpuModified(device_addr, size)) {
const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment());
return {&stream_buffer, offset};
}

View File

@ -57,7 +57,7 @@ public:
return is_compute;
}
using DescriptorWrites = boost::container::small_vector<vk::WriteDescriptorSet, 16>;
using DescriptorWrites = std::vector<vk::WriteDescriptorSet>;
using BufferBarriers = boost::container::small_vector<vk::BufferMemoryBarrier2, 16>;
void BindResources(DescriptorWrites& set_writes, const BufferBarriers& buffer_barriers,

View File

@ -385,6 +385,7 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
return false;
}
set_write_index = 0;
set_writes.clear();
buffer_barriers.clear();
buffer_infos.clear();
@ -399,6 +400,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
if (!stage) {
continue;
}
set_writes.resize(set_writes.size() + stage->buffers.size() + stage->images.size() +
stage->samplers.size());
stage->PushUd(binding, push_data);
BindBuffers(*stage, binding, push_data);
BindTextures(*stage, binding);
@ -647,15 +650,14 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
}
}
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = is_storage ? vk::DescriptorType::eStorageBuffer
: vk::DescriptorType::eUniformBuffer,
.pBufferInfo = &buffer_infos.back(),
});
auto& set_write = set_writes[set_write_index++];
set_write.dstSet = VK_NULL_HANDLE;
set_write.dstBinding = binding.unified++;
set_write.dstArrayElement = 0;
set_write.descriptorCount = 1;
set_write.descriptorType =
is_storage ? vk::DescriptorType::eStorageBuffer : vk::DescriptorType::eUniformBuffer;
set_write.pBufferInfo = &buffer_infos.back();
++binding.buffer;
}
}
@ -781,15 +783,14 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
for (u32 array_size : image_descriptor_array_sizes) {
const auto& [_, desc] = image_bindings[image_binding_idx];
const bool is_storage = desc.type == VideoCore::TextureCache::BindingType::Storage;
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified,
.dstArrayElement = 0,
.descriptorCount = array_size,
.descriptorType =
is_storage ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage,
.pImageInfo = &image_infos[image_info_idx],
});
auto& set_write = set_writes[set_write_index++];
set_write.dstSet = VK_NULL_HANDLE;
set_write.dstBinding = binding.unified;
set_write.dstArrayElement = 0;
set_write.descriptorCount = array_size;
set_write.descriptorType =
is_storage ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage;
set_write.pImageInfo = &image_infos[image_info_idx];
image_info_idx += array_size;
image_binding_idx += array_size;
@ -806,14 +807,13 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
}
const auto vk_sampler = texture_cache.GetSampler(ssharp, liverpool->regs.ta_bc_base);
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampler,
.pImageInfo = &image_infos.back(),
});
auto& set_write = set_writes[set_write_index++];
set_write.dstSet = VK_NULL_HANDLE;
set_write.dstBinding = binding.unified++;
set_write.dstArrayElement = 0;
set_write.descriptorCount = 1;
set_write.descriptorType = vk::DescriptorType::eSampler;
set_write.pImageInfo = &image_infos.back();
}
}
@ -824,7 +824,7 @@ RenderState Rasterizer::BeginRendering(const GraphicsPipeline* pipeline) {
RenderState state;
state.width = instance.GetMaxFramebufferWidth();
state.height = instance.GetMaxFramebufferHeight();
state.num_layers = std::numeric_limits<u32>::max();
state.num_layers = std::numeric_limits<u16>::max();
state.num_color_attachments = std::bit_width(key.mrt_mask);
for (auto cb = 0u; cb < state.num_color_attachments; ++cb) {
auto& [image_id, desc] = cb_descs[cb];
@ -864,16 +864,20 @@ RenderState Rasterizer::BeginRendering(const GraphicsPipeline* pipeline) {
state.width = std::min<u32>(state.width, std::max(image->info.size.width >> mip, 1u));
state.height = std::min<u32>(state.height, std::max(image->info.size.height >> mip, 1u));
state.num_layers = std::min<u32>(state.num_layers, image_view.info.range.extent.layers);
state.color_attachments[cb] = {
.imageView = *image_view.image_view,
.imageLayout = image->backing->state.layout,
.loadOp = is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue =
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{},
};
const auto clear_value =
is_clear ? LiverpoolToVK::ColorBufferClearValue(col_buf) : vk::ClearValue{};
auto& attachment = state.color_attachments[cb];
attachment.image_view = *image_view.image_view;
attachment.image_layout = image->backing->state.layout;
attachment.clear_value = clear_value.color.uint32;
attachment.is_clear = is_clear;
image->usage.render_target = 1u;
}
for (u32 cb = state.num_color_attachments; cb < state.color_attachments.size(); ++cb) {
state.color_attachments[cb] = {};
}
if (auto image_id = db_desc.first; image_id) {
auto& desc = db_desc.second;
@ -901,34 +905,29 @@ RenderState Rasterizer::BeginRendering(const GraphicsPipeline* pipeline) {
state.width = std::min<u32>(state.width, image.info.size.width);
state.height = std::min<u32>(state.height, image.info.size.height);
state.has_depth = regs.depth_buffer.DepthValid();
state.has_stencil = regs.depth_buffer.StencilValid();
state.num_layers = std::min<u32>(state.num_layers, image_view.info.range.extent.layers);
if (state.has_depth) {
state.depth_attachment = {
.imageView = *image_view.image_view,
.imageLayout = image.backing->state.layout,
.loadOp =
is_depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.depth = regs.depth_clear}},
};
auto& attachment = state.depth_stencil_attachment;
attachment.image_view = *image_view.image_view;
attachment.image_layout = image.backing->state.layout;
if (regs.depth_buffer.DepthValid()) {
attachment.clear_value[0] = is_depth_clear ? std::bit_cast<u32>(regs.depth_clear) : 0u;
attachment.has_depth = true;
attachment.depth_clear = is_depth_clear;
}
if (state.has_stencil) {
state.stencil_attachment = {
.imageView = *image_view.image_view,
.imageLayout = image.backing->state.layout,
.loadOp =
is_stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.stencil = regs.stencil_clear}},
};
if (regs.depth_buffer.StencilValid()) {
attachment.clear_value[1] = is_stencil_clear ? regs.stencil_clear : 0u;
attachment.has_stencil = true;
attachment.stencil_clear = is_stencil_clear;
}
image.usage.depth_target = true;
} else {
state.depth_stencil_attachment = {};
}
if (state.num_layers == std::numeric_limits<u32>::max()) {
if (state.num_layers == std::numeric_limits<u16>::max()) {
state.num_layers = 1;
}

View File

@ -135,6 +135,7 @@ private:
boost::container::static_vector<vk::DescriptorBufferInfo, Shader::NUM_BUFFERS> buffer_infos;
boost::container::static_vector<VideoCore::ImageId, Shader::NUM_IMAGES> bound_images;
u32 set_write_index{};
Pipeline::DescriptorWrites set_writes;
Pipeline::BufferBarriers buffer_barriers;
Shader::PushData push_data;

View File

@ -36,6 +36,37 @@ void Scheduler::BeginRendering(const RenderState& new_state) {
is_rendering = true;
render_state = new_state;
std::array<vk::RenderingAttachmentInfo, 8> color_attachments;
for (u32 i = 0; i < render_state.num_color_attachments; ++i) {
const auto& cb = render_state.color_attachments[i];
color_attachments[i] = vk::RenderingAttachmentInfo{
.imageView = cb.image_view,
.imageLayout = cb.image_layout,
.loadOp = cb.is_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.color = vk::ClearColorValue{.uint32 = cb.clear_value}},
};
}
const auto& db = render_state.depth_stencil_attachment;
const vk::RenderingAttachmentInfo depth_attachment = {
.imageView = db.image_view,
.imageLayout = db.image_layout,
.loadOp = db.depth_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue =
vk::ClearValue{.depthStencil = vk::ClearDepthStencilValue{.depth = std::bit_cast<float>(
db.clear_value[0])}},
};
const vk::RenderingAttachmentInfo stencil_attachment = {
.imageView = db.image_view,
.imageLayout = db.image_layout,
.loadOp = db.stencil_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil =
vk::ClearDepthStencilValue{.stencil = db.clear_value[1]}},
};
const vk::RenderingInfo rendering_info = {
.renderArea =
{
@ -44,11 +75,9 @@ void Scheduler::BeginRendering(const RenderState& new_state) {
},
.layerCount = render_state.num_layers,
.colorAttachmentCount = render_state.num_color_attachments,
.pColorAttachments = render_state.num_color_attachments > 0
? render_state.color_attachments.data()
: nullptr,
.pDepthAttachment = render_state.has_depth ? &render_state.depth_attachment : nullptr,
.pStencilAttachment = render_state.has_stencil ? &render_state.stencil_attachment : nullptr,
.pColorAttachments = color_attachments.data(),
.pDepthAttachment = db.has_depth ? &depth_attachment : nullptr,
.pStencilAttachment = db.has_stencil ? &stencil_attachment : nullptr,
};
current_cmdbuf.beginRendering(rendering_info);

View File

@ -22,29 +22,35 @@ namespace Vulkan {
class Instance;
struct RenderState {
std::array<vk::RenderingAttachmentInfo, 8> color_attachments;
vk::RenderingAttachmentInfo depth_attachment;
vk::RenderingAttachmentInfo stencil_attachment;
u32 num_color_attachments;
u32 num_layers;
bool has_depth;
bool has_stencil;
u32 width;
u32 height;
struct RenderAttachment {
vk::ImageView image_view;
vk::ImageLayout image_layout;
std::array<u32, 4> clear_value;
union {
u32 is_clear;
struct {
bool has_depth;
bool depth_clear;
bool has_stencil;
bool stencil_clear;
};
};
};
static_assert(std::has_unique_object_representations_v<RenderAttachment>);
RenderState() {
std::memset(this, 0, sizeof(*this));
color_attachments.fill(vk::RenderingAttachmentInfo{});
depth_attachment = vk::RenderingAttachmentInfo{};
stencil_attachment = vk::RenderingAttachmentInfo{};
num_layers = 1;
}
struct RenderState {
std::array<RenderAttachment, 8> color_attachments;
RenderAttachment depth_stencil_attachment;
u16 width;
u16 height;
u16 num_layers;
u16 num_color_attachments;
bool operator==(const RenderState& other) const noexcept {
return std::memcmp(this, &other, sizeof(RenderState)) == 0;
}
};
static_assert(std::has_unique_object_representations_v<RenderState>);
struct SubmitInfo {
std::array<vk::Semaphore, 3> wait_semas;

View File

@ -90,16 +90,13 @@ void BlitHelper::ReinterpretColorAsMsDepth(u32 width, u32 height, u32 num_sample
});
Vulkan::RenderState state{};
state.has_depth = true;
state.width = width;
state.height = height;
state.depth_attachment = vk::RenderingAttachmentInfo{
.imageView = depth_view,
.imageLayout = vk::ImageLayout::eDepthAttachmentOptimal,
.loadOp = vk::AttachmentLoadOp::eDontCare,
.storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = vk::ClearValue{.depthStencil = {.depth = 0.f}},
};
state.num_layers = 1;
state.depth_stencil_attachment.image_view = depth_view;
state.depth_stencil_attachment.image_layout = vk::ImageLayout::eDepthAttachmentOptimal;
state.depth_stencil_attachment.has_depth = true;
state.depth_stencil_attachment.depth_clear = true;
scheduler.BeginRendering(state);
const auto cmdbuf = scheduler.CommandBuffer();
@ -196,12 +193,11 @@ void BlitHelper::CopyBetweenMsImages(u32 width, u32 height, u32 num_samples,
Vulkan::RenderState state{};
state.width = width;
state.height = height;
state.color_attachments[state.num_color_attachments++] = vk::RenderingAttachmentInfo{
.imageView = dst_view,
.imageLayout = vk::ImageLayout::eColorAttachmentOptimal,
.loadOp = vk::AttachmentLoadOp::eDontCare,
.storeOp = vk::AttachmentStoreOp::eStore,
};
state.num_layers = 1;
state.num_color_attachments = 1;
state.color_attachments[0].image_view = dst_view;
state.color_attachments[0].image_layout = vk::ImageLayout::eColorAttachmentOptimal;
state.color_attachments[0].is_clear = true;
scheduler.BeginRendering(state);
const auto cmdbuf = scheduler.CommandBuffer();

View File

@ -26,7 +26,8 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
PageManager& tracker_)
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
buffer_cache{buffer_cache_}, tracker{tracker_}, blit_helper{instance, scheduler},
tile_manager{instance, scheduler, buffer_cache.GetUtilityBuffer(MemoryUsage::Stream)} {
tile_manager{instance, scheduler, buffer_cache.GetUtilityBuffer(MemoryUsage::Stream)},
readback_linear_images{EmulatorSettings.IsReadbackLinearImagesEnabled()} {
// Create basic null image at fixed image ID.
const auto null_id = GetNullImage(vk::Format::eR8G8B8A8Unorm);
ASSERT(null_id.index == NULL_IMAGE_ID.index);
@ -641,8 +642,7 @@ ImageView& TextureCache::FindTexture(ImageId image_id, const ImageDesc& desc) {
Image& image = slot_images[image_id];
if (desc.type == BindingType::Storage) {
image.flags |= ImageFlagBits::GpuModified;
if (EmulatorSettings.IsReadbackLinearImagesEnabled() && !image.info.props.is_tiled &&
image.info.guest_address != 0) {
if (readback_linear_images && !image.info.props.is_tiled && image.info.guest_address != 0) {
download_images.emplace(image_id);
}
}
@ -653,7 +653,7 @@ ImageView& TextureCache::FindTexture(ImageId image_id, const ImageDesc& desc) {
ImageView& TextureCache::FindRenderTarget(ImageId image_id, const ImageDesc& desc) {
Image& image = slot_images[image_id];
image.flags |= ImageFlagBits::GpuModified;
if (EmulatorSettings.IsReadbackLinearImagesEnabled() && !image.info.props.is_tiled) {
if (readback_linear_images && !image.info.props.is_tiled) {
download_images.emplace(image_id);
}
image.usage.render_target = 1u;

View File

@ -312,6 +312,7 @@ private:
u64 critical_gc_memory = 0;
u64 gc_tick = 0;
Common::LeastRecentlyUsedCache<ImageId, u64> lru_cache;
bool readback_linear_images;
PageTable page_table;
std::mutex mutex;
struct MetaDataInfo {