From f3dbc86f9b041dc006d2ab3d93207a31c841e46c Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Fri, 26 Dec 2025 16:15:59 +0100 Subject: [PATCH 01/15] fix render pass barrier logic --- .../HW/Latte/Renderer/Vulkan/VulkanRenderer.h | 8 +- .../Renderer/Vulkan/VulkanRendererCore.cpp | 96 +++++-------------- 2 files changed, 32 insertions(+), 72 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h index 2c7af53b..f6780bae 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h @@ -8,6 +8,7 @@ #include "Cafe/HW/Latte/Renderer/Vulkan/CachedFBOVk.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.h" #include "Cafe/HW/Latte/Renderer/Vulkan/SwapchainInfoVk.h" +#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h" #include "util/math/vector2.h" #include "util/helpers/Semaphore.h" #include "util/containers/flat_hash_map.hpp" @@ -546,7 +547,7 @@ private: void draw_handleSpecialState5(); // draw synchronization helper - void sync_inputTexturesChanged(); + bool sync_isInputTexturesSyncRequired(); void sync_RenderPassLoadTextures(CachedFBOVk* fboVk); void sync_RenderPassStoreTextures(CachedFBOVk* fboVk); @@ -825,6 +826,7 @@ private: bufMemBarrier.offset = offset; bufMemBarrier.size = size; vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStages, dstStages, 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr); + performanceMonitor.vk.numDrawBarriersPerFrame.increment(); } template @@ -863,6 +865,7 @@ private: bufMemBarrier[1].size = sizeB; vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStagesA|srcStagesB, dstStagesA|dstStagesB, 0, 0, nullptr, 2, bufMemBarrier, 0, nullptr); + performanceMonitor.vk.numDrawBarriersPerFrame.increment(); } void barrier_sequentializeTransfer() @@ -881,6 +884,7 @@ private: memBarrier.dstAccessMask |= (VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT); vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStages, dstStages, 0, 1, &memBarrier, 0, nullptr, 0, nullptr); + performanceMonitor.vk.numDrawBarriersPerFrame.increment(); } void barrier_sequentializeCommand() @@ -889,6 +893,7 @@ private: VkPipelineStageFlags dstStages = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStages, dstStages, 0, 0, nullptr, 0, nullptr, 0, nullptr); + performanceMonitor.vk.numDrawBarriersPerFrame.increment(); } template @@ -916,6 +921,7 @@ private: 0, NULL, 0, NULL, 1, &imageMemBarrier); + performanceMonitor.vk.numDrawBarriersPerFrame.increment(); } template diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index a6814186..0e990c9a 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -975,68 +975,21 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo* return dsInfo; } -void VulkanRenderer::sync_inputTexturesChanged() +bool VulkanRenderer::sync_isInputTexturesSyncRequired() { - bool writeFlushRequired = false; - - if (m_state.activeVertexDS) - { - for (auto& tex : m_state.activeVertexDS->list_fboCandidates) + auto checkSync = [&](const VkDescriptorSetInfo* info) { + if (info) { - tex->m_vkFlushIndex_read = m_state.currentFlushIndex; - if (tex->m_vkFlushIndex_write == m_state.currentFlushIndex) - writeFlushRequired = true; + for (auto& tex : m_state.activeVertexDS->list_fboCandidates) + { + tex->m_vkFlushIndex_read = m_state.currentFlushIndex; + if (tex->m_vkFlushIndex_write == m_state.currentFlushIndex) + return true; + } } - } - if (m_state.activeGeometryDS) - { - for (auto& tex : m_state.activeGeometryDS->list_fboCandidates) - { - tex->m_vkFlushIndex_read = m_state.currentFlushIndex; - if (tex->m_vkFlushIndex_write == m_state.currentFlushIndex) - writeFlushRequired = true; - } - } - if (m_state.activePixelDS) - { - for (auto& tex : m_state.activePixelDS->list_fboCandidates) - { - tex->m_vkFlushIndex_read = m_state.currentFlushIndex; - if (tex->m_vkFlushIndex_write == m_state.currentFlushIndex) - writeFlushRequired = true; - } - } - // barrier here - if (writeFlushRequired) - { - VkMemoryBarrier memoryBarrier{}; - memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - memoryBarrier.srcAccessMask = 0; - memoryBarrier.dstAccessMask = 0; - - VkPipelineStageFlags srcStage = 0; - VkPipelineStageFlags dstStage = 0; - - // src - srcStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - memoryBarrier.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - - srcStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - memoryBarrier.srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - - // dst - dstStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - memoryBarrier.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; - - dstStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - memoryBarrier.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; - - vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStage, dstStage, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); - - performanceMonitor.vk.numDrawBarriersPerFrame.increment(); - - m_state.currentFlushIndex++; - } + return false; + }; + return checkSync(m_state.activeVertexDS) || checkSync(m_state.activeGeometryDS) || checkSync(m_state.activePixelDS); } void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk) @@ -1051,7 +1004,7 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk) readFlushRequired = true; - texVk->m_vkFlushIndex_write = m_state.currentFlushIndex; + texVk->m_vkFlushIndex_read = m_state.currentFlushIndex; // todo - also check for write-before-write ? if (texVk->m_vkFlushIndex_read == m_state.currentFlushIndex) readFlushRequired = true; @@ -1091,11 +1044,10 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk) void VulkanRenderer::sync_RenderPassStoreTextures(CachedFBOVk* fboVk) { - uint32 flushIndex = m_state.currentFlushIndex; for (auto& tex : fboVk->GetTextures()) { LatteTextureVk* texVk = (LatteTextureVk*)tex; - texVk->m_vkFlushIndex_write = flushIndex; + texVk->m_vkFlushIndex_write = m_state.currentFlushIndex; } } @@ -1164,20 +1116,22 @@ void VulkanRenderer::draw_setRenderPass() auto vkObjRenderPass = fboVk->GetRenderPassObj(); auto vkObjFramebuffer = fboVk->GetFramebufferObj(); - bool overridePassReuse = m_state.hasRenderSelfDependency && (GetConfig().vk_accurate_barriers || m_state.activePipelineInfo->neverSkipAccurateBarrier); + const bool syncSkipAllowed = !(GetConfig().vk_accurate_barriers || m_state.activePipelineInfo->neverSkipAccurateBarrier); - if (!overridePassReuse && m_state.activeRenderpassFBO == fboVk) + const bool FBOChanged = m_state.activeRenderpassFBO != fboVk; + + bool inputSyncNecessary = false; + if (m_state.descriptorSetsChanged) + inputSyncNecessary = sync_isInputTexturesSyncRequired(); + + const bool passReusable = !FBOChanged && !inputSyncNecessary; + + if (passReusable) { - if (m_state.descriptorSetsChanged) - sync_inputTexturesChanged(); + // reuse previous render pass return; } draw_endRenderPass(); - if (m_state.descriptorSetsChanged) - sync_inputTexturesChanged(); - - // assume that FBO changed, update self-dependency state - m_state.hasRenderSelfDependency = fboVk->CheckForCollision(m_state.activeVertexDS, m_state.activeGeometryDS, m_state.activePixelDS); sync_RenderPassLoadTextures(fboVk); From 4b42b508a1c1776ae291977b7b4a7c2942cfcd26 Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Sat, 27 Dec 2025 14:01:20 +0100 Subject: [PATCH 02/15] backport rework from feedback loop branch --- .../HW/Latte/Renderer/Vulkan/VulkanRenderer.h | 1 + .../Renderer/Vulkan/VulkanRendererCore.cpp | 110 ++++++++++-------- 2 files changed, 62 insertions(+), 49 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h index f6780bae..0995c5af 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h @@ -547,6 +547,7 @@ private: void draw_handleSpecialState5(); // draw synchronization helper + void sync_performFlushBarrier(); bool sync_isInputTexturesSyncRequired(); void sync_RenderPassLoadTextures(CachedFBOVk* fboVk); void sync_RenderPassStoreTextures(CachedFBOVk* fboVk); diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index 0e990c9a..18358784 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -975,8 +975,40 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo* return dsInfo; } +void VulkanRenderer::sync_performFlushBarrier() +{ + VkMemoryBarrier memoryBarrier{}; + memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + memoryBarrier.srcAccessMask = 0; + memoryBarrier.dstAccessMask = 0; + + VkPipelineStageFlags srcStage = 0; + VkPipelineStageFlags dstStage = 0; + + // src + srcStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + memoryBarrier.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; + + srcStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + memoryBarrier.srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; + + // dst + dstStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + memoryBarrier.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; + + dstStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + memoryBarrier.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; + + vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStage, dstStage, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); + + performanceMonitor.vk.numDrawBarriersPerFrame.increment(); + + m_state.currentFlushIndex++; +} + bool VulkanRenderer::sync_isInputTexturesSyncRequired() { + bool required = false; auto checkSync = [&](const VkDescriptorSetInfo* info) { if (info) { @@ -984,71 +1016,50 @@ bool VulkanRenderer::sync_isInputTexturesSyncRequired() { tex->m_vkFlushIndex_read = m_state.currentFlushIndex; if (tex->m_vkFlushIndex_write == m_state.currentFlushIndex) - return true; + required = true; } } - return false; }; - return checkSync(m_state.activeVertexDS) || checkSync(m_state.activeGeometryDS) || checkSync(m_state.activePixelDS); + checkSync(m_state.activeVertexDS); + checkSync(m_state.activeGeometryDS); + checkSync(m_state.activePixelDS); + return required; } void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk) { - bool readFlushRequired = false; - // always called after draw_inputTexturesChanged() + bool flushRequired = false; for (auto& tex : fboVk->GetTextures()) { LatteTextureVk* texVk = (LatteTextureVk*)tex; - // write-before-write + + //RAW if (texVk->m_vkFlushIndex_write == m_state.currentFlushIndex) - readFlushRequired = true; + flushRequired = true; - - texVk->m_vkFlushIndex_read = m_state.currentFlushIndex; - // todo - also check for write-before-write ? - if (texVk->m_vkFlushIndex_read == m_state.currentFlushIndex) - readFlushRequired = true; - } - // barrier here - if (readFlushRequired) - { - VkMemoryBarrier memoryBarrier{}; - memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - memoryBarrier.srcAccessMask = 0; - memoryBarrier.dstAccessMask = 0; - - VkPipelineStageFlags srcStage = 0; - VkPipelineStageFlags dstStage = 0; - - // src - srcStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - memoryBarrier.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - - srcStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - memoryBarrier.srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - - // dst - dstStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - memoryBarrier.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; - - dstStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - memoryBarrier.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; - - vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStage, dstStage, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); - - performanceMonitor.vk.numDrawBarriersPerFrame.increment(); - - m_state.currentFlushIndex++; + if ((texVk->GetImageAspect() | VK_IMAGE_ASPECT_DEPTH_BIT) != 0) + texVk->m_vkFlushIndex_read = m_state.currentFlushIndex; } + if (flushRequired) + sync_performFlushBarrier(); } void VulkanRenderer::sync_RenderPassStoreTextures(CachedFBOVk* fboVk) { + bool flushRequired = false; for (auto& tex : fboVk->GetTextures()) { LatteTextureVk* texVk = (LatteTextureVk*)tex; + //WAR + if (texVk->m_vkFlushIndex_read == m_state.currentFlushIndex) + flushRequired = true; + //WAW + if (texVk->m_vkFlushIndex_write == m_state.currentFlushIndex) + flushRequired = true; texVk->m_vkFlushIndex_write = m_state.currentFlushIndex; } + if (flushRequired) + sync_performFlushBarrier(); } void VulkanRenderer::draw_prepareDescriptorSets(PipelineInfo* pipeline_info, VkDescriptorSetInfo*& vertexDS, VkDescriptorSetInfo*& pixelDS, VkDescriptorSetInfo*& geometryDS) @@ -1116,23 +1127,23 @@ void VulkanRenderer::draw_setRenderPass() auto vkObjRenderPass = fboVk->GetRenderPassObj(); auto vkObjFramebuffer = fboVk->GetFramebufferObj(); - const bool syncSkipAllowed = !(GetConfig().vk_accurate_barriers || m_state.activePipelineInfo->neverSkipAccurateBarrier); + const bool syncSkipAllowed = !m_state.hasRenderSelfDependency || !(GetConfig().vk_accurate_barriers || m_state.activePipelineInfo->neverSkipAccurateBarrier); const bool FBOChanged = m_state.activeRenderpassFBO != fboVk; - bool inputSyncNecessary = false; - if (m_state.descriptorSetsChanged) - inputSyncNecessary = sync_isInputTexturesSyncRequired(); - - const bool passReusable = !FBOChanged && !inputSyncNecessary; + const bool passReusable = !FBOChanged && syncSkipAllowed; if (passReusable) { + if (sync_isInputTexturesSyncRequired()) + sync_performFlushBarrier(); // reuse previous render pass return; } draw_endRenderPass(); + if (sync_isInputTexturesSyncRequired()) + sync_performFlushBarrier(); sync_RenderPassLoadTextures(fboVk); if (m_featureControl.deviceExtensions.dynamic_rendering) @@ -1167,6 +1178,7 @@ void VulkanRenderer::draw_endRenderPass() { if (!m_state.activeRenderpassFBO) return; + if (m_featureControl.deviceExtensions.dynamic_rendering) vkCmdEndRenderingKHR(m_state.currentCommandBuffer); else From 9fd42a60b2d45b08bcde86a467035b38ec23415b Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Sat, 27 Dec 2025 14:10:19 +0100 Subject: [PATCH 03/15] do not put barrier in render pass --- src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index 18358784..a13428e3 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -1129,14 +1129,14 @@ void VulkanRenderer::draw_setRenderPass() const bool syncSkipAllowed = !m_state.hasRenderSelfDependency || !(GetConfig().vk_accurate_barriers || m_state.activePipelineInfo->neverSkipAccurateBarrier); + const bool inputSyncNecessary = sync_isInputTexturesSyncRequired(); + const bool FBOChanged = m_state.activeRenderpassFBO != fboVk; - const bool passReusable = !FBOChanged && syncSkipAllowed; + const bool passReusable = !FBOChanged && !inputSyncNecessary && syncSkipAllowed; if (passReusable) { - if (sync_isInputTexturesSyncRequired()) - sync_performFlushBarrier(); // reuse previous render pass return; } From 853b7fcb709c288808334407199be573c71aeb22 Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Sat, 27 Dec 2025 14:31:40 +0100 Subject: [PATCH 04/15] non-depth attachments also perform render pass loads --- src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index a13428e3..fba264fd 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -1037,8 +1037,7 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk) if (texVk->m_vkFlushIndex_write == m_state.currentFlushIndex) flushRequired = true; - if ((texVk->GetImageAspect() | VK_IMAGE_ASPECT_DEPTH_BIT) != 0) - texVk->m_vkFlushIndex_read = m_state.currentFlushIndex; + texVk->m_vkFlushIndex_read = m_state.currentFlushIndex; } if (flushRequired) sync_performFlushBarrier(); From 55bc72fb34771103bd2cfbb6cf3a10b23395499f Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Sat, 27 Dec 2025 14:40:22 +0100 Subject: [PATCH 05/15] Remove duplicate WAW detection. --- src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index fba264fd..39dec18c 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -1033,7 +1033,7 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk) { LatteTextureVk* texVk = (LatteTextureVk*)tex; - //RAW + //RAW / WAW if (texVk->m_vkFlushIndex_write == m_state.currentFlushIndex) flushRequired = true; @@ -1052,9 +1052,6 @@ void VulkanRenderer::sync_RenderPassStoreTextures(CachedFBOVk* fboVk) //WAR if (texVk->m_vkFlushIndex_read == m_state.currentFlushIndex) flushRequired = true; - //WAW - if (texVk->m_vkFlushIndex_write == m_state.currentFlushIndex) - flushRequired = true; texVk->m_vkFlushIndex_write = m_state.currentFlushIndex; } if (flushRequired) From c0810797e143c7ea5e633ad5c5ec816966185c7a Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Sun, 28 Dec 2025 20:41:49 +0100 Subject: [PATCH 06/15] copy changes from feedback_loop_fewer_barriers, again more validations errors but seems to make more sense this way --- .../Renderer/Vulkan/VulkanRendererCore.cpp | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index 39dec18c..f919c66f 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -1010,14 +1010,13 @@ bool VulkanRenderer::sync_isInputTexturesSyncRequired() { bool required = false; auto checkSync = [&](const VkDescriptorSetInfo* info) { - if (info) + if (!info) + return; + for (auto& tex : info->list_fboCandidates) { - for (auto& tex : m_state.activeVertexDS->list_fboCandidates) - { - tex->m_vkFlushIndex_read = m_state.currentFlushIndex; - if (tex->m_vkFlushIndex_write == m_state.currentFlushIndex) - required = true; - } + tex->m_vkFlushIndex_read = m_state.currentFlushIndex; + if (tex->m_vkFlushIndex_write == m_state.currentFlushIndex) + required = true; } }; checkSync(m_state.activeVertexDS); @@ -1029,6 +1028,7 @@ bool VulkanRenderer::sync_isInputTexturesSyncRequired() void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk) { bool flushRequired = false; + for (auto& tex : fboVk->GetTextures()) { LatteTextureVk* texVk = (LatteTextureVk*)tex; @@ -1036,26 +1036,28 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk) //RAW / WAW if (texVk->m_vkFlushIndex_write == m_state.currentFlushIndex) flushRequired = true; + //WAR + if (texVk->m_vkFlushIndex_read == m_state.currentFlushIndex) + flushRequired = true; - texVk->m_vkFlushIndex_read = m_state.currentFlushIndex; } if (flushRequired) sync_performFlushBarrier(); + + for (auto& tex : fboVk->GetTextures()) + { + LatteTextureVk* texVk = (LatteTextureVk*)tex; + texVk->m_vkFlushIndex_read = m_state.currentFlushIndex; + } } void VulkanRenderer::sync_RenderPassStoreTextures(CachedFBOVk* fboVk) { - bool flushRequired = false; for (auto& tex : fboVk->GetTextures()) { LatteTextureVk* texVk = (LatteTextureVk*)tex; - //WAR - if (texVk->m_vkFlushIndex_read == m_state.currentFlushIndex) - flushRequired = true; texVk->m_vkFlushIndex_write = m_state.currentFlushIndex; } - if (flushRequired) - sync_performFlushBarrier(); } void VulkanRenderer::draw_prepareDescriptorSets(PipelineInfo* pipeline_info, VkDescriptorSetInfo*& vertexDS, VkDescriptorSetInfo*& pixelDS, VkDescriptorSetInfo*& geometryDS) @@ -1138,9 +1140,9 @@ void VulkanRenderer::draw_setRenderPass() } draw_endRenderPass(); + sync_RenderPassLoadTextures(fboVk); if (sync_isInputTexturesSyncRequired()) sync_performFlushBarrier(); - sync_RenderPassLoadTextures(fboVk); if (m_featureControl.deviceExtensions.dynamic_rendering) { From 1c9381653ea22ef2fd6159f0feef66faece094b5 Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Mon, 29 Dec 2025 00:20:50 +0100 Subject: [PATCH 07/15] remove unused flush index --- src/Cafe/HW/Latte/Renderer/Vulkan/LatteTextureVk.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/LatteTextureVk.h b/src/Cafe/HW/Latte/Renderer/Vulkan/LatteTextureVk.h index 612e2e70..475ed00e 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/LatteTextureVk.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/LatteTextureVk.h @@ -78,8 +78,6 @@ protected: LatteTextureView* CreateView(Latte::E_DIM dim, Latte::E_GX2SURFFMT format, sint32 firstMip, sint32 mipCount, sint32 firstSlice, sint32 sliceCount) override; public: - uint64 m_vkFlushIndex{}; // used to track read-write dependencies within the same renderpass - uint64 m_vkFlushIndex_read{}; uint64 m_vkFlushIndex_write{}; From 782d53eee680f836ec0796772c375f20829bf69f Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Mon, 29 Dec 2025 00:21:23 +0100 Subject: [PATCH 08/15] only traverse inputs when changed. --- src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index f919c66f..0af7b2f9 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -1127,7 +1127,7 @@ void VulkanRenderer::draw_setRenderPass() const bool syncSkipAllowed = !m_state.hasRenderSelfDependency || !(GetConfig().vk_accurate_barriers || m_state.activePipelineInfo->neverSkipAccurateBarrier); - const bool inputSyncNecessary = sync_isInputTexturesSyncRequired(); + const bool inputSyncNecessary = m_state.descriptorSetsChanged && sync_isInputTexturesSyncRequired(); const bool FBOChanged = m_state.activeRenderpassFBO != fboVk; From 66d007e7d3aa69a79dc819ce0fcfc5002e80aa3b Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Mon, 29 Dec 2025 14:30:02 +0100 Subject: [PATCH 09/15] simplyify mask assignment and remove read from source scope, WAR can be resolved with only execution dep. --- .../Latte/Renderer/Vulkan/VulkanRendererCore.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index 0af7b2f9..f8f62dc0 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -986,17 +986,19 @@ void VulkanRenderer::sync_performFlushBarrier() VkPipelineStageFlags dstStage = 0; // src - srcStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - memoryBarrier.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; + srcStage |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + srcStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + memoryBarrier.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - srcStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - memoryBarrier.srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; + srcStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + memoryBarrier.srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; // dst - dstStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + dstStage |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + dstStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; memoryBarrier.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; - dstStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + dstStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; memoryBarrier.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStage, dstStage, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); From a8ddc1008d1809fe52a97a0b0170580199118e49 Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Wed, 31 Dec 2025 13:13:52 +0100 Subject: [PATCH 10/15] formatting --- src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index f8f62dc0..706ee259 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -987,6 +987,7 @@ void VulkanRenderer::sync_performFlushBarrier() // src srcStage |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + srcStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; memoryBarrier.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; @@ -995,11 +996,13 @@ void VulkanRenderer::sync_performFlushBarrier() // dst dstStage |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + memoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + dstStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - memoryBarrier.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; + memoryBarrier.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; dstStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - memoryBarrier.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; + memoryBarrier.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStage, dstStage, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); From 6e4d311ffc5a61c00df8e78a8faf1b55567852ab Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Mon, 5 Jan 2026 22:34:25 +0100 Subject: [PATCH 11/15] Also check RAW hazards on descriptor images and mark them as read at the start of render pass --- .../Latte/Renderer/Vulkan/VulkanRenderer.cpp | 8 +++++ .../HW/Latte/Renderer/Vulkan/VulkanRenderer.h | 2 ++ .../Renderer/Vulkan/VulkanRendererCore.cpp | 36 +++++++++++++++---- 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp index 18fd1000..4dfc763f 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp @@ -3222,6 +3222,14 @@ void VulkanRenderer::ProcessDestructionQueue() m_spinlockDestructionQueue.unlock(); } +void VkDescriptorSetInfo::ForEachView(const std::function& fun) +{ + for (auto& view : list_referencedViews) + { + fun(view); + } +} + VkDescriptorSetInfo::~VkDescriptorSetInfo() { for (auto& it : list_referencedViews) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h index 0995c5af..351d89c6 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h @@ -27,6 +27,8 @@ struct VkDescriptorSetInfo { VKRObjectDescriptorSet* m_vkObjDescriptorSet{}; + void ForEachView(const std::function& fun); + ~VkDescriptorSetInfo(); std::vector list_referencedViews; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index 706ee259..df235d66 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -1034,18 +1034,29 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk) { bool flushRequired = false; - for (auto& tex : fboVk->GetTextures()) - { - LatteTextureVk* texVk = (LatteTextureVk*)tex; - + auto checkImageSyncHazard = [&](LatteTextureVk* texVk, bool isWrite = false) { //RAW / WAW if (texVk->m_vkFlushIndex_write == m_state.currentFlushIndex) flushRequired = true; //WAR - if (texVk->m_vkFlushIndex_read == m_state.currentFlushIndex) + if (isWrite && texVk->m_vkFlushIndex_read == m_state.currentFlushIndex) flushRequired = true; + }; + + for (auto& tex : fboVk->GetTextures()) + checkImageSyncHazard((LatteTextureVk*)tex, true); + + auto checkViewSync = [&](LatteTextureViewVk* view) { + checkImageSyncHazard(view->GetBaseImage()); + }; + + if (m_state.activeVertexDS) + m_state.activeVertexDS->ForEachView(checkViewSync); + if (m_state.activeGeometryDS) + m_state.activeGeometryDS->ForEachView(checkViewSync); + if (m_state.activePixelDS) + m_state.activePixelDS->ForEachView(checkViewSync); - } if (flushRequired) sync_performFlushBarrier(); @@ -1054,6 +1065,19 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk) LatteTextureVk* texVk = (LatteTextureVk*)tex; texVk->m_vkFlushIndex_read = m_state.currentFlushIndex; } + + auto updateViewSync = [&](LatteTextureViewVk* view) { + view->GetBaseImage()->m_vkFlushIndex_read = m_state.currentFlushIndex; + }; + + if (m_state.activeVertexDS) + m_state.activeVertexDS->ForEachView(updateViewSync); + if (m_state.activeGeometryDS) + m_state.activeGeometryDS->ForEachView(updateViewSync); + if (m_state.activePixelDS) + m_state.activePixelDS->ForEachView(updateViewSync); + + } void VulkanRenderer::sync_RenderPassStoreTextures(CachedFBOVk* fboVk) From cea40e9835781ec3c87efbb0bd825b84759b4765 Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Mon, 5 Jan 2026 22:56:01 +0100 Subject: [PATCH 12/15] simplify stage mask assignment --- .../Renderer/Vulkan/VulkanRendererCore.cpp | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index df235d66..53a7ea01 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -982,29 +982,23 @@ void VulkanRenderer::sync_performFlushBarrier() memoryBarrier.srcAccessMask = 0; memoryBarrier.dstAccessMask = 0; - VkPipelineStageFlags srcStage = 0; - VkPipelineStageFlags dstStage = 0; + VkPipelineStageFlags stages = 0; + + // src & dst + stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + stages |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; // src - srcStage |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - - srcStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; memoryBarrier.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - - srcStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; memoryBarrier.srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; // dst - dstStage |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; memoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - - dstStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; memoryBarrier.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - - dstStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; memoryBarrier.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStage, dstStage, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); + vkCmdPipelineBarrier(m_state.currentCommandBuffer, stages, stages, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); performanceMonitor.vk.numDrawBarriersPerFrame.increment(); From cc01133d0642a13274d7bf46536bc8de8482d759 Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Tue, 6 Jan 2026 00:09:33 +0100 Subject: [PATCH 13/15] Remove global memory barrier and only sync images that would actually race --- .../HW/Latte/Renderer/Vulkan/LatteTextureVk.h | 2 + .../HW/Latte/Renderer/Vulkan/VulkanRenderer.h | 4 +- .../Renderer/Vulkan/VulkanRendererCore.cpp | 74 +++++++++++++------ 3 files changed, 57 insertions(+), 23 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/LatteTextureVk.h b/src/Cafe/HW/Latte/Renderer/Vulkan/LatteTextureVk.h index 475ed00e..612e2e70 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/LatteTextureVk.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/LatteTextureVk.h @@ -78,6 +78,8 @@ protected: LatteTextureView* CreateView(Latte::E_DIM dim, Latte::E_GX2SURFFMT format, sint32 firstMip, sint32 mipCount, sint32 firstSlice, sint32 sliceCount) override; public: + uint64 m_vkFlushIndex{}; // used to track read-write dependencies within the same renderpass + uint64 m_vkFlushIndex_read{}; uint64 m_vkFlushIndex_write{}; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h index 351d89c6..0a329c28 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h @@ -412,7 +412,7 @@ private: } // invalidation / flushing - uint64 currentFlushIndex{0}; + uint64 currentFlushIndex{1}; bool requestFlush{ false }; // flush after every draw operation. The renderpass dependencies dont handle dependencies across multiple drawcalls inside a single renderpass // draw sequence @@ -549,7 +549,7 @@ private: void draw_handleSpecialState5(); // draw synchronization helper - void sync_performFlushBarrier(); + void sync_performFlushBarrier(CachedFBOVk* fboVk); bool sync_isInputTexturesSyncRequired(); void sync_RenderPassLoadTextures(CachedFBOVk* fboVk); void sync_RenderPassStoreTextures(CachedFBOVk* fboVk); diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index 53a7ea01..ad947865 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -975,30 +975,62 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo* return dsInfo; } -void VulkanRenderer::sync_performFlushBarrier() +void VulkanRenderer::sync_performFlushBarrier(CachedFBOVk* fboVk) { - VkMemoryBarrier memoryBarrier{}; - memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - memoryBarrier.srcAccessMask = 0; - memoryBarrier.dstAccessMask = 0; + size_t barrierCount = 0; + VkImageMemoryBarrier imageMemBarriers[8 + 2 + LATTE_NUM_MAX_TEX_UNITS]{}; + + auto addImgMemBarrierForTexView = [&](LatteTextureViewVk* view) { + VkImageSubresourceRange range = { + view->GetBaseImage()->GetImageAspect(), + (uint32_t)view->firstMip, + (uint32_t)view->numMip, + (uint32_t)view->firstSlice, + (uint32_t)view->numSlice}; + auto baseTex = (LatteTextureVk*)view->baseTexture; + const auto idx = barrierCount++; + imageMemBarriers[idx].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageMemBarriers[idx].image = baseTex->GetImageObj()->m_image; + imageMemBarriers[idx].subresourceRange = range; + imageMemBarriers[idx].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemBarriers[idx].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemBarriers[idx].oldLayout = VK_IMAGE_LAYOUT_GENERAL; + imageMemBarriers[idx].newLayout = VK_IMAGE_LAYOUT_GENERAL; + imageMemBarriers[idx].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + imageMemBarriers[idx].srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + imageMemBarriers[idx].dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + imageMemBarriers[idx].dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + imageMemBarriers[idx].dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + + view->GetBaseImage()->m_vkFlushIndex = m_state.currentFlushIndex; + }; + + for (auto& i : fboVk->colorBuffer) + { + if (!i.texture) + continue; + addImgMemBarrierForTexView(static_cast(i.texture)); + } + + if (auto i = fboVk->depthBuffer.texture) + { + addImgMemBarrierForTexView(static_cast(i)); + } + + if (m_state.activeVertexDS) + m_state.activeVertexDS->ForEachView(addImgMemBarrierForTexView); + if (m_state.activeGeometryDS) + m_state.activeGeometryDS->ForEachView(addImgMemBarrierForTexView); + if (m_state.activePixelDS) + m_state.activePixelDS->ForEachView(addImgMemBarrierForTexView); VkPipelineStageFlags stages = 0; - // src & dst stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; stages |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - // src - memoryBarrier.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - memoryBarrier.srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - - // dst - memoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - memoryBarrier.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - memoryBarrier.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - - vkCmdPipelineBarrier(m_state.currentCommandBuffer, stages, stages, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); + vkCmdPipelineBarrier(m_state.currentCommandBuffer, stages, stages, 0, 0, nullptr, 0, nullptr, barrierCount, imageMemBarriers); performanceMonitor.vk.numDrawBarriersPerFrame.increment(); @@ -1014,7 +1046,7 @@ bool VulkanRenderer::sync_isInputTexturesSyncRequired() for (auto& tex : info->list_fboCandidates) { tex->m_vkFlushIndex_read = m_state.currentFlushIndex; - if (tex->m_vkFlushIndex_write == m_state.currentFlushIndex) + if (tex->m_vkFlushIndex < tex->m_vkFlushIndex_write) required = true; } }; @@ -1030,10 +1062,10 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk) auto checkImageSyncHazard = [&](LatteTextureVk* texVk, bool isWrite = false) { //RAW / WAW - if (texVk->m_vkFlushIndex_write == m_state.currentFlushIndex) + if (texVk->m_vkFlushIndex < texVk->m_vkFlushIndex_write) flushRequired = true; //WAR - if (isWrite && texVk->m_vkFlushIndex_read == m_state.currentFlushIndex) + if (isWrite && texVk->m_vkFlushIndex < texVk->m_vkFlushIndex_read) flushRequired = true; }; @@ -1052,7 +1084,7 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk) m_state.activePixelDS->ForEachView(checkViewSync); if (flushRequired) - sync_performFlushBarrier(); + sync_performFlushBarrier(fboVk); for (auto& tex : fboVk->GetTextures()) { @@ -1165,7 +1197,7 @@ void VulkanRenderer::draw_setRenderPass() sync_RenderPassLoadTextures(fboVk); if (sync_isInputTexturesSyncRequired()) - sync_performFlushBarrier(); + sync_performFlushBarrier(fboVk); if (m_featureControl.deviceExtensions.dynamic_rendering) { From 753a667cd0060a9de4d40de10161850845e9b421 Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Tue, 6 Jan 2026 01:01:48 +0100 Subject: [PATCH 14/15] Remove all except one global memory barrier --- .../Renderer/Vulkan/TextureReadbackVk.cpp | 4 --- .../Latte/Renderer/Vulkan/VulkanRenderer.cpp | 27 +++++++++--------- .../HW/Latte/Renderer/Vulkan/VulkanRenderer.h | 28 ------------------- 3 files changed, 14 insertions(+), 45 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/TextureReadbackVk.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/TextureReadbackVk.cpp index bce23b59..1fe90abe 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/TextureReadbackVk.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/TextureReadbackVk.cpp @@ -124,12 +124,8 @@ void LatteTextureReadbackInfoVk::StartTransfer() renderer->barrier_image(baseTexture, region.imageSubresource, VK_IMAGE_LAYOUT_GENERAL); - renderer->barrier_sequentializeTransfer(); - vkCmdCopyImageToBuffer(renderer->getCurrentCommandBuffer(), baseTexture->GetImageObj()->m_image, VK_IMAGE_LAYOUT_GENERAL, m_buffer, 1, ®ion); - renderer->barrier_sequentializeTransfer(); - renderer->barrier_image(baseTexture, region.imageSubresource, VK_IMAGE_LAYOUT_GENERAL); // make sure transfer is finished before image is modified renderer->barrier_bufferRange(m_buffer, m_buffer_offset, m_image_size); // make sure transfer is finished before result is read diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp index 4dfc763f..42571aa5 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp @@ -3709,9 +3709,9 @@ void VulkanRenderer::bufferCache_upload(uint8* buffer, sint32 size, uint32 buffe vkMemAllocator.FlushReservation(uploadResv); - barrier_bufferRange( - uploadResv.vkBuffer, uploadResv.bufferOffset, uploadResv.size, // make sure any in-flight transfers are completed + barrier_bufferRange( + uploadResv.vkBuffer, uploadResv.bufferOffset, uploadResv.size, // make sure source data is visible m_bufferCache, bufferOffset, size); // make sure all reads are completed before we overwrite the data VkBufferCopy region; @@ -3720,7 +3720,7 @@ void VulkanRenderer::bufferCache_upload(uint8* buffer, sint32 size, uint32 buffe region.size = size; vkCmdCopyBuffer(m_state.currentCommandBuffer, uploadResv.vkBuffer, m_bufferCache, 1, ®ion); - barrier_sequentializeTransfer(); + barrier_bufferRange(m_bufferCache, bufferOffset, size); } void VulkanRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 size) @@ -3728,7 +3728,10 @@ void VulkanRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 cemu_assert_debug(!m_useHostMemoryForCache); draw_endRenderPass(); - barrier_sequentializeTransfer(); + barrier_bufferRange( + m_bufferCache, srcOffset, size, // make sure source data is visible + m_bufferCache, dstOffset, size); // make sure all reads are completed before we overwrite the data bool isOverlapping = (srcOffset + size) > dstOffset && (srcOffset) < (dstOffset + size); cemu_assert_debug(!isOverlapping); @@ -3739,7 +3742,7 @@ void VulkanRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 bufferCopy.size = size; vkCmdCopyBuffer(m_state.currentCommandBuffer, m_bufferCache, m_bufferCache, 1, &bufferCopy); - barrier_sequentializeTransfer(); + barrier_bufferRange(m_bufferCache, dstOffset, size); } void VulkanRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size) @@ -3756,12 +3759,10 @@ void VulkanRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uin else dstBuffer = m_bufferCache; - barrier_bufferRange( - m_xfbRingBuffer, srcOffset, size, // wait for all writes to finish - dstBuffer, dstOffset, size); // wait for all reads to finish - - barrier_sequentializeTransfer(); + barrier_bufferRange( + m_xfbRingBuffer, srcOffset, size, // make sure source data is visible + dstBuffer, dstOffset, size); // make sure all reads are completed before we overwrite the data VkBufferCopy bufferCopy{}; bufferCopy.srcOffset = srcOffset; @@ -3769,7 +3770,7 @@ void VulkanRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uin bufferCopy.size = size; vkCmdCopyBuffer(m_state.currentCommandBuffer, m_xfbRingBuffer, dstBuffer, 1, &bufferCopy); - barrier_sequentializeTransfer(); + barrier_bufferRange(dstBuffer, dstOffset, size); // make sure writes are visible to host } void VulkanRenderer::AppendOverlayDebugInfo() diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h index 0a329c28..c819134f 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h @@ -871,34 +871,6 @@ private: performanceMonitor.vk.numDrawBarriersPerFrame.increment(); } - void barrier_sequentializeTransfer() - { - VkMemoryBarrier memBarrier{}; - memBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - memBarrier.pNext = nullptr; - - VkPipelineStageFlags srcStages = VK_PIPELINE_STAGE_TRANSFER_BIT; - VkPipelineStageFlags dstStages = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - - memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT; - memBarrier.dstAccessMask = 0; - - memBarrier.srcAccessMask |= (VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT); - memBarrier.dstAccessMask |= (VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT); - - vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStages, dstStages, 0, 1, &memBarrier, 0, nullptr, 0, nullptr); - performanceMonitor.vk.numDrawBarriersPerFrame.increment(); - } - - void barrier_sequentializeCommand() - { - VkPipelineStageFlags srcStages = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - VkPipelineStageFlags dstStages = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - - vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStages, dstStages, 0, 0, nullptr, 0, nullptr, 0, nullptr); - performanceMonitor.vk.numDrawBarriersPerFrame.increment(); - } - template void barrier_image(VkImage imageVk, VkImageSubresourceRange& subresourceRange, VkImageLayout oldLayout, VkImageLayout newLayout) { From 3481c93efc3ab851658206ab8612d43a1587ba1a Mon Sep 17 00:00:00 2001 From: goeiecool9999 <7033575+goeiecool9999@users.noreply.github.com> Date: Tue, 6 Jan 2026 01:37:40 +0100 Subject: [PATCH 15/15] Don't just check FBO candidates for RAW, but all referenced DS views --- .../Renderer/Vulkan/VulkanRendererCore.cpp | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index ad947865..26df6c7a 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -1040,19 +1040,18 @@ void VulkanRenderer::sync_performFlushBarrier(CachedFBOVk* fboVk) bool VulkanRenderer::sync_isInputTexturesSyncRequired() { bool required = false; - auto checkSync = [&](const VkDescriptorSetInfo* info) { - if (!info) - return; - for (auto& tex : info->list_fboCandidates) - { - tex->m_vkFlushIndex_read = m_state.currentFlushIndex; - if (tex->m_vkFlushIndex < tex->m_vkFlushIndex_write) - required = true; - } + auto checkSync = [&](LatteTextureViewVk* texViewVk) { + LatteTextureVk* texVk = texViewVk->GetBaseImage(); + texVk->m_vkFlushIndex_read = m_state.currentFlushIndex; + if (texVk->m_vkFlushIndex < texVk->m_vkFlushIndex_write) + required = true; }; - checkSync(m_state.activeVertexDS); - checkSync(m_state.activeGeometryDS); - checkSync(m_state.activePixelDS); + if (m_state.activeVertexDS) + m_state.activeVertexDS->ForEachView(checkSync); + if (m_state.activeGeometryDS) + m_state.activeGeometryDS->ForEachView(checkSync); + if (m_state.activePixelDS) + m_state.activePixelDS->ForEachView(checkSync); return required; }