From 8cd5ce102fceb244cb5c3e9ed46d375e542ab167 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Mon, 9 Feb 2026 04:11:10 +0100 Subject: [PATCH] Vulkan: Properly shutdown pipeline compile threads + code cleanup This fixes an issue where the Vulkan renderer would hang up on shutdown due to resources still being in use --- src/Cafe/HW/Latte/Core/LatteBufferCache.cpp | 3 +- .../LatteDecompilerInternal.h | 4 - .../Vulkan/VulkanPipelineCompiler.cpp | 76 ++++++++++++++++++- .../Renderer/Vulkan/VulkanPipelineCompiler.h | 7 ++ .../Latte/Renderer/Vulkan/VulkanRenderer.cpp | 10 ++- .../Renderer/Vulkan/VulkanRendererCore.cpp | 67 +--------------- 6 files changed, 90 insertions(+), 77 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteBufferCache.cpp b/src/Cafe/HW/Latte/Core/LatteBufferCache.cpp index 6c36ddd3..e466bf3a 100644 --- a/src/Cafe/HW/Latte/Core/LatteBufferCache.cpp +++ b/src/Cafe/HW/Latte/Core/LatteBufferCache.cpp @@ -290,7 +290,6 @@ public: { if (m_hasCacheAlloc) { - cemu_assert_debug(isInUse() == false); g_gpuBufferHeap->freeOffset(m_cacheOffset); m_hasCacheAlloc = false; } @@ -836,6 +835,8 @@ public: continue; } // delete range + if (node->m_hasCacheAlloc) + cemu_assert_debug(!node->isInUse()); node->ReleaseCacheMemoryImmediately(); LatteBufferCache_removeSingleNodeFromTree(node); delete node; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h index e756ce17..4c6b158a 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h @@ -115,11 +115,7 @@ struct LatteDecompilerCFInstruction cemu_assert_debug(!(instructionsALU.size() != 0 && instructionsTEX.size() != 0)); // make sure we haven't accidentally added the wrong instruction type } -#if BOOST_OS_WINDOWS - LatteDecompilerCFInstruction(LatteDecompilerCFInstruction& mE) = default; -#else LatteDecompilerCFInstruction(const LatteDecompilerCFInstruction& mE) = default; -#endif LatteDecompilerCFInstruction(LatteDecompilerCFInstruction&& mE) = default; LatteDecompilerCFInstruction& operator=(LatteDecompilerCFInstruction&& mE) = default; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp index eb455887..795d11c3 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp @@ -6,11 +6,10 @@ #include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h" #include "Cafe/OS/libs/gx2/GX2.h" #include "config/ActiveSettings.h" +#include "util/helpers/helpers.h" #include "util/helpers/Serializer.h" #include "Cafe/HW/Latte/Common/RegisterSerializer.h" -std::mutex s_nvidiaWorkaround; - /* rects emulation */ void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) @@ -923,7 +922,6 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const if (result != VK_SUCCESS) { cemuLog_log(LogType::Force, "Failed to create pipeline layout: {}", result); - s_nvidiaWorkaround.unlock(); return false; } @@ -941,7 +939,7 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const // increment ref counter for vkrObjPipeline and renderpass object to make sure they dont get released while we are using them m_vkrObjPipeline->incRef(); - renderPassObj->incRef(); + m_renderPassObj->incRef(); return true; } @@ -1121,3 +1119,73 @@ bool PipelineCompiler::CalcRobustBufferAccessRequirement(LatteDecompilerShader* } return requiresRobustBufferAcces; } + +static std::vector s_compileThreads; +static std::atomic_bool s_compileThreadsShutdownSignal{}; +static ConcurrentQueue s_pipelineCompileRequests; + +static void compilePipeline_thread(sint32 threadIndex) +{ + SetThreadName("compilePl"); +#ifdef _WIN32 + // to avoid starving the main cpu and render threads the pipeline compile threads run at lower priority + // except for one thread which we always run at normal priority to prevent the opposite scenario where all compile threads are starved + if(threadIndex != 0) + SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL); +#endif + while (!s_compileThreadsShutdownSignal) + { + PipelineCompiler* request = s_pipelineCompileRequests.pop(); + if (!request) + continue; + request->Compile(true, false, true); + delete request; + } +} + +void PipelineCompiler::CompileThreadPool_Start() +{ + cemu_assert_debug(s_compileThreads.empty()); + s_compileThreadsShutdownSignal = false; + uint32 numCompileThreads; + + uint32 cpuCoreCount = GetPhysicalCoreCount(); + if (cpuCoreCount <= 2) + numCompileThreads = 1; + else + numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3 + + numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8 + + for (uint32_t i = 0; i < numCompileThreads; i++) + { + s_compileThreads.emplace_back(compilePipeline_thread, i); + } +} + +void PipelineCompiler::CompileThreadPool_Stop() +{ + s_compileThreadsShutdownSignal = true; + { + // push one empty workload for each thread + // this way we can make sure that each waiting thread is woken up to see the shutdown signal + for (auto& thread : s_compileThreads) + s_pipelineCompileRequests.push(nullptr); + } + for (auto& thread : s_compileThreads) + thread.join(); + while (!s_pipelineCompileRequests.empty()) + { + PipelineCompiler* pipelineCompiler = s_pipelineCompileRequests.pop(); + if (!pipelineCompiler) + break; + if (pipelineCompiler) + delete pipelineCompiler; + } + s_compileThreads.clear(); +} + +void PipelineCompiler::CompileThreadPool_QueueCompilation(PipelineCompiler* v) +{ + s_pipelineCompileRequests.push(v); +} \ No newline at end of file diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h index 7297049e..f4240a53 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h @@ -1,4 +1,6 @@ #pragma once +#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h" +#include "VKRBase.h" class PipelineCompiler : public VKRMoveableRefCounter { @@ -43,6 +45,11 @@ public: static bool CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader); + // API for thread pool + static void CompileThreadPool_Start(); + static void CompileThreadPool_Stop(); + static void CompileThreadPool_QueueCompilation(PipelineCompiler* v); + VkPipelineLayout m_pipelineLayout; VKRObjectRenderPass* m_renderPassObj{}; bool m_requestRobustBufferAccess{false}; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp index 251fd69c..18fd1000 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp @@ -4,6 +4,7 @@ #include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanTextureReadback.h" #include "Cafe/HW/Latte/Renderer/Vulkan/CocoaSurface.h" +#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h" #include "Cafe/HW/Latte/Core/LatteBufferCache.h" #include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h" @@ -653,7 +654,8 @@ VulkanRenderer::VulkanRenderer() m_occlusionQueries.list_availableQueryIndices.emplace_back(i); // start compilation threads - RendererShaderVk::Init(); + RendererShaderVk::Init(); // shaders + PipelineCompiler::CompileThreadPool_Start(); // pipelines } VulkanRenderer::~VulkanRenderer() @@ -661,8 +663,6 @@ VulkanRenderer::~VulkanRenderer() SubmitCommandBuffer(); WaitDeviceIdle(); WaitCommandBufferFinished(GetCurrentCommandBufferId()); - // make sure compilation threads have been shut down - RendererShaderVk::Shutdown(); // shut down pipeline save thread m_destructionRequested = true; m_pipeline_cache_semaphore.notify(); @@ -1666,6 +1666,10 @@ void VulkanRenderer::Shutdown() { SubmitCommandBuffer(); WaitDeviceIdle(); + // stop compilation threads + RendererShaderVk::Shutdown(); + PipelineCompiler::CompileThreadPool_Stop(); + DeleteFontTextures(); Renderer::Shutdown(); if (m_imguiRenderPass != VK_NULL_HANDLE) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index 23fb910c..a6814186 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -183,63 +183,6 @@ void VulkanRenderer::unregisterGraphicsPipeline(PipelineInfo* pipelineInfo) } } -bool g_compilePipelineThreadInit{false}; -std::mutex g_compilePipelineMutex; -std::condition_variable g_compilePipelineCondVar; -std::queue g_compilePipelineRequests; - -void compilePipeline_thread(sint32 threadIndex) -{ - SetThreadName("compilePl"); -#ifdef _WIN32 - // one thread runs at normal priority while the others run at lower priority - if(threadIndex != 0) - SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL); -#endif - while (true) - { - std::unique_lock lock(g_compilePipelineMutex); - while (g_compilePipelineRequests.empty()) - g_compilePipelineCondVar.wait(lock); - - PipelineCompiler* request = g_compilePipelineRequests.front(); - - g_compilePipelineRequests.pop(); - - lock.unlock(); - - request->Compile(true, false, true); - delete request; - } -} - -void compilePipelineThread_init() -{ - uint32 numCompileThreads; - - uint32 cpuCoreCount = GetPhysicalCoreCount(); - if (cpuCoreCount <= 2) - numCompileThreads = 1; - else - numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3 - - numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8 - - for (uint32_t i = 0; i < numCompileThreads; i++) - { - std::thread compileThread(compilePipeline_thread, i); - compileThread.detach(); - } -} - -void compilePipelineThread_queue(PipelineCompiler* v) -{ - std::unique_lock lock(g_compilePipelineMutex); - g_compilePipelineRequests.push(std::move(v)); - lock.unlock(); - g_compilePipelineCondVar.notify_one(); -} - // make a guess if a pipeline is not essential // non-essential means that skipping these drawcalls shouldn't lead to permanently corrupted graphics bool VulkanRenderer::IsAsyncPipelineAllowed(uint32 numIndices) @@ -270,12 +213,6 @@ bool VulkanRenderer::IsAsyncPipelineAllowed(uint32 numIndices) // create graphics pipeline for current state PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount) { - if (!g_compilePipelineThreadInit) - { - compilePipelineThread_init(); - g_compilePipelineThreadInit = true; - } - const auto fetchShader = LatteSHRC_GetActiveFetchShader(); const auto vertexShader = LatteSHRC_GetActiveVertexShader(); const auto geometryShader = LatteSHRC_GetActiveGeometryShader(); @@ -313,7 +250,7 @@ PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount) if (pipelineCompiler->Compile(false, true, true) == false) { // shaders or pipeline not cached -> asynchronous compilation - compilePipelineThread_queue(pipelineCompiler); + PipelineCompiler::CompileThreadPool_QueueCompilation(pipelineCompiler); } else { @@ -379,7 +316,7 @@ float s_vkUniformData[512 * 4]; uint32 VulkanRenderer::uniformData_uploadUniformDataBufferGetOffset(std::span data) { const uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1; - const uint32 uniformSize = (data.size() + bufferAlignmentM1) & ~bufferAlignmentM1; + const uint32 uniformSize = ((uint32)data.size() + bufferAlignmentM1) & ~bufferAlignmentM1; auto waitWhileCondition = [&](std::function condition) { while (condition())