From 8cd5ce102fceb244cb5c3e9ed46d375e542ab167 Mon Sep 17 00:00:00 2001
From: Exzap <13877693+Exzap@users.noreply.github.com>
Date: Mon, 9 Feb 2026 04:11:10 +0100
Subject: [PATCH] Vulkan: Properly shutdown pipeline compile threads + code
 cleanup

This fixes an issue where the Vulkan renderer would hang up on shutdown due to resources still being in use
---
 src/Cafe/HW/Latte/Core/LatteBufferCache.cpp   |  3 +-
 .../LatteDecompilerInternal.h                 |  4 -
 .../Vulkan/VulkanPipelineCompiler.cpp         | 76 ++++++++++++++++++-
 .../Renderer/Vulkan/VulkanPipelineCompiler.h  |  7 ++
 .../Latte/Renderer/Vulkan/VulkanRenderer.cpp  | 10 ++-
 .../Renderer/Vulkan/VulkanRendererCore.cpp    | 67 +---------------
 6 files changed, 90 insertions(+), 77 deletions(-)

diff --git a/src/Cafe/HW/Latte/Core/LatteBufferCache.cpp b/src/Cafe/HW/Latte/Core/LatteBufferCache.cpp
index 6c36ddd3..e466bf3a 100644
--- a/src/Cafe/HW/Latte/Core/LatteBufferCache.cpp
+++ b/src/Cafe/HW/Latte/Core/LatteBufferCache.cpp
@@ -290,7 +290,6 @@ public:
 	{
 		if (m_hasCacheAlloc)
 		{
-			cemu_assert_debug(isInUse() == false);
 			g_gpuBufferHeap->freeOffset(m_cacheOffset);
 			m_hasCacheAlloc = false;
 		}
@@ -836,6 +835,8 @@ public:
 				continue;
 			}
 			// delete range
+			if (node->m_hasCacheAlloc)
+				cemu_assert_debug(!node->isInUse());
 			node->ReleaseCacheMemoryImmediately();
 			LatteBufferCache_removeSingleNodeFromTree(node);
 			delete node;
diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h
index e756ce17..4c6b158a 100644
--- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h
+++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h
@@ -115,11 +115,7 @@ struct LatteDecompilerCFInstruction
 		cemu_assert_debug(!(instructionsALU.size() != 0 && instructionsTEX.size() != 0)); // make sure we haven't accidentally added the wrong instruction type
 	}
 
-#if BOOST_OS_WINDOWS
-	LatteDecompilerCFInstruction(LatteDecompilerCFInstruction& mE) = default;
-#else
 	LatteDecompilerCFInstruction(const LatteDecompilerCFInstruction& mE) = default;
-#endif
 	LatteDecompilerCFInstruction(LatteDecompilerCFInstruction&& mE) = default;
 
 	LatteDecompilerCFInstruction& operator=(LatteDecompilerCFInstruction&& mE) = default;
diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp
index eb455887..795d11c3 100644
--- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp
+++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp
@@ -6,11 +6,10 @@
 #include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
 #include "Cafe/OS/libs/gx2/GX2.h"
 #include "config/ActiveSettings.h"
+#include "util/helpers/helpers.h"
 #include "util/helpers/Serializer.h"
 #include "Cafe/HW/Latte/Common/RegisterSerializer.h"
 
-std::mutex s_nvidiaWorkaround;
-
 /* rects emulation */
 
 void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
@@ -923,7 +922,6 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
 	if (result != VK_SUCCESS)
 	{
 		cemuLog_log(LogType::Force, "Failed to create pipeline layout: {}", result);
-		s_nvidiaWorkaround.unlock();
 		return false;
 	}
 
@@ -941,7 +939,7 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
 
 	// increment ref counter for vkrObjPipeline and renderpass object to make sure they dont get released while we are using them
 	m_vkrObjPipeline->incRef();
-	renderPassObj->incRef();
+	m_renderPassObj->incRef();
 	return true;
 }
 
@@ -1121,3 +1119,73 @@ bool PipelineCompiler::CalcRobustBufferAccessRequirement(LatteDecompilerShader*
 	}
 	return requiresRobustBufferAcces;
 }
+
+static std::vector<std::thread> s_compileThreads;
+static std::atomic_bool s_compileThreadsShutdownSignal{};
+static ConcurrentQueue<PipelineCompiler*> s_pipelineCompileRequests;
+
+static void compilePipeline_thread(sint32 threadIndex)
+{
+	SetThreadName("compilePl");
+#ifdef _WIN32
+	// to avoid starving the main cpu and render threads the pipeline compile threads run at lower priority
+	// except for one thread which we always run at normal priority to prevent the opposite scenario where all compile threads are starved
+	if(threadIndex != 0)
+		SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL);
+#endif
+	while (!s_compileThreadsShutdownSignal)
+	{
+		PipelineCompiler* request = s_pipelineCompileRequests.pop();
+		if (!request)
+			continue;
+		request->Compile(true, false, true);
+		delete request;
+	}
+}
+
+void PipelineCompiler::CompileThreadPool_Start()
+{
+	cemu_assert_debug(s_compileThreads.empty());
+	s_compileThreadsShutdownSignal = false;
+	uint32 numCompileThreads;
+
+	uint32 cpuCoreCount = GetPhysicalCoreCount();
+	if (cpuCoreCount <= 2)
+		numCompileThreads = 1;
+	else
+		numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3
+
+	numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8
+
+	for (uint32_t i = 0; i < numCompileThreads; i++)
+	{
+		s_compileThreads.emplace_back(compilePipeline_thread, i);
+	}
+}
+
+void PipelineCompiler::CompileThreadPool_Stop()
+{
+	s_compileThreadsShutdownSignal = true;
+	{
+		// push one empty workload for each thread
+		// this way we can make sure that each waiting thread is woken up to see the shutdown signal
+		for (auto& thread : s_compileThreads)
+			s_pipelineCompileRequests.push(nullptr);
+	}
+	for (auto& thread : s_compileThreads)
+		thread.join();
+	while (!s_pipelineCompileRequests.empty())
+	{
+		PipelineCompiler* pipelineCompiler = s_pipelineCompileRequests.pop();
+		if (!pipelineCompiler)
+			break;
+		if (pipelineCompiler)
+			delete pipelineCompiler;
+	}
+	s_compileThreads.clear();
+}
+
+void PipelineCompiler::CompileThreadPool_QueueCompilation(PipelineCompiler* v)
+{
+	s_pipelineCompileRequests.push(v);
+}
\ No newline at end of file
diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h
index 7297049e..f4240a53 100644
--- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h
+++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h
@@ -1,4 +1,6 @@
 #pragma once
+#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
+#include "VKRBase.h"
 
 class PipelineCompiler : public VKRMoveableRefCounter
 {
@@ -43,6 +45,11 @@ public:
 
 	static bool CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader);
 
+	// API for thread pool
+	static void CompileThreadPool_Start();
+	static void CompileThreadPool_Stop();
+	static void CompileThreadPool_QueueCompilation(PipelineCompiler* v);
+
 	VkPipelineLayout m_pipelineLayout;
 	VKRObjectRenderPass* m_renderPassObj{};
 	bool m_requestRobustBufferAccess{false};
diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp
index 251fd69c..18fd1000 100644
--- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp
+++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp
@@ -4,6 +4,7 @@
 #include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h"
 #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanTextureReadback.h"
 #include "Cafe/HW/Latte/Renderer/Vulkan/CocoaSurface.h"
+#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h"
 
 #include "Cafe/HW/Latte/Core/LatteBufferCache.h"
 #include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
@@ -653,7 +654,8 @@ VulkanRenderer::VulkanRenderer()
 		m_occlusionQueries.list_availableQueryIndices.emplace_back(i);
 
 	// start compilation threads
-	RendererShaderVk::Init();
+	RendererShaderVk::Init(); // shaders
+	PipelineCompiler::CompileThreadPool_Start(); // pipelines
 }
 
 VulkanRenderer::~VulkanRenderer()
@@ -661,8 +663,6 @@ VulkanRenderer::~VulkanRenderer()
 	SubmitCommandBuffer();
 	WaitDeviceIdle();
 	WaitCommandBufferFinished(GetCurrentCommandBufferId());
-	// make sure compilation threads have been shut down
-	RendererShaderVk::Shutdown();
 	// shut down pipeline save thread
 	m_destructionRequested = true;
 	m_pipeline_cache_semaphore.notify();
@@ -1666,6 +1666,10 @@ void VulkanRenderer::Shutdown()
 {
 	SubmitCommandBuffer();
 	WaitDeviceIdle();
+	// stop compilation threads
+	RendererShaderVk::Shutdown();
+	PipelineCompiler::CompileThreadPool_Stop();
+
 	DeleteFontTextures();
 	Renderer::Shutdown();
 	if (m_imguiRenderPass != VK_NULL_HANDLE)
diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp
index 23fb910c..a6814186 100644
--- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp
+++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp
@@ -183,63 +183,6 @@ void VulkanRenderer::unregisterGraphicsPipeline(PipelineInfo* pipelineInfo)
 	}
 }
 
-bool g_compilePipelineThreadInit{false};
-std::mutex g_compilePipelineMutex;
-std::condition_variable g_compilePipelineCondVar;
-std::queue<PipelineCompiler*> g_compilePipelineRequests;
-
-void compilePipeline_thread(sint32 threadIndex)
-{
-	SetThreadName("compilePl");
-#ifdef _WIN32
-	// one thread runs at normal priority while the others run at lower priority
-	if(threadIndex != 0)
-		SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL);
-#endif
-	while (true)
-	{
-		std::unique_lock lock(g_compilePipelineMutex);
-		while (g_compilePipelineRequests.empty())
-			g_compilePipelineCondVar.wait(lock);
-
-		PipelineCompiler* request = g_compilePipelineRequests.front();
-
-		g_compilePipelineRequests.pop();
-
-		lock.unlock();
-
-		request->Compile(true, false, true);
-		delete request;
-	}
-}
-
-void compilePipelineThread_init()
-{
-	uint32 numCompileThreads;
-
-	uint32 cpuCoreCount = GetPhysicalCoreCount();
-	if (cpuCoreCount <= 2)
-		numCompileThreads = 1;
-	else
-		numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3
-
-	numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8
-
-	for (uint32_t i = 0; i < numCompileThreads; i++)
-	{
-		std::thread compileThread(compilePipeline_thread, i);
-		compileThread.detach();
-	}
-}
-
-void compilePipelineThread_queue(PipelineCompiler* v)
-{
-	std::unique_lock lock(g_compilePipelineMutex);
-	g_compilePipelineRequests.push(std::move(v));
-	lock.unlock();
-	g_compilePipelineCondVar.notify_one();
-}
-
 // make a guess if a pipeline is not essential
 // non-essential means that skipping these drawcalls shouldn't lead to permanently corrupted graphics
 bool VulkanRenderer::IsAsyncPipelineAllowed(uint32 numIndices)
@@ -270,12 +213,6 @@ bool VulkanRenderer::IsAsyncPipelineAllowed(uint32 numIndices)
 // create graphics pipeline for current state
 PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
 {
-	if (!g_compilePipelineThreadInit)
-	{
-		compilePipelineThread_init();
-		g_compilePipelineThreadInit = true;
-	}
-
 	const auto fetchShader = LatteSHRC_GetActiveFetchShader();
 	const auto vertexShader = LatteSHRC_GetActiveVertexShader();
 	const auto geometryShader = LatteSHRC_GetActiveGeometryShader();
@@ -313,7 +250,7 @@ PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
 		if (pipelineCompiler->Compile(false, true, true) == false)
 		{
 			// shaders or pipeline not cached -> asynchronous compilation
-			compilePipelineThread_queue(pipelineCompiler);
+			PipelineCompiler::CompileThreadPool_QueueCompilation(pipelineCompiler);
 		}
 		else
 		{
@@ -379,7 +316,7 @@ float s_vkUniformData[512 * 4];
 uint32 VulkanRenderer::uniformData_uploadUniformDataBufferGetOffset(std::span<uint8> data)
 {
 	const uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1;
-	const uint32 uniformSize = (data.size() + bufferAlignmentM1) & ~bufferAlignmentM1;
+	const uint32 uniformSize = ((uint32)data.size() + bufferAlignmentM1) & ~bufferAlignmentM1;
 
 	auto waitWhileCondition = [&](std::function<bool()> condition) {
 		while (condition())