Vulkan: Properly shutdown pipeline compile threads + code cleanup

This fixes an issue where the Vulkan renderer would hang up on shutdown due to resources still being in use
2026-06-06 22:54:59 -06:00 · 2026-02-09 04:11:10 +01:00 · 2026-02-09 04:11:10 +01:00 · 8cd5ce102f
commit 8cd5ce102f
parent 2c03ac3217
6 changed files with 90 additions and 77 deletions
--- a/src/Cafe/HW/Latte/Core/LatteBufferCache.cpp
+++ b/src/Cafe/HW/Latte/Core/LatteBufferCache.cpp
@ -290,7 +290,6 @@ public:
 	{
 		if (m_hasCacheAlloc)
 		{
 			cemu_assert_debug(isInUse() == false);
 			g_gpuBufferHeap->freeOffset(m_cacheOffset);
 			m_hasCacheAlloc = false;
 		}
@ -836,6 +835,8 @@ public:
 				continue;
 			}
 			// delete range
 			if (node->m_hasCacheAlloc)
 				cemu_assert_debug(!node->isInUse());
 			node->ReleaseCacheMemoryImmediately();
 			LatteBufferCache_removeSingleNodeFromTree(node);
 			delete node;
--- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h
+++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h
@ -115,11 +115,7 @@ struct LatteDecompilerCFInstruction
 		cemu_assert_debug(!(instructionsALU.size() != 0 && instructionsTEX.size() != 0)); // make sure we haven't accidentally added the wrong instruction type
 	}
 #if BOOST_OS_WINDOWS
 	LatteDecompilerCFInstruction(LatteDecompilerCFInstruction& mE) = default;
 #else
 	LatteDecompilerCFInstruction(const LatteDecompilerCFInstruction& mE) = default;
 #endif
 	LatteDecompilerCFInstruction(LatteDecompilerCFInstruction&& mE) = default;
 	LatteDecompilerCFInstruction& operator=(LatteDecompilerCFInstruction&& mE) = default;
--- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp
+++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp
@ -6,11 +6,10 @@
 #include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
 #include "Cafe/OS/libs/gx2/GX2.h"
 #include "config/ActiveSettings.h"
 #include "util/helpers/helpers.h"
 #include "util/helpers/Serializer.h"
 #include "Cafe/HW/Latte/Common/RegisterSerializer.h"
 std::mutex s_nvidiaWorkaround;
 /* rects emulation */
 void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
@ -923,7 +922,6 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
 	if (result != VK_SUCCESS)
 	{
 		cemuLog_log(LogType::Force, "Failed to create pipeline layout: {}", result);
 		s_nvidiaWorkaround.unlock();
 		return false;
 	}
@ -941,7 +939,7 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
 	// increment ref counter for vkrObjPipeline and renderpass object to make sure they dont get released while we are using them
 	m_vkrObjPipeline->incRef();
-	renderPassObj->incRef();
+	m_renderPassObj->incRef();
 	return true;
 }
@ -1121,3 +1119,73 @@ bool PipelineCompiler::CalcRobustBufferAccessRequirement(LatteDecompilerShader*
 	}
 	return requiresRobustBufferAcces;
 }
 static std::vector<std::thread> s_compileThreads;
 static std::atomic_bool s_compileThreadsShutdownSignal{};
 static ConcurrentQueue<PipelineCompiler*> s_pipelineCompileRequests;
 static void compilePipeline_thread(sint32 threadIndex)
 {
 	SetThreadName("compilePl");
 #ifdef _WIN32
 	// to avoid starving the main cpu and render threads the pipeline compile threads run at lower priority
 	// except for one thread which we always run at normal priority to prevent the opposite scenario where all compile threads are starved
 	if(threadIndex != 0)
 		SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL);
 #endif
 	while (!s_compileThreadsShutdownSignal)
 	{
 		PipelineCompiler* request = s_pipelineCompileRequests.pop();
 		if (!request)
 			continue;
 		request->Compile(true, false, true);
 		delete request;
 	}
 }
 void PipelineCompiler::CompileThreadPool_Start()
 {
 	cemu_assert_debug(s_compileThreads.empty());
 	s_compileThreadsShutdownSignal = false;
 	uint32 numCompileThreads;
 	uint32 cpuCoreCount = GetPhysicalCoreCount();
 	if (cpuCoreCount <= 2)
 		numCompileThreads = 1;
 	else
 		numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3
 	numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8
 	for (uint32_t i = 0; i < numCompileThreads; i++)
 	{
 		s_compileThreads.emplace_back(compilePipeline_thread, i);
 	}
 }
 void PipelineCompiler::CompileThreadPool_Stop()
 {
 	s_compileThreadsShutdownSignal = true;
 	{
 		// push one empty workload for each thread
 		// this way we can make sure that each waiting thread is woken up to see the shutdown signal
 		for (auto& thread : s_compileThreads)
 			s_pipelineCompileRequests.push(nullptr);
 	}
 	for (auto& thread : s_compileThreads)
 		thread.join();
 	while (!s_pipelineCompileRequests.empty())
 	{
 		PipelineCompiler* pipelineCompiler = s_pipelineCompileRequests.pop();
 		if (!pipelineCompiler)
 			break;
 		if (pipelineCompiler)
 			delete pipelineCompiler;
 	}
 	s_compileThreads.clear();
 }
 void PipelineCompiler::CompileThreadPool_QueueCompilation(PipelineCompiler* v)
 {
 	s_pipelineCompileRequests.push(v);
 }
--- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h
+++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h
@ -1,4 +1,6 @@
 #pragma once
 #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
 #include "VKRBase.h"
 class PipelineCompiler : public VKRMoveableRefCounter
 {
@ -43,6 +45,11 @@ public:
 	static bool CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader);
 	// API for thread pool
 	static void CompileThreadPool_Start();
 	static void CompileThreadPool_Stop();
 	static void CompileThreadPool_QueueCompilation(PipelineCompiler* v);
 	VkPipelineLayout m_pipelineLayout;
 	VKRObjectRenderPass* m_renderPassObj{};
 	bool m_requestRobustBufferAccess{false};
--- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp
+++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp
@ -4,6 +4,7 @@
 #include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h"
 #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanTextureReadback.h"
 #include "Cafe/HW/Latte/Renderer/Vulkan/CocoaSurface.h"
 #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h"
 #include "Cafe/HW/Latte/Core/LatteBufferCache.h"
 #include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
@ -653,7 +654,8 @@ VulkanRenderer::VulkanRenderer()
 		m_occlusionQueries.list_availableQueryIndices.emplace_back(i);
 	// start compilation threads
-	RendererShaderVk::Init();
+	RendererShaderVk::Init(); // shaders
 	PipelineCompiler::CompileThreadPool_Start(); // pipelines
 }
 VulkanRenderer::~VulkanRenderer()
@ -661,8 +663,6 @@ VulkanRenderer::~VulkanRenderer()
 	SubmitCommandBuffer();
 	WaitDeviceIdle();
 	WaitCommandBufferFinished(GetCurrentCommandBufferId());
 	// make sure compilation threads have been shut down
 	RendererShaderVk::Shutdown();
 	// shut down pipeline save thread
 	m_destructionRequested = true;
 	m_pipeline_cache_semaphore.notify();
@ -1666,6 +1666,10 @@ void VulkanRenderer::Shutdown()
 {
 	SubmitCommandBuffer();
 	WaitDeviceIdle();
 	// stop compilation threads
 	RendererShaderVk::Shutdown();
 	PipelineCompiler::CompileThreadPool_Stop();
 	DeleteFontTextures();
 	Renderer::Shutdown();
 	if (m_imguiRenderPass != VK_NULL_HANDLE)
--- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp
+++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp
@ -183,63 +183,6 @@ void VulkanRenderer::unregisterGraphicsPipeline(PipelineInfo* pipelineInfo)
 	}
 }
 bool g_compilePipelineThreadInit{false};
 std::mutex g_compilePipelineMutex;
 std::condition_variable g_compilePipelineCondVar;
 std::queue<PipelineCompiler*> g_compilePipelineRequests;
 void compilePipeline_thread(sint32 threadIndex)
 {
 	SetThreadName("compilePl");
 #ifdef _WIN32
 	// one thread runs at normal priority while the others run at lower priority
 	if(threadIndex != 0)
 		SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL);
 #endif
 	while (true)
 	{
 		std::unique_lock lock(g_compilePipelineMutex);
 		while (g_compilePipelineRequests.empty())
 			g_compilePipelineCondVar.wait(lock);
 		PipelineCompiler* request = g_compilePipelineRequests.front();
 		g_compilePipelineRequests.pop();
 		lock.unlock();
 		request->Compile(true, false, true);
 		delete request;
 	}
 }
 void compilePipelineThread_init()
 {
 	uint32 numCompileThreads;
 	uint32 cpuCoreCount = GetPhysicalCoreCount();
 	if (cpuCoreCount <= 2)
 		numCompileThreads = 1;
 	else
 		numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3
 	numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8
 	for (uint32_t i = 0; i < numCompileThreads; i++)
 	{
 		std::thread compileThread(compilePipeline_thread, i);
 		compileThread.detach();
 	}
 }
 void compilePipelineThread_queue(PipelineCompiler* v)
 {
 	std::unique_lock lock(g_compilePipelineMutex);
 	g_compilePipelineRequests.push(std::move(v));
 	lock.unlock();
 	g_compilePipelineCondVar.notify_one();
 }
 // make a guess if a pipeline is not essential
 // non-essential means that skipping these drawcalls shouldn't lead to permanently corrupted graphics
 bool VulkanRenderer::IsAsyncPipelineAllowed(uint32 numIndices)
@ -270,12 +213,6 @@ bool VulkanRenderer::IsAsyncPipelineAllowed(uint32 numIndices)
 // create graphics pipeline for current state
 PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
 {
 	if (!g_compilePipelineThreadInit)
 	{
 		compilePipelineThread_init();
 		g_compilePipelineThreadInit = true;
 	}
 	const auto fetchShader = LatteSHRC_GetActiveFetchShader();
 	const auto vertexShader = LatteSHRC_GetActiveVertexShader();
 	const auto geometryShader = LatteSHRC_GetActiveGeometryShader();
@ -313,7 +250,7 @@ PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
 		if (pipelineCompiler->Compile(false, true, true) == false)
 		{
 			// shaders or pipeline not cached -> asynchronous compilation
-			compilePipelineThread_queue(pipelineCompiler);
+			PipelineCompiler::CompileThreadPool_QueueCompilation(pipelineCompiler);
 		}
 		else
 		{
@ -379,7 +316,7 @@ float s_vkUniformData[512 * 4];
 uint32 VulkanRenderer::uniformData_uploadUniformDataBufferGetOffset(std::span<uint8> data)
 {
 	const uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1;
-	const uint32 uniformSize = (data.size() + bufferAlignmentM1) & ~bufferAlignmentM1;
+	const uint32 uniformSize = ((uint32)data.size() + bufferAlignmentM1) & ~bufferAlignmentM1;
 	auto waitWhileCondition = [&](std::function<bool()> condition) {
 		while (condition())