Vulkan: Properly shutdown pipeline compile threads + code cleanup

This fixes an issue where the Vulkan renderer would hang up on shutdown due to resources still being in use
This commit is contained in:
Exzap 2026-02-09 04:11:10 +01:00
parent 2c03ac3217
commit 8cd5ce102f
6 changed files with 90 additions and 77 deletions

View File

@ -290,7 +290,6 @@ public:
{
if (m_hasCacheAlloc)
{
cemu_assert_debug(isInUse() == false);
g_gpuBufferHeap->freeOffset(m_cacheOffset);
m_hasCacheAlloc = false;
}
@ -836,6 +835,8 @@ public:
continue;
}
// delete range
if (node->m_hasCacheAlloc)
cemu_assert_debug(!node->isInUse());
node->ReleaseCacheMemoryImmediately();
LatteBufferCache_removeSingleNodeFromTree(node);
delete node;

View File

@ -115,11 +115,7 @@ struct LatteDecompilerCFInstruction
cemu_assert_debug(!(instructionsALU.size() != 0 && instructionsTEX.size() != 0)); // make sure we haven't accidentally added the wrong instruction type
}
#if BOOST_OS_WINDOWS
LatteDecompilerCFInstruction(LatteDecompilerCFInstruction& mE) = default;
#else
LatteDecompilerCFInstruction(const LatteDecompilerCFInstruction& mE) = default;
#endif
LatteDecompilerCFInstruction(LatteDecompilerCFInstruction&& mE) = default;
LatteDecompilerCFInstruction& operator=(LatteDecompilerCFInstruction&& mE) = default;

View File

@ -6,11 +6,10 @@
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
#include "Cafe/OS/libs/gx2/GX2.h"
#include "config/ActiveSettings.h"
#include "util/helpers/helpers.h"
#include "util/helpers/Serializer.h"
#include "Cafe/HW/Latte/Common/RegisterSerializer.h"
std::mutex s_nvidiaWorkaround;
/* rects emulation */
void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
@ -923,7 +922,6 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
if (result != VK_SUCCESS)
{
cemuLog_log(LogType::Force, "Failed to create pipeline layout: {}", result);
s_nvidiaWorkaround.unlock();
return false;
}
@ -941,7 +939,7 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
// increment ref counter for vkrObjPipeline and renderpass object to make sure they dont get released while we are using them
m_vkrObjPipeline->incRef();
renderPassObj->incRef();
m_renderPassObj->incRef();
return true;
}
@ -1121,3 +1119,73 @@ bool PipelineCompiler::CalcRobustBufferAccessRequirement(LatteDecompilerShader*
}
return requiresRobustBufferAcces;
}
static std::vector<std::thread> s_compileThreads;
static std::atomic_bool s_compileThreadsShutdownSignal{};
static ConcurrentQueue<PipelineCompiler*> s_pipelineCompileRequests;
static void compilePipeline_thread(sint32 threadIndex)
{
SetThreadName("compilePl");
#ifdef _WIN32
// to avoid starving the main cpu and render threads the pipeline compile threads run at lower priority
// except for one thread which we always run at normal priority to prevent the opposite scenario where all compile threads are starved
if(threadIndex != 0)
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL);
#endif
while (!s_compileThreadsShutdownSignal)
{
PipelineCompiler* request = s_pipelineCompileRequests.pop();
if (!request)
continue;
request->Compile(true, false, true);
delete request;
}
}
void PipelineCompiler::CompileThreadPool_Start()
{
cemu_assert_debug(s_compileThreads.empty());
s_compileThreadsShutdownSignal = false;
uint32 numCompileThreads;
uint32 cpuCoreCount = GetPhysicalCoreCount();
if (cpuCoreCount <= 2)
numCompileThreads = 1;
else
numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3
numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8
for (uint32_t i = 0; i < numCompileThreads; i++)
{
s_compileThreads.emplace_back(compilePipeline_thread, i);
}
}
void PipelineCompiler::CompileThreadPool_Stop()
{
s_compileThreadsShutdownSignal = true;
{
// push one empty workload for each thread
// this way we can make sure that each waiting thread is woken up to see the shutdown signal
for (auto& thread : s_compileThreads)
s_pipelineCompileRequests.push(nullptr);
}
for (auto& thread : s_compileThreads)
thread.join();
while (!s_pipelineCompileRequests.empty())
{
PipelineCompiler* pipelineCompiler = s_pipelineCompileRequests.pop();
if (!pipelineCompiler)
break;
if (pipelineCompiler)
delete pipelineCompiler;
}
s_compileThreads.clear();
}
void PipelineCompiler::CompileThreadPool_QueueCompilation(PipelineCompiler* v)
{
s_pipelineCompileRequests.push(v);
}

View File

@ -1,4 +1,6 @@
#pragma once
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
#include "VKRBase.h"
class PipelineCompiler : public VKRMoveableRefCounter
{
@ -43,6 +45,11 @@ public:
static bool CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader);
// API for thread pool
static void CompileThreadPool_Start();
static void CompileThreadPool_Stop();
static void CompileThreadPool_QueueCompilation(PipelineCompiler* v);
VkPipelineLayout m_pipelineLayout;
VKRObjectRenderPass* m_renderPassObj{};
bool m_requestRobustBufferAccess{false};

View File

@ -4,6 +4,7 @@
#include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h"
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanTextureReadback.h"
#include "Cafe/HW/Latte/Renderer/Vulkan/CocoaSurface.h"
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h"
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
@ -653,7 +654,8 @@ VulkanRenderer::VulkanRenderer()
m_occlusionQueries.list_availableQueryIndices.emplace_back(i);
// start compilation threads
RendererShaderVk::Init();
RendererShaderVk::Init(); // shaders
PipelineCompiler::CompileThreadPool_Start(); // pipelines
}
VulkanRenderer::~VulkanRenderer()
@ -661,8 +663,6 @@ VulkanRenderer::~VulkanRenderer()
SubmitCommandBuffer();
WaitDeviceIdle();
WaitCommandBufferFinished(GetCurrentCommandBufferId());
// make sure compilation threads have been shut down
RendererShaderVk::Shutdown();
// shut down pipeline save thread
m_destructionRequested = true;
m_pipeline_cache_semaphore.notify();
@ -1666,6 +1666,10 @@ void VulkanRenderer::Shutdown()
{
SubmitCommandBuffer();
WaitDeviceIdle();
// stop compilation threads
RendererShaderVk::Shutdown();
PipelineCompiler::CompileThreadPool_Stop();
DeleteFontTextures();
Renderer::Shutdown();
if (m_imguiRenderPass != VK_NULL_HANDLE)

View File

@ -183,63 +183,6 @@ void VulkanRenderer::unregisterGraphicsPipeline(PipelineInfo* pipelineInfo)
}
}
bool g_compilePipelineThreadInit{false};
std::mutex g_compilePipelineMutex;
std::condition_variable g_compilePipelineCondVar;
std::queue<PipelineCompiler*> g_compilePipelineRequests;
void compilePipeline_thread(sint32 threadIndex)
{
SetThreadName("compilePl");
#ifdef _WIN32
// one thread runs at normal priority while the others run at lower priority
if(threadIndex != 0)
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL);
#endif
while (true)
{
std::unique_lock lock(g_compilePipelineMutex);
while (g_compilePipelineRequests.empty())
g_compilePipelineCondVar.wait(lock);
PipelineCompiler* request = g_compilePipelineRequests.front();
g_compilePipelineRequests.pop();
lock.unlock();
request->Compile(true, false, true);
delete request;
}
}
void compilePipelineThread_init()
{
uint32 numCompileThreads;
uint32 cpuCoreCount = GetPhysicalCoreCount();
if (cpuCoreCount <= 2)
numCompileThreads = 1;
else
numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3
numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8
for (uint32_t i = 0; i < numCompileThreads; i++)
{
std::thread compileThread(compilePipeline_thread, i);
compileThread.detach();
}
}
void compilePipelineThread_queue(PipelineCompiler* v)
{
std::unique_lock lock(g_compilePipelineMutex);
g_compilePipelineRequests.push(std::move(v));
lock.unlock();
g_compilePipelineCondVar.notify_one();
}
// make a guess if a pipeline is not essential
// non-essential means that skipping these drawcalls shouldn't lead to permanently corrupted graphics
bool VulkanRenderer::IsAsyncPipelineAllowed(uint32 numIndices)
@ -270,12 +213,6 @@ bool VulkanRenderer::IsAsyncPipelineAllowed(uint32 numIndices)
// create graphics pipeline for current state
PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
{
if (!g_compilePipelineThreadInit)
{
compilePipelineThread_init();
g_compilePipelineThreadInit = true;
}
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
const auto vertexShader = LatteSHRC_GetActiveVertexShader();
const auto geometryShader = LatteSHRC_GetActiveGeometryShader();
@ -313,7 +250,7 @@ PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
if (pipelineCompiler->Compile(false, true, true) == false)
{
// shaders or pipeline not cached -> asynchronous compilation
compilePipelineThread_queue(pipelineCompiler);
PipelineCompiler::CompileThreadPool_QueueCompilation(pipelineCompiler);
}
else
{
@ -379,7 +316,7 @@ float s_vkUniformData[512 * 4];
uint32 VulkanRenderer::uniformData_uploadUniformDataBufferGetOffset(std::span<uint8> data)
{
const uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1;
const uint32 uniformSize = (data.size() + bufferAlignmentM1) & ~bufferAlignmentM1;
const uint32 uniformSize = ((uint32)data.size() + bufferAlignmentM1) & ~bufferAlignmentM1;
auto waitWhileCondition = [&](std::function<bool()> condition) {
while (condition())