mirror of
https://github.com/cemu-project/Cemu.git
synced 2026-02-20 07:13:19 -07:00
Vulkan: Properly shutdown pipeline compile threads + code cleanup
This fixes an issue where the Vulkan renderer would hang up on shutdown due to resources still being in use
This commit is contained in:
parent
2c03ac3217
commit
8cd5ce102f
@ -290,7 +290,6 @@ public:
|
||||
{
|
||||
if (m_hasCacheAlloc)
|
||||
{
|
||||
cemu_assert_debug(isInUse() == false);
|
||||
g_gpuBufferHeap->freeOffset(m_cacheOffset);
|
||||
m_hasCacheAlloc = false;
|
||||
}
|
||||
@ -836,6 +835,8 @@ public:
|
||||
continue;
|
||||
}
|
||||
// delete range
|
||||
if (node->m_hasCacheAlloc)
|
||||
cemu_assert_debug(!node->isInUse());
|
||||
node->ReleaseCacheMemoryImmediately();
|
||||
LatteBufferCache_removeSingleNodeFromTree(node);
|
||||
delete node;
|
||||
|
||||
@ -115,11 +115,7 @@ struct LatteDecompilerCFInstruction
|
||||
cemu_assert_debug(!(instructionsALU.size() != 0 && instructionsTEX.size() != 0)); // make sure we haven't accidentally added the wrong instruction type
|
||||
}
|
||||
|
||||
#if BOOST_OS_WINDOWS
|
||||
LatteDecompilerCFInstruction(LatteDecompilerCFInstruction& mE) = default;
|
||||
#else
|
||||
LatteDecompilerCFInstruction(const LatteDecompilerCFInstruction& mE) = default;
|
||||
#endif
|
||||
LatteDecompilerCFInstruction(LatteDecompilerCFInstruction&& mE) = default;
|
||||
|
||||
LatteDecompilerCFInstruction& operator=(LatteDecompilerCFInstruction&& mE) = default;
|
||||
|
||||
@ -6,11 +6,10 @@
|
||||
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
|
||||
#include "Cafe/OS/libs/gx2/GX2.h"
|
||||
#include "config/ActiveSettings.h"
|
||||
#include "util/helpers/helpers.h"
|
||||
#include "util/helpers/Serializer.h"
|
||||
#include "Cafe/HW/Latte/Common/RegisterSerializer.h"
|
||||
|
||||
std::mutex s_nvidiaWorkaround;
|
||||
|
||||
/* rects emulation */
|
||||
|
||||
void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
|
||||
@ -923,7 +922,6 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
|
||||
if (result != VK_SUCCESS)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "Failed to create pipeline layout: {}", result);
|
||||
s_nvidiaWorkaround.unlock();
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -941,7 +939,7 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
|
||||
|
||||
// increment ref counter for vkrObjPipeline and renderpass object to make sure they dont get released while we are using them
|
||||
m_vkrObjPipeline->incRef();
|
||||
renderPassObj->incRef();
|
||||
m_renderPassObj->incRef();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1121,3 +1119,73 @@ bool PipelineCompiler::CalcRobustBufferAccessRequirement(LatteDecompilerShader*
|
||||
}
|
||||
return requiresRobustBufferAcces;
|
||||
}
|
||||
|
||||
static std::vector<std::thread> s_compileThreads;
|
||||
static std::atomic_bool s_compileThreadsShutdownSignal{};
|
||||
static ConcurrentQueue<PipelineCompiler*> s_pipelineCompileRequests;
|
||||
|
||||
static void compilePipeline_thread(sint32 threadIndex)
|
||||
{
|
||||
SetThreadName("compilePl");
|
||||
#ifdef _WIN32
|
||||
// to avoid starving the main cpu and render threads the pipeline compile threads run at lower priority
|
||||
// except for one thread which we always run at normal priority to prevent the opposite scenario where all compile threads are starved
|
||||
if(threadIndex != 0)
|
||||
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL);
|
||||
#endif
|
||||
while (!s_compileThreadsShutdownSignal)
|
||||
{
|
||||
PipelineCompiler* request = s_pipelineCompileRequests.pop();
|
||||
if (!request)
|
||||
continue;
|
||||
request->Compile(true, false, true);
|
||||
delete request;
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCompiler::CompileThreadPool_Start()
|
||||
{
|
||||
cemu_assert_debug(s_compileThreads.empty());
|
||||
s_compileThreadsShutdownSignal = false;
|
||||
uint32 numCompileThreads;
|
||||
|
||||
uint32 cpuCoreCount = GetPhysicalCoreCount();
|
||||
if (cpuCoreCount <= 2)
|
||||
numCompileThreads = 1;
|
||||
else
|
||||
numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3
|
||||
|
||||
numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8
|
||||
|
||||
for (uint32_t i = 0; i < numCompileThreads; i++)
|
||||
{
|
||||
s_compileThreads.emplace_back(compilePipeline_thread, i);
|
||||
}
|
||||
}
|
||||
|
||||
void PipelineCompiler::CompileThreadPool_Stop()
|
||||
{
|
||||
s_compileThreadsShutdownSignal = true;
|
||||
{
|
||||
// push one empty workload for each thread
|
||||
// this way we can make sure that each waiting thread is woken up to see the shutdown signal
|
||||
for (auto& thread : s_compileThreads)
|
||||
s_pipelineCompileRequests.push(nullptr);
|
||||
}
|
||||
for (auto& thread : s_compileThreads)
|
||||
thread.join();
|
||||
while (!s_pipelineCompileRequests.empty())
|
||||
{
|
||||
PipelineCompiler* pipelineCompiler = s_pipelineCompileRequests.pop();
|
||||
if (!pipelineCompiler)
|
||||
break;
|
||||
if (pipelineCompiler)
|
||||
delete pipelineCompiler;
|
||||
}
|
||||
s_compileThreads.clear();
|
||||
}
|
||||
|
||||
void PipelineCompiler::CompileThreadPool_QueueCompilation(PipelineCompiler* v)
|
||||
{
|
||||
s_pipelineCompileRequests.push(v);
|
||||
}
|
||||
@ -1,4 +1,6 @@
|
||||
#pragma once
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
|
||||
#include "VKRBase.h"
|
||||
|
||||
class PipelineCompiler : public VKRMoveableRefCounter
|
||||
{
|
||||
@ -43,6 +45,11 @@ public:
|
||||
|
||||
static bool CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader);
|
||||
|
||||
// API for thread pool
|
||||
static void CompileThreadPool_Start();
|
||||
static void CompileThreadPool_Stop();
|
||||
static void CompileThreadPool_QueueCompilation(PipelineCompiler* v);
|
||||
|
||||
VkPipelineLayout m_pipelineLayout;
|
||||
VKRObjectRenderPass* m_renderPassObj{};
|
||||
bool m_requestRobustBufferAccess{false};
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanTextureReadback.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/CocoaSurface.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h"
|
||||
|
||||
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
|
||||
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
|
||||
@ -653,7 +654,8 @@ VulkanRenderer::VulkanRenderer()
|
||||
m_occlusionQueries.list_availableQueryIndices.emplace_back(i);
|
||||
|
||||
// start compilation threads
|
||||
RendererShaderVk::Init();
|
||||
RendererShaderVk::Init(); // shaders
|
||||
PipelineCompiler::CompileThreadPool_Start(); // pipelines
|
||||
}
|
||||
|
||||
VulkanRenderer::~VulkanRenderer()
|
||||
@ -661,8 +663,6 @@ VulkanRenderer::~VulkanRenderer()
|
||||
SubmitCommandBuffer();
|
||||
WaitDeviceIdle();
|
||||
WaitCommandBufferFinished(GetCurrentCommandBufferId());
|
||||
// make sure compilation threads have been shut down
|
||||
RendererShaderVk::Shutdown();
|
||||
// shut down pipeline save thread
|
||||
m_destructionRequested = true;
|
||||
m_pipeline_cache_semaphore.notify();
|
||||
@ -1666,6 +1666,10 @@ void VulkanRenderer::Shutdown()
|
||||
{
|
||||
SubmitCommandBuffer();
|
||||
WaitDeviceIdle();
|
||||
// stop compilation threads
|
||||
RendererShaderVk::Shutdown();
|
||||
PipelineCompiler::CompileThreadPool_Stop();
|
||||
|
||||
DeleteFontTextures();
|
||||
Renderer::Shutdown();
|
||||
if (m_imguiRenderPass != VK_NULL_HANDLE)
|
||||
|
||||
@ -183,63 +183,6 @@ void VulkanRenderer::unregisterGraphicsPipeline(PipelineInfo* pipelineInfo)
|
||||
}
|
||||
}
|
||||
|
||||
bool g_compilePipelineThreadInit{false};
|
||||
std::mutex g_compilePipelineMutex;
|
||||
std::condition_variable g_compilePipelineCondVar;
|
||||
std::queue<PipelineCompiler*> g_compilePipelineRequests;
|
||||
|
||||
void compilePipeline_thread(sint32 threadIndex)
|
||||
{
|
||||
SetThreadName("compilePl");
|
||||
#ifdef _WIN32
|
||||
// one thread runs at normal priority while the others run at lower priority
|
||||
if(threadIndex != 0)
|
||||
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL);
|
||||
#endif
|
||||
while (true)
|
||||
{
|
||||
std::unique_lock lock(g_compilePipelineMutex);
|
||||
while (g_compilePipelineRequests.empty())
|
||||
g_compilePipelineCondVar.wait(lock);
|
||||
|
||||
PipelineCompiler* request = g_compilePipelineRequests.front();
|
||||
|
||||
g_compilePipelineRequests.pop();
|
||||
|
||||
lock.unlock();
|
||||
|
||||
request->Compile(true, false, true);
|
||||
delete request;
|
||||
}
|
||||
}
|
||||
|
||||
void compilePipelineThread_init()
|
||||
{
|
||||
uint32 numCompileThreads;
|
||||
|
||||
uint32 cpuCoreCount = GetPhysicalCoreCount();
|
||||
if (cpuCoreCount <= 2)
|
||||
numCompileThreads = 1;
|
||||
else
|
||||
numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3
|
||||
|
||||
numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8
|
||||
|
||||
for (uint32_t i = 0; i < numCompileThreads; i++)
|
||||
{
|
||||
std::thread compileThread(compilePipeline_thread, i);
|
||||
compileThread.detach();
|
||||
}
|
||||
}
|
||||
|
||||
void compilePipelineThread_queue(PipelineCompiler* v)
|
||||
{
|
||||
std::unique_lock lock(g_compilePipelineMutex);
|
||||
g_compilePipelineRequests.push(std::move(v));
|
||||
lock.unlock();
|
||||
g_compilePipelineCondVar.notify_one();
|
||||
}
|
||||
|
||||
// make a guess if a pipeline is not essential
|
||||
// non-essential means that skipping these drawcalls shouldn't lead to permanently corrupted graphics
|
||||
bool VulkanRenderer::IsAsyncPipelineAllowed(uint32 numIndices)
|
||||
@ -270,12 +213,6 @@ bool VulkanRenderer::IsAsyncPipelineAllowed(uint32 numIndices)
|
||||
// create graphics pipeline for current state
|
||||
PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
|
||||
{
|
||||
if (!g_compilePipelineThreadInit)
|
||||
{
|
||||
compilePipelineThread_init();
|
||||
g_compilePipelineThreadInit = true;
|
||||
}
|
||||
|
||||
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
|
||||
const auto vertexShader = LatteSHRC_GetActiveVertexShader();
|
||||
const auto geometryShader = LatteSHRC_GetActiveGeometryShader();
|
||||
@ -313,7 +250,7 @@ PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
|
||||
if (pipelineCompiler->Compile(false, true, true) == false)
|
||||
{
|
||||
// shaders or pipeline not cached -> asynchronous compilation
|
||||
compilePipelineThread_queue(pipelineCompiler);
|
||||
PipelineCompiler::CompileThreadPool_QueueCompilation(pipelineCompiler);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -379,7 +316,7 @@ float s_vkUniformData[512 * 4];
|
||||
uint32 VulkanRenderer::uniformData_uploadUniformDataBufferGetOffset(std::span<uint8> data)
|
||||
{
|
||||
const uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1;
|
||||
const uint32 uniformSize = (data.size() + bufferAlignmentM1) & ~bufferAlignmentM1;
|
||||
const uint32 uniformSize = ((uint32)data.size() + bufferAlignmentM1) & ~bufferAlignmentM1;
|
||||
|
||||
auto waitWhileCondition = [&](std::function<bool()> condition) {
|
||||
while (condition())
|
||||
|
||||
Loading…
Reference in New Issue
Block a user