mirror of
https://github.com/cemu-project/Cemu.git
synced 2026-06-06 22:54:59 -06:00
Vulkan: Properly shutdown pipeline compile threads + code cleanup
This fixes an issue where the Vulkan renderer would hang up on shutdown due to resources still being in use
This commit is contained in:
parent
2c03ac3217
commit
8cd5ce102f
@ -290,7 +290,6 @@ public:
|
|||||||
{
|
{
|
||||||
if (m_hasCacheAlloc)
|
if (m_hasCacheAlloc)
|
||||||
{
|
{
|
||||||
cemu_assert_debug(isInUse() == false);
|
|
||||||
g_gpuBufferHeap->freeOffset(m_cacheOffset);
|
g_gpuBufferHeap->freeOffset(m_cacheOffset);
|
||||||
m_hasCacheAlloc = false;
|
m_hasCacheAlloc = false;
|
||||||
}
|
}
|
||||||
@ -836,6 +835,8 @@ public:
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// delete range
|
// delete range
|
||||||
|
if (node->m_hasCacheAlloc)
|
||||||
|
cemu_assert_debug(!node->isInUse());
|
||||||
node->ReleaseCacheMemoryImmediately();
|
node->ReleaseCacheMemoryImmediately();
|
||||||
LatteBufferCache_removeSingleNodeFromTree(node);
|
LatteBufferCache_removeSingleNodeFromTree(node);
|
||||||
delete node;
|
delete node;
|
||||||
|
|||||||
@ -115,11 +115,7 @@ struct LatteDecompilerCFInstruction
|
|||||||
cemu_assert_debug(!(instructionsALU.size() != 0 && instructionsTEX.size() != 0)); // make sure we haven't accidentally added the wrong instruction type
|
cemu_assert_debug(!(instructionsALU.size() != 0 && instructionsTEX.size() != 0)); // make sure we haven't accidentally added the wrong instruction type
|
||||||
}
|
}
|
||||||
|
|
||||||
#if BOOST_OS_WINDOWS
|
|
||||||
LatteDecompilerCFInstruction(LatteDecompilerCFInstruction& mE) = default;
|
|
||||||
#else
|
|
||||||
LatteDecompilerCFInstruction(const LatteDecompilerCFInstruction& mE) = default;
|
LatteDecompilerCFInstruction(const LatteDecompilerCFInstruction& mE) = default;
|
||||||
#endif
|
|
||||||
LatteDecompilerCFInstruction(LatteDecompilerCFInstruction&& mE) = default;
|
LatteDecompilerCFInstruction(LatteDecompilerCFInstruction&& mE) = default;
|
||||||
|
|
||||||
LatteDecompilerCFInstruction& operator=(LatteDecompilerCFInstruction&& mE) = default;
|
LatteDecompilerCFInstruction& operator=(LatteDecompilerCFInstruction&& mE) = default;
|
||||||
|
|||||||
@ -6,11 +6,10 @@
|
|||||||
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
|
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
|
||||||
#include "Cafe/OS/libs/gx2/GX2.h"
|
#include "Cafe/OS/libs/gx2/GX2.h"
|
||||||
#include "config/ActiveSettings.h"
|
#include "config/ActiveSettings.h"
|
||||||
|
#include "util/helpers/helpers.h"
|
||||||
#include "util/helpers/Serializer.h"
|
#include "util/helpers/Serializer.h"
|
||||||
#include "Cafe/HW/Latte/Common/RegisterSerializer.h"
|
#include "Cafe/HW/Latte/Common/RegisterSerializer.h"
|
||||||
|
|
||||||
std::mutex s_nvidiaWorkaround;
|
|
||||||
|
|
||||||
/* rects emulation */
|
/* rects emulation */
|
||||||
|
|
||||||
void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
|
void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister)
|
||||||
@ -923,7 +922,6 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
|
|||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
{
|
{
|
||||||
cemuLog_log(LogType::Force, "Failed to create pipeline layout: {}", result);
|
cemuLog_log(LogType::Force, "Failed to create pipeline layout: {}", result);
|
||||||
s_nvidiaWorkaround.unlock();
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -941,7 +939,7 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
|
|||||||
|
|
||||||
// increment ref counter for vkrObjPipeline and renderpass object to make sure they dont get released while we are using them
|
// increment ref counter for vkrObjPipeline and renderpass object to make sure they dont get released while we are using them
|
||||||
m_vkrObjPipeline->incRef();
|
m_vkrObjPipeline->incRef();
|
||||||
renderPassObj->incRef();
|
m_renderPassObj->incRef();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1121,3 +1119,73 @@ bool PipelineCompiler::CalcRobustBufferAccessRequirement(LatteDecompilerShader*
|
|||||||
}
|
}
|
||||||
return requiresRobustBufferAcces;
|
return requiresRobustBufferAcces;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::vector<std::thread> s_compileThreads;
|
||||||
|
static std::atomic_bool s_compileThreadsShutdownSignal{};
|
||||||
|
static ConcurrentQueue<PipelineCompiler*> s_pipelineCompileRequests;
|
||||||
|
|
||||||
|
static void compilePipeline_thread(sint32 threadIndex)
|
||||||
|
{
|
||||||
|
SetThreadName("compilePl");
|
||||||
|
#ifdef _WIN32
|
||||||
|
// to avoid starving the main cpu and render threads the pipeline compile threads run at lower priority
|
||||||
|
// except for one thread which we always run at normal priority to prevent the opposite scenario where all compile threads are starved
|
||||||
|
if(threadIndex != 0)
|
||||||
|
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL);
|
||||||
|
#endif
|
||||||
|
while (!s_compileThreadsShutdownSignal)
|
||||||
|
{
|
||||||
|
PipelineCompiler* request = s_pipelineCompileRequests.pop();
|
||||||
|
if (!request)
|
||||||
|
continue;
|
||||||
|
request->Compile(true, false, true);
|
||||||
|
delete request;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PipelineCompiler::CompileThreadPool_Start()
|
||||||
|
{
|
||||||
|
cemu_assert_debug(s_compileThreads.empty());
|
||||||
|
s_compileThreadsShutdownSignal = false;
|
||||||
|
uint32 numCompileThreads;
|
||||||
|
|
||||||
|
uint32 cpuCoreCount = GetPhysicalCoreCount();
|
||||||
|
if (cpuCoreCount <= 2)
|
||||||
|
numCompileThreads = 1;
|
||||||
|
else
|
||||||
|
numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3
|
||||||
|
|
||||||
|
numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < numCompileThreads; i++)
|
||||||
|
{
|
||||||
|
s_compileThreads.emplace_back(compilePipeline_thread, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PipelineCompiler::CompileThreadPool_Stop()
|
||||||
|
{
|
||||||
|
s_compileThreadsShutdownSignal = true;
|
||||||
|
{
|
||||||
|
// push one empty workload for each thread
|
||||||
|
// this way we can make sure that each waiting thread is woken up to see the shutdown signal
|
||||||
|
for (auto& thread : s_compileThreads)
|
||||||
|
s_pipelineCompileRequests.push(nullptr);
|
||||||
|
}
|
||||||
|
for (auto& thread : s_compileThreads)
|
||||||
|
thread.join();
|
||||||
|
while (!s_pipelineCompileRequests.empty())
|
||||||
|
{
|
||||||
|
PipelineCompiler* pipelineCompiler = s_pipelineCompileRequests.pop();
|
||||||
|
if (!pipelineCompiler)
|
||||||
|
break;
|
||||||
|
if (pipelineCompiler)
|
||||||
|
delete pipelineCompiler;
|
||||||
|
}
|
||||||
|
s_compileThreads.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void PipelineCompiler::CompileThreadPool_QueueCompilation(PipelineCompiler* v)
|
||||||
|
{
|
||||||
|
s_pipelineCompileRequests.push(v);
|
||||||
|
}
|
||||||
@ -1,4 +1,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
|
||||||
|
#include "VKRBase.h"
|
||||||
|
|
||||||
class PipelineCompiler : public VKRMoveableRefCounter
|
class PipelineCompiler : public VKRMoveableRefCounter
|
||||||
{
|
{
|
||||||
@ -43,6 +45,11 @@ public:
|
|||||||
|
|
||||||
static bool CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader);
|
static bool CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader);
|
||||||
|
|
||||||
|
// API for thread pool
|
||||||
|
static void CompileThreadPool_Start();
|
||||||
|
static void CompileThreadPool_Stop();
|
||||||
|
static void CompileThreadPool_QueueCompilation(PipelineCompiler* v);
|
||||||
|
|
||||||
VkPipelineLayout m_pipelineLayout;
|
VkPipelineLayout m_pipelineLayout;
|
||||||
VKRObjectRenderPass* m_renderPassObj{};
|
VKRObjectRenderPass* m_renderPassObj{};
|
||||||
bool m_requestRobustBufferAccess{false};
|
bool m_requestRobustBufferAccess{false};
|
||||||
|
|||||||
@ -4,6 +4,7 @@
|
|||||||
#include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h"
|
#include "Cafe/HW/Latte/Renderer/Vulkan/RendererShaderVk.h"
|
||||||
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanTextureReadback.h"
|
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanTextureReadback.h"
|
||||||
#include "Cafe/HW/Latte/Renderer/Vulkan/CocoaSurface.h"
|
#include "Cafe/HW/Latte/Renderer/Vulkan/CocoaSurface.h"
|
||||||
|
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h"
|
||||||
|
|
||||||
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
|
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
|
||||||
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
|
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
|
||||||
@ -653,7 +654,8 @@ VulkanRenderer::VulkanRenderer()
|
|||||||
m_occlusionQueries.list_availableQueryIndices.emplace_back(i);
|
m_occlusionQueries.list_availableQueryIndices.emplace_back(i);
|
||||||
|
|
||||||
// start compilation threads
|
// start compilation threads
|
||||||
RendererShaderVk::Init();
|
RendererShaderVk::Init(); // shaders
|
||||||
|
PipelineCompiler::CompileThreadPool_Start(); // pipelines
|
||||||
}
|
}
|
||||||
|
|
||||||
VulkanRenderer::~VulkanRenderer()
|
VulkanRenderer::~VulkanRenderer()
|
||||||
@ -661,8 +663,6 @@ VulkanRenderer::~VulkanRenderer()
|
|||||||
SubmitCommandBuffer();
|
SubmitCommandBuffer();
|
||||||
WaitDeviceIdle();
|
WaitDeviceIdle();
|
||||||
WaitCommandBufferFinished(GetCurrentCommandBufferId());
|
WaitCommandBufferFinished(GetCurrentCommandBufferId());
|
||||||
// make sure compilation threads have been shut down
|
|
||||||
RendererShaderVk::Shutdown();
|
|
||||||
// shut down pipeline save thread
|
// shut down pipeline save thread
|
||||||
m_destructionRequested = true;
|
m_destructionRequested = true;
|
||||||
m_pipeline_cache_semaphore.notify();
|
m_pipeline_cache_semaphore.notify();
|
||||||
@ -1666,6 +1666,10 @@ void VulkanRenderer::Shutdown()
|
|||||||
{
|
{
|
||||||
SubmitCommandBuffer();
|
SubmitCommandBuffer();
|
||||||
WaitDeviceIdle();
|
WaitDeviceIdle();
|
||||||
|
// stop compilation threads
|
||||||
|
RendererShaderVk::Shutdown();
|
||||||
|
PipelineCompiler::CompileThreadPool_Stop();
|
||||||
|
|
||||||
DeleteFontTextures();
|
DeleteFontTextures();
|
||||||
Renderer::Shutdown();
|
Renderer::Shutdown();
|
||||||
if (m_imguiRenderPass != VK_NULL_HANDLE)
|
if (m_imguiRenderPass != VK_NULL_HANDLE)
|
||||||
|
|||||||
@ -183,63 +183,6 @@ void VulkanRenderer::unregisterGraphicsPipeline(PipelineInfo* pipelineInfo)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool g_compilePipelineThreadInit{false};
|
|
||||||
std::mutex g_compilePipelineMutex;
|
|
||||||
std::condition_variable g_compilePipelineCondVar;
|
|
||||||
std::queue<PipelineCompiler*> g_compilePipelineRequests;
|
|
||||||
|
|
||||||
void compilePipeline_thread(sint32 threadIndex)
|
|
||||||
{
|
|
||||||
SetThreadName("compilePl");
|
|
||||||
#ifdef _WIN32
|
|
||||||
// one thread runs at normal priority while the others run at lower priority
|
|
||||||
if(threadIndex != 0)
|
|
||||||
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL);
|
|
||||||
#endif
|
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
std::unique_lock lock(g_compilePipelineMutex);
|
|
||||||
while (g_compilePipelineRequests.empty())
|
|
||||||
g_compilePipelineCondVar.wait(lock);
|
|
||||||
|
|
||||||
PipelineCompiler* request = g_compilePipelineRequests.front();
|
|
||||||
|
|
||||||
g_compilePipelineRequests.pop();
|
|
||||||
|
|
||||||
lock.unlock();
|
|
||||||
|
|
||||||
request->Compile(true, false, true);
|
|
||||||
delete request;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void compilePipelineThread_init()
|
|
||||||
{
|
|
||||||
uint32 numCompileThreads;
|
|
||||||
|
|
||||||
uint32 cpuCoreCount = GetPhysicalCoreCount();
|
|
||||||
if (cpuCoreCount <= 2)
|
|
||||||
numCompileThreads = 1;
|
|
||||||
else
|
|
||||||
numCompileThreads = 2 + (cpuCoreCount - 3); // 2 plus one additionally for every extra core above 3
|
|
||||||
|
|
||||||
numCompileThreads = std::min(numCompileThreads, 8u); // cap at 8
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < numCompileThreads; i++)
|
|
||||||
{
|
|
||||||
std::thread compileThread(compilePipeline_thread, i);
|
|
||||||
compileThread.detach();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void compilePipelineThread_queue(PipelineCompiler* v)
|
|
||||||
{
|
|
||||||
std::unique_lock lock(g_compilePipelineMutex);
|
|
||||||
g_compilePipelineRequests.push(std::move(v));
|
|
||||||
lock.unlock();
|
|
||||||
g_compilePipelineCondVar.notify_one();
|
|
||||||
}
|
|
||||||
|
|
||||||
// make a guess if a pipeline is not essential
|
// make a guess if a pipeline is not essential
|
||||||
// non-essential means that skipping these drawcalls shouldn't lead to permanently corrupted graphics
|
// non-essential means that skipping these drawcalls shouldn't lead to permanently corrupted graphics
|
||||||
bool VulkanRenderer::IsAsyncPipelineAllowed(uint32 numIndices)
|
bool VulkanRenderer::IsAsyncPipelineAllowed(uint32 numIndices)
|
||||||
@ -270,12 +213,6 @@ bool VulkanRenderer::IsAsyncPipelineAllowed(uint32 numIndices)
|
|||||||
// create graphics pipeline for current state
|
// create graphics pipeline for current state
|
||||||
PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
|
PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
|
||||||
{
|
{
|
||||||
if (!g_compilePipelineThreadInit)
|
|
||||||
{
|
|
||||||
compilePipelineThread_init();
|
|
||||||
g_compilePipelineThreadInit = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
|
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
|
||||||
const auto vertexShader = LatteSHRC_GetActiveVertexShader();
|
const auto vertexShader = LatteSHRC_GetActiveVertexShader();
|
||||||
const auto geometryShader = LatteSHRC_GetActiveGeometryShader();
|
const auto geometryShader = LatteSHRC_GetActiveGeometryShader();
|
||||||
@ -313,7 +250,7 @@ PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
|
|||||||
if (pipelineCompiler->Compile(false, true, true) == false)
|
if (pipelineCompiler->Compile(false, true, true) == false)
|
||||||
{
|
{
|
||||||
// shaders or pipeline not cached -> asynchronous compilation
|
// shaders or pipeline not cached -> asynchronous compilation
|
||||||
compilePipelineThread_queue(pipelineCompiler);
|
PipelineCompiler::CompileThreadPool_QueueCompilation(pipelineCompiler);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -379,7 +316,7 @@ float s_vkUniformData[512 * 4];
|
|||||||
uint32 VulkanRenderer::uniformData_uploadUniformDataBufferGetOffset(std::span<uint8> data)
|
uint32 VulkanRenderer::uniformData_uploadUniformDataBufferGetOffset(std::span<uint8> data)
|
||||||
{
|
{
|
||||||
const uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1;
|
const uint32 bufferAlignmentM1 = std::max(m_featureControl.limits.minUniformBufferOffsetAlignment, m_featureControl.limits.nonCoherentAtomSize) - 1;
|
||||||
const uint32 uniformSize = (data.size() + bufferAlignmentM1) & ~bufferAlignmentM1;
|
const uint32 uniformSize = ((uint32)data.size() + bufferAlignmentM1) & ~bufferAlignmentM1;
|
||||||
|
|
||||||
auto waitWhileCondition = [&](std::function<bool()> condition) {
|
auto waitWhileCondition = [&](std::function<bool()> condition) {
|
||||||
while (condition())
|
while (condition())
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user