OPENGL WORKS (with async presentation and async shader compilation disabled)

This commit is contained in:
jbm11208 2026-01-17 22:29:22 -05:00
parent 788ec56c8b
commit f1411a19ad
6 changed files with 190 additions and 49 deletions

View File

@ -163,8 +163,8 @@ void ConfigureGraphics::ApplyConfiguration() {
ui->toggle_async_shaders, async_shader_compilation); ui->toggle_async_shaders, async_shader_compilation);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_presentation, ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_presentation,
ui->toggle_async_present, async_presentation); ui->toggle_async_present, async_presentation);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_gpu, ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_gpu, ui->toggle_async_gpu,
ui->toggle_async_gpu, async_gpu); async_gpu);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.spirv_shader_gen, ConfigurationShared::ApplyPerGameSetting(&Settings::values.spirv_shader_gen,
ui->spirv_shader_gen, spirv_shader_gen); ui->spirv_shader_gen, spirv_shader_gen);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.disable_spirv_optimizer, ConfigurationShared::ApplyPerGameSetting(&Settings::values.disable_spirv_optimizer,

View File

@ -8,6 +8,7 @@
#include "common/settings.h" #include "common/settings.h"
#include "core/core.h" #include "core/core.h"
#include "core/core_timing.h" #include "core/core_timing.h"
#include "core/frontend/emu_window.h"
#include "core/hle/service/gsp/gsp_gpu.h" #include "core/hle/service/gsp/gsp_gpu.h"
#include "core/hle/service/plgldr/plgldr.h" #include "core/hle/service/plgldr/plgldr.h"
#include "video_core/debug_utils/debug_utils.h" #include "video_core/debug_utils/debug_utils.h"
@ -32,8 +33,8 @@ MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(1
GPU::GPU(Core::System& system, Frontend::EmuWindow& emu_window, GPU::GPU(Core::System& system, Frontend::EmuWindow& emu_window,
Frontend::EmuWindow* secondary_window) Frontend::EmuWindow* secondary_window)
: right_eye_disabler{std::make_unique<RightEyeDisabler>(*this)}, : impl{std::make_unique<Impl>(system, emu_window, secondary_window)},
impl{std::make_unique<Impl>(system, emu_window, secondary_window)} { right_eye_disabler{std::make_unique<RightEyeDisabler>(*this)} {
impl->vblank_event = impl->timing.RegisterEvent( impl->vblank_event = impl->timing.RegisterEvent(
"GPU::VBlankCallback", "GPU::VBlankCallback",
[this](uintptr_t user_data, s64 cycles_late) { VBlankCallback(user_data, cycles_late); }); [this](uintptr_t user_data, s64 cycles_late) { VBlankCallback(user_data, cycles_late); });
@ -43,11 +44,14 @@ GPU::GPU(Core::System& system, Frontend::EmuWindow& emu_window,
impl->pica.BindRasterizer(impl->rasterizer); impl->pica.BindRasterizer(impl->rasterizer);
// Initialize GPU command queue if async GPU is enabled. // Initialize GPU command queue if async GPU is enabled.
// Note: Async GPU is disabled for Vulkan as it causes threading issues with command buffer // Note: Async GPU is disabled for Vulkan as it causes threading issues.
// recording.
if (Settings::values.async_gpu.GetValue() && if (Settings::values.async_gpu.GetValue() &&
Settings::values.graphics_api.GetValue() != Settings::GraphicsAPI::Vulkan) { Settings::values.graphics_api.GetValue() != Settings::GraphicsAPI::Vulkan) {
impl->command_queue = std::make_unique<GPUCommandQueue>(*this); auto shared_context = emu_window.CreateSharedContext();
if (shared_context) {
impl->command_queue =
std::make_unique<GPUCommandQueue>(*this, std::move(shared_context));
}
} }
} }
@ -95,18 +99,12 @@ void GPU::ClearAll(bool flush) {
} }
void GPU::Execute(const Service::GSP::Command& command) { void GPU::Execute(const Service::GSP::Command& command) {
// If async GPU is enabled, queue the command; otherwise execute it directly QueueInternalCommand(command);
if (impl->command_queue) {
impl->command_queue->QueueCommand(command);
} else {
ExecuteCommand(command);
}
} }
void GPU::ExecuteCommand(const Service::GSP::Command& command) { void GPU::ExecuteCommand(const Service::GSP::Command& command) {
using Service::GSP::CommandId; using Service::GSP::CommandId;
auto& regs = impl->pica.regs; auto& regs = impl->pica.regs;
switch (command.id) { switch (command.id) {
case CommandId::RequestDma: { case CommandId::RequestDma: {
impl->system.Memory().RasterizerFlushVirtualRegion( impl->system.Memory().RasterizerFlushVirtualRegion(
@ -257,7 +255,14 @@ u32 GPU::ReadReg(VAddr addr) {
const u32 index = offset / sizeof(u32); const u32 index = offset / sizeof(u32);
ASSERT(addr % sizeof(u32) == 0); ASSERT(addr % sizeof(u32) == 0);
ASSERT(index < Pica::PicaCore::Regs::NUM_REGS); ASSERT(index < Pica::PicaCore::Regs::NUM_REGS);
return impl->pica.regs.reg_array[index];
// Protect GPU register reads with mutex only when async GPU is enabled
if (impl->command_queue) {
std::lock_guard<std::mutex> lock(impl->rasterizer_mutex);
return impl->pica.regs.reg_array[index];
} else {
return impl->pica.regs.reg_array[index];
}
} }
default: default:
UNREACHABLE_MSG("Read from unknown GPU address {:#08X}", addr); UNREACHABLE_MSG("Read from unknown GPU address {:#08X}", addr);
@ -281,27 +286,66 @@ void GPU::WriteReg(VAddr addr, u32 data) {
ASSERT(addr % sizeof(u32) == 0); ASSERT(addr % sizeof(u32) == 0);
ASSERT(index < Pica::PicaCore::Regs::NUM_REGS); ASSERT(index < Pica::PicaCore::Regs::NUM_REGS);
impl->pica.regs.reg_array[index] = data;
// Handle registers that trigger GPU actions if (impl->command_queue) {
switch (index) { // Async GPU path: protect with mutex and execute operations
case GPU_REG_INDEX(memory_fill_config[0].trigger): std::lock_guard<std::mutex> lock(impl->rasterizer_mutex);
MemoryFill(0, 0); impl->pica.regs.reg_array[index] = data;
break;
case GPU_REG_INDEX(memory_fill_config[1].trigger): // Handle registers that trigger GPU actions asynchronously
MemoryFill(1, 1); switch (index) {
break; case GPU_REG_INDEX(memory_fill_config[0].trigger):
case GPU_REG_INDEX(display_transfer_config.trigger): if (data)
MemoryTransfer(); MemoryFill(0, 0);
break; break;
case GPU_REG_INDEX(internal.pipeline.command_buffer.trigger[0]): case GPU_REG_INDEX(memory_fill_config[1].trigger):
SubmitCmdList(0); if (data)
break; MemoryFill(1, 1);
case GPU_REG_INDEX(internal.pipeline.command_buffer.trigger[1]): break;
SubmitCmdList(1); case GPU_REG_INDEX(display_transfer_config.trigger):
break; if (data)
default: MemoryTransfer();
break; break;
case GPU_REG_INDEX(internal.pipeline.command_buffer.trigger[0]):
if (data)
SubmitCmdList(0);
break;
case GPU_REG_INDEX(internal.pipeline.command_buffer.trigger[1]):
if (data)
SubmitCmdList(1);
break;
default:
break;
}
} else {
// Synchronous GPU path: no mutex needed, execute directly
impl->pica.regs.reg_array[index] = data;
// Handle registers that trigger GPU actions synchronously
switch (index) {
case GPU_REG_INDEX(memory_fill_config[0].trigger):
if (data)
MemoryFill(0, 0);
break;
case GPU_REG_INDEX(memory_fill_config[1].trigger):
if (data)
MemoryFill(1, 1);
break;
case GPU_REG_INDEX(display_transfer_config.trigger):
if (data)
MemoryTransfer();
break;
case GPU_REG_INDEX(internal.pipeline.command_buffer.trigger[0]):
if (data)
SubmitCmdList(0);
break;
case GPU_REG_INDEX(internal.pipeline.command_buffer.trigger[1]):
if (data)
SubmitCmdList(1);
break;
default:
break;
}
} }
break; break;
} }
@ -350,6 +394,33 @@ void GPU::ReportLoadingProgramID(u64 program_ID) {
impl->rasterizer->SetAccurateMul(use_accurate_mul); impl->rasterizer->SetAccurateMul(use_accurate_mul);
} }
void GPU::WaitForGPUCompletion() {
if (impl->command_queue) {
impl->command_queue->WaitForIdle();
}
}
bool GPU::IsGPUCommandQueueIdle() const {
if (impl->command_queue) {
return impl->command_queue->IsIdle();
}
return true;
}
void GPU::SignalGPUFlush() {
if (impl->command_queue) {
impl->command_queue->SignalFlush();
}
}
void GPU::QueueInternalCommand(const Service::GSP::Command& command) {
if (impl->command_queue) {
impl->command_queue->QueueCommand(command);
} else {
ExecuteCommand(command);
}
}
void GPU::SubmitCmdList(u32 index) { void GPU::SubmitCmdList(u32 index) {
// Check if a command list was triggered. // Check if a command list was triggered.
auto& config = impl->pica.regs.internal.pipeline.command_buffer; auto& config = impl->pica.regs.internal.pipeline.command_buffer;
@ -429,7 +500,15 @@ void GPU::MemoryTransfer() {
} }
void GPU::VBlankCallback(std::uintptr_t user_data, s64 cycles_late) { void GPU::VBlankCallback(std::uintptr_t user_data, s64 cycles_late) {
// Present renderered frame. // Signal GPU to flush any pending work before presenting.
// Use non-blocking signal because VBlank happens on timing thread and cannot block.
// The GPU worker thread will process queued commands and complete them before
// the next frame if there's time, enabling proper async operation.
if (impl->command_queue) {
impl->command_queue->SignalFlush();
}
// Present rendered frame.
impl->renderer->SwapBuffers(); impl->renderer->SwapBuffers();
// Signal to GSP that GPU interrupt has occurred // Signal to GSP that GPU interrupt has occurred

View File

@ -38,6 +38,7 @@ constexpr u64 FRAME_TICKS = 4481136ull;
class GraphicsDebugger; class GraphicsDebugger;
class RendererBase; class RendererBase;
class RightEyeDisabler; class RightEyeDisabler;
class GPUCommandQueue;
/** /**
* The GPU class is the high level interface to the video_core for core services. * The GPU class is the high level interface to the video_core for core services.
@ -78,6 +79,10 @@ public:
/// Writes the provided value to the GPU virtual address. /// Writes the provided value to the GPU virtual address.
void WriteReg(VAddr addr, u32 data); void WriteReg(VAddr addr, u32 data);
/// Queues a synthetic GPU command for an internally-triggered operation
/// Used when WriteReg triggers GPU actions that should be async
void QueueInternalCommand(const Service::GSP::Command& command);
/// Returns a mutable reference to the renderer. /// Returns a mutable reference to the renderer.
[[nodiscard]] VideoCore::RendererBase& Renderer(); [[nodiscard]] VideoCore::RendererBase& Renderer();
@ -99,6 +104,23 @@ public:
void ReportLoadingProgramID(u64 program_ID); void ReportLoadingProgramID(u64 program_ID);
/// Waits for all pending GPU commands to complete.
/// This should ONLY be called in critical sections where game logic depends on GPU results.
/// Normal rendering does not require this call - it happens asynchronously.
/// Examples: Memory reads after transfers, register reads that reflect GPU state
void WaitForGPUCompletion();
/// Check if GPU command queue is idle (non-blocking check)
[[nodiscard]] bool IsGPUCommandQueueIdle() const;
/// Signal GPU to flush pending work (non-blocking).
/// Used at frame boundaries where timing thread cannot block.
void SignalGPUFlush();
// Allow GPUCommandQueue to access implementation details
struct Impl;
std::unique_ptr<Impl> impl;
private: private:
void SubmitCmdList(u32 index); void SubmitCmdList(u32 index);
@ -113,12 +135,12 @@ private:
template <class Archive> template <class Archive>
void serialize(Archive& ar, const u32 file_version); void serialize(Archive& ar, const u32 file_version);
friend class GPUCommandQueue; // Allow access to impl for rasterizer mutex
std::unique_ptr<RightEyeDisabler> right_eye_disabler; std::unique_ptr<RightEyeDisabler> right_eye_disabler;
private: private:
friend class RightEyeDisabler; friend class RightEyeDisabler;
struct Impl;
std::unique_ptr<Impl> impl;
PAddr VirtualToPhysicalAddress(VAddr addr); PAddr VirtualToPhysicalAddress(VAddr addr);
}; };

View File

@ -6,12 +6,15 @@
#include <mutex> #include <mutex>
#include <thread> #include <thread>
#include "common/logging/log.h" #include "common/logging/log.h"
#include "core/frontend/emu_window.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
#include "video_core/gpu_command_queue.h" #include "video_core/gpu_command_queue.h"
#include "video_core/gpu_impl.h"
namespace VideoCore { namespace VideoCore {
GPUCommandQueue::GPUCommandQueue(GPU& gpu) : gpu{gpu} { GPUCommandQueue::GPUCommandQueue(GPU& gpu, std::unique_ptr<Frontend::GraphicsContext> context)
: gpu{gpu}, graphics_context{std::move(context)} {
worker_thread = std::make_unique<std::thread>([this] { ProcessCommandQueue(); }); worker_thread = std::make_unique<std::thread>([this] { ProcessCommandQueue(); });
} }
@ -33,6 +36,17 @@ void GPUCommandQueue::WaitForIdle() {
idle_cv.wait(lock, [this] { return is_idle; }); idle_cv.wait(lock, [this] { return is_idle; });
} }
void GPUCommandQueue::SignalFlush() {
// Non-blocking signal that GPU should flush pending work
// Used at frame boundaries where we can't block the timing thread
{
std::lock_guard<std::mutex> lock(queue_mutex);
// Just ensure the worker is awake to process any remaining commands
// Don't wait for completion
}
queue_cv.notify_one();
}
void GPUCommandQueue::Shutdown() { void GPUCommandQueue::Shutdown() {
{ {
std::lock_guard<std::mutex> lock(queue_mutex); std::lock_guard<std::mutex> lock(queue_mutex);
@ -51,6 +65,9 @@ bool GPUCommandQueue::IsIdle() const {
} }
void GPUCommandQueue::ProcessCommandQueue() { void GPUCommandQueue::ProcessCommandQueue() {
// Execute queued commands on a dedicated worker thread.
// Rasterizer access is protected by a mutex to ensure thread safety.
while (true) { while (true) {
Service::GSP::Command command; Service::GSP::Command command;
bool has_command = false; bool has_command = false;
@ -72,8 +89,10 @@ void GPUCommandQueue::ProcessCommandQueue() {
} }
} }
// Process the command outside the lock - no artificial delays // Process the command outside the queue lock but with rasterizer lock held
if (has_command) { if (has_command) {
// Hold rasterizer mutex while executing to prevent races with main thread
std::lock_guard<std::mutex> rasterizer_lock(gpu.impl->rasterizer_mutex);
gpu.ExecuteCommand(command); gpu.ExecuteCommand(command);
// Check if queue is now idle after processing this command // Check if queue is now idle after processing this command

View File

@ -13,35 +13,49 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "core/hle/service/gsp/gsp_gpu.h" #include "core/hle/service/gsp/gsp_gpu.h"
namespace Frontend {
class GraphicsContext;
}
namespace VideoCore { namespace VideoCore {
class GPU; class GPU;
/** /**
* GPU Command Queue for asynchronous GPU command processing. * GPU Command Queue for asynchronous GPU command processing.
* Processes GPU commands on a dedicated worker thread, similar to real 3DS hardware. * Processes GPU commands on a dedicated worker thread with shared OpenGL context.
* *
* Design principles: * Design principles:
* - No artificial delays or busy-waiting * - Main thread queues GPU commands without blocking
* - Worker thread sleeps when queue is empty (OS scheduler handles CPU allocation) * - Worker thread executes with shared GL context (context sharing via frontend)
* - Logic thread not blocked when rendering * - Rasterizer cache protected by mutex for thread safety
* - Efficient synchronization with condition variables * - Game logic runs parallel to GPU work, enabling dynamic FPS
*
* Why this works:
* - OpenGL supports context sharing: worker thread gets shared context
* - GPU objects (shaders, textures) are shared across contexts
* - Rasterizer mutex prevents cache races
* - Game logic only waits when explicitly reading GPU results
*/ */
class GPUCommandQueue { class GPUCommandQueue {
public: public:
explicit GPUCommandQueue(GPU& gpu); explicit GPUCommandQueue(GPU& gpu, std::unique_ptr<Frontend::GraphicsContext> context);
~GPUCommandQueue(); ~GPUCommandQueue();
/// Queue a GPU command for processing /// Queue a GPU command for processing
void QueueCommand(const Service::GSP::Command& command); void QueueCommand(const Service::GSP::Command& command);
/// Wait for all queued commands to be processed /// Wait for all queued commands to be processed (BLOCKING - use sparingly)
void WaitForIdle(); void WaitForIdle();
/// Non-blocking flush: Signals GPU to complete pending work but doesn't wait
/// Used for frame boundaries where we can't block the timing thread
void SignalFlush();
/// Shutdown the command queue and worker thread /// Shutdown the command queue and worker thread
void Shutdown(); void Shutdown();
/// Check if the queue is idle /// Check if the queue is idle (non-blocking check)
[[nodiscard]] bool IsIdle() const; [[nodiscard]] bool IsIdle() const;
private: private:
@ -49,6 +63,7 @@ private:
void ProcessCommandQueue(); void ProcessCommandQueue();
GPU& gpu; GPU& gpu;
std::unique_ptr<Frontend::GraphicsContext> graphics_context;
std::queue<Service::GSP::Command> command_queue; std::queue<Service::GSP::Command> command_queue;
mutable std::mutex queue_mutex; mutable std::mutex queue_mutex;
std::condition_variable queue_cv; std::condition_variable queue_cv;

View File

@ -13,9 +13,9 @@
#include "core/hle/service/plgldr/plgldr.h" #include "core/hle/service/plgldr/plgldr.h"
#include "video_core/debug_utils/debug_utils.h" #include "video_core/debug_utils/debug_utils.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
#include "video_core/gpu_command_queue.h"
#include "video_core/gpu_debugger.h" #include "video_core/gpu_debugger.h"
#include "video_core/gpu_impl.h" #include "video_core/gpu_impl.h"
#include "video_core/gpu_command_queue.h"
#include "video_core/pica/pica_core.h" #include "video_core/pica/pica_core.h"
#include "video_core/pica/regs_lcd.h" #include "video_core/pica/regs_lcd.h"
#include "video_core/renderer_base.h" #include "video_core/renderer_base.h"
@ -23,6 +23,8 @@
#include "video_core/right_eye_disabler.h" #include "video_core/right_eye_disabler.h"
#include "video_core/video_core.h" #include "video_core/video_core.h"
#include <mutex>
namespace VideoCore { namespace VideoCore {
struct GPU::Impl { struct GPU::Impl {
Core::Timing& timing; Core::Timing& timing;
@ -38,6 +40,10 @@ struct GPU::Impl {
Core::TimingEventType* vblank_event; Core::TimingEventType* vblank_event;
Service::GSP::InterruptHandler signal_interrupt; Service::GSP::InterruptHandler signal_interrupt;
// Mutex to protect rasterizer access when async GPU is enabled
// Ensures cache consistency when accessed from multiple threads
mutable std::mutex rasterizer_mutex;
explicit Impl(Core::System& system, Frontend::EmuWindow& emu_window, explicit Impl(Core::System& system, Frontend::EmuWindow& emu_window,
Frontend::EmuWindow* secondary_window) Frontend::EmuWindow* secondary_window)
: timing{system.CoreTiming()}, system{system}, memory{system.Memory()}, : timing{system.CoreTiming()}, system{system}, memory{system.Memory()},