diff --git a/src/citra_qt/configuration/configure_graphics.cpp b/src/citra_qt/configuration/configure_graphics.cpp index d695b6720..02d9cb879 100644 --- a/src/citra_qt/configuration/configure_graphics.cpp +++ b/src/citra_qt/configuration/configure_graphics.cpp @@ -147,6 +147,7 @@ void ConfigureGraphics::SetConfiguration() { ui->disable_spirv_optimizer->setChecked(Settings::values.disable_spirv_optimizer.GetValue()); ui->toggle_async_shaders->setChecked(Settings::values.async_shader_compilation.GetValue()); ui->toggle_async_present->setChecked(Settings::values.async_presentation.GetValue()); + ui->toggle_async_gpu->setChecked(Settings::values.async_gpu.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->toggle_shader_jit->setChecked(Settings::values.use_shader_jit.GetValue()); @@ -162,6 +163,8 @@ void ConfigureGraphics::ApplyConfiguration() { ui->toggle_async_shaders, async_shader_compilation); ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_presentation, ui->toggle_async_present, async_presentation); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_gpu, + ui->toggle_async_gpu, async_gpu); ConfigurationShared::ApplyPerGameSetting(&Settings::values.spirv_shader_gen, ui->spirv_shader_gen, spirv_shader_gen); ConfigurationShared::ApplyPerGameSetting(&Settings::values.disable_spirv_optimizer, @@ -203,6 +206,7 @@ void ConfigureGraphics::SetupPerGameUI() { Settings::values.async_shader_compilation.UsingGlobal()); ui->widget_texture_sampling->setEnabled(Settings::values.texture_sampling.UsingGlobal()); ui->toggle_async_present->setEnabled(Settings::values.async_presentation.UsingGlobal()); + ui->toggle_async_gpu->setEnabled(Settings::values.async_gpu.UsingGlobal()); ui->graphics_api_combo->setEnabled(Settings::values.graphics_api.UsingGlobal()); ui->physical_device_combo->setEnabled(Settings::values.physical_device.UsingGlobal()); ui->delay_render_combo->setEnabled( @@ -243,6 +247,8 @@ void ConfigureGraphics::SetupPerGameUI() { async_shader_compilation); ConfigurationShared::SetColoredTristate( ui->toggle_async_present, Settings::values.async_presentation, async_presentation); + ConfigurationShared::SetColoredTristate(ui->toggle_async_gpu, Settings::values.async_gpu, + async_gpu); ConfigurationShared::SetColoredTristate(ui->spirv_shader_gen, Settings::values.spirv_shader_gen, spirv_shader_gen); ConfigurationShared::SetColoredTristate(ui->disable_spirv_optimizer, diff --git a/src/citra_qt/configuration/configure_graphics.h b/src/citra_qt/configuration/configure_graphics.h index 46d74dc19..4545a1b6f 100644 --- a/src/citra_qt/configuration/configure_graphics.h +++ b/src/citra_qt/configuration/configure_graphics.h @@ -41,6 +41,7 @@ private: ConfigurationShared::CheckState use_vsync; ConfigurationShared::CheckState async_shader_compilation; ConfigurationShared::CheckState async_presentation; + ConfigurationShared::CheckState async_gpu; ConfigurationShared::CheckState spirv_shader_gen; ConfigurationShared::CheckState disable_spirv_optimizer; std::unique_ptr ui; diff --git a/src/citra_qt/configuration/configure_graphics.ui b/src/citra_qt/configuration/configure_graphics.ui index 49f818129..949ffa8b7 100644 --- a/src/citra_qt/configuration/configure_graphics.ui +++ b/src/citra_qt/configuration/configure_graphics.ui @@ -241,6 +241,16 @@ + + + + <html><head/><body><p>Process GPU commands asynchronously on a separate thread, similar to real 3DS hardware. Improves performance in CPU-bound scenarios. Works with OpenGL and Software renderers (disabled with Vulkan due to rendering thread requirements).</p></body></html> + + + Enable async GPU + + + diff --git a/src/common/settings.h b/src/common/settings.h index a85ad3c0a..9d03288b7 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -511,6 +511,7 @@ struct Values { SwitchableSetting spirv_shader_gen{true, "spirv_shader_gen"}; SwitchableSetting disable_spirv_optimizer{true, "disable_spirv_optimizer"}; SwitchableSetting async_shader_compilation{false, "async_shader_compilation"}; + SwitchableSetting async_gpu{true, "async_gpu"}; SwitchableSetting async_presentation{true, "async_presentation"}; SwitchableSetting use_hw_shader{true, "use_hw_shader"}; SwitchableSetting use_disk_shader_cache{true, "use_disk_shader_cache"}; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 27076cbe4..4d30c1176 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -11,6 +11,8 @@ add_library(video_core STATIC debug_utils/debug_utils.h gpu.cpp gpu.h + gpu_command_queue.cpp + gpu_command_queue.h gpu_debugger.h gpu_impl.h pica_types.h diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 11516e5d6..bda786d9a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -5,12 +5,14 @@ #include "common/archives.h" #include "common/hacks/hack_manager.h" #include "common/microprofile.h" +#include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" #include "core/hle/service/gsp/gsp_gpu.h" #include "core/hle/service/plgldr/plgldr.h" #include "video_core/debug_utils/debug_utils.h" #include "video_core/gpu.h" +#include "video_core/gpu_command_queue.h" #include "video_core/gpu_debugger.h" #include "video_core/gpu_impl.h" #include "video_core/pica/pica_core.h" @@ -39,6 +41,14 @@ GPU::GPU(Core::System& system, Frontend::EmuWindow& emu_window, // Bind the rasterizer to the PICA GPU impl->pica.BindRasterizer(impl->rasterizer); + + // Initialize GPU command queue if async GPU is enabled. + // Note: Async GPU is disabled for Vulkan as it causes threading issues with command buffer + // recording. + if (Settings::values.async_gpu.GetValue() && + Settings::values.graphics_api.GetValue() != Settings::GraphicsAPI::Vulkan) { + impl->command_queue = std::make_unique(*this); + } } GPU::~GPU() = default; @@ -85,6 +95,15 @@ void GPU::ClearAll(bool flush) { } void GPU::Execute(const Service::GSP::Command& command) { + // If async GPU is enabled, queue the command; otherwise execute it directly + if (impl->command_queue) { + impl->command_queue->QueueCommand(command); + } else { + ExecuteCommand(command); + } +} + +void GPU::ExecuteCommand(const Service::GSP::Command& command) { using Service::GSP::CommandId; auto& regs = impl->pica.regs; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index c2d344a2c..e598f2c23 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -63,6 +63,9 @@ public: /// Executes the provided GSP command. void Execute(const Service::GSP::Command& command); + /// Executes a GPU command directly (internal use for async command processing). + void ExecuteCommand(const Service::GSP::Command& command); + /// Updates GPU display framebuffer configuration using the specified parameters. void SetBufferSwap(u32 screen_id, const Service::GSP::FrameBufferInfo& info); diff --git a/src/video_core/gpu_command_queue.cpp b/src/video_core/gpu_command_queue.cpp new file mode 100644 index 000000000..de637f7ff --- /dev/null +++ b/src/video_core/gpu_command_queue.cpp @@ -0,0 +1,91 @@ +// Copyright Citra Emulator Project / Azahar Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include "common/logging/log.h" +#include "video_core/gpu.h" +#include "video_core/gpu_command_queue.h" + +namespace VideoCore { + +GPUCommandQueue::GPUCommandQueue(GPU& gpu) : gpu{gpu} { + worker_thread = std::make_unique([this] { ProcessCommandQueue(); }); +} + +GPUCommandQueue::~GPUCommandQueue() { + Shutdown(); +} + +void GPUCommandQueue::QueueCommand(const Service::GSP::Command& command) { + { + std::lock_guard lock(queue_mutex); + command_queue.push(command); + is_idle = false; + } + queue_cv.notify_one(); +} + +void GPUCommandQueue::WaitForIdle() { + std::unique_lock lock(queue_mutex); + idle_cv.wait(lock, [this] { return is_idle; }); +} + +void GPUCommandQueue::Shutdown() { + { + std::lock_guard lock(queue_mutex); + shutdown_requested = true; + } + queue_cv.notify_one(); + + if (worker_thread && worker_thread->joinable()) { + worker_thread->join(); + } +} + +bool GPUCommandQueue::IsIdle() const { + std::lock_guard lock(queue_mutex); + return is_idle; +} + +void GPUCommandQueue::ProcessCommandQueue() { + while (true) { + Service::GSP::Command command; + bool has_command = false; + + { + std::unique_lock lock(queue_mutex); + + // Wait for commands or shutdown - no timeout, no artificial delays + queue_cv.wait(lock, [this] { return !command_queue.empty() || shutdown_requested; }); + + if (shutdown_requested && command_queue.empty()) { + break; + } + + if (!command_queue.empty()) { + command = command_queue.front(); + command_queue.pop(); + has_command = true; + } + } + + // Process the command outside the lock - no artificial delays + if (has_command) { + gpu.ExecuteCommand(command); + + // Check if queue is now idle after processing this command + { + std::lock_guard lock(queue_mutex); + if (command_queue.empty()) { + is_idle = true; + idle_cv.notify_all(); + } + } + } + } +} + +} // namespace VideoCore diff --git a/src/video_core/gpu_command_queue.h b/src/video_core/gpu_command_queue.h new file mode 100644 index 000000000..015c065c4 --- /dev/null +++ b/src/video_core/gpu_command_queue.h @@ -0,0 +1,61 @@ +// Copyright Citra Emulator Project / Azahar Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "core/hle/service/gsp/gsp_gpu.h" + +namespace VideoCore { + +class GPU; + +/** + * GPU Command Queue for asynchronous GPU command processing. + * Processes GPU commands on a dedicated worker thread, similar to real 3DS hardware. + * + * Design principles: + * - No artificial delays or busy-waiting + * - Worker thread sleeps when queue is empty (OS scheduler handles CPU allocation) + * - Logic thread not blocked when rendering + * - Efficient synchronization with condition variables + */ +class GPUCommandQueue { +public: + explicit GPUCommandQueue(GPU& gpu); + ~GPUCommandQueue(); + + /// Queue a GPU command for processing + void QueueCommand(const Service::GSP::Command& command); + + /// Wait for all queued commands to be processed + void WaitForIdle(); + + /// Shutdown the command queue and worker thread + void Shutdown(); + + /// Check if the queue is idle + [[nodiscard]] bool IsIdle() const; + +private: + /// Worker thread function - processes commands without artificial delays + void ProcessCommandQueue(); + + GPU& gpu; + std::queue command_queue; + mutable std::mutex queue_mutex; + std::condition_variable queue_cv; + std::condition_variable idle_cv; + std::unique_ptr worker_thread; + bool shutdown_requested{false}; + bool is_idle{true}; +}; + +} // namespace VideoCore diff --git a/src/video_core/gpu_impl.h b/src/video_core/gpu_impl.h index 015918fe1..073fa2917 100644 --- a/src/video_core/gpu_impl.h +++ b/src/video_core/gpu_impl.h @@ -15,6 +15,7 @@ #include "video_core/gpu.h" #include "video_core/gpu_debugger.h" #include "video_core/gpu_impl.h" +#include "video_core/gpu_command_queue.h" #include "video_core/pica/pica_core.h" #include "video_core/pica/regs_lcd.h" #include "video_core/renderer_base.h" @@ -33,6 +34,7 @@ struct GPU::Impl { std::unique_ptr renderer; RasterizerInterface* rasterizer; std::unique_ptr sw_blitter; + std::unique_ptr command_queue; Core::TimingEventType* vblank_event; Service::GSP::InterruptHandler signal_interrupt;