diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index f8886e3ff..3af4acba6 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -2081,6 +2081,11 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf // check for `prepareFlip` packet cmdbuf += size - 64; ASSERT_MSG(cmdbuf[0] == 0xc03e1000, "Can't find `prepareFlip` packet"); + // PS4 returns 0x80d11080 instead of crashing + // if (cmdbuf[0] != 0xc03e1000) { + // LOG_ERROR(Lib_GnmDriver, "Can't find `prepareFlip` packet"); + // return 0x80d11080; // SCE_GNM_ERROR_SUBMISSION_AND_FLIP_FAILED_INVALID_COMMAND_BUFFER + // } std::array backup{}; std::memcpy(backup.data(), cmdbuf, backup.size() * sizeof(decltype(backup)::value_type)); @@ -2089,15 +2094,14 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf "Invalid flip packet"); ASSERT_MSG(buf_idx != 0xffff'ffffu, "Invalid VO buffer index"); - const s32 flip_result = VideoOut::sceVideoOutSubmitEopFlip(vo_handle, buf_idx, flip_mode, - flip_arg, nullptr /*unk*/); + const s32 flip_result = liverpool->ReserveFlip(); if (flip_result != 0) { if (flip_result == 0x80290012) { LOG_ERROR(Lib_GnmDriver, "Flip queue is full"); return 0x80d11081; } else { - LOG_ERROR(Lib_GnmDriver, "Flip request failed"); - return flip_result; + LOG_ERROR(Lib_GnmDriver, "Flip request failed with {:#x}", flip_result); + return 0x80d11082; // SCE_GNM_ERROR_SUBMISSION_AND_FLIP_FAILED_REQUEST_FAILED } } @@ -2169,6 +2173,14 @@ s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, u32* dcb_gpu_addrs vo_handle, buf_idx, flip_mode, flip_arg); } +// Shared submission loop. When flip has a value, it is associated with the +// last command buffer in the batch so the flip triggers after the final +// command buffer completes. +static s32 SubmitCommandBuffersInternal(u32 count, const u32* dcb_gpu_addrs[], + u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[], + u32* ccb_sizes_in_bytes, + std::optional flip); + s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload( u32 workload, u32 count, u32* dcb_gpu_addrs[], u32* dcb_sizes_in_bytes, u32* ccb_gpu_addrs[], u32* ccb_sizes_in_bytes, u32 vo_handle, u32 buf_idx, u32 flip_mode, s64 flip_arg) { @@ -2183,9 +2195,10 @@ s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload( return patch_result; } - return sceGnmSubmitCommandBuffers(count, const_cast(dcb_gpu_addrs), - dcb_sizes_in_bytes, const_cast(ccb_gpu_addrs), - ccb_sizes_in_bytes); + return SubmitCommandBuffersInternal(count, const_cast(dcb_gpu_addrs), + dcb_sizes_in_bytes, const_cast(ccb_gpu_addrs), + ccb_sizes_in_bytes, + AmdGpu::Liverpool::FlipRequest{buf_idx, flip_arg}); } int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count, @@ -2193,6 +2206,14 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count, u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[], u32* ccb_sizes_in_bytes) { + return SubmitCommandBuffersInternal(count, dcb_gpu_addrs, dcb_sizes_in_bytes, ccb_gpu_addrs, + ccb_sizes_in_bytes, std::nullopt); +} + +static s32 SubmitCommandBuffersInternal(u32 count, const u32* dcb_gpu_addrs[], + u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[], + u32* ccb_sizes_in_bytes, + std::optional flip) { HLE_TRACE; LOG_DEBUG(Lib_GnmDriver, "called"); @@ -2288,7 +2309,9 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count, .base_addr = reinterpret_cast(ccb), }); } - liverpool->SubmitGfx(dcb_span, ccb_span); + // Associate the flip with the last command buffer in the batch. + const bool is_last = (cbpair == count - 1); + liverpool->SubmitGfx(dcb_span, ccb_span, is_last ? flip : std::nullopt); } return ORBIS_OK; diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index 9db70569b..0288308b0 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -55,17 +55,27 @@ int VideoOutDriver::Open(const ServiceThreadParams* params) { return ORBIS_VIDEO_OUT_ERROR_RESOURCE_BUSY; } main_port.is_open = true; - liverpool->SetVoPort(&main_port); + main_port.flip_status.gc_queue_num = 0; + main_port.flip_status.flip_pending_num = 0; + liverpool->SetVideoOut(&main_port, this); return 1; } void VideoOutDriver::Close(s32 handle) { + // Drain all pending GPU submissions before closing. + // Must be done before taking mutex since the GPU thread needs it to + // enqueue flip requests. + liverpool->WaitGpuIdle(); + std::scoped_lock lock{mutex}; main_port.is_open = false; main_port.flip_rate = 0; main_port.prev_index = -1; + main_port.flip_status.gc_queue_num = 0; + main_port.flip_status.flip_pending_num = 0; ASSERT(main_port.flip_events.empty()); + liverpool->SetVideoOut(nullptr, nullptr); } VideoOutPort* VideoOutDriver::GetPort(int handle) { @@ -220,8 +230,7 @@ void VideoOutDriver::DrawLastFrame() { } } -bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, - bool is_eop /*= false*/) { +bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) { { std::unique_lock lock{port->port_mutex}; if (index != -1 && port->flip_status.flip_pending_num > 16) { @@ -229,23 +238,20 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, return false; } - if (is_eop) { - ++port->flip_status.gc_queue_num; - } - ++port->flip_status.flip_pending_num; // integral GPU and CPU pending flips counter + ++port->flip_status.flip_pending_num; port->flip_status.submit_tsc = Libraries::Kernel::sceKernelReadTsc(); } - if (!is_eop) { - // Non EOP flips can arrive from any thread so ask GPU thread to perform them - liverpool->SendCommand([=, this]() { SubmitFlipInternal(port, index, flip_arg, is_eop); }); - } else { - SubmitFlipInternal(port, index, flip_arg, is_eop); - } + // CPU flips can arrive from any thread so ask GPU thread to perform them + liverpool->SendCommand([=, this]() { SubmitFlipInternal(port, index, flip_arg, false); }); return true; } +void VideoOutDriver::EnqueueFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop) { + SubmitFlipInternal(port, index, flip_arg, is_eop); +} + void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop) { Vulkan::Frame* frame; if (index == -1) { diff --git a/src/core/libraries/videoout/driver.h b/src/core/libraries/videoout/driver.h index 96bd58500..7115cdb94 100644 --- a/src/core/libraries/videoout/driver.h +++ b/src/core/libraries/videoout/driver.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -88,7 +88,11 @@ public: const BufferAttribute* attribute); int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex); - bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false); + bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg); + + // Push a frame to the presenter. Called by Liverpool (GPU thread) when + // a submission with an associated flip completes. + void EnqueueFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop); private: struct Request { diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp index 7714eb2b5..6da34c3db 100644 --- a/src/core/libraries/videoout/video_out.cpp +++ b/src/core/libraries/videoout/video_out.cpp @@ -10,7 +10,6 @@ #include "core/libraries/videoout/driver.h" #include "core/libraries/videoout/video_out.h" #include "core/libraries/videoout/videoout_error.h" -#include "core/platform.h" #include "video_core/renderer_vulkan/vk_presenter.h" extern std::unique_ptr presenter; @@ -342,23 +341,6 @@ s32 PS4_SYSV_ABI sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_a return 16; } -s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, s64 flip_arg, void** unk) { - auto* port = driver->GetPort(handle); - if (!port) { - return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE; - } - - Platform::IrqC::Instance()->RegisterOnce( - Platform::InterruptId::GfxFlip, [=](Platform::InterruptId irq) { - ASSERT_MSG(irq == Platform::InterruptId::GfxFlip, "An unexpected IRQ occured"); - ASSERT_MSG(port->buffer_labels[buf_id] == 1, "Out of order flip IRQ"); - const auto result = driver->SubmitFlip(port, buf_id, flip_arg, true); - ASSERT_MSG(result, "EOP flip submission failed"); - }); - - return ORBIS_OK; -} - s32 PS4_SYSV_ABI sceVideoOutGetDeviceCapabilityInfo( s32 handle, SceVideoOutDeviceCapabilityInfo* pDeviceCapabilityInfo) { pDeviceCapabilityInfo->capability = 0; diff --git a/src/core/libraries/videoout/video_out.h b/src/core/libraries/videoout/video_out.h index 09b79e85d..c831f8846 100644 --- a/src/core/libraries/videoout/video_out.h +++ b/src/core/libraries/videoout/video_out.h @@ -138,9 +138,6 @@ s32 PS4_SYSV_ABI sceVideoOutGetEventData(const Kernel::OrbisKernelEvent* ev, s64 s32 PS4_SYSV_ABI sceVideoOutColorSettingsSetGamma(SceVideoOutColorSettings* settings, float gamma); s32 PS4_SYSV_ABI sceVideoOutAdjustColor(s32 handle, const SceVideoOutColorSettings* settings); -// Internal system functions -s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, s64 flip_arg, void** unk); - void RegisterLib(Core::Loader::SymbolsResolver* sym); } // namespace Libraries::VideoOut diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 0648df922..6ec4050dc 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -10,7 +10,9 @@ #include "core/debug_state.h" #include "core/emulator_settings.h" #include "core/libraries/kernel/process.h" +#include "core/libraries/kernel/time.h" #include "core/libraries/videoout/driver.h" +#include "core/libraries/videoout/videoout_error.h" #include "core/memory.h" #include "core/platform.h" #include "video_core/amdgpu/liverpool.h" @@ -120,19 +122,37 @@ void Liverpool::Process(std::stop_token stoken) { if (queue.submits.empty()) { continue; } - task = queue.submits.front(); + task = queue.submits.front().task; } task.resume(); if (task.done()) { + std::optional flip; + { + std::scoped_lock lock{queue.m_access}; + flip = std::move(queue.submits.front().flip); + queue.submits.pop(); + } + task.destroy(); - std::scoped_lock lock{queue.m_access}; - queue.submits.pop(); - --num_submits; - std::scoped_lock lock2{submit_mutex}; - submit_cv.notify_all(); + { + std::scoped_lock lock2{submit_mutex}; + submit_cv.notify_all(); + } + + // Perform flip after the submission completes. + auto* port = vo_port.load(std::memory_order_acquire); + auto* drv = vo_driver.load(std::memory_order_acquire); + if (flip && port && drv) { + ASSERT_MSG(flip->buf_id < Libraries::VideoOut::MaxDisplayBuffers, + "Invalid flip buffer index {}", flip->buf_id); + ASSERT_MSG(port->buffer_labels[flip->buf_id] == 1, "Out of order flip IRQ"); + drv->EnqueueFlip(port, flip->buf_id, flip->flip_arg, true); + } else if (flip) { + LOG_WARNING(Lib_GnmDriver, "EOP flip dropped — VideoOut port is not available"); + } } } @@ -263,9 +283,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spandata_block[0]) { case PM4CmdNop::PayloadType::PatchedFlip: { - // There is no evidence that GPU CP drives flip events by parsing - // special NOP packets. For convenience lets assume that it does. - Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxFlip); + // Flip is performed when the submission completes, not here. break; } case PM4CmdNop::PayloadType::DebugMarkerPush: { @@ -811,9 +829,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spanAddress(); - if (vo_port->IsVoLabel(wait_addr) && + auto* port = vo_port.load(std::memory_order_acquire); + if (port && port->IsVoLabel(wait_addr) && num_submits == mapped_queues[GfxQueueId].submits.size()) { - vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(regs.reg_array); }); + port->WaitVoLabel([&] { return wait_reg_mem->Test(regs.reg_array); }); break; } while (!wait_reg_mem->Test(regs.reg_array)) { @@ -1199,7 +1218,23 @@ Liverpool::CmdBuffer Liverpool::CopyCmdBuffers(std::span dcb, std::sp return std::make_pair(dcb, ccb); } -void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { +s32 Liverpool::ReserveFlip() { + auto* port = vo_port.load(std::memory_order_acquire); + if (!port) { + return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE; + } + std::unique_lock lock{port->port_mutex}; + if (port->flip_status.flip_pending_num > 16) { + return ORBIS_VIDEO_OUT_ERROR_FLIP_QUEUE_FULL; + } + ++port->flip_status.gc_queue_num; + ++port->flip_status.flip_pending_num; + port->flip_status.submit_tsc = Libraries::Kernel::sceKernelReadTsc(); + return ORBIS_OK; +} + +void Liverpool::SubmitGfx(std::span dcb, std::span ccb, + std::optional flip) { auto& queue = mapped_queues[GfxQueueId]; if (EmulatorSettings.IsCopyGpuBuffers()) { @@ -1209,7 +1244,7 @@ void Liverpool::SubmitGfx(std::span dcb, std::span ccb) { auto task = ProcessGraphics(dcb, ccb); { std::scoped_lock lock{queue.m_access}; - queue.submits.emplace(task.handle); + queue.submits.push({task.handle, std::move(flip)}); } std::scoped_lock lk{submit_mutex}; @@ -1225,7 +1260,7 @@ void Liverpool::SubmitAsc(u32 gnm_vqid, std::span acb) { const auto& task = ProcessCompute(acb, vqid); { std::scoped_lock lock{queue.m_access}; - queue.submits.emplace(task.handle); + queue.submits.push({task.handle, std::nullopt}); } std::scoped_lock lk{submit_mutex}; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index 249ea2ba6..a90db6c18 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -26,7 +27,8 @@ class Rasterizer; namespace Libraries::VideoOut { struct VideoOutPort; -} +class VideoOutDriver; +} // namespace Libraries::VideoOut namespace AmdGpu { @@ -67,7 +69,13 @@ public: explicit Liverpool(); ~Liverpool(); - void SubmitGfx(std::span dcb, std::span ccb); + struct FlipRequest { + u32 buf_id; + s64 flip_arg; + }; + + void SubmitGfx(std::span dcb, std::span ccb, + std::optional flip = std::nullopt); void SubmitAsc(u32 gnm_vqid, std::span acb); void SubmitDone() noexcept { @@ -87,10 +95,16 @@ public: return num_submits == 0; } - void SetVoPort(Libraries::VideoOut::VideoOutPort* port) { - vo_port = port; + void SetVideoOut(Libraries::VideoOut::VideoOutPort* port, + Libraries::VideoOut::VideoOutDriver* drv) { + vo_port.store(port, std::memory_order_release); + vo_driver.store(drv, std::memory_order_release); } + // Reserve a flip slot — called at submission time (game thread). + // Returns ORBIS_OK or a VideoOut error code. + s32 ReserveFlip(); + void BindRasterizer(Vulkan::Rasterizer* rasterizer_) { rasterizer = rasterizer_; } @@ -187,12 +201,17 @@ private: void Process(std::stop_token stoken); struct GpuQueue { + struct Submission { + Task::Handle task; + std::optional flip{}; + }; + std::mutex m_access{}; std::atomic dcb_buffer_offset; std::atomic ccb_buffer_offset; std::vector dcb_buffer; std::vector ccb_buffer; - std::queue submits{}; + std::queue submits{}; ComputeProgram cs_state{}; }; std::array mapped_queues{}; @@ -221,7 +240,8 @@ private: } cblock{}; Vulkan::Rasterizer* rasterizer{}; - Libraries::VideoOut::VideoOutPort* vo_port{}; + std::atomic vo_port{}; + std::atomic vo_driver{}; std::jthread process_thread{}; std::atomic num_submits{}; std::atomic num_commands{};