This commit is contained in:
Kravickas 2026-04-02 00:18:09 +03:00 committed by GitHub
commit 9dbcce2bd0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 131 additions and 64 deletions

View File

@ -2081,6 +2081,11 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf
// check for `prepareFlip` packet
cmdbuf += size - 64;
ASSERT_MSG(cmdbuf[0] == 0xc03e1000, "Can't find `prepareFlip` packet");
// PS4 returns 0x80d11080 instead of crashing
// if (cmdbuf[0] != 0xc03e1000) {
// LOG_ERROR(Lib_GnmDriver, "Can't find `prepareFlip` packet");
// return 0x80d11080; // SCE_GNM_ERROR_SUBMISSION_AND_FLIP_FAILED_INVALID_COMMAND_BUFFER
// }
std::array<u32, 7> backup{};
std::memcpy(backup.data(), cmdbuf, backup.size() * sizeof(decltype(backup)::value_type));
@ -2089,15 +2094,14 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf
"Invalid flip packet");
ASSERT_MSG(buf_idx != 0xffff'ffffu, "Invalid VO buffer index");
const s32 flip_result = VideoOut::sceVideoOutSubmitEopFlip(vo_handle, buf_idx, flip_mode,
flip_arg, nullptr /*unk*/);
const s32 flip_result = liverpool->ReserveFlip();
if (flip_result != 0) {
if (flip_result == 0x80290012) {
LOG_ERROR(Lib_GnmDriver, "Flip queue is full");
return 0x80d11081;
} else {
LOG_ERROR(Lib_GnmDriver, "Flip request failed");
return flip_result;
LOG_ERROR(Lib_GnmDriver, "Flip request failed with {:#x}", flip_result);
return 0x80d11082; // SCE_GNM_ERROR_SUBMISSION_AND_FLIP_FAILED_REQUEST_FAILED
}
}
@ -2169,6 +2173,14 @@ s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, u32* dcb_gpu_addrs
vo_handle, buf_idx, flip_mode, flip_arg);
}
// Shared submission loop. When flip has a value, it is associated with the
// last command buffer in the batch so the flip triggers after the final
// command buffer completes.
static s32 SubmitCommandBuffersInternal(u32 count, const u32* dcb_gpu_addrs[],
u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes,
std::optional<AmdGpu::Liverpool::FlipRequest> flip);
s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload(
u32 workload, u32 count, u32* dcb_gpu_addrs[], u32* dcb_sizes_in_bytes, u32* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes, u32 vo_handle, u32 buf_idx, u32 flip_mode, s64 flip_arg) {
@ -2183,9 +2195,10 @@ s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload(
return patch_result;
}
return sceGnmSubmitCommandBuffers(count, const_cast<const u32**>(dcb_gpu_addrs),
dcb_sizes_in_bytes, const_cast<const u32**>(ccb_gpu_addrs),
ccb_sizes_in_bytes);
return SubmitCommandBuffersInternal(count, const_cast<const u32**>(dcb_gpu_addrs),
dcb_sizes_in_bytes, const_cast<const u32**>(ccb_gpu_addrs),
ccb_sizes_in_bytes,
AmdGpu::Liverpool::FlipRequest{buf_idx, flip_arg});
}
int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
@ -2193,6 +2206,14 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
u32* dcb_sizes_in_bytes,
const u32* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes) {
return SubmitCommandBuffersInternal(count, dcb_gpu_addrs, dcb_sizes_in_bytes, ccb_gpu_addrs,
ccb_sizes_in_bytes, std::nullopt);
}
static s32 SubmitCommandBuffersInternal(u32 count, const u32* dcb_gpu_addrs[],
u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes,
std::optional<AmdGpu::Liverpool::FlipRequest> flip) {
HLE_TRACE;
LOG_DEBUG(Lib_GnmDriver, "called");
@ -2288,7 +2309,9 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
.base_addr = reinterpret_cast<uintptr_t>(ccb),
});
}
liverpool->SubmitGfx(dcb_span, ccb_span);
// Associate the flip with the last command buffer in the batch.
const bool is_last = (cbpair == count - 1);
liverpool->SubmitGfx(dcb_span, ccb_span, is_last ? flip : std::nullopt);
}
return ORBIS_OK;

View File

@ -55,17 +55,27 @@ int VideoOutDriver::Open(const ServiceThreadParams* params) {
return ORBIS_VIDEO_OUT_ERROR_RESOURCE_BUSY;
}
main_port.is_open = true;
liverpool->SetVoPort(&main_port);
main_port.flip_status.gc_queue_num = 0;
main_port.flip_status.flip_pending_num = 0;
liverpool->SetVideoOut(&main_port, this);
return 1;
}
void VideoOutDriver::Close(s32 handle) {
// Drain all pending GPU submissions before closing.
// Must be done before taking mutex since the GPU thread needs it to
// enqueue flip requests.
liverpool->WaitGpuIdle();
std::scoped_lock lock{mutex};
main_port.is_open = false;
main_port.flip_rate = 0;
main_port.prev_index = -1;
main_port.flip_status.gc_queue_num = 0;
main_port.flip_status.flip_pending_num = 0;
ASSERT(main_port.flip_events.empty());
liverpool->SetVideoOut(nullptr, nullptr);
}
VideoOutPort* VideoOutDriver::GetPort(int handle) {
@ -220,8 +230,7 @@ void VideoOutDriver::DrawLastFrame() {
}
}
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
bool is_eop /*= false*/) {
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) {
{
std::unique_lock lock{port->port_mutex};
if (index != -1 && port->flip_status.flip_pending_num > 16) {
@ -229,23 +238,20 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
return false;
}
if (is_eop) {
++port->flip_status.gc_queue_num;
}
++port->flip_status.flip_pending_num; // integral GPU and CPU pending flips counter
++port->flip_status.flip_pending_num;
port->flip_status.submit_tsc = Libraries::Kernel::sceKernelReadTsc();
}
if (!is_eop) {
// Non EOP flips can arrive from any thread so ask GPU thread to perform them
liverpool->SendCommand([=, this]() { SubmitFlipInternal(port, index, flip_arg, is_eop); });
} else {
SubmitFlipInternal(port, index, flip_arg, is_eop);
}
// CPU flips can arrive from any thread so ask GPU thread to perform them
liverpool->SendCommand([=, this]() { SubmitFlipInternal(port, index, flip_arg, false); });
return true;
}
void VideoOutDriver::EnqueueFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop) {
SubmitFlipInternal(port, index, flip_arg, is_eop);
}
void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop) {
Vulkan::Frame* frame;
if (index == -1) {

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
@ -88,7 +88,11 @@ public:
const BufferAttribute* attribute);
int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex);
bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false);
bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg);
// Push a frame to the presenter. Called by Liverpool (GPU thread) when
// a submission with an associated flip completes.
void EnqueueFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop);
private:
struct Request {

View File

@ -10,7 +10,6 @@
#include "core/libraries/videoout/driver.h"
#include "core/libraries/videoout/video_out.h"
#include "core/libraries/videoout/videoout_error.h"
#include "core/platform.h"
#include "video_core/renderer_vulkan/vk_presenter.h"
extern std::unique_ptr<Vulkan::Presenter> presenter;
@ -342,23 +341,6 @@ s32 PS4_SYSV_ABI sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_a
return 16;
}
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, s64 flip_arg, void** unk) {
auto* port = driver->GetPort(handle);
if (!port) {
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
}
Platform::IrqC::Instance()->RegisterOnce(
Platform::InterruptId::GfxFlip, [=](Platform::InterruptId irq) {
ASSERT_MSG(irq == Platform::InterruptId::GfxFlip, "An unexpected IRQ occured");
ASSERT_MSG(port->buffer_labels[buf_id] == 1, "Out of order flip IRQ");
const auto result = driver->SubmitFlip(port, buf_id, flip_arg, true);
ASSERT_MSG(result, "EOP flip submission failed");
});
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceVideoOutGetDeviceCapabilityInfo(
s32 handle, SceVideoOutDeviceCapabilityInfo* pDeviceCapabilityInfo) {
pDeviceCapabilityInfo->capability = 0;

View File

@ -138,9 +138,6 @@ s32 PS4_SYSV_ABI sceVideoOutGetEventData(const Kernel::OrbisKernelEvent* ev, s64
s32 PS4_SYSV_ABI sceVideoOutColorSettingsSetGamma(SceVideoOutColorSettings* settings, float gamma);
s32 PS4_SYSV_ABI sceVideoOutAdjustColor(s32 handle, const SceVideoOutColorSettings* settings);
// Internal system functions
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, s64 flip_arg, void** unk);
void RegisterLib(Core::Loader::SymbolsResolver* sym);
} // namespace Libraries::VideoOut

View File

@ -10,7 +10,9 @@
#include "core/debug_state.h"
#include "core/emulator_settings.h"
#include "core/libraries/kernel/process.h"
#include "core/libraries/kernel/time.h"
#include "core/libraries/videoout/driver.h"
#include "core/libraries/videoout/videoout_error.h"
#include "core/memory.h"
#include "core/platform.h"
#include "video_core/amdgpu/liverpool.h"
@ -120,19 +122,37 @@ void Liverpool::Process(std::stop_token stoken) {
if (queue.submits.empty()) {
continue;
}
task = queue.submits.front();
task = queue.submits.front().task;
}
task.resume();
if (task.done()) {
std::optional<FlipRequest> flip;
{
std::scoped_lock lock{queue.m_access};
flip = std::move(queue.submits.front().flip);
queue.submits.pop();
}
task.destroy();
std::scoped_lock lock{queue.m_access};
queue.submits.pop();
--num_submits;
std::scoped_lock lock2{submit_mutex};
submit_cv.notify_all();
{
std::scoped_lock lock2{submit_mutex};
submit_cv.notify_all();
}
// Perform flip after the submission completes.
auto* port = vo_port.load(std::memory_order_acquire);
auto* drv = vo_driver.load(std::memory_order_acquire);
if (flip && port && drv) {
ASSERT_MSG(flip->buf_id < Libraries::VideoOut::MaxDisplayBuffers,
"Invalid flip buffer index {}", flip->buf_id);
ASSERT_MSG(port->buffer_labels[flip->buf_id] == 1, "Out of order flip IRQ");
drv->EnqueueFlip(port, flip->buf_id, flip->flip_arg, true);
} else if (flip) {
LOG_WARNING(Lib_GnmDriver, "EOP flip dropped — VideoOut port is not available");
}
}
}
@ -263,9 +283,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
switch (nop->data_block[0]) {
case PM4CmdNop::PayloadType::PatchedFlip: {
// There is no evidence that GPU CP drives flip events by parsing
// special NOP packets. For convenience lets assume that it does.
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxFlip);
// Flip is performed when the submission completes, not here.
break;
}
case PM4CmdNop::PayloadType::DebugMarkerPush: {
@ -811,9 +829,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
// there are no other submits to yield to we can sleep the thread
// instead and allow other tasks to run.
const u64* wait_addr = wait_reg_mem->Address<u64*>();
if (vo_port->IsVoLabel(wait_addr) &&
auto* port = vo_port.load(std::memory_order_acquire);
if (port && port->IsVoLabel(wait_addr) &&
num_submits == mapped_queues[GfxQueueId].submits.size()) {
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(regs.reg_array); });
port->WaitVoLabel([&] { return wait_reg_mem->Test(regs.reg_array); });
break;
}
while (!wait_reg_mem->Test(regs.reg_array)) {
@ -1199,7 +1218,23 @@ Liverpool::CmdBuffer Liverpool::CopyCmdBuffers(std::span<const u32> dcb, std::sp
return std::make_pair(dcb, ccb);
}
void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
s32 Liverpool::ReserveFlip() {
auto* port = vo_port.load(std::memory_order_acquire);
if (!port) {
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
}
std::unique_lock lock{port->port_mutex};
if (port->flip_status.flip_pending_num > 16) {
return ORBIS_VIDEO_OUT_ERROR_FLIP_QUEUE_FULL;
}
++port->flip_status.gc_queue_num;
++port->flip_status.flip_pending_num;
port->flip_status.submit_tsc = Libraries::Kernel::sceKernelReadTsc();
return ORBIS_OK;
}
void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb,
std::optional<FlipRequest> flip) {
auto& queue = mapped_queues[GfxQueueId];
if (EmulatorSettings.IsCopyGpuBuffers()) {
@ -1209,7 +1244,7 @@ void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
auto task = ProcessGraphics(dcb, ccb);
{
std::scoped_lock lock{queue.m_access};
queue.submits.emplace(task.handle);
queue.submits.push({task.handle, std::move(flip)});
}
std::scoped_lock lk{submit_mutex};
@ -1225,7 +1260,7 @@ void Liverpool::SubmitAsc(u32 gnm_vqid, std::span<const u32> acb) {
const auto& task = ProcessCompute(acb, vqid);
{
std::scoped_lock lock{queue.m_access};
queue.submits.emplace(task.handle);
queue.submits.push({task.handle, std::nullopt});
}
std::scoped_lock lk{submit_mutex};

View File

@ -7,6 +7,7 @@
#include <coroutine>
#include <exception>
#include <mutex>
#include <optional>
#include <semaphore>
#include <span>
#include <thread>
@ -26,7 +27,8 @@ class Rasterizer;
namespace Libraries::VideoOut {
struct VideoOutPort;
}
class VideoOutDriver;
} // namespace Libraries::VideoOut
namespace AmdGpu {
@ -67,7 +69,13 @@ public:
explicit Liverpool();
~Liverpool();
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
struct FlipRequest {
u32 buf_id;
s64 flip_arg;
};
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb,
std::optional<FlipRequest> flip = std::nullopt);
void SubmitAsc(u32 gnm_vqid, std::span<const u32> acb);
void SubmitDone() noexcept {
@ -87,10 +95,16 @@ public:
return num_submits == 0;
}
void SetVoPort(Libraries::VideoOut::VideoOutPort* port) {
vo_port = port;
void SetVideoOut(Libraries::VideoOut::VideoOutPort* port,
Libraries::VideoOut::VideoOutDriver* drv) {
vo_port.store(port, std::memory_order_release);
vo_driver.store(drv, std::memory_order_release);
}
// Reserve a flip slot — called at submission time (game thread).
// Returns ORBIS_OK or a VideoOut error code.
s32 ReserveFlip();
void BindRasterizer(Vulkan::Rasterizer* rasterizer_) {
rasterizer = rasterizer_;
}
@ -187,12 +201,17 @@ private:
void Process(std::stop_token stoken);
struct GpuQueue {
struct Submission {
Task::Handle task;
std::optional<FlipRequest> flip{};
};
std::mutex m_access{};
std::atomic<u32> dcb_buffer_offset;
std::atomic<u32> ccb_buffer_offset;
std::vector<u32> dcb_buffer;
std::vector<u32> ccb_buffer;
std::queue<Task::Handle> submits{};
std::queue<Submission> submits{};
ComputeProgram cs_state{};
};
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
@ -221,7 +240,8 @@ private:
} cblock{};
Vulkan::Rasterizer* rasterizer{};
Libraries::VideoOut::VideoOutPort* vo_port{};
std::atomic<Libraries::VideoOut::VideoOutPort*> vo_port{};
std::atomic<Libraries::VideoOut::VideoOutDriver*> vo_driver{};
std::jthread process_thread{};
std::atomic<u32> num_submits{};
std::atomic<u32> num_commands{};