mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2026-04-02 11:03:34 -06:00
Merge ed87ea4d0d into deb8c66ffb
This commit is contained in:
commit
9dbcce2bd0
@ -2081,6 +2081,11 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf
|
||||
// check for `prepareFlip` packet
|
||||
cmdbuf += size - 64;
|
||||
ASSERT_MSG(cmdbuf[0] == 0xc03e1000, "Can't find `prepareFlip` packet");
|
||||
// PS4 returns 0x80d11080 instead of crashing
|
||||
// if (cmdbuf[0] != 0xc03e1000) {
|
||||
// LOG_ERROR(Lib_GnmDriver, "Can't find `prepareFlip` packet");
|
||||
// return 0x80d11080; // SCE_GNM_ERROR_SUBMISSION_AND_FLIP_FAILED_INVALID_COMMAND_BUFFER
|
||||
// }
|
||||
|
||||
std::array<u32, 7> backup{};
|
||||
std::memcpy(backup.data(), cmdbuf, backup.size() * sizeof(decltype(backup)::value_type));
|
||||
@ -2089,15 +2094,14 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf
|
||||
"Invalid flip packet");
|
||||
ASSERT_MSG(buf_idx != 0xffff'ffffu, "Invalid VO buffer index");
|
||||
|
||||
const s32 flip_result = VideoOut::sceVideoOutSubmitEopFlip(vo_handle, buf_idx, flip_mode,
|
||||
flip_arg, nullptr /*unk*/);
|
||||
const s32 flip_result = liverpool->ReserveFlip();
|
||||
if (flip_result != 0) {
|
||||
if (flip_result == 0x80290012) {
|
||||
LOG_ERROR(Lib_GnmDriver, "Flip queue is full");
|
||||
return 0x80d11081;
|
||||
} else {
|
||||
LOG_ERROR(Lib_GnmDriver, "Flip request failed");
|
||||
return flip_result;
|
||||
LOG_ERROR(Lib_GnmDriver, "Flip request failed with {:#x}", flip_result);
|
||||
return 0x80d11082; // SCE_GNM_ERROR_SUBMISSION_AND_FLIP_FAILED_REQUEST_FAILED
|
||||
}
|
||||
}
|
||||
|
||||
@ -2169,6 +2173,14 @@ s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, u32* dcb_gpu_addrs
|
||||
vo_handle, buf_idx, flip_mode, flip_arg);
|
||||
}
|
||||
|
||||
// Shared submission loop. When flip has a value, it is associated with the
|
||||
// last command buffer in the batch so the flip triggers after the final
|
||||
// command buffer completes.
|
||||
static s32 SubmitCommandBuffersInternal(u32 count, const u32* dcb_gpu_addrs[],
|
||||
u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[],
|
||||
u32* ccb_sizes_in_bytes,
|
||||
std::optional<AmdGpu::Liverpool::FlipRequest> flip);
|
||||
|
||||
s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload(
|
||||
u32 workload, u32 count, u32* dcb_gpu_addrs[], u32* dcb_sizes_in_bytes, u32* ccb_gpu_addrs[],
|
||||
u32* ccb_sizes_in_bytes, u32 vo_handle, u32 buf_idx, u32 flip_mode, s64 flip_arg) {
|
||||
@ -2183,9 +2195,10 @@ s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload(
|
||||
return patch_result;
|
||||
}
|
||||
|
||||
return sceGnmSubmitCommandBuffers(count, const_cast<const u32**>(dcb_gpu_addrs),
|
||||
dcb_sizes_in_bytes, const_cast<const u32**>(ccb_gpu_addrs),
|
||||
ccb_sizes_in_bytes);
|
||||
return SubmitCommandBuffersInternal(count, const_cast<const u32**>(dcb_gpu_addrs),
|
||||
dcb_sizes_in_bytes, const_cast<const u32**>(ccb_gpu_addrs),
|
||||
ccb_sizes_in_bytes,
|
||||
AmdGpu::Liverpool::FlipRequest{buf_idx, flip_arg});
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
|
||||
@ -2193,6 +2206,14 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
|
||||
u32* dcb_sizes_in_bytes,
|
||||
const u32* ccb_gpu_addrs[],
|
||||
u32* ccb_sizes_in_bytes) {
|
||||
return SubmitCommandBuffersInternal(count, dcb_gpu_addrs, dcb_sizes_in_bytes, ccb_gpu_addrs,
|
||||
ccb_sizes_in_bytes, std::nullopt);
|
||||
}
|
||||
|
||||
static s32 SubmitCommandBuffersInternal(u32 count, const u32* dcb_gpu_addrs[],
|
||||
u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[],
|
||||
u32* ccb_sizes_in_bytes,
|
||||
std::optional<AmdGpu::Liverpool::FlipRequest> flip) {
|
||||
HLE_TRACE;
|
||||
LOG_DEBUG(Lib_GnmDriver, "called");
|
||||
|
||||
@ -2288,7 +2309,9 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
|
||||
.base_addr = reinterpret_cast<uintptr_t>(ccb),
|
||||
});
|
||||
}
|
||||
liverpool->SubmitGfx(dcb_span, ccb_span);
|
||||
// Associate the flip with the last command buffer in the batch.
|
||||
const bool is_last = (cbpair == count - 1);
|
||||
liverpool->SubmitGfx(dcb_span, ccb_span, is_last ? flip : std::nullopt);
|
||||
}
|
||||
|
||||
return ORBIS_OK;
|
||||
|
||||
@ -55,17 +55,27 @@ int VideoOutDriver::Open(const ServiceThreadParams* params) {
|
||||
return ORBIS_VIDEO_OUT_ERROR_RESOURCE_BUSY;
|
||||
}
|
||||
main_port.is_open = true;
|
||||
liverpool->SetVoPort(&main_port);
|
||||
main_port.flip_status.gc_queue_num = 0;
|
||||
main_port.flip_status.flip_pending_num = 0;
|
||||
liverpool->SetVideoOut(&main_port, this);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void VideoOutDriver::Close(s32 handle) {
|
||||
// Drain all pending GPU submissions before closing.
|
||||
// Must be done before taking mutex since the GPU thread needs it to
|
||||
// enqueue flip requests.
|
||||
liverpool->WaitGpuIdle();
|
||||
|
||||
std::scoped_lock lock{mutex};
|
||||
|
||||
main_port.is_open = false;
|
||||
main_port.flip_rate = 0;
|
||||
main_port.prev_index = -1;
|
||||
main_port.flip_status.gc_queue_num = 0;
|
||||
main_port.flip_status.flip_pending_num = 0;
|
||||
ASSERT(main_port.flip_events.empty());
|
||||
liverpool->SetVideoOut(nullptr, nullptr);
|
||||
}
|
||||
|
||||
VideoOutPort* VideoOutDriver::GetPort(int handle) {
|
||||
@ -220,8 +230,7 @@ void VideoOutDriver::DrawLastFrame() {
|
||||
}
|
||||
}
|
||||
|
||||
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
||||
bool is_eop /*= false*/) {
|
||||
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) {
|
||||
{
|
||||
std::unique_lock lock{port->port_mutex};
|
||||
if (index != -1 && port->flip_status.flip_pending_num > 16) {
|
||||
@ -229,23 +238,20 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_eop) {
|
||||
++port->flip_status.gc_queue_num;
|
||||
}
|
||||
++port->flip_status.flip_pending_num; // integral GPU and CPU pending flips counter
|
||||
++port->flip_status.flip_pending_num;
|
||||
port->flip_status.submit_tsc = Libraries::Kernel::sceKernelReadTsc();
|
||||
}
|
||||
|
||||
if (!is_eop) {
|
||||
// Non EOP flips can arrive from any thread so ask GPU thread to perform them
|
||||
liverpool->SendCommand([=, this]() { SubmitFlipInternal(port, index, flip_arg, is_eop); });
|
||||
} else {
|
||||
SubmitFlipInternal(port, index, flip_arg, is_eop);
|
||||
}
|
||||
// CPU flips can arrive from any thread so ask GPU thread to perform them
|
||||
liverpool->SendCommand([=, this]() { SubmitFlipInternal(port, index, flip_arg, false); });
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void VideoOutDriver::EnqueueFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop) {
|
||||
SubmitFlipInternal(port, index, flip_arg, is_eop);
|
||||
}
|
||||
|
||||
void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop) {
|
||||
Vulkan::Frame* frame;
|
||||
if (index == -1) {
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
@ -88,7 +88,11 @@ public:
|
||||
const BufferAttribute* attribute);
|
||||
int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex);
|
||||
|
||||
bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false);
|
||||
bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg);
|
||||
|
||||
// Push a frame to the presenter. Called by Liverpool (GPU thread) when
|
||||
// a submission with an associated flip completes.
|
||||
void EnqueueFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop);
|
||||
|
||||
private:
|
||||
struct Request {
|
||||
|
||||
@ -10,7 +10,6 @@
|
||||
#include "core/libraries/videoout/driver.h"
|
||||
#include "core/libraries/videoout/video_out.h"
|
||||
#include "core/libraries/videoout/videoout_error.h"
|
||||
#include "core/platform.h"
|
||||
#include "video_core/renderer_vulkan/vk_presenter.h"
|
||||
|
||||
extern std::unique_ptr<Vulkan::Presenter> presenter;
|
||||
@ -342,23 +341,6 @@ s32 PS4_SYSV_ABI sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_a
|
||||
return 16;
|
||||
}
|
||||
|
||||
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, s64 flip_arg, void** unk) {
|
||||
auto* port = driver->GetPort(handle);
|
||||
if (!port) {
|
||||
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
Platform::IrqC::Instance()->RegisterOnce(
|
||||
Platform::InterruptId::GfxFlip, [=](Platform::InterruptId irq) {
|
||||
ASSERT_MSG(irq == Platform::InterruptId::GfxFlip, "An unexpected IRQ occured");
|
||||
ASSERT_MSG(port->buffer_labels[buf_id] == 1, "Out of order flip IRQ");
|
||||
const auto result = driver->SubmitFlip(port, buf_id, flip_arg, true);
|
||||
ASSERT_MSG(result, "EOP flip submission failed");
|
||||
});
|
||||
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceVideoOutGetDeviceCapabilityInfo(
|
||||
s32 handle, SceVideoOutDeviceCapabilityInfo* pDeviceCapabilityInfo) {
|
||||
pDeviceCapabilityInfo->capability = 0;
|
||||
|
||||
@ -138,9 +138,6 @@ s32 PS4_SYSV_ABI sceVideoOutGetEventData(const Kernel::OrbisKernelEvent* ev, s64
|
||||
s32 PS4_SYSV_ABI sceVideoOutColorSettingsSetGamma(SceVideoOutColorSettings* settings, float gamma);
|
||||
s32 PS4_SYSV_ABI sceVideoOutAdjustColor(s32 handle, const SceVideoOutColorSettings* settings);
|
||||
|
||||
// Internal system functions
|
||||
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, s64 flip_arg, void** unk);
|
||||
|
||||
void RegisterLib(Core::Loader::SymbolsResolver* sym);
|
||||
|
||||
} // namespace Libraries::VideoOut
|
||||
|
||||
@ -10,7 +10,9 @@
|
||||
#include "core/debug_state.h"
|
||||
#include "core/emulator_settings.h"
|
||||
#include "core/libraries/kernel/process.h"
|
||||
#include "core/libraries/kernel/time.h"
|
||||
#include "core/libraries/videoout/driver.h"
|
||||
#include "core/libraries/videoout/videoout_error.h"
|
||||
#include "core/memory.h"
|
||||
#include "core/platform.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
@ -120,19 +122,37 @@ void Liverpool::Process(std::stop_token stoken) {
|
||||
if (queue.submits.empty()) {
|
||||
continue;
|
||||
}
|
||||
task = queue.submits.front();
|
||||
task = queue.submits.front().task;
|
||||
}
|
||||
task.resume();
|
||||
|
||||
if (task.done()) {
|
||||
std::optional<FlipRequest> flip;
|
||||
{
|
||||
std::scoped_lock lock{queue.m_access};
|
||||
flip = std::move(queue.submits.front().flip);
|
||||
queue.submits.pop();
|
||||
}
|
||||
|
||||
task.destroy();
|
||||
|
||||
std::scoped_lock lock{queue.m_access};
|
||||
queue.submits.pop();
|
||||
|
||||
--num_submits;
|
||||
std::scoped_lock lock2{submit_mutex};
|
||||
submit_cv.notify_all();
|
||||
{
|
||||
std::scoped_lock lock2{submit_mutex};
|
||||
submit_cv.notify_all();
|
||||
}
|
||||
|
||||
// Perform flip after the submission completes.
|
||||
auto* port = vo_port.load(std::memory_order_acquire);
|
||||
auto* drv = vo_driver.load(std::memory_order_acquire);
|
||||
if (flip && port && drv) {
|
||||
ASSERT_MSG(flip->buf_id < Libraries::VideoOut::MaxDisplayBuffers,
|
||||
"Invalid flip buffer index {}", flip->buf_id);
|
||||
ASSERT_MSG(port->buffer_labels[flip->buf_id] == 1, "Out of order flip IRQ");
|
||||
drv->EnqueueFlip(port, flip->buf_id, flip->flip_arg, true);
|
||||
} else if (flip) {
|
||||
LOG_WARNING(Lib_GnmDriver, "EOP flip dropped — VideoOut port is not available");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -263,9 +283,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
|
||||
switch (nop->data_block[0]) {
|
||||
case PM4CmdNop::PayloadType::PatchedFlip: {
|
||||
// There is no evidence that GPU CP drives flip events by parsing
|
||||
// special NOP packets. For convenience lets assume that it does.
|
||||
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxFlip);
|
||||
// Flip is performed when the submission completes, not here.
|
||||
break;
|
||||
}
|
||||
case PM4CmdNop::PayloadType::DebugMarkerPush: {
|
||||
@ -811,9 +829,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
|
||||
// there are no other submits to yield to we can sleep the thread
|
||||
// instead and allow other tasks to run.
|
||||
const u64* wait_addr = wait_reg_mem->Address<u64*>();
|
||||
if (vo_port->IsVoLabel(wait_addr) &&
|
||||
auto* port = vo_port.load(std::memory_order_acquire);
|
||||
if (port && port->IsVoLabel(wait_addr) &&
|
||||
num_submits == mapped_queues[GfxQueueId].submits.size()) {
|
||||
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(regs.reg_array); });
|
||||
port->WaitVoLabel([&] { return wait_reg_mem->Test(regs.reg_array); });
|
||||
break;
|
||||
}
|
||||
while (!wait_reg_mem->Test(regs.reg_array)) {
|
||||
@ -1199,7 +1218,23 @@ Liverpool::CmdBuffer Liverpool::CopyCmdBuffers(std::span<const u32> dcb, std::sp
|
||||
return std::make_pair(dcb, ccb);
|
||||
}
|
||||
|
||||
void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
||||
s32 Liverpool::ReserveFlip() {
|
||||
auto* port = vo_port.load(std::memory_order_acquire);
|
||||
if (!port) {
|
||||
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
|
||||
}
|
||||
std::unique_lock lock{port->port_mutex};
|
||||
if (port->flip_status.flip_pending_num > 16) {
|
||||
return ORBIS_VIDEO_OUT_ERROR_FLIP_QUEUE_FULL;
|
||||
}
|
||||
++port->flip_status.gc_queue_num;
|
||||
++port->flip_status.flip_pending_num;
|
||||
port->flip_status.submit_tsc = Libraries::Kernel::sceKernelReadTsc();
|
||||
return ORBIS_OK;
|
||||
}
|
||||
|
||||
void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb,
|
||||
std::optional<FlipRequest> flip) {
|
||||
auto& queue = mapped_queues[GfxQueueId];
|
||||
|
||||
if (EmulatorSettings.IsCopyGpuBuffers()) {
|
||||
@ -1209,7 +1244,7 @@ void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
|
||||
auto task = ProcessGraphics(dcb, ccb);
|
||||
{
|
||||
std::scoped_lock lock{queue.m_access};
|
||||
queue.submits.emplace(task.handle);
|
||||
queue.submits.push({task.handle, std::move(flip)});
|
||||
}
|
||||
|
||||
std::scoped_lock lk{submit_mutex};
|
||||
@ -1225,7 +1260,7 @@ void Liverpool::SubmitAsc(u32 gnm_vqid, std::span<const u32> acb) {
|
||||
const auto& task = ProcessCompute(acb, vqid);
|
||||
{
|
||||
std::scoped_lock lock{queue.m_access};
|
||||
queue.submits.emplace(task.handle);
|
||||
queue.submits.push({task.handle, std::nullopt});
|
||||
}
|
||||
|
||||
std::scoped_lock lk{submit_mutex};
|
||||
|
||||
@ -7,6 +7,7 @@
|
||||
#include <coroutine>
|
||||
#include <exception>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <semaphore>
|
||||
#include <span>
|
||||
#include <thread>
|
||||
@ -26,7 +27,8 @@ class Rasterizer;
|
||||
|
||||
namespace Libraries::VideoOut {
|
||||
struct VideoOutPort;
|
||||
}
|
||||
class VideoOutDriver;
|
||||
} // namespace Libraries::VideoOut
|
||||
|
||||
namespace AmdGpu {
|
||||
|
||||
@ -67,7 +69,13 @@ public:
|
||||
explicit Liverpool();
|
||||
~Liverpool();
|
||||
|
||||
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb);
|
||||
struct FlipRequest {
|
||||
u32 buf_id;
|
||||
s64 flip_arg;
|
||||
};
|
||||
|
||||
void SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb,
|
||||
std::optional<FlipRequest> flip = std::nullopt);
|
||||
void SubmitAsc(u32 gnm_vqid, std::span<const u32> acb);
|
||||
|
||||
void SubmitDone() noexcept {
|
||||
@ -87,10 +95,16 @@ public:
|
||||
return num_submits == 0;
|
||||
}
|
||||
|
||||
void SetVoPort(Libraries::VideoOut::VideoOutPort* port) {
|
||||
vo_port = port;
|
||||
void SetVideoOut(Libraries::VideoOut::VideoOutPort* port,
|
||||
Libraries::VideoOut::VideoOutDriver* drv) {
|
||||
vo_port.store(port, std::memory_order_release);
|
||||
vo_driver.store(drv, std::memory_order_release);
|
||||
}
|
||||
|
||||
// Reserve a flip slot — called at submission time (game thread).
|
||||
// Returns ORBIS_OK or a VideoOut error code.
|
||||
s32 ReserveFlip();
|
||||
|
||||
void BindRasterizer(Vulkan::Rasterizer* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
@ -187,12 +201,17 @@ private:
|
||||
void Process(std::stop_token stoken);
|
||||
|
||||
struct GpuQueue {
|
||||
struct Submission {
|
||||
Task::Handle task;
|
||||
std::optional<FlipRequest> flip{};
|
||||
};
|
||||
|
||||
std::mutex m_access{};
|
||||
std::atomic<u32> dcb_buffer_offset;
|
||||
std::atomic<u32> ccb_buffer_offset;
|
||||
std::vector<u32> dcb_buffer;
|
||||
std::vector<u32> ccb_buffer;
|
||||
std::queue<Task::Handle> submits{};
|
||||
std::queue<Submission> submits{};
|
||||
ComputeProgram cs_state{};
|
||||
};
|
||||
std::array<GpuQueue, NumTotalQueues> mapped_queues{};
|
||||
@ -221,7 +240,8 @@ private:
|
||||
} cblock{};
|
||||
|
||||
Vulkan::Rasterizer* rasterizer{};
|
||||
Libraries::VideoOut::VideoOutPort* vo_port{};
|
||||
std::atomic<Libraries::VideoOut::VideoOutPort*> vo_port{};
|
||||
std::atomic<Libraries::VideoOut::VideoOutDriver*> vo_driver{};
|
||||
std::jthread process_thread{};
|
||||
std::atomic<u32> num_submits{};
|
||||
std::atomic<u32> num_commands{};
|
||||
|
||||
Loading…
Reference in New Issue
Block a user