mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2026-04-02 19:08:03 -06:00
Merge branch 'main' into user_and_settings
This commit is contained in:
commit
a8f51584bf
@ -71,7 +71,7 @@ public:
|
||||
~FileBackend() = default;
|
||||
|
||||
void Write(const Entry& entry) {
|
||||
if (!enabled) {
|
||||
if (!enabled && entry.log_level != Level::Critical) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -12,28 +12,28 @@ void* PS4_SYSV_ABI AvPlayer::Allocate(void* handle, u32 alignment, u32 size) {
|
||||
const auto* const self = reinterpret_cast<AvPlayer*>(handle);
|
||||
const auto allocate = self->m_init_data_original.memory_replacement.allocate;
|
||||
const auto ptr = self->m_init_data_original.memory_replacement.object_ptr;
|
||||
return Core::ExecuteGuest(allocate, ptr, alignment, size);
|
||||
return allocate(ptr, alignment, size);
|
||||
}
|
||||
|
||||
void PS4_SYSV_ABI AvPlayer::Deallocate(void* handle, void* memory) {
|
||||
const auto* const self = reinterpret_cast<AvPlayer*>(handle);
|
||||
const auto deallocate = self->m_init_data_original.memory_replacement.deallocate;
|
||||
const auto ptr = self->m_init_data_original.memory_replacement.object_ptr;
|
||||
return Core::ExecuteGuest(deallocate, ptr, memory);
|
||||
return deallocate(ptr, memory);
|
||||
}
|
||||
|
||||
void* PS4_SYSV_ABI AvPlayer::AllocateTexture(void* handle, u32 alignment, u32 size) {
|
||||
const auto* const self = reinterpret_cast<AvPlayer*>(handle);
|
||||
const auto allocate = self->m_init_data_original.memory_replacement.allocate_texture;
|
||||
const auto ptr = self->m_init_data_original.memory_replacement.object_ptr;
|
||||
return Core::ExecuteGuest(allocate, ptr, alignment, size);
|
||||
return allocate(ptr, alignment, size);
|
||||
}
|
||||
|
||||
void PS4_SYSV_ABI AvPlayer::DeallocateTexture(void* handle, void* memory) {
|
||||
const auto* const self = reinterpret_cast<AvPlayer*>(handle);
|
||||
const auto deallocate = self->m_init_data_original.memory_replacement.deallocate_texture;
|
||||
const auto ptr = self->m_init_data_original.memory_replacement.object_ptr;
|
||||
return Core::ExecuteGuest(deallocate, ptr, memory);
|
||||
return deallocate(ptr, memory);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI AvPlayer::OpenFile(void* handle, const char* filename) {
|
||||
@ -42,7 +42,7 @@ int PS4_SYSV_ABI AvPlayer::OpenFile(void* handle, const char* filename) {
|
||||
|
||||
const auto open = self->m_init_data_original.file_replacement.open;
|
||||
const auto ptr = self->m_init_data_original.file_replacement.object_ptr;
|
||||
return Core::ExecuteGuest(open, ptr, filename);
|
||||
return open(ptr, filename);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI AvPlayer::CloseFile(void* handle) {
|
||||
@ -51,7 +51,7 @@ int PS4_SYSV_ABI AvPlayer::CloseFile(void* handle) {
|
||||
|
||||
const auto close = self->m_init_data_original.file_replacement.close;
|
||||
const auto ptr = self->m_init_data_original.file_replacement.object_ptr;
|
||||
return Core::ExecuteGuest(close, ptr);
|
||||
return close(ptr);
|
||||
}
|
||||
|
||||
int PS4_SYSV_ABI AvPlayer::ReadOffsetFile(void* handle, u8* buffer, u64 position, u32 length) {
|
||||
@ -60,7 +60,7 @@ int PS4_SYSV_ABI AvPlayer::ReadOffsetFile(void* handle, u8* buffer, u64 position
|
||||
|
||||
const auto read_offset = self->m_init_data_original.file_replacement.read_offset;
|
||||
const auto ptr = self->m_init_data_original.file_replacement.object_ptr;
|
||||
return Core::ExecuteGuest(read_offset, ptr, buffer, position, length);
|
||||
return read_offset(ptr, buffer, position, length);
|
||||
}
|
||||
|
||||
u64 PS4_SYSV_ABI AvPlayer::SizeFile(void* handle) {
|
||||
@ -69,7 +69,7 @@ u64 PS4_SYSV_ABI AvPlayer::SizeFile(void* handle) {
|
||||
|
||||
const auto size = self->m_init_data_original.file_replacement.size;
|
||||
const auto ptr = self->m_init_data_original.file_replacement.object_ptr;
|
||||
return Core::ExecuteGuest(size, ptr);
|
||||
return size(ptr);
|
||||
}
|
||||
|
||||
AvPlayerInitData AvPlayer::StubInitData(const AvPlayerInitData& data) {
|
||||
|
||||
@ -92,7 +92,7 @@ void AvPlayerState::DefaultEventCallback(void* opaque, AvPlayerEvents event_id,
|
||||
const auto callback = self->m_event_replacement.event_callback;
|
||||
const auto ptr = self->m_event_replacement.object_ptr;
|
||||
if (callback != nullptr) {
|
||||
Core::ExecuteGuest(callback, ptr, event_id, 0, event_data);
|
||||
callback(ptr, event_id, 0, event_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -99,16 +99,16 @@ public:
|
||||
if (m_ime_mode) {
|
||||
OrbisImeParam param = m_param.ime;
|
||||
if (use_param_handler) {
|
||||
Core::ExecuteGuest(param.handler, param.arg, event);
|
||||
param.handler(param.arg, event);
|
||||
} else {
|
||||
Core::ExecuteGuest(handler, param.arg, event);
|
||||
handler(param.arg, event);
|
||||
}
|
||||
} else {
|
||||
OrbisImeKeyboardParam param = m_param.key;
|
||||
if (use_param_handler) {
|
||||
Core::ExecuteGuest(param.handler, param.arg, event);
|
||||
param.handler(param.arg, event);
|
||||
} else {
|
||||
Core::ExecuteGuest(handler, param.arg, event);
|
||||
handler(param.arg, event);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,8 +131,7 @@ bool ImeDialogState::CallTextFilter() {
|
||||
return false;
|
||||
}
|
||||
|
||||
int ret =
|
||||
Core::ExecuteGuest(text_filter, out_text, &out_text_length, src_text, src_text_length);
|
||||
int ret = text_filter(out_text, &out_text_length, src_text, src_text_length);
|
||||
|
||||
if (ret != 0) {
|
||||
return false;
|
||||
@ -153,7 +152,7 @@ bool ImeDialogState::CallKeyboardFilter(const OrbisImeKeycode* src_keycode, u16*
|
||||
return true;
|
||||
}
|
||||
|
||||
int ret = Core::ExecuteGuest(keyboard_filter, src_keycode, out_keycode, out_status, nullptr);
|
||||
int ret = keyboard_filter(src_keycode, out_keycode, out_status, nullptr);
|
||||
return ret == 0;
|
||||
}
|
||||
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
#include "common/logging/log.h"
|
||||
#include "core/libraries/kernel/equeue.h"
|
||||
#include "core/libraries/kernel/orbis_error.h"
|
||||
#include "core/libraries/kernel/time.h"
|
||||
#include "core/libraries/libs.h"
|
||||
|
||||
namespace Libraries::Kernel {
|
||||
@ -15,23 +16,39 @@ namespace Libraries::Kernel {
|
||||
extern boost::asio::io_context io_context;
|
||||
extern void KernelSignalRequest();
|
||||
|
||||
static constexpr auto HrTimerSpinlockThresholdUs = 1200u;
|
||||
static constexpr auto HrTimerSpinlockThresholdNs = 1200000u;
|
||||
|
||||
// Events are uniquely identified by id and filter.
|
||||
|
||||
bool EqueueInternal::AddEvent(EqueueEvent& event) {
|
||||
std::scoped_lock lock{m_mutex};
|
||||
|
||||
// Calculate timer interval
|
||||
event.time_added = std::chrono::steady_clock::now();
|
||||
if (event.event.filter == SceKernelEvent::Filter::Timer ||
|
||||
event.event.filter == SceKernelEvent::Filter::HrTimer) {
|
||||
// HrTimer events are offset by the threshold of time at the end that we spinlock for
|
||||
// greater accuracy.
|
||||
const auto offset =
|
||||
event.event.filter == SceKernelEvent::Filter::HrTimer ? HrTimerSpinlockThresholdUs : 0u;
|
||||
event.timer_interval = std::chrono::microseconds(event.event.data - offset);
|
||||
// Set timer interval
|
||||
event.timer_interval = std::chrono::nanoseconds(event.event.data);
|
||||
}
|
||||
|
||||
// First, check if there's already an event with the same id and filter.
|
||||
u64 id = event.event.ident;
|
||||
SceKernelEvent::Filter filter = event.event.filter;
|
||||
const auto& find_it = std::ranges::find_if(m_events, [id, filter](auto& ev) {
|
||||
return ev.event.ident == id && ev.event.filter == filter;
|
||||
});
|
||||
// If there is a duplicate event, we need to update that instead.
|
||||
if (find_it != m_events.cend()) {
|
||||
// Specifically, update user data and timer_interval.
|
||||
// Trigger status and event data should remain intact.
|
||||
auto& old_event = *find_it;
|
||||
old_event.timer_interval = event.timer_interval;
|
||||
old_event.event.udata = event.event.udata;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Clear input data from event.
|
||||
event.event.data = 0;
|
||||
|
||||
// Remove add flag from event
|
||||
event.event.flags &= ~SceKernelEvent::Flags::Add;
|
||||
|
||||
@ -157,6 +174,9 @@ bool EqueueInternal::TriggerEvent(u64 ident, s16 filter, void* trigger_data) {
|
||||
event.TriggerDisplay(trigger_data);
|
||||
} else if (filter == SceKernelEvent::Filter::User) {
|
||||
event.TriggerUser(trigger_data);
|
||||
} else if (filter == SceKernelEvent::Filter::Timer ||
|
||||
filter == SceKernelEvent::Filter::HrTimer) {
|
||||
event.TriggerTimer();
|
||||
} else {
|
||||
event.Trigger(trigger_data);
|
||||
}
|
||||
@ -197,7 +217,7 @@ bool EqueueInternal::AddSmallTimer(EqueueEvent& ev) {
|
||||
SmallTimer st;
|
||||
st.event = ev.event;
|
||||
st.added = std::chrono::steady_clock::now();
|
||||
st.interval = std::chrono::microseconds{ev.event.data};
|
||||
st.interval = std::chrono::nanoseconds{ev.event.data};
|
||||
{
|
||||
std::scoped_lock lock{m_mutex};
|
||||
m_small_timers[st.event.ident] = std::move(st);
|
||||
@ -307,30 +327,23 @@ int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int
|
||||
}
|
||||
|
||||
static void HrTimerCallback(SceKernelEqueue eq, const SceKernelEvent& kevent) {
|
||||
static EqueueEvent event;
|
||||
event.event = kevent;
|
||||
event.event.data = HrTimerSpinlockThresholdUs;
|
||||
eq->AddSmallTimer(event);
|
||||
eq->TriggerEvent(kevent.ident, SceKernelEvent::Filter::HrTimer, kevent.udata);
|
||||
}
|
||||
|
||||
s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* ts, void* udata) {
|
||||
s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, OrbisKernelTimespec* ts,
|
||||
void* udata) {
|
||||
if (eq == nullptr) {
|
||||
return ORBIS_KERNEL_ERROR_EBADF;
|
||||
}
|
||||
|
||||
if (ts->tv_sec > 100 || ts->tv_nsec < 100'000) {
|
||||
return ORBIS_KERNEL_ERROR_EINVAL;
|
||||
}
|
||||
ASSERT(ts->tv_nsec > 1000); // assume 1us resolution
|
||||
const auto total_us = ts->tv_sec * 1000'000 + ts->tv_nsec / 1000;
|
||||
const auto total_ns = ts->tv_sec * 1000000000 + ts->tv_nsec;
|
||||
|
||||
EqueueEvent event{};
|
||||
event.event.ident = id;
|
||||
event.event.filter = SceKernelEvent::Filter::HrTimer;
|
||||
event.event.flags = SceKernelEvent::Flags::Add | SceKernelEvent::Flags::OneShot;
|
||||
event.event.fflags = 0;
|
||||
event.event.data = total_us;
|
||||
event.event.data = total_ns;
|
||||
event.event.udata = udata;
|
||||
|
||||
// HR timers cannot be implemented within the existing event queue architecture due to the
|
||||
@ -340,12 +353,7 @@ s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec*
|
||||
// `HrTimerSpinlockThresholdUs`) and fall back to boost asio timers if the time to tick is
|
||||
// large. Even for large delays, we truncate a small portion to complete the wait
|
||||
// using the spinlock, prioritizing precision.
|
||||
|
||||
if (eq->EventExists(event.event.ident, event.event.filter)) {
|
||||
eq->RemoveEvent(id, SceKernelEvent::Filter::HrTimer);
|
||||
}
|
||||
|
||||
if (total_us < HrTimerSpinlockThresholdUs) {
|
||||
if (total_ns < HrTimerSpinlockThresholdNs) {
|
||||
return eq->AddSmallTimer(event) ? ORBIS_OK : ORBIS_KERNEL_ERROR_ENOMEM;
|
||||
}
|
||||
|
||||
@ -391,16 +399,9 @@ int PS4_SYSV_ABI sceKernelAddTimerEvent(SceKernelEqueue eq, int id, SceKernelUse
|
||||
event.event.filter = SceKernelEvent::Filter::Timer;
|
||||
event.event.flags = SceKernelEvent::Flags::Add;
|
||||
event.event.fflags = 0;
|
||||
event.event.data = usec;
|
||||
event.event.data = usec * 1000;
|
||||
event.event.udata = udata;
|
||||
|
||||
if (eq->EventExists(event.event.ident, event.event.filter)) {
|
||||
eq->RemoveEvent(id, SceKernelEvent::Filter::Timer);
|
||||
LOG_DEBUG(Kernel_Event,
|
||||
"Timer event already exists, removing it: queue name={}, queue id={}",
|
||||
eq->GetName(), event.event.ident);
|
||||
}
|
||||
|
||||
LOG_DEBUG(Kernel_Event, "Added timing event: queue name={}, queue id={}, usec={}, pointer={:x}",
|
||||
eq->GetName(), event.event.ident, usec, reinterpret_cast<uintptr_t>(udata));
|
||||
|
||||
|
||||
@ -81,7 +81,7 @@ struct EqueueEvent {
|
||||
SceKernelEvent event;
|
||||
void* data = nullptr;
|
||||
std::chrono::steady_clock::time_point time_added;
|
||||
std::chrono::microseconds timer_interval;
|
||||
std::chrono::nanoseconds timer_interval;
|
||||
std::unique_ptr<boost::asio::steady_timer> timer;
|
||||
|
||||
void Clear() {
|
||||
@ -92,7 +92,6 @@ struct EqueueEvent {
|
||||
|
||||
void Trigger(void* data) {
|
||||
is_triggered = true;
|
||||
event.fflags++;
|
||||
event.data = reinterpret_cast<uintptr_t>(data);
|
||||
}
|
||||
|
||||
@ -101,6 +100,11 @@ struct EqueueEvent {
|
||||
event.udata = data;
|
||||
}
|
||||
|
||||
void TriggerTimer() {
|
||||
is_triggered = true;
|
||||
event.data++;
|
||||
}
|
||||
|
||||
void TriggerDisplay(void* data) {
|
||||
is_triggered = true;
|
||||
if (data != nullptr) {
|
||||
@ -135,7 +139,7 @@ class EqueueInternal {
|
||||
struct SmallTimer {
|
||||
SceKernelEvent event;
|
||||
std::chrono::steady_clock::time_point added;
|
||||
std::chrono::microseconds interval;
|
||||
std::chrono::nanoseconds interval;
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
@ -194,6 +194,21 @@ int PS4_SYSV_ABI posix_pthread_detach(PthreadT pthread) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __clang__
|
||||
__attribute__((optnone))
|
||||
#else
|
||||
__attribute__((optimize("O0")))
|
||||
#endif
|
||||
void ClearStack(const PthreadAttr& attr) {
|
||||
void* sp;
|
||||
asm("mov %%rsp, %0" : "=rm"(sp));
|
||||
// leave a safety net of 128 bytes for memset
|
||||
const u64 size = (u64)sp - (u64)attr.stackaddr_attr - 128;
|
||||
volatile void* buf = alloca(size);
|
||||
memset(const_cast<void*>(buf), 0, size);
|
||||
buf = nullptr;
|
||||
}
|
||||
|
||||
static void RunThread(void* arg) {
|
||||
auto* curthread = static_cast<Pthread*>(arg);
|
||||
g_curthread = curthread;
|
||||
@ -202,7 +217,12 @@ static void RunThread(void* arg) {
|
||||
|
||||
/* Run the current thread's start routine with argument: */
|
||||
curthread->native_thr.Initialize();
|
||||
void* ret = Core::ExecuteGuest(curthread->start_routine, curthread->arg);
|
||||
Core::EnsureThreadInitialized();
|
||||
|
||||
// Clear the stack before running the guest thread
|
||||
ClearStack(curthread->attr);
|
||||
|
||||
void* ret = curthread->start_routine(curthread->arg);
|
||||
|
||||
/* Remove thread from tracking */
|
||||
DebugState.RemoveCurrentThreadFromGuestList();
|
||||
|
||||
@ -84,7 +84,7 @@ void _thread_cleanupspecific() {
|
||||
* destructor:
|
||||
*/
|
||||
lk.unlock();
|
||||
Core::ExecuteGuest(destructor, data);
|
||||
destructor(data);
|
||||
lk.lock();
|
||||
}
|
||||
}
|
||||
|
||||
@ -52,7 +52,7 @@ void NetCtlInternal::CheckCallback() {
|
||||
: ORBIS_NET_CTL_EVENT_TYPE_DISCONNECTED;
|
||||
for (const auto [func, arg] : callbacks) {
|
||||
if (func != nullptr) {
|
||||
Core::ExecuteGuest(func, event, arg);
|
||||
func(event, arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -64,7 +64,7 @@ void NetCtlInternal::CheckNpToolkitCallback() {
|
||||
: ORBIS_NET_CTL_EVENT_TYPE_DISCONNECTED;
|
||||
for (const auto [func, arg] : nptool_callbacks) {
|
||||
if (func != nullptr) {
|
||||
Core::ExecuteGuest(func, event, arg);
|
||||
func(event, arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -160,13 +160,13 @@ s32 PS4_SYSV_ABI sceNgs2SystemCreateWithAllocator(const OrbisNgs2SystemOption* o
|
||||
result = SystemSetup(option, &bufferInfo, 0, 0);
|
||||
if (result >= 0) {
|
||||
uintptr_t sysUserData = allocator->userData;
|
||||
result = Core::ExecuteGuest(hostAlloc, &bufferInfo);
|
||||
result = hostAlloc(&bufferInfo);
|
||||
if (result >= 0) {
|
||||
OrbisNgs2Handle* handleCopy = outHandle;
|
||||
result = SystemSetup(option, &bufferInfo, hostFree, handleCopy);
|
||||
if (result < 0) {
|
||||
if (hostFree) {
|
||||
Core::ExecuteGuest(hostFree, &bufferInfo);
|
||||
hostFree(&bufferInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -3,6 +3,8 @@
|
||||
|
||||
#include "dimensions.h"
|
||||
|
||||
#include "core/tls.h"
|
||||
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
@ -622,6 +624,8 @@ libusb_transfer_status DimensionsBackend::HandleAsyncTransfer(libusb_transfer* t
|
||||
s32 DimensionsBackend::SubmitTransfer(libusb_transfer* transfer) {
|
||||
if (transfer->endpoint == 0x01) {
|
||||
std::thread write_thread([this, transfer] {
|
||||
Core::EnsureThreadInitialized();
|
||||
|
||||
HandleAsyncTransfer(transfer);
|
||||
|
||||
const u8 flags = transfer->flags;
|
||||
|
||||
@ -317,20 +317,25 @@ void VideoOutDriver::PresentThread(std::stop_token token) {
|
||||
{
|
||||
// Needs lock here as can be concurrently read by `sceVideoOutGetVblankStatus`
|
||||
std::scoped_lock lock{main_port.vo_mutex};
|
||||
|
||||
// Trigger flip events for the port
|
||||
for (auto& event : main_port.vblank_events) {
|
||||
if (event != nullptr) {
|
||||
event->TriggerEvent(static_cast<u64>(OrbisVideoOutInternalEventId::Vblank),
|
||||
Kernel::SceKernelEvent::Filter::VideoOut,
|
||||
reinterpret_cast<void*>(
|
||||
static_cast<u64>(OrbisVideoOutInternalEventId::Vblank) |
|
||||
(vblank_status.count << 16)));
|
||||
}
|
||||
}
|
||||
|
||||
// Update vblank status
|
||||
vblank_status.count++;
|
||||
vblank_status.process_time = Libraries::Kernel::sceKernelGetProcessTime();
|
||||
vblank_status.tsc = Libraries::Kernel::sceKernelReadTsc();
|
||||
main_port.vblank_cv.notify_all();
|
||||
}
|
||||
|
||||
// Trigger flip events for the port.
|
||||
for (auto& event : main_port.vblank_events) {
|
||||
if (event != nullptr) {
|
||||
event->TriggerEvent(static_cast<u64>(OrbisVideoOutInternalEventId::Vblank),
|
||||
Kernel::SceKernelEvent::Filter::VideoOut, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
timer.End();
|
||||
}
|
||||
}
|
||||
|
||||
@ -136,7 +136,8 @@ void Linker::Execute(const std::vector<std::string>& args) {
|
||||
}
|
||||
}
|
||||
params.entry_addr = module->GetEntryAddress();
|
||||
ExecuteGuest(RunMainEntry, ¶ms);
|
||||
Core::EnsureThreadInitialized();
|
||||
RunMainEntry(¶ms);
|
||||
});
|
||||
}
|
||||
|
||||
@ -380,8 +381,7 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) {
|
||||
if (!addr) {
|
||||
// Module was just loaded by above code. Allocate TLS block for it.
|
||||
const u32 init_image_size = module->tls.init_image_size;
|
||||
u8* dest = reinterpret_cast<u8*>(
|
||||
Core::ExecuteGuest(heap_api->heap_malloc, module->tls.image_size));
|
||||
u8* dest = reinterpret_cast<u8*>(heap_api->heap_malloc(module->tls.image_size));
|
||||
const u8* src = reinterpret_cast<const u8*>(module->tls.image_virtual_addr);
|
||||
std::memcpy(dest, src, init_image_size);
|
||||
std::memset(dest + init_image_size, 0, module->tls.image_size - init_image_size);
|
||||
@ -413,7 +413,7 @@ void* Linker::AllocateTlsForThread(bool is_primary) {
|
||||
ASSERT_MSG(ret == 0, "Unable to allocate TLS+TCB for the primary thread");
|
||||
} else {
|
||||
if (heap_api) {
|
||||
addr_out = Core::ExecuteGuest(heap_api->heap_malloc, total_tls_size);
|
||||
addr_out = heap_api->heap_malloc(total_tls_size);
|
||||
} else {
|
||||
addr_out = std::malloc(total_tls_size);
|
||||
}
|
||||
@ -423,7 +423,7 @@ void* Linker::AllocateTlsForThread(bool is_primary) {
|
||||
|
||||
void Linker::FreeTlsForNonPrimaryThread(void* pointer) {
|
||||
if (heap_api) {
|
||||
Core::ExecuteGuest(heap_api->heap_free, pointer);
|
||||
heap_api->heap_free(pointer);
|
||||
} else {
|
||||
std::free(pointer);
|
||||
}
|
||||
|
||||
@ -97,7 +97,8 @@ Module::~Module() = default;
|
||||
s32 Module::Start(u64 args, const void* argp, void* param) {
|
||||
LOG_INFO(Core_Linker, "Module started : {}", name);
|
||||
const VAddr addr = dynamic_info.init_virtual_addr + GetBaseAddress();
|
||||
return ExecuteGuest(reinterpret_cast<EntryFunc>(addr), args, argp, param);
|
||||
Core::EnsureThreadInitialized();
|
||||
return reinterpret_cast<EntryFunc>(addr)(args, argp, param);
|
||||
}
|
||||
|
||||
void Module::LoadModuleToMemory(u32& max_tls_index) {
|
||||
|
||||
@ -45,29 +45,6 @@ Tcb* GetTcbBase();
|
||||
/// Makes sure TLS is initialized for the thread before entering guest.
|
||||
void EnsureThreadInitialized();
|
||||
|
||||
template <size_t size>
|
||||
#ifdef __clang__
|
||||
__attribute__((optnone))
|
||||
#else
|
||||
__attribute__((optimize("O0")))
|
||||
#endif
|
||||
void ClearStack() {
|
||||
volatile void* buf = alloca(size);
|
||||
memset(const_cast<void*>(buf), 0, size);
|
||||
buf = nullptr;
|
||||
}
|
||||
|
||||
template <class ReturnType, class... FuncArgs, class... CallArgs>
|
||||
ReturnType ExecuteGuest(PS4_SYSV_ABI ReturnType (*func)(FuncArgs...), CallArgs&&... args) {
|
||||
EnsureThreadInitialized();
|
||||
// clear stack to avoid trash from EnsureThreadInitialized
|
||||
auto* tcb = GetTcbBase();
|
||||
if (tcb != nullptr && tcb->tcb_fiber == nullptr) {
|
||||
ClearStack<12_KB>();
|
||||
}
|
||||
return func(std::forward<CallArgs>(args)...);
|
||||
}
|
||||
|
||||
template <class F, F f>
|
||||
struct HostCallWrapperImpl;
|
||||
|
||||
|
||||
@ -462,50 +462,134 @@ static std::pair<u32, u32> SanitizeCopyLayers(const ImageInfo& src_info, const I
|
||||
void Image::CopyImage(Image& src_image) {
|
||||
const auto& src_info = src_image.info;
|
||||
const u32 num_mips = std::min(src_info.resources.levels, info.resources.levels);
|
||||
ASSERT(src_info.resources.layers == info.resources.layers || num_mips == 1);
|
||||
|
||||
// Check format compatibility
|
||||
if (src_info.pixel_format != info.pixel_format) {
|
||||
LOG_DEBUG(Render_Vulkan,
|
||||
"Copy between different formats: src={}, dst={}. Color may be incorrect.",
|
||||
vk::to_string(src_info.pixel_format), vk::to_string(info.pixel_format));
|
||||
}
|
||||
|
||||
const u32 width = src_info.size.width;
|
||||
const u32 height = src_info.size.height;
|
||||
const u32 depth =
|
||||
const u32 base_depth =
|
||||
info.type == AmdGpu::ImageType::Color3D ? info.size.depth : src_info.size.depth;
|
||||
|
||||
auto [test_src_layers, test_dst_layers] = SanitizeCopyLayers(src_info, info, base_depth);
|
||||
|
||||
ASSERT(test_src_layers == test_dst_layers || num_mips == 1 ||
|
||||
(ConvertImageType(src_info.type) != ConvertImageType(info.type) &&
|
||||
(test_src_layers == 1 || test_dst_layers == 1)));
|
||||
|
||||
SetBackingSamples(info.num_samples, false);
|
||||
src_image.SetBackingSamples(src_info.num_samples);
|
||||
|
||||
boost::container::small_vector<vk::ImageCopy, 8> image_copies;
|
||||
|
||||
const bool src_is_2d = ConvertImageType(src_info.type) == vk::ImageType::e2D;
|
||||
const bool src_is_3d = ConvertImageType(src_info.type) == vk::ImageType::e3D;
|
||||
const bool dst_is_2d = ConvertImageType(info.type) == vk::ImageType::e2D;
|
||||
const bool dst_is_3d = ConvertImageType(info.type) == vk::ImageType::e3D;
|
||||
|
||||
const bool is_2d_to_3d = src_is_2d && dst_is_3d;
|
||||
const bool is_3d_to_2d = src_is_3d && dst_is_2d;
|
||||
const bool is_same_type = !is_2d_to_3d && !is_3d_to_2d;
|
||||
|
||||
// Determine aspect mask - exclude stencil
|
||||
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eColor;
|
||||
|
||||
// For depth/stencil images, only copy the depth aspect (skip stencil)
|
||||
if (src_image.aspect_mask & vk::ImageAspectFlagBits::eDepth) {
|
||||
aspect = vk::ImageAspectFlagBits::eDepth;
|
||||
}
|
||||
|
||||
for (u32 mip = 0; mip < num_mips; ++mip) {
|
||||
const auto mip_w = std::max(width >> mip, 1u);
|
||||
const auto mip_h = std::max(height >> mip, 1u);
|
||||
const auto mip_d = std::max(depth >> mip, 1u);
|
||||
const auto [src_layers, dst_layers] = SanitizeCopyLayers(src_info, info, mip_d);
|
||||
const auto mip_d = std::max(base_depth >> mip, 1u);
|
||||
|
||||
image_copies.emplace_back(vk::ImageCopy{
|
||||
.srcSubresource{
|
||||
.aspectMask = src_image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
|
||||
.mipLevel = mip,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = src_layers,
|
||||
},
|
||||
.dstSubresource{
|
||||
.aspectMask = aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
|
||||
.mipLevel = mip,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = dst_layers,
|
||||
},
|
||||
.extent = {mip_w, mip_h, mip_d},
|
||||
});
|
||||
auto [src_layers, dst_layers] = SanitizeCopyLayers(src_info, info, mip_d);
|
||||
|
||||
if (is_same_type) {
|
||||
u32 copy_layers = std::min(src_layers, dst_layers);
|
||||
|
||||
if (src_is_3d)
|
||||
src_layers = 1;
|
||||
if (dst_is_3d)
|
||||
dst_layers = 1;
|
||||
|
||||
vk::ImageCopy copy_region = {
|
||||
.srcSubresource{
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = mip,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = copy_layers,
|
||||
},
|
||||
.dstSubresource{
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = mip,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = copy_layers,
|
||||
},
|
||||
.extent = vk::Extent3D(mip_w, mip_h, mip_d),
|
||||
};
|
||||
image_copies.push_back(copy_region);
|
||||
} else if (is_2d_to_3d) {
|
||||
vk::ImageCopy copy_region = {
|
||||
.srcSubresource{
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = mip,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = src_layers,
|
||||
},
|
||||
.dstSubresource{
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = mip,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.extent = vk::Extent3D(mip_w, mip_h, src_layers),
|
||||
};
|
||||
image_copies.push_back(copy_region);
|
||||
} else if (is_3d_to_2d) {
|
||||
vk::ImageCopy copy_region = {
|
||||
.srcSubresource{
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = mip,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.dstSubresource{
|
||||
.aspectMask = aspect,
|
||||
.mipLevel = mip,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = dst_layers,
|
||||
},
|
||||
.extent = vk::Extent3D(mip_w, mip_h, dst_layers),
|
||||
};
|
||||
image_copies.push_back(copy_region);
|
||||
}
|
||||
}
|
||||
|
||||
scheduler->EndRendering();
|
||||
|
||||
// Remove the pipeline stage flags - they don't belong here
|
||||
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
|
||||
|
||||
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
|
||||
|
||||
auto cmdbuf = scheduler->CommandBuffer();
|
||||
cmdbuf.copyImage(src_image.GetImage(), src_image.backing->state.layout, GetImage(),
|
||||
backing->state.layout, image_copies);
|
||||
|
||||
Transit(vk::ImageLayout::eGeneral,
|
||||
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
|
||||
if (!image_copies.empty()) {
|
||||
cmdbuf.copyImage(src_image.GetImage(), vk::ImageLayout::eTransferSrcOptimal, GetImage(),
|
||||
vk::ImageLayout::eTransferDstOptimal, image_copies);
|
||||
}
|
||||
|
||||
// Remove pipeline stage flags here too
|
||||
src_image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead,
|
||||
{});
|
||||
|
||||
Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {});
|
||||
}
|
||||
|
||||
void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset) {
|
||||
|
||||
@ -297,6 +297,14 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
||||
if (image_info.guest_address == cache_image.info.guest_address) {
|
||||
const u32 lhs_block_size = image_info.num_bits * image_info.num_samples;
|
||||
const u32 rhs_block_size = cache_image.info.num_bits * cache_image.info.num_samples;
|
||||
|
||||
if (image_info.pitch != cache_image.info.pitch) {
|
||||
if (safe_to_delete) {
|
||||
FreeImage(cache_image_id);
|
||||
}
|
||||
return {merged_image_id, -1, -1};
|
||||
}
|
||||
|
||||
if (image_info.BlockDim() != cache_image.info.BlockDim() ||
|
||||
lhs_block_size != rhs_block_size) {
|
||||
// Very likely this kind of overlap is caused by allocation from a pool.
|
||||
@ -346,6 +354,111 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
|
||||
return {merged_image_id, -1, -1};
|
||||
}
|
||||
|
||||
// Enhanced debug logging for unreachable case
|
||||
// Calculate expected size based on format and dimensions
|
||||
u64 expected_size =
|
||||
(static_cast<u64>(image_info.size.width) * static_cast<u64>(image_info.size.height) *
|
||||
static_cast<u64>(image_info.size.depth) * static_cast<u64>(image_info.num_bits) / 8);
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
"Unresolvable image overlap with equal memory address:\n"
|
||||
"=== OLD IMAGE (cached) ===\n"
|
||||
" Address: {:#x}\n"
|
||||
" Size: {:#x} bytes\n"
|
||||
" Format: {}\n"
|
||||
" Type: {}\n"
|
||||
" Width: {}\n"
|
||||
" Height: {}\n"
|
||||
" Depth: {}\n"
|
||||
" Pitch: {}\n"
|
||||
" Mip levels: {}\n"
|
||||
" Array layers: {}\n"
|
||||
" Samples: {}\n"
|
||||
" Tile mode: {:#x}\n"
|
||||
" Block size: {} bits\n"
|
||||
" Is block-comp: {}\n"
|
||||
" Guest size: {:#x}\n"
|
||||
" Last accessed: tick {}\n"
|
||||
" Safe to delete: {}\n"
|
||||
"\n"
|
||||
"=== NEW IMAGE (requested) ===\n"
|
||||
" Address: {:#x}\n"
|
||||
" Size: {:#x} bytes\n"
|
||||
" Format: {}\n"
|
||||
" Type: {}\n"
|
||||
" Width: {}\n"
|
||||
" Height: {}\n"
|
||||
" Depth: {}\n"
|
||||
" Pitch: {}\n"
|
||||
" Mip levels: {}\n"
|
||||
" Array layers: {}\n"
|
||||
" Samples: {}\n"
|
||||
" Tile mode: {:#x}\n"
|
||||
" Block size: {} bits\n"
|
||||
" Is block-comp: {}\n"
|
||||
" Guest size: {:#x}\n"
|
||||
"\n"
|
||||
"=== COMPARISON ===\n"
|
||||
" Same format: {}\n"
|
||||
" Same type: {}\n"
|
||||
" Same tile mode: {}\n"
|
||||
" Same block size: {}\n"
|
||||
" Same BlockDim: {}\n"
|
||||
" Same pitch: {}\n"
|
||||
" Old resources <= new: {} (old: {}, new: {})\n"
|
||||
" Old size <= new size: {}\n"
|
||||
" Expected size (calc): {} bytes\n"
|
||||
" Size ratio (new/expected): {:.2f}x\n"
|
||||
" Size ratio (new/old): {:.2f}x\n"
|
||||
" Old vs expected diff: {} bytes ({:+.2f}%)\n"
|
||||
" New vs expected diff: {} bytes ({:+.2f}%)\n"
|
||||
" Merged image ID: {}\n"
|
||||
" Binding type: {}\n"
|
||||
" Current tick: {}\n"
|
||||
" Age (ticks since last access): {}",
|
||||
|
||||
// Old image details
|
||||
cache_image.info.guest_address, cache_image.info.guest_size,
|
||||
vk::to_string(cache_image.info.pixel_format),
|
||||
static_cast<int>(cache_image.info.type), cache_image.info.size.width,
|
||||
cache_image.info.size.height, cache_image.info.size.depth, cache_image.info.pitch,
|
||||
cache_image.info.resources.levels, cache_image.info.resources.layers,
|
||||
cache_image.info.num_samples, static_cast<u32>(cache_image.info.tile_mode),
|
||||
cache_image.info.num_bits, cache_image.info.props.is_block,
|
||||
cache_image.info.guest_size, cache_image.tick_accessed_last, safe_to_delete,
|
||||
|
||||
// New image details
|
||||
image_info.guest_address, image_info.guest_size,
|
||||
vk::to_string(image_info.pixel_format), static_cast<int>(image_info.type),
|
||||
image_info.size.width, image_info.size.height, image_info.size.depth,
|
||||
image_info.pitch, image_info.resources.levels, image_info.resources.layers,
|
||||
image_info.num_samples, static_cast<u32>(image_info.tile_mode),
|
||||
image_info.num_bits, image_info.props.is_block, image_info.guest_size,
|
||||
|
||||
// Comparison
|
||||
(image_info.pixel_format == cache_image.info.pixel_format),
|
||||
(image_info.type == cache_image.info.type),
|
||||
(image_info.tile_mode == cache_image.info.tile_mode),
|
||||
(image_info.num_bits == cache_image.info.num_bits),
|
||||
(image_info.BlockDim() == cache_image.info.BlockDim()),
|
||||
(image_info.pitch == cache_image.info.pitch),
|
||||
(cache_image.info.resources <= image_info.resources),
|
||||
cache_image.info.resources.levels, image_info.resources.levels,
|
||||
(cache_image.info.guest_size <= image_info.guest_size), expected_size,
|
||||
|
||||
// Size ratios
|
||||
static_cast<double>(image_info.guest_size) / expected_size,
|
||||
static_cast<double>(image_info.guest_size) / cache_image.info.guest_size,
|
||||
|
||||
// Difference between actual and expected sizes with percentages
|
||||
static_cast<s64>(cache_image.info.guest_size) - static_cast<s64>(expected_size),
|
||||
(static_cast<double>(cache_image.info.guest_size) / expected_size - 1.0) * 100.0,
|
||||
|
||||
static_cast<s64>(image_info.guest_size) - static_cast<s64>(expected_size),
|
||||
(static_cast<double>(image_info.guest_size) / expected_size - 1.0) * 100.0,
|
||||
|
||||
merged_image_id.index, static_cast<int>(binding), scheduler.CurrentTick(),
|
||||
scheduler.CurrentTick() - cache_image.tick_accessed_last);
|
||||
|
||||
UNREACHABLE_MSG("Encountered unresolvable image overlap with equal memory address.");
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user