diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index e778704cd..6636fca48 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -71,7 +71,7 @@ public: ~FileBackend() = default; void Write(const Entry& entry) { - if (!enabled) { + if (!enabled && entry.log_level != Level::Critical) { return; } diff --git a/src/core/libraries/avplayer/avplayer_impl.cpp b/src/core/libraries/avplayer/avplayer_impl.cpp index 138747da4..db32862ad 100644 --- a/src/core/libraries/avplayer/avplayer_impl.cpp +++ b/src/core/libraries/avplayer/avplayer_impl.cpp @@ -12,28 +12,28 @@ void* PS4_SYSV_ABI AvPlayer::Allocate(void* handle, u32 alignment, u32 size) { const auto* const self = reinterpret_cast(handle); const auto allocate = self->m_init_data_original.memory_replacement.allocate; const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; - return Core::ExecuteGuest(allocate, ptr, alignment, size); + return allocate(ptr, alignment, size); } void PS4_SYSV_ABI AvPlayer::Deallocate(void* handle, void* memory) { const auto* const self = reinterpret_cast(handle); const auto deallocate = self->m_init_data_original.memory_replacement.deallocate; const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; - return Core::ExecuteGuest(deallocate, ptr, memory); + return deallocate(ptr, memory); } void* PS4_SYSV_ABI AvPlayer::AllocateTexture(void* handle, u32 alignment, u32 size) { const auto* const self = reinterpret_cast(handle); const auto allocate = self->m_init_data_original.memory_replacement.allocate_texture; const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; - return Core::ExecuteGuest(allocate, ptr, alignment, size); + return allocate(ptr, alignment, size); } void PS4_SYSV_ABI AvPlayer::DeallocateTexture(void* handle, void* memory) { const auto* const self = reinterpret_cast(handle); const auto deallocate = self->m_init_data_original.memory_replacement.deallocate_texture; const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; - return Core::ExecuteGuest(deallocate, ptr, memory); + return deallocate(ptr, memory); } int PS4_SYSV_ABI AvPlayer::OpenFile(void* handle, const char* filename) { @@ -42,7 +42,7 @@ int PS4_SYSV_ABI AvPlayer::OpenFile(void* handle, const char* filename) { const auto open = self->m_init_data_original.file_replacement.open; const auto ptr = self->m_init_data_original.file_replacement.object_ptr; - return Core::ExecuteGuest(open, ptr, filename); + return open(ptr, filename); } int PS4_SYSV_ABI AvPlayer::CloseFile(void* handle) { @@ -51,7 +51,7 @@ int PS4_SYSV_ABI AvPlayer::CloseFile(void* handle) { const auto close = self->m_init_data_original.file_replacement.close; const auto ptr = self->m_init_data_original.file_replacement.object_ptr; - return Core::ExecuteGuest(close, ptr); + return close(ptr); } int PS4_SYSV_ABI AvPlayer::ReadOffsetFile(void* handle, u8* buffer, u64 position, u32 length) { @@ -60,7 +60,7 @@ int PS4_SYSV_ABI AvPlayer::ReadOffsetFile(void* handle, u8* buffer, u64 position const auto read_offset = self->m_init_data_original.file_replacement.read_offset; const auto ptr = self->m_init_data_original.file_replacement.object_ptr; - return Core::ExecuteGuest(read_offset, ptr, buffer, position, length); + return read_offset(ptr, buffer, position, length); } u64 PS4_SYSV_ABI AvPlayer::SizeFile(void* handle) { @@ -69,7 +69,7 @@ u64 PS4_SYSV_ABI AvPlayer::SizeFile(void* handle) { const auto size = self->m_init_data_original.file_replacement.size; const auto ptr = self->m_init_data_original.file_replacement.object_ptr; - return Core::ExecuteGuest(size, ptr); + return size(ptr); } AvPlayerInitData AvPlayer::StubInitData(const AvPlayerInitData& data) { diff --git a/src/core/libraries/avplayer/avplayer_state.cpp b/src/core/libraries/avplayer/avplayer_state.cpp index e1b11840e..dbaa36d18 100644 --- a/src/core/libraries/avplayer/avplayer_state.cpp +++ b/src/core/libraries/avplayer/avplayer_state.cpp @@ -92,7 +92,7 @@ void AvPlayerState::DefaultEventCallback(void* opaque, AvPlayerEvents event_id, const auto callback = self->m_event_replacement.event_callback; const auto ptr = self->m_event_replacement.object_ptr; if (callback != nullptr) { - Core::ExecuteGuest(callback, ptr, event_id, 0, event_data); + callback(ptr, event_id, 0, event_data); } } diff --git a/src/core/libraries/ime/ime.cpp b/src/core/libraries/ime/ime.cpp index 258cc61e1..96ae446fa 100644 --- a/src/core/libraries/ime/ime.cpp +++ b/src/core/libraries/ime/ime.cpp @@ -99,16 +99,16 @@ public: if (m_ime_mode) { OrbisImeParam param = m_param.ime; if (use_param_handler) { - Core::ExecuteGuest(param.handler, param.arg, event); + param.handler(param.arg, event); } else { - Core::ExecuteGuest(handler, param.arg, event); + handler(param.arg, event); } } else { OrbisImeKeyboardParam param = m_param.key; if (use_param_handler) { - Core::ExecuteGuest(param.handler, param.arg, event); + param.handler(param.arg, event); } else { - Core::ExecuteGuest(handler, param.arg, event); + handler(param.arg, event); } } } diff --git a/src/core/libraries/ime/ime_dialog_ui.cpp b/src/core/libraries/ime/ime_dialog_ui.cpp index 4a95c60c9..9611e7c49 100644 --- a/src/core/libraries/ime/ime_dialog_ui.cpp +++ b/src/core/libraries/ime/ime_dialog_ui.cpp @@ -131,8 +131,7 @@ bool ImeDialogState::CallTextFilter() { return false; } - int ret = - Core::ExecuteGuest(text_filter, out_text, &out_text_length, src_text, src_text_length); + int ret = text_filter(out_text, &out_text_length, src_text, src_text_length); if (ret != 0) { return false; @@ -153,7 +152,7 @@ bool ImeDialogState::CallKeyboardFilter(const OrbisImeKeycode* src_keycode, u16* return true; } - int ret = Core::ExecuteGuest(keyboard_filter, src_keycode, out_keycode, out_status, nullptr); + int ret = keyboard_filter(src_keycode, out_keycode, out_status, nullptr); return ret == 0; } diff --git a/src/core/libraries/kernel/equeue.cpp b/src/core/libraries/kernel/equeue.cpp index 72e38b265..9291c0a1a 100644 --- a/src/core/libraries/kernel/equeue.cpp +++ b/src/core/libraries/kernel/equeue.cpp @@ -8,6 +8,7 @@ #include "common/logging/log.h" #include "core/libraries/kernel/equeue.h" #include "core/libraries/kernel/orbis_error.h" +#include "core/libraries/kernel/time.h" #include "core/libraries/libs.h" namespace Libraries::Kernel { @@ -15,23 +16,39 @@ namespace Libraries::Kernel { extern boost::asio::io_context io_context; extern void KernelSignalRequest(); -static constexpr auto HrTimerSpinlockThresholdUs = 1200u; +static constexpr auto HrTimerSpinlockThresholdNs = 1200000u; // Events are uniquely identified by id and filter. - bool EqueueInternal::AddEvent(EqueueEvent& event) { std::scoped_lock lock{m_mutex}; + // Calculate timer interval event.time_added = std::chrono::steady_clock::now(); if (event.event.filter == SceKernelEvent::Filter::Timer || event.event.filter == SceKernelEvent::Filter::HrTimer) { - // HrTimer events are offset by the threshold of time at the end that we spinlock for - // greater accuracy. - const auto offset = - event.event.filter == SceKernelEvent::Filter::HrTimer ? HrTimerSpinlockThresholdUs : 0u; - event.timer_interval = std::chrono::microseconds(event.event.data - offset); + // Set timer interval + event.timer_interval = std::chrono::nanoseconds(event.event.data); } + // First, check if there's already an event with the same id and filter. + u64 id = event.event.ident; + SceKernelEvent::Filter filter = event.event.filter; + const auto& find_it = std::ranges::find_if(m_events, [id, filter](auto& ev) { + return ev.event.ident == id && ev.event.filter == filter; + }); + // If there is a duplicate event, we need to update that instead. + if (find_it != m_events.cend()) { + // Specifically, update user data and timer_interval. + // Trigger status and event data should remain intact. + auto& old_event = *find_it; + old_event.timer_interval = event.timer_interval; + old_event.event.udata = event.event.udata; + return true; + } + + // Clear input data from event. + event.event.data = 0; + // Remove add flag from event event.event.flags &= ~SceKernelEvent::Flags::Add; @@ -157,6 +174,9 @@ bool EqueueInternal::TriggerEvent(u64 ident, s16 filter, void* trigger_data) { event.TriggerDisplay(trigger_data); } else if (filter == SceKernelEvent::Filter::User) { event.TriggerUser(trigger_data); + } else if (filter == SceKernelEvent::Filter::Timer || + filter == SceKernelEvent::Filter::HrTimer) { + event.TriggerTimer(); } else { event.Trigger(trigger_data); } @@ -197,7 +217,7 @@ bool EqueueInternal::AddSmallTimer(EqueueEvent& ev) { SmallTimer st; st.event = ev.event; st.added = std::chrono::steady_clock::now(); - st.interval = std::chrono::microseconds{ev.event.data}; + st.interval = std::chrono::nanoseconds{ev.event.data}; { std::scoped_lock lock{m_mutex}; m_small_timers[st.event.ident] = std::move(st); @@ -307,30 +327,23 @@ int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int } static void HrTimerCallback(SceKernelEqueue eq, const SceKernelEvent& kevent) { - static EqueueEvent event; - event.event = kevent; - event.event.data = HrTimerSpinlockThresholdUs; - eq->AddSmallTimer(event); eq->TriggerEvent(kevent.ident, SceKernelEvent::Filter::HrTimer, kevent.udata); } -s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* ts, void* udata) { +s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, OrbisKernelTimespec* ts, + void* udata) { if (eq == nullptr) { return ORBIS_KERNEL_ERROR_EBADF; } - if (ts->tv_sec > 100 || ts->tv_nsec < 100'000) { - return ORBIS_KERNEL_ERROR_EINVAL; - } - ASSERT(ts->tv_nsec > 1000); // assume 1us resolution - const auto total_us = ts->tv_sec * 1000'000 + ts->tv_nsec / 1000; + const auto total_ns = ts->tv_sec * 1000000000 + ts->tv_nsec; EqueueEvent event{}; event.event.ident = id; event.event.filter = SceKernelEvent::Filter::HrTimer; event.event.flags = SceKernelEvent::Flags::Add | SceKernelEvent::Flags::OneShot; event.event.fflags = 0; - event.event.data = total_us; + event.event.data = total_ns; event.event.udata = udata; // HR timers cannot be implemented within the existing event queue architecture due to the @@ -340,12 +353,7 @@ s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* // `HrTimerSpinlockThresholdUs`) and fall back to boost asio timers if the time to tick is // large. Even for large delays, we truncate a small portion to complete the wait // using the spinlock, prioritizing precision. - - if (eq->EventExists(event.event.ident, event.event.filter)) { - eq->RemoveEvent(id, SceKernelEvent::Filter::HrTimer); - } - - if (total_us < HrTimerSpinlockThresholdUs) { + if (total_ns < HrTimerSpinlockThresholdNs) { return eq->AddSmallTimer(event) ? ORBIS_OK : ORBIS_KERNEL_ERROR_ENOMEM; } @@ -391,16 +399,9 @@ int PS4_SYSV_ABI sceKernelAddTimerEvent(SceKernelEqueue eq, int id, SceKernelUse event.event.filter = SceKernelEvent::Filter::Timer; event.event.flags = SceKernelEvent::Flags::Add; event.event.fflags = 0; - event.event.data = usec; + event.event.data = usec * 1000; event.event.udata = udata; - if (eq->EventExists(event.event.ident, event.event.filter)) { - eq->RemoveEvent(id, SceKernelEvent::Filter::Timer); - LOG_DEBUG(Kernel_Event, - "Timer event already exists, removing it: queue name={}, queue id={}", - eq->GetName(), event.event.ident); - } - LOG_DEBUG(Kernel_Event, "Added timing event: queue name={}, queue id={}, usec={}, pointer={:x}", eq->GetName(), event.event.ident, usec, reinterpret_cast(udata)); diff --git a/src/core/libraries/kernel/equeue.h b/src/core/libraries/kernel/equeue.h index 06b667008..83b4b8689 100644 --- a/src/core/libraries/kernel/equeue.h +++ b/src/core/libraries/kernel/equeue.h @@ -81,7 +81,7 @@ struct EqueueEvent { SceKernelEvent event; void* data = nullptr; std::chrono::steady_clock::time_point time_added; - std::chrono::microseconds timer_interval; + std::chrono::nanoseconds timer_interval; std::unique_ptr timer; void Clear() { @@ -92,7 +92,6 @@ struct EqueueEvent { void Trigger(void* data) { is_triggered = true; - event.fflags++; event.data = reinterpret_cast(data); } @@ -101,6 +100,11 @@ struct EqueueEvent { event.udata = data; } + void TriggerTimer() { + is_triggered = true; + event.data++; + } + void TriggerDisplay(void* data) { is_triggered = true; if (data != nullptr) { @@ -135,7 +139,7 @@ class EqueueInternal { struct SmallTimer { SceKernelEvent event; std::chrono::steady_clock::time_point added; - std::chrono::microseconds interval; + std::chrono::nanoseconds interval; }; public: diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 20bd20f4b..0218285f7 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -194,6 +194,21 @@ int PS4_SYSV_ABI posix_pthread_detach(PthreadT pthread) { return 0; } +#ifdef __clang__ +__attribute__((optnone)) +#else +__attribute__((optimize("O0"))) +#endif +void ClearStack(const PthreadAttr& attr) { + void* sp; + asm("mov %%rsp, %0" : "=rm"(sp)); + // leave a safety net of 128 bytes for memset + const u64 size = (u64)sp - (u64)attr.stackaddr_attr - 128; + volatile void* buf = alloca(size); + memset(const_cast(buf), 0, size); + buf = nullptr; +} + static void RunThread(void* arg) { auto* curthread = static_cast(arg); g_curthread = curthread; @@ -202,7 +217,12 @@ static void RunThread(void* arg) { /* Run the current thread's start routine with argument: */ curthread->native_thr.Initialize(); - void* ret = Core::ExecuteGuest(curthread->start_routine, curthread->arg); + Core::EnsureThreadInitialized(); + + // Clear the stack before running the guest thread + ClearStack(curthread->attr); + + void* ret = curthread->start_routine(curthread->arg); /* Remove thread from tracking */ DebugState.RemoveCurrentThreadFromGuestList(); diff --git a/src/core/libraries/kernel/threads/pthread_spec.cpp b/src/core/libraries/kernel/threads/pthread_spec.cpp index 094866a5a..38032f174 100644 --- a/src/core/libraries/kernel/threads/pthread_spec.cpp +++ b/src/core/libraries/kernel/threads/pthread_spec.cpp @@ -84,7 +84,7 @@ void _thread_cleanupspecific() { * destructor: */ lk.unlock(); - Core::ExecuteGuest(destructor, data); + destructor(data); lk.lock(); } } diff --git a/src/core/libraries/network/net_ctl_obj.cpp b/src/core/libraries/network/net_ctl_obj.cpp index fe9a51100..7be379ca1 100644 --- a/src/core/libraries/network/net_ctl_obj.cpp +++ b/src/core/libraries/network/net_ctl_obj.cpp @@ -52,7 +52,7 @@ void NetCtlInternal::CheckCallback() { : ORBIS_NET_CTL_EVENT_TYPE_DISCONNECTED; for (const auto [func, arg] : callbacks) { if (func != nullptr) { - Core::ExecuteGuest(func, event, arg); + func(event, arg); } } } @@ -64,7 +64,7 @@ void NetCtlInternal::CheckNpToolkitCallback() { : ORBIS_NET_CTL_EVENT_TYPE_DISCONNECTED; for (const auto [func, arg] : nptool_callbacks) { if (func != nullptr) { - Core::ExecuteGuest(func, event, arg); + func(event, arg); } } } diff --git a/src/core/libraries/ngs2/ngs2.cpp b/src/core/libraries/ngs2/ngs2.cpp index 2f785f9a0..97d19c352 100644 --- a/src/core/libraries/ngs2/ngs2.cpp +++ b/src/core/libraries/ngs2/ngs2.cpp @@ -160,13 +160,13 @@ s32 PS4_SYSV_ABI sceNgs2SystemCreateWithAllocator(const OrbisNgs2SystemOption* o result = SystemSetup(option, &bufferInfo, 0, 0); if (result >= 0) { uintptr_t sysUserData = allocator->userData; - result = Core::ExecuteGuest(hostAlloc, &bufferInfo); + result = hostAlloc(&bufferInfo); if (result >= 0) { OrbisNgs2Handle* handleCopy = outHandle; result = SystemSetup(option, &bufferInfo, hostFree, handleCopy); if (result < 0) { if (hostFree) { - Core::ExecuteGuest(hostFree, &bufferInfo); + hostFree(&bufferInfo); } } } diff --git a/src/core/libraries/usbd/emulated/dimensions.cpp b/src/core/libraries/usbd/emulated/dimensions.cpp index 272f2f649..4d38c66fa 100644 --- a/src/core/libraries/usbd/emulated/dimensions.cpp +++ b/src/core/libraries/usbd/emulated/dimensions.cpp @@ -3,6 +3,8 @@ #include "dimensions.h" +#include "core/tls.h" + #include #include @@ -622,6 +624,8 @@ libusb_transfer_status DimensionsBackend::HandleAsyncTransfer(libusb_transfer* t s32 DimensionsBackend::SubmitTransfer(libusb_transfer* transfer) { if (transfer->endpoint == 0x01) { std::thread write_thread([this, transfer] { + Core::EnsureThreadInitialized(); + HandleAsyncTransfer(transfer); const u8 flags = transfer->flags; diff --git a/src/core/libraries/videoout/driver.cpp b/src/core/libraries/videoout/driver.cpp index 22b4fb853..4672aadb4 100644 --- a/src/core/libraries/videoout/driver.cpp +++ b/src/core/libraries/videoout/driver.cpp @@ -317,20 +317,25 @@ void VideoOutDriver::PresentThread(std::stop_token token) { { // Needs lock here as can be concurrently read by `sceVideoOutGetVblankStatus` std::scoped_lock lock{main_port.vo_mutex}; + + // Trigger flip events for the port + for (auto& event : main_port.vblank_events) { + if (event != nullptr) { + event->TriggerEvent(static_cast(OrbisVideoOutInternalEventId::Vblank), + Kernel::SceKernelEvent::Filter::VideoOut, + reinterpret_cast( + static_cast(OrbisVideoOutInternalEventId::Vblank) | + (vblank_status.count << 16))); + } + } + + // Update vblank status vblank_status.count++; vblank_status.process_time = Libraries::Kernel::sceKernelGetProcessTime(); vblank_status.tsc = Libraries::Kernel::sceKernelReadTsc(); main_port.vblank_cv.notify_all(); } - // Trigger flip events for the port. - for (auto& event : main_port.vblank_events) { - if (event != nullptr) { - event->TriggerEvent(static_cast(OrbisVideoOutInternalEventId::Vblank), - Kernel::SceKernelEvent::Filter::VideoOut, nullptr); - } - } - timer.End(); } } diff --git a/src/core/linker.cpp b/src/core/linker.cpp index 9e34e5902..41e787efd 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -136,7 +136,8 @@ void Linker::Execute(const std::vector& args) { } } params.entry_addr = module->GetEntryAddress(); - ExecuteGuest(RunMainEntry, ¶ms); + Core::EnsureThreadInitialized(); + RunMainEntry(¶ms); }); } @@ -380,8 +381,7 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) { if (!addr) { // Module was just loaded by above code. Allocate TLS block for it. const u32 init_image_size = module->tls.init_image_size; - u8* dest = reinterpret_cast( - Core::ExecuteGuest(heap_api->heap_malloc, module->tls.image_size)); + u8* dest = reinterpret_cast(heap_api->heap_malloc(module->tls.image_size)); const u8* src = reinterpret_cast(module->tls.image_virtual_addr); std::memcpy(dest, src, init_image_size); std::memset(dest + init_image_size, 0, module->tls.image_size - init_image_size); @@ -413,7 +413,7 @@ void* Linker::AllocateTlsForThread(bool is_primary) { ASSERT_MSG(ret == 0, "Unable to allocate TLS+TCB for the primary thread"); } else { if (heap_api) { - addr_out = Core::ExecuteGuest(heap_api->heap_malloc, total_tls_size); + addr_out = heap_api->heap_malloc(total_tls_size); } else { addr_out = std::malloc(total_tls_size); } @@ -423,7 +423,7 @@ void* Linker::AllocateTlsForThread(bool is_primary) { void Linker::FreeTlsForNonPrimaryThread(void* pointer) { if (heap_api) { - Core::ExecuteGuest(heap_api->heap_free, pointer); + heap_api->heap_free(pointer); } else { std::free(pointer); } diff --git a/src/core/module.cpp b/src/core/module.cpp index 127e74293..d0fae3a9f 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -97,7 +97,8 @@ Module::~Module() = default; s32 Module::Start(u64 args, const void* argp, void* param) { LOG_INFO(Core_Linker, "Module started : {}", name); const VAddr addr = dynamic_info.init_virtual_addr + GetBaseAddress(); - return ExecuteGuest(reinterpret_cast(addr), args, argp, param); + Core::EnsureThreadInitialized(); + return reinterpret_cast(addr)(args, argp, param); } void Module::LoadModuleToMemory(u32& max_tls_index) { diff --git a/src/core/tls.h b/src/core/tls.h index 27de518ea..00eba188e 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -45,29 +45,6 @@ Tcb* GetTcbBase(); /// Makes sure TLS is initialized for the thread before entering guest. void EnsureThreadInitialized(); -template -#ifdef __clang__ -__attribute__((optnone)) -#else -__attribute__((optimize("O0"))) -#endif -void ClearStack() { - volatile void* buf = alloca(size); - memset(const_cast(buf), 0, size); - buf = nullptr; -} - -template -ReturnType ExecuteGuest(PS4_SYSV_ABI ReturnType (*func)(FuncArgs...), CallArgs&&... args) { - EnsureThreadInitialized(); - // clear stack to avoid trash from EnsureThreadInitialized - auto* tcb = GetTcbBase(); - if (tcb != nullptr && tcb->tcb_fiber == nullptr) { - ClearStack<12_KB>(); - } - return func(std::forward(args)...); -} - template struct HostCallWrapperImpl; diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index cce0cd281..418641bc3 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -462,50 +462,134 @@ static std::pair SanitizeCopyLayers(const ImageInfo& src_info, const I void Image::CopyImage(Image& src_image) { const auto& src_info = src_image.info; const u32 num_mips = std::min(src_info.resources.levels, info.resources.levels); - ASSERT(src_info.resources.layers == info.resources.layers || num_mips == 1); + + // Check format compatibility + if (src_info.pixel_format != info.pixel_format) { + LOG_DEBUG(Render_Vulkan, + "Copy between different formats: src={}, dst={}. Color may be incorrect.", + vk::to_string(src_info.pixel_format), vk::to_string(info.pixel_format)); + } const u32 width = src_info.size.width; const u32 height = src_info.size.height; - const u32 depth = + const u32 base_depth = info.type == AmdGpu::ImageType::Color3D ? info.size.depth : src_info.size.depth; + auto [test_src_layers, test_dst_layers] = SanitizeCopyLayers(src_info, info, base_depth); + + ASSERT(test_src_layers == test_dst_layers || num_mips == 1 || + (ConvertImageType(src_info.type) != ConvertImageType(info.type) && + (test_src_layers == 1 || test_dst_layers == 1))); + SetBackingSamples(info.num_samples, false); src_image.SetBackingSamples(src_info.num_samples); boost::container::small_vector image_copies; + + const bool src_is_2d = ConvertImageType(src_info.type) == vk::ImageType::e2D; + const bool src_is_3d = ConvertImageType(src_info.type) == vk::ImageType::e3D; + const bool dst_is_2d = ConvertImageType(info.type) == vk::ImageType::e2D; + const bool dst_is_3d = ConvertImageType(info.type) == vk::ImageType::e3D; + + const bool is_2d_to_3d = src_is_2d && dst_is_3d; + const bool is_3d_to_2d = src_is_3d && dst_is_2d; + const bool is_same_type = !is_2d_to_3d && !is_3d_to_2d; + + // Determine aspect mask - exclude stencil + vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eColor; + + // For depth/stencil images, only copy the depth aspect (skip stencil) + if (src_image.aspect_mask & vk::ImageAspectFlagBits::eDepth) { + aspect = vk::ImageAspectFlagBits::eDepth; + } + for (u32 mip = 0; mip < num_mips; ++mip) { const auto mip_w = std::max(width >> mip, 1u); const auto mip_h = std::max(height >> mip, 1u); - const auto mip_d = std::max(depth >> mip, 1u); - const auto [src_layers, dst_layers] = SanitizeCopyLayers(src_info, info, mip_d); + const auto mip_d = std::max(base_depth >> mip, 1u); - image_copies.emplace_back(vk::ImageCopy{ - .srcSubresource{ - .aspectMask = src_image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, - .mipLevel = mip, - .baseArrayLayer = 0, - .layerCount = src_layers, - }, - .dstSubresource{ - .aspectMask = aspect_mask & ~vk::ImageAspectFlagBits::eStencil, - .mipLevel = mip, - .baseArrayLayer = 0, - .layerCount = dst_layers, - }, - .extent = {mip_w, mip_h, mip_d}, - }); + auto [src_layers, dst_layers] = SanitizeCopyLayers(src_info, info, mip_d); + + if (is_same_type) { + u32 copy_layers = std::min(src_layers, dst_layers); + + if (src_is_3d) + src_layers = 1; + if (dst_is_3d) + dst_layers = 1; + + vk::ImageCopy copy_region = { + .srcSubresource{ + .aspectMask = aspect, + .mipLevel = mip, + .baseArrayLayer = 0, + .layerCount = copy_layers, + }, + .dstSubresource{ + .aspectMask = aspect, + .mipLevel = mip, + .baseArrayLayer = 0, + .layerCount = copy_layers, + }, + .extent = vk::Extent3D(mip_w, mip_h, mip_d), + }; + image_copies.push_back(copy_region); + } else if (is_2d_to_3d) { + vk::ImageCopy copy_region = { + .srcSubresource{ + .aspectMask = aspect, + .mipLevel = mip, + .baseArrayLayer = 0, + .layerCount = src_layers, + }, + .dstSubresource{ + .aspectMask = aspect, + .mipLevel = mip, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .extent = vk::Extent3D(mip_w, mip_h, src_layers), + }; + image_copies.push_back(copy_region); + } else if (is_3d_to_2d) { + vk::ImageCopy copy_region = { + .srcSubresource{ + .aspectMask = aspect, + .mipLevel = mip, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .dstSubresource{ + .aspectMask = aspect, + .mipLevel = mip, + .baseArrayLayer = 0, + .layerCount = dst_layers, + }, + .extent = vk::Extent3D(mip_w, mip_h, dst_layers), + }; + image_copies.push_back(copy_region); + } } scheduler->EndRendering(); + + // Remove the pipeline stage flags - they don't belong here src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); + Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {}); auto cmdbuf = scheduler->CommandBuffer(); - cmdbuf.copyImage(src_image.GetImage(), src_image.backing->state.layout, GetImage(), - backing->state.layout, image_copies); - Transit(vk::ImageLayout::eGeneral, - vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {}); + if (!image_copies.empty()) { + cmdbuf.copyImage(src_image.GetImage(), vk::ImageLayout::eTransferSrcOptimal, GetImage(), + vk::ImageLayout::eTransferDstOptimal, image_copies); + } + + // Remove pipeline stage flags here too + src_image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, + {}); + + Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {}); } void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset) { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index ad81c3f0b..9bbfeafb5 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -297,6 +297,14 @@ std::tuple TextureCache::ResolveOverlap(const ImageInfo& imag if (image_info.guest_address == cache_image.info.guest_address) { const u32 lhs_block_size = image_info.num_bits * image_info.num_samples; const u32 rhs_block_size = cache_image.info.num_bits * cache_image.info.num_samples; + + if (image_info.pitch != cache_image.info.pitch) { + if (safe_to_delete) { + FreeImage(cache_image_id); + } + return {merged_image_id, -1, -1}; + } + if (image_info.BlockDim() != cache_image.info.BlockDim() || lhs_block_size != rhs_block_size) { // Very likely this kind of overlap is caused by allocation from a pool. @@ -346,6 +354,111 @@ std::tuple TextureCache::ResolveOverlap(const ImageInfo& imag return {merged_image_id, -1, -1}; } + // Enhanced debug logging for unreachable case + // Calculate expected size based on format and dimensions + u64 expected_size = + (static_cast(image_info.size.width) * static_cast(image_info.size.height) * + static_cast(image_info.size.depth) * static_cast(image_info.num_bits) / 8); + LOG_ERROR(Render_Vulkan, + "Unresolvable image overlap with equal memory address:\n" + "=== OLD IMAGE (cached) ===\n" + " Address: {:#x}\n" + " Size: {:#x} bytes\n" + " Format: {}\n" + " Type: {}\n" + " Width: {}\n" + " Height: {}\n" + " Depth: {}\n" + " Pitch: {}\n" + " Mip levels: {}\n" + " Array layers: {}\n" + " Samples: {}\n" + " Tile mode: {:#x}\n" + " Block size: {} bits\n" + " Is block-comp: {}\n" + " Guest size: {:#x}\n" + " Last accessed: tick {}\n" + " Safe to delete: {}\n" + "\n" + "=== NEW IMAGE (requested) ===\n" + " Address: {:#x}\n" + " Size: {:#x} bytes\n" + " Format: {}\n" + " Type: {}\n" + " Width: {}\n" + " Height: {}\n" + " Depth: {}\n" + " Pitch: {}\n" + " Mip levels: {}\n" + " Array layers: {}\n" + " Samples: {}\n" + " Tile mode: {:#x}\n" + " Block size: {} bits\n" + " Is block-comp: {}\n" + " Guest size: {:#x}\n" + "\n" + "=== COMPARISON ===\n" + " Same format: {}\n" + " Same type: {}\n" + " Same tile mode: {}\n" + " Same block size: {}\n" + " Same BlockDim: {}\n" + " Same pitch: {}\n" + " Old resources <= new: {} (old: {}, new: {})\n" + " Old size <= new size: {}\n" + " Expected size (calc): {} bytes\n" + " Size ratio (new/expected): {:.2f}x\n" + " Size ratio (new/old): {:.2f}x\n" + " Old vs expected diff: {} bytes ({:+.2f}%)\n" + " New vs expected diff: {} bytes ({:+.2f}%)\n" + " Merged image ID: {}\n" + " Binding type: {}\n" + " Current tick: {}\n" + " Age (ticks since last access): {}", + + // Old image details + cache_image.info.guest_address, cache_image.info.guest_size, + vk::to_string(cache_image.info.pixel_format), + static_cast(cache_image.info.type), cache_image.info.size.width, + cache_image.info.size.height, cache_image.info.size.depth, cache_image.info.pitch, + cache_image.info.resources.levels, cache_image.info.resources.layers, + cache_image.info.num_samples, static_cast(cache_image.info.tile_mode), + cache_image.info.num_bits, cache_image.info.props.is_block, + cache_image.info.guest_size, cache_image.tick_accessed_last, safe_to_delete, + + // New image details + image_info.guest_address, image_info.guest_size, + vk::to_string(image_info.pixel_format), static_cast(image_info.type), + image_info.size.width, image_info.size.height, image_info.size.depth, + image_info.pitch, image_info.resources.levels, image_info.resources.layers, + image_info.num_samples, static_cast(image_info.tile_mode), + image_info.num_bits, image_info.props.is_block, image_info.guest_size, + + // Comparison + (image_info.pixel_format == cache_image.info.pixel_format), + (image_info.type == cache_image.info.type), + (image_info.tile_mode == cache_image.info.tile_mode), + (image_info.num_bits == cache_image.info.num_bits), + (image_info.BlockDim() == cache_image.info.BlockDim()), + (image_info.pitch == cache_image.info.pitch), + (cache_image.info.resources <= image_info.resources), + cache_image.info.resources.levels, image_info.resources.levels, + (cache_image.info.guest_size <= image_info.guest_size), expected_size, + + // Size ratios + static_cast(image_info.guest_size) / expected_size, + static_cast(image_info.guest_size) / cache_image.info.guest_size, + + // Difference between actual and expected sizes with percentages + static_cast(cache_image.info.guest_size) - static_cast(expected_size), + (static_cast(cache_image.info.guest_size) / expected_size - 1.0) * 100.0, + + static_cast(image_info.guest_size) - static_cast(expected_size), + (static_cast(image_info.guest_size) / expected_size - 1.0) * 100.0, + + merged_image_id.index, static_cast(binding), scheduler.CurrentTick(), + scheduler.CurrentTick() - cache_image.tick_accessed_last); + UNREACHABLE_MSG("Encountered unresolvable image overlap with equal memory address."); }