Merge branch 'main' into user_and_settings

This commit is contained in:
georgemoralis 2026-02-21 09:56:50 +02:00 committed by GitHub
commit a8f51584bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 327 additions and 119 deletions

View File

@ -71,7 +71,7 @@ public:
~FileBackend() = default;
void Write(const Entry& entry) {
if (!enabled) {
if (!enabled && entry.log_level != Level::Critical) {
return;
}

View File

@ -12,28 +12,28 @@ void* PS4_SYSV_ABI AvPlayer::Allocate(void* handle, u32 alignment, u32 size) {
const auto* const self = reinterpret_cast<AvPlayer*>(handle);
const auto allocate = self->m_init_data_original.memory_replacement.allocate;
const auto ptr = self->m_init_data_original.memory_replacement.object_ptr;
return Core::ExecuteGuest(allocate, ptr, alignment, size);
return allocate(ptr, alignment, size);
}
void PS4_SYSV_ABI AvPlayer::Deallocate(void* handle, void* memory) {
const auto* const self = reinterpret_cast<AvPlayer*>(handle);
const auto deallocate = self->m_init_data_original.memory_replacement.deallocate;
const auto ptr = self->m_init_data_original.memory_replacement.object_ptr;
return Core::ExecuteGuest(deallocate, ptr, memory);
return deallocate(ptr, memory);
}
void* PS4_SYSV_ABI AvPlayer::AllocateTexture(void* handle, u32 alignment, u32 size) {
const auto* const self = reinterpret_cast<AvPlayer*>(handle);
const auto allocate = self->m_init_data_original.memory_replacement.allocate_texture;
const auto ptr = self->m_init_data_original.memory_replacement.object_ptr;
return Core::ExecuteGuest(allocate, ptr, alignment, size);
return allocate(ptr, alignment, size);
}
void PS4_SYSV_ABI AvPlayer::DeallocateTexture(void* handle, void* memory) {
const auto* const self = reinterpret_cast<AvPlayer*>(handle);
const auto deallocate = self->m_init_data_original.memory_replacement.deallocate_texture;
const auto ptr = self->m_init_data_original.memory_replacement.object_ptr;
return Core::ExecuteGuest(deallocate, ptr, memory);
return deallocate(ptr, memory);
}
int PS4_SYSV_ABI AvPlayer::OpenFile(void* handle, const char* filename) {
@ -42,7 +42,7 @@ int PS4_SYSV_ABI AvPlayer::OpenFile(void* handle, const char* filename) {
const auto open = self->m_init_data_original.file_replacement.open;
const auto ptr = self->m_init_data_original.file_replacement.object_ptr;
return Core::ExecuteGuest(open, ptr, filename);
return open(ptr, filename);
}
int PS4_SYSV_ABI AvPlayer::CloseFile(void* handle) {
@ -51,7 +51,7 @@ int PS4_SYSV_ABI AvPlayer::CloseFile(void* handle) {
const auto close = self->m_init_data_original.file_replacement.close;
const auto ptr = self->m_init_data_original.file_replacement.object_ptr;
return Core::ExecuteGuest(close, ptr);
return close(ptr);
}
int PS4_SYSV_ABI AvPlayer::ReadOffsetFile(void* handle, u8* buffer, u64 position, u32 length) {
@ -60,7 +60,7 @@ int PS4_SYSV_ABI AvPlayer::ReadOffsetFile(void* handle, u8* buffer, u64 position
const auto read_offset = self->m_init_data_original.file_replacement.read_offset;
const auto ptr = self->m_init_data_original.file_replacement.object_ptr;
return Core::ExecuteGuest(read_offset, ptr, buffer, position, length);
return read_offset(ptr, buffer, position, length);
}
u64 PS4_SYSV_ABI AvPlayer::SizeFile(void* handle) {
@ -69,7 +69,7 @@ u64 PS4_SYSV_ABI AvPlayer::SizeFile(void* handle) {
const auto size = self->m_init_data_original.file_replacement.size;
const auto ptr = self->m_init_data_original.file_replacement.object_ptr;
return Core::ExecuteGuest(size, ptr);
return size(ptr);
}
AvPlayerInitData AvPlayer::StubInitData(const AvPlayerInitData& data) {

View File

@ -92,7 +92,7 @@ void AvPlayerState::DefaultEventCallback(void* opaque, AvPlayerEvents event_id,
const auto callback = self->m_event_replacement.event_callback;
const auto ptr = self->m_event_replacement.object_ptr;
if (callback != nullptr) {
Core::ExecuteGuest(callback, ptr, event_id, 0, event_data);
callback(ptr, event_id, 0, event_data);
}
}

View File

@ -99,16 +99,16 @@ public:
if (m_ime_mode) {
OrbisImeParam param = m_param.ime;
if (use_param_handler) {
Core::ExecuteGuest(param.handler, param.arg, event);
param.handler(param.arg, event);
} else {
Core::ExecuteGuest(handler, param.arg, event);
handler(param.arg, event);
}
} else {
OrbisImeKeyboardParam param = m_param.key;
if (use_param_handler) {
Core::ExecuteGuest(param.handler, param.arg, event);
param.handler(param.arg, event);
} else {
Core::ExecuteGuest(handler, param.arg, event);
handler(param.arg, event);
}
}
}

View File

@ -131,8 +131,7 @@ bool ImeDialogState::CallTextFilter() {
return false;
}
int ret =
Core::ExecuteGuest(text_filter, out_text, &out_text_length, src_text, src_text_length);
int ret = text_filter(out_text, &out_text_length, src_text, src_text_length);
if (ret != 0) {
return false;
@ -153,7 +152,7 @@ bool ImeDialogState::CallKeyboardFilter(const OrbisImeKeycode* src_keycode, u16*
return true;
}
int ret = Core::ExecuteGuest(keyboard_filter, src_keycode, out_keycode, out_status, nullptr);
int ret = keyboard_filter(src_keycode, out_keycode, out_status, nullptr);
return ret == 0;
}

View File

@ -8,6 +8,7 @@
#include "common/logging/log.h"
#include "core/libraries/kernel/equeue.h"
#include "core/libraries/kernel/orbis_error.h"
#include "core/libraries/kernel/time.h"
#include "core/libraries/libs.h"
namespace Libraries::Kernel {
@ -15,23 +16,39 @@ namespace Libraries::Kernel {
extern boost::asio::io_context io_context;
extern void KernelSignalRequest();
static constexpr auto HrTimerSpinlockThresholdUs = 1200u;
static constexpr auto HrTimerSpinlockThresholdNs = 1200000u;
// Events are uniquely identified by id and filter.
bool EqueueInternal::AddEvent(EqueueEvent& event) {
std::scoped_lock lock{m_mutex};
// Calculate timer interval
event.time_added = std::chrono::steady_clock::now();
if (event.event.filter == SceKernelEvent::Filter::Timer ||
event.event.filter == SceKernelEvent::Filter::HrTimer) {
// HrTimer events are offset by the threshold of time at the end that we spinlock for
// greater accuracy.
const auto offset =
event.event.filter == SceKernelEvent::Filter::HrTimer ? HrTimerSpinlockThresholdUs : 0u;
event.timer_interval = std::chrono::microseconds(event.event.data - offset);
// Set timer interval
event.timer_interval = std::chrono::nanoseconds(event.event.data);
}
// First, check if there's already an event with the same id and filter.
u64 id = event.event.ident;
SceKernelEvent::Filter filter = event.event.filter;
const auto& find_it = std::ranges::find_if(m_events, [id, filter](auto& ev) {
return ev.event.ident == id && ev.event.filter == filter;
});
// If there is a duplicate event, we need to update that instead.
if (find_it != m_events.cend()) {
// Specifically, update user data and timer_interval.
// Trigger status and event data should remain intact.
auto& old_event = *find_it;
old_event.timer_interval = event.timer_interval;
old_event.event.udata = event.event.udata;
return true;
}
// Clear input data from event.
event.event.data = 0;
// Remove add flag from event
event.event.flags &= ~SceKernelEvent::Flags::Add;
@ -157,6 +174,9 @@ bool EqueueInternal::TriggerEvent(u64 ident, s16 filter, void* trigger_data) {
event.TriggerDisplay(trigger_data);
} else if (filter == SceKernelEvent::Filter::User) {
event.TriggerUser(trigger_data);
} else if (filter == SceKernelEvent::Filter::Timer ||
filter == SceKernelEvent::Filter::HrTimer) {
event.TriggerTimer();
} else {
event.Trigger(trigger_data);
}
@ -197,7 +217,7 @@ bool EqueueInternal::AddSmallTimer(EqueueEvent& ev) {
SmallTimer st;
st.event = ev.event;
st.added = std::chrono::steady_clock::now();
st.interval = std::chrono::microseconds{ev.event.data};
st.interval = std::chrono::nanoseconds{ev.event.data};
{
std::scoped_lock lock{m_mutex};
m_small_timers[st.event.ident] = std::move(st);
@ -307,30 +327,23 @@ int PS4_SYSV_ABI sceKernelWaitEqueue(SceKernelEqueue eq, SceKernelEvent* ev, int
}
static void HrTimerCallback(SceKernelEqueue eq, const SceKernelEvent& kevent) {
static EqueueEvent event;
event.event = kevent;
event.event.data = HrTimerSpinlockThresholdUs;
eq->AddSmallTimer(event);
eq->TriggerEvent(kevent.ident, SceKernelEvent::Filter::HrTimer, kevent.udata);
}
s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec* ts, void* udata) {
s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, OrbisKernelTimespec* ts,
void* udata) {
if (eq == nullptr) {
return ORBIS_KERNEL_ERROR_EBADF;
}
if (ts->tv_sec > 100 || ts->tv_nsec < 100'000) {
return ORBIS_KERNEL_ERROR_EINVAL;
}
ASSERT(ts->tv_nsec > 1000); // assume 1us resolution
const auto total_us = ts->tv_sec * 1000'000 + ts->tv_nsec / 1000;
const auto total_ns = ts->tv_sec * 1000000000 + ts->tv_nsec;
EqueueEvent event{};
event.event.ident = id;
event.event.filter = SceKernelEvent::Filter::HrTimer;
event.event.flags = SceKernelEvent::Flags::Add | SceKernelEvent::Flags::OneShot;
event.event.fflags = 0;
event.event.data = total_us;
event.event.data = total_ns;
event.event.udata = udata;
// HR timers cannot be implemented within the existing event queue architecture due to the
@ -340,12 +353,7 @@ s32 PS4_SYSV_ABI sceKernelAddHRTimerEvent(SceKernelEqueue eq, int id, timespec*
// `HrTimerSpinlockThresholdUs`) and fall back to boost asio timers if the time to tick is
// large. Even for large delays, we truncate a small portion to complete the wait
// using the spinlock, prioritizing precision.
if (eq->EventExists(event.event.ident, event.event.filter)) {
eq->RemoveEvent(id, SceKernelEvent::Filter::HrTimer);
}
if (total_us < HrTimerSpinlockThresholdUs) {
if (total_ns < HrTimerSpinlockThresholdNs) {
return eq->AddSmallTimer(event) ? ORBIS_OK : ORBIS_KERNEL_ERROR_ENOMEM;
}
@ -391,16 +399,9 @@ int PS4_SYSV_ABI sceKernelAddTimerEvent(SceKernelEqueue eq, int id, SceKernelUse
event.event.filter = SceKernelEvent::Filter::Timer;
event.event.flags = SceKernelEvent::Flags::Add;
event.event.fflags = 0;
event.event.data = usec;
event.event.data = usec * 1000;
event.event.udata = udata;
if (eq->EventExists(event.event.ident, event.event.filter)) {
eq->RemoveEvent(id, SceKernelEvent::Filter::Timer);
LOG_DEBUG(Kernel_Event,
"Timer event already exists, removing it: queue name={}, queue id={}",
eq->GetName(), event.event.ident);
}
LOG_DEBUG(Kernel_Event, "Added timing event: queue name={}, queue id={}, usec={}, pointer={:x}",
eq->GetName(), event.event.ident, usec, reinterpret_cast<uintptr_t>(udata));

View File

@ -81,7 +81,7 @@ struct EqueueEvent {
SceKernelEvent event;
void* data = nullptr;
std::chrono::steady_clock::time_point time_added;
std::chrono::microseconds timer_interval;
std::chrono::nanoseconds timer_interval;
std::unique_ptr<boost::asio::steady_timer> timer;
void Clear() {
@ -92,7 +92,6 @@ struct EqueueEvent {
void Trigger(void* data) {
is_triggered = true;
event.fflags++;
event.data = reinterpret_cast<uintptr_t>(data);
}
@ -101,6 +100,11 @@ struct EqueueEvent {
event.udata = data;
}
void TriggerTimer() {
is_triggered = true;
event.data++;
}
void TriggerDisplay(void* data) {
is_triggered = true;
if (data != nullptr) {
@ -135,7 +139,7 @@ class EqueueInternal {
struct SmallTimer {
SceKernelEvent event;
std::chrono::steady_clock::time_point added;
std::chrono::microseconds interval;
std::chrono::nanoseconds interval;
};
public:

View File

@ -194,6 +194,21 @@ int PS4_SYSV_ABI posix_pthread_detach(PthreadT pthread) {
return 0;
}
#ifdef __clang__
__attribute__((optnone))
#else
__attribute__((optimize("O0")))
#endif
void ClearStack(const PthreadAttr& attr) {
void* sp;
asm("mov %%rsp, %0" : "=rm"(sp));
// leave a safety net of 128 bytes for memset
const u64 size = (u64)sp - (u64)attr.stackaddr_attr - 128;
volatile void* buf = alloca(size);
memset(const_cast<void*>(buf), 0, size);
buf = nullptr;
}
static void RunThread(void* arg) {
auto* curthread = static_cast<Pthread*>(arg);
g_curthread = curthread;
@ -202,7 +217,12 @@ static void RunThread(void* arg) {
/* Run the current thread's start routine with argument: */
curthread->native_thr.Initialize();
void* ret = Core::ExecuteGuest(curthread->start_routine, curthread->arg);
Core::EnsureThreadInitialized();
// Clear the stack before running the guest thread
ClearStack(curthread->attr);
void* ret = curthread->start_routine(curthread->arg);
/* Remove thread from tracking */
DebugState.RemoveCurrentThreadFromGuestList();

View File

@ -84,7 +84,7 @@ void _thread_cleanupspecific() {
* destructor:
*/
lk.unlock();
Core::ExecuteGuest(destructor, data);
destructor(data);
lk.lock();
}
}

View File

@ -52,7 +52,7 @@ void NetCtlInternal::CheckCallback() {
: ORBIS_NET_CTL_EVENT_TYPE_DISCONNECTED;
for (const auto [func, arg] : callbacks) {
if (func != nullptr) {
Core::ExecuteGuest(func, event, arg);
func(event, arg);
}
}
}
@ -64,7 +64,7 @@ void NetCtlInternal::CheckNpToolkitCallback() {
: ORBIS_NET_CTL_EVENT_TYPE_DISCONNECTED;
for (const auto [func, arg] : nptool_callbacks) {
if (func != nullptr) {
Core::ExecuteGuest(func, event, arg);
func(event, arg);
}
}
}

View File

@ -160,13 +160,13 @@ s32 PS4_SYSV_ABI sceNgs2SystemCreateWithAllocator(const OrbisNgs2SystemOption* o
result = SystemSetup(option, &bufferInfo, 0, 0);
if (result >= 0) {
uintptr_t sysUserData = allocator->userData;
result = Core::ExecuteGuest(hostAlloc, &bufferInfo);
result = hostAlloc(&bufferInfo);
if (result >= 0) {
OrbisNgs2Handle* handleCopy = outHandle;
result = SystemSetup(option, &bufferInfo, hostFree, handleCopy);
if (result < 0) {
if (hostFree) {
Core::ExecuteGuest(hostFree, &bufferInfo);
hostFree(&bufferInfo);
}
}
}

View File

@ -3,6 +3,8 @@
#include "dimensions.h"
#include "core/tls.h"
#include <mutex>
#include <thread>
@ -622,6 +624,8 @@ libusb_transfer_status DimensionsBackend::HandleAsyncTransfer(libusb_transfer* t
s32 DimensionsBackend::SubmitTransfer(libusb_transfer* transfer) {
if (transfer->endpoint == 0x01) {
std::thread write_thread([this, transfer] {
Core::EnsureThreadInitialized();
HandleAsyncTransfer(transfer);
const u8 flags = transfer->flags;

View File

@ -317,20 +317,25 @@ void VideoOutDriver::PresentThread(std::stop_token token) {
{
// Needs lock here as can be concurrently read by `sceVideoOutGetVblankStatus`
std::scoped_lock lock{main_port.vo_mutex};
// Trigger flip events for the port
for (auto& event : main_port.vblank_events) {
if (event != nullptr) {
event->TriggerEvent(static_cast<u64>(OrbisVideoOutInternalEventId::Vblank),
Kernel::SceKernelEvent::Filter::VideoOut,
reinterpret_cast<void*>(
static_cast<u64>(OrbisVideoOutInternalEventId::Vblank) |
(vblank_status.count << 16)));
}
}
// Update vblank status
vblank_status.count++;
vblank_status.process_time = Libraries::Kernel::sceKernelGetProcessTime();
vblank_status.tsc = Libraries::Kernel::sceKernelReadTsc();
main_port.vblank_cv.notify_all();
}
// Trigger flip events for the port.
for (auto& event : main_port.vblank_events) {
if (event != nullptr) {
event->TriggerEvent(static_cast<u64>(OrbisVideoOutInternalEventId::Vblank),
Kernel::SceKernelEvent::Filter::VideoOut, nullptr);
}
}
timer.End();
}
}

View File

@ -136,7 +136,8 @@ void Linker::Execute(const std::vector<std::string>& args) {
}
}
params.entry_addr = module->GetEntryAddress();
ExecuteGuest(RunMainEntry, &params);
Core::EnsureThreadInitialized();
RunMainEntry(&params);
});
}
@ -380,8 +381,7 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) {
if (!addr) {
// Module was just loaded by above code. Allocate TLS block for it.
const u32 init_image_size = module->tls.init_image_size;
u8* dest = reinterpret_cast<u8*>(
Core::ExecuteGuest(heap_api->heap_malloc, module->tls.image_size));
u8* dest = reinterpret_cast<u8*>(heap_api->heap_malloc(module->tls.image_size));
const u8* src = reinterpret_cast<const u8*>(module->tls.image_virtual_addr);
std::memcpy(dest, src, init_image_size);
std::memset(dest + init_image_size, 0, module->tls.image_size - init_image_size);
@ -413,7 +413,7 @@ void* Linker::AllocateTlsForThread(bool is_primary) {
ASSERT_MSG(ret == 0, "Unable to allocate TLS+TCB for the primary thread");
} else {
if (heap_api) {
addr_out = Core::ExecuteGuest(heap_api->heap_malloc, total_tls_size);
addr_out = heap_api->heap_malloc(total_tls_size);
} else {
addr_out = std::malloc(total_tls_size);
}
@ -423,7 +423,7 @@ void* Linker::AllocateTlsForThread(bool is_primary) {
void Linker::FreeTlsForNonPrimaryThread(void* pointer) {
if (heap_api) {
Core::ExecuteGuest(heap_api->heap_free, pointer);
heap_api->heap_free(pointer);
} else {
std::free(pointer);
}

View File

@ -97,7 +97,8 @@ Module::~Module() = default;
s32 Module::Start(u64 args, const void* argp, void* param) {
LOG_INFO(Core_Linker, "Module started : {}", name);
const VAddr addr = dynamic_info.init_virtual_addr + GetBaseAddress();
return ExecuteGuest(reinterpret_cast<EntryFunc>(addr), args, argp, param);
Core::EnsureThreadInitialized();
return reinterpret_cast<EntryFunc>(addr)(args, argp, param);
}
void Module::LoadModuleToMemory(u32& max_tls_index) {

View File

@ -45,29 +45,6 @@ Tcb* GetTcbBase();
/// Makes sure TLS is initialized for the thread before entering guest.
void EnsureThreadInitialized();
template <size_t size>
#ifdef __clang__
__attribute__((optnone))
#else
__attribute__((optimize("O0")))
#endif
void ClearStack() {
volatile void* buf = alloca(size);
memset(const_cast<void*>(buf), 0, size);
buf = nullptr;
}
template <class ReturnType, class... FuncArgs, class... CallArgs>
ReturnType ExecuteGuest(PS4_SYSV_ABI ReturnType (*func)(FuncArgs...), CallArgs&&... args) {
EnsureThreadInitialized();
// clear stack to avoid trash from EnsureThreadInitialized
auto* tcb = GetTcbBase();
if (tcb != nullptr && tcb->tcb_fiber == nullptr) {
ClearStack<12_KB>();
}
return func(std::forward<CallArgs>(args)...);
}
template <class F, F f>
struct HostCallWrapperImpl;

View File

@ -462,50 +462,134 @@ static std::pair<u32, u32> SanitizeCopyLayers(const ImageInfo& src_info, const I
void Image::CopyImage(Image& src_image) {
const auto& src_info = src_image.info;
const u32 num_mips = std::min(src_info.resources.levels, info.resources.levels);
ASSERT(src_info.resources.layers == info.resources.layers || num_mips == 1);
// Check format compatibility
if (src_info.pixel_format != info.pixel_format) {
LOG_DEBUG(Render_Vulkan,
"Copy between different formats: src={}, dst={}. Color may be incorrect.",
vk::to_string(src_info.pixel_format), vk::to_string(info.pixel_format));
}
const u32 width = src_info.size.width;
const u32 height = src_info.size.height;
const u32 depth =
const u32 base_depth =
info.type == AmdGpu::ImageType::Color3D ? info.size.depth : src_info.size.depth;
auto [test_src_layers, test_dst_layers] = SanitizeCopyLayers(src_info, info, base_depth);
ASSERT(test_src_layers == test_dst_layers || num_mips == 1 ||
(ConvertImageType(src_info.type) != ConvertImageType(info.type) &&
(test_src_layers == 1 || test_dst_layers == 1)));
SetBackingSamples(info.num_samples, false);
src_image.SetBackingSamples(src_info.num_samples);
boost::container::small_vector<vk::ImageCopy, 8> image_copies;
const bool src_is_2d = ConvertImageType(src_info.type) == vk::ImageType::e2D;
const bool src_is_3d = ConvertImageType(src_info.type) == vk::ImageType::e3D;
const bool dst_is_2d = ConvertImageType(info.type) == vk::ImageType::e2D;
const bool dst_is_3d = ConvertImageType(info.type) == vk::ImageType::e3D;
const bool is_2d_to_3d = src_is_2d && dst_is_3d;
const bool is_3d_to_2d = src_is_3d && dst_is_2d;
const bool is_same_type = !is_2d_to_3d && !is_3d_to_2d;
// Determine aspect mask - exclude stencil
vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eColor;
// For depth/stencil images, only copy the depth aspect (skip stencil)
if (src_image.aspect_mask & vk::ImageAspectFlagBits::eDepth) {
aspect = vk::ImageAspectFlagBits::eDepth;
}
for (u32 mip = 0; mip < num_mips; ++mip) {
const auto mip_w = std::max(width >> mip, 1u);
const auto mip_h = std::max(height >> mip, 1u);
const auto mip_d = std::max(depth >> mip, 1u);
const auto [src_layers, dst_layers] = SanitizeCopyLayers(src_info, info, mip_d);
const auto mip_d = std::max(base_depth >> mip, 1u);
image_copies.emplace_back(vk::ImageCopy{
.srcSubresource{
.aspectMask = src_image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
.mipLevel = mip,
.baseArrayLayer = 0,
.layerCount = src_layers,
},
.dstSubresource{
.aspectMask = aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
.mipLevel = mip,
.baseArrayLayer = 0,
.layerCount = dst_layers,
},
.extent = {mip_w, mip_h, mip_d},
});
auto [src_layers, dst_layers] = SanitizeCopyLayers(src_info, info, mip_d);
if (is_same_type) {
u32 copy_layers = std::min(src_layers, dst_layers);
if (src_is_3d)
src_layers = 1;
if (dst_is_3d)
dst_layers = 1;
vk::ImageCopy copy_region = {
.srcSubresource{
.aspectMask = aspect,
.mipLevel = mip,
.baseArrayLayer = 0,
.layerCount = copy_layers,
},
.dstSubresource{
.aspectMask = aspect,
.mipLevel = mip,
.baseArrayLayer = 0,
.layerCount = copy_layers,
},
.extent = vk::Extent3D(mip_w, mip_h, mip_d),
};
image_copies.push_back(copy_region);
} else if (is_2d_to_3d) {
vk::ImageCopy copy_region = {
.srcSubresource{
.aspectMask = aspect,
.mipLevel = mip,
.baseArrayLayer = 0,
.layerCount = src_layers,
},
.dstSubresource{
.aspectMask = aspect,
.mipLevel = mip,
.baseArrayLayer = 0,
.layerCount = 1,
},
.extent = vk::Extent3D(mip_w, mip_h, src_layers),
};
image_copies.push_back(copy_region);
} else if (is_3d_to_2d) {
vk::ImageCopy copy_region = {
.srcSubresource{
.aspectMask = aspect,
.mipLevel = mip,
.baseArrayLayer = 0,
.layerCount = 1,
},
.dstSubresource{
.aspectMask = aspect,
.mipLevel = mip,
.baseArrayLayer = 0,
.layerCount = dst_layers,
},
.extent = vk::Extent3D(mip_w, mip_h, dst_layers),
};
image_copies.push_back(copy_region);
}
}
scheduler->EndRendering();
// Remove the pipeline stage flags - they don't belong here
src_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {});
auto cmdbuf = scheduler->CommandBuffer();
cmdbuf.copyImage(src_image.GetImage(), src_image.backing->state.layout, GetImage(),
backing->state.layout, image_copies);
Transit(vk::ImageLayout::eGeneral,
vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eTransferRead, {});
if (!image_copies.empty()) {
cmdbuf.copyImage(src_image.GetImage(), vk::ImageLayout::eTransferSrcOptimal, GetImage(),
vk::ImageLayout::eTransferDstOptimal, image_copies);
}
// Remove pipeline stage flags here too
src_image.Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead,
{});
Transit(vk::ImageLayout::eShaderReadOnlyOptimal, vk::AccessFlagBits2::eShaderRead, {});
}
void Image::CopyImageWithBuffer(Image& src_image, vk::Buffer buffer, u64 offset) {

View File

@ -297,6 +297,14 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
if (image_info.guest_address == cache_image.info.guest_address) {
const u32 lhs_block_size = image_info.num_bits * image_info.num_samples;
const u32 rhs_block_size = cache_image.info.num_bits * cache_image.info.num_samples;
if (image_info.pitch != cache_image.info.pitch) {
if (safe_to_delete) {
FreeImage(cache_image_id);
}
return {merged_image_id, -1, -1};
}
if (image_info.BlockDim() != cache_image.info.BlockDim() ||
lhs_block_size != rhs_block_size) {
// Very likely this kind of overlap is caused by allocation from a pool.
@ -346,6 +354,111 @@ std::tuple<ImageId, int, int> TextureCache::ResolveOverlap(const ImageInfo& imag
return {merged_image_id, -1, -1};
}
// Enhanced debug logging for unreachable case
// Calculate expected size based on format and dimensions
u64 expected_size =
(static_cast<u64>(image_info.size.width) * static_cast<u64>(image_info.size.height) *
static_cast<u64>(image_info.size.depth) * static_cast<u64>(image_info.num_bits) / 8);
LOG_ERROR(Render_Vulkan,
"Unresolvable image overlap with equal memory address:\n"
"=== OLD IMAGE (cached) ===\n"
" Address: {:#x}\n"
" Size: {:#x} bytes\n"
" Format: {}\n"
" Type: {}\n"
" Width: {}\n"
" Height: {}\n"
" Depth: {}\n"
" Pitch: {}\n"
" Mip levels: {}\n"
" Array layers: {}\n"
" Samples: {}\n"
" Tile mode: {:#x}\n"
" Block size: {} bits\n"
" Is block-comp: {}\n"
" Guest size: {:#x}\n"
" Last accessed: tick {}\n"
" Safe to delete: {}\n"
"\n"
"=== NEW IMAGE (requested) ===\n"
" Address: {:#x}\n"
" Size: {:#x} bytes\n"
" Format: {}\n"
" Type: {}\n"
" Width: {}\n"
" Height: {}\n"
" Depth: {}\n"
" Pitch: {}\n"
" Mip levels: {}\n"
" Array layers: {}\n"
" Samples: {}\n"
" Tile mode: {:#x}\n"
" Block size: {} bits\n"
" Is block-comp: {}\n"
" Guest size: {:#x}\n"
"\n"
"=== COMPARISON ===\n"
" Same format: {}\n"
" Same type: {}\n"
" Same tile mode: {}\n"
" Same block size: {}\n"
" Same BlockDim: {}\n"
" Same pitch: {}\n"
" Old resources <= new: {} (old: {}, new: {})\n"
" Old size <= new size: {}\n"
" Expected size (calc): {} bytes\n"
" Size ratio (new/expected): {:.2f}x\n"
" Size ratio (new/old): {:.2f}x\n"
" Old vs expected diff: {} bytes ({:+.2f}%)\n"
" New vs expected diff: {} bytes ({:+.2f}%)\n"
" Merged image ID: {}\n"
" Binding type: {}\n"
" Current tick: {}\n"
" Age (ticks since last access): {}",
// Old image details
cache_image.info.guest_address, cache_image.info.guest_size,
vk::to_string(cache_image.info.pixel_format),
static_cast<int>(cache_image.info.type), cache_image.info.size.width,
cache_image.info.size.height, cache_image.info.size.depth, cache_image.info.pitch,
cache_image.info.resources.levels, cache_image.info.resources.layers,
cache_image.info.num_samples, static_cast<u32>(cache_image.info.tile_mode),
cache_image.info.num_bits, cache_image.info.props.is_block,
cache_image.info.guest_size, cache_image.tick_accessed_last, safe_to_delete,
// New image details
image_info.guest_address, image_info.guest_size,
vk::to_string(image_info.pixel_format), static_cast<int>(image_info.type),
image_info.size.width, image_info.size.height, image_info.size.depth,
image_info.pitch, image_info.resources.levels, image_info.resources.layers,
image_info.num_samples, static_cast<u32>(image_info.tile_mode),
image_info.num_bits, image_info.props.is_block, image_info.guest_size,
// Comparison
(image_info.pixel_format == cache_image.info.pixel_format),
(image_info.type == cache_image.info.type),
(image_info.tile_mode == cache_image.info.tile_mode),
(image_info.num_bits == cache_image.info.num_bits),
(image_info.BlockDim() == cache_image.info.BlockDim()),
(image_info.pitch == cache_image.info.pitch),
(cache_image.info.resources <= image_info.resources),
cache_image.info.resources.levels, image_info.resources.levels,
(cache_image.info.guest_size <= image_info.guest_size), expected_size,
// Size ratios
static_cast<double>(image_info.guest_size) / expected_size,
static_cast<double>(image_info.guest_size) / cache_image.info.guest_size,
// Difference between actual and expected sizes with percentages
static_cast<s64>(cache_image.info.guest_size) - static_cast<s64>(expected_size),
(static_cast<double>(cache_image.info.guest_size) / expected_size - 1.0) * 100.0,
static_cast<s64>(image_info.guest_size) - static_cast<s64>(expected_size),
(static_cast<double>(image_info.guest_size) / expected_size - 1.0) * 100.0,
merged_image_id.index, static_cast<int>(binding), scheduler.CurrentTick(),
scheduler.CurrentTick() - cache_image.tick_accessed_last);
UNREACHABLE_MSG("Encountered unresolvable image overlap with equal memory address.");
}