From f6d71646c0a6e450e9edab472ccbbb435bdc35c1 Mon Sep 17 00:00:00 2001 From: Vladislav Mikhalin Date: Thu, 19 Feb 2026 21:26:33 +0300 Subject: [PATCH] threads: initialize TLS on thread creation (#4048) * initialize TLS on thread creation * initialize tls in dimensions toypad writer thread * clear most of the stack on thread init with some black magic --- src/core/libraries/avplayer/avplayer_impl.cpp | 16 ++++++------- .../libraries/avplayer/avplayer_state.cpp | 2 +- src/core/libraries/ime/ime.cpp | 8 +++---- src/core/libraries/ime/ime_dialog_ui.cpp | 5 ++-- src/core/libraries/kernel/threads/pthread.cpp | 22 +++++++++++++++++- .../libraries/kernel/threads/pthread_spec.cpp | 2 +- src/core/libraries/network/net_ctl_obj.cpp | 4 ++-- src/core/libraries/ngs2/ngs2.cpp | 4 ++-- .../libraries/usbd/emulated/dimensions.cpp | 4 ++++ src/core/linker.cpp | 10 ++++---- src/core/module.cpp | 3 ++- src/core/tls.h | 23 ------------------- 12 files changed, 52 insertions(+), 51 deletions(-) diff --git a/src/core/libraries/avplayer/avplayer_impl.cpp b/src/core/libraries/avplayer/avplayer_impl.cpp index 138747da4..db32862ad 100644 --- a/src/core/libraries/avplayer/avplayer_impl.cpp +++ b/src/core/libraries/avplayer/avplayer_impl.cpp @@ -12,28 +12,28 @@ void* PS4_SYSV_ABI AvPlayer::Allocate(void* handle, u32 alignment, u32 size) { const auto* const self = reinterpret_cast(handle); const auto allocate = self->m_init_data_original.memory_replacement.allocate; const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; - return Core::ExecuteGuest(allocate, ptr, alignment, size); + return allocate(ptr, alignment, size); } void PS4_SYSV_ABI AvPlayer::Deallocate(void* handle, void* memory) { const auto* const self = reinterpret_cast(handle); const auto deallocate = self->m_init_data_original.memory_replacement.deallocate; const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; - return Core::ExecuteGuest(deallocate, ptr, memory); + return deallocate(ptr, memory); } void* PS4_SYSV_ABI AvPlayer::AllocateTexture(void* handle, u32 alignment, u32 size) { const auto* const self = reinterpret_cast(handle); const auto allocate = self->m_init_data_original.memory_replacement.allocate_texture; const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; - return Core::ExecuteGuest(allocate, ptr, alignment, size); + return allocate(ptr, alignment, size); } void PS4_SYSV_ABI AvPlayer::DeallocateTexture(void* handle, void* memory) { const auto* const self = reinterpret_cast(handle); const auto deallocate = self->m_init_data_original.memory_replacement.deallocate_texture; const auto ptr = self->m_init_data_original.memory_replacement.object_ptr; - return Core::ExecuteGuest(deallocate, ptr, memory); + return deallocate(ptr, memory); } int PS4_SYSV_ABI AvPlayer::OpenFile(void* handle, const char* filename) { @@ -42,7 +42,7 @@ int PS4_SYSV_ABI AvPlayer::OpenFile(void* handle, const char* filename) { const auto open = self->m_init_data_original.file_replacement.open; const auto ptr = self->m_init_data_original.file_replacement.object_ptr; - return Core::ExecuteGuest(open, ptr, filename); + return open(ptr, filename); } int PS4_SYSV_ABI AvPlayer::CloseFile(void* handle) { @@ -51,7 +51,7 @@ int PS4_SYSV_ABI AvPlayer::CloseFile(void* handle) { const auto close = self->m_init_data_original.file_replacement.close; const auto ptr = self->m_init_data_original.file_replacement.object_ptr; - return Core::ExecuteGuest(close, ptr); + return close(ptr); } int PS4_SYSV_ABI AvPlayer::ReadOffsetFile(void* handle, u8* buffer, u64 position, u32 length) { @@ -60,7 +60,7 @@ int PS4_SYSV_ABI AvPlayer::ReadOffsetFile(void* handle, u8* buffer, u64 position const auto read_offset = self->m_init_data_original.file_replacement.read_offset; const auto ptr = self->m_init_data_original.file_replacement.object_ptr; - return Core::ExecuteGuest(read_offset, ptr, buffer, position, length); + return read_offset(ptr, buffer, position, length); } u64 PS4_SYSV_ABI AvPlayer::SizeFile(void* handle) { @@ -69,7 +69,7 @@ u64 PS4_SYSV_ABI AvPlayer::SizeFile(void* handle) { const auto size = self->m_init_data_original.file_replacement.size; const auto ptr = self->m_init_data_original.file_replacement.object_ptr; - return Core::ExecuteGuest(size, ptr); + return size(ptr); } AvPlayerInitData AvPlayer::StubInitData(const AvPlayerInitData& data) { diff --git a/src/core/libraries/avplayer/avplayer_state.cpp b/src/core/libraries/avplayer/avplayer_state.cpp index e1b11840e..dbaa36d18 100644 --- a/src/core/libraries/avplayer/avplayer_state.cpp +++ b/src/core/libraries/avplayer/avplayer_state.cpp @@ -92,7 +92,7 @@ void AvPlayerState::DefaultEventCallback(void* opaque, AvPlayerEvents event_id, const auto callback = self->m_event_replacement.event_callback; const auto ptr = self->m_event_replacement.object_ptr; if (callback != nullptr) { - Core::ExecuteGuest(callback, ptr, event_id, 0, event_data); + callback(ptr, event_id, 0, event_data); } } diff --git a/src/core/libraries/ime/ime.cpp b/src/core/libraries/ime/ime.cpp index 258cc61e1..96ae446fa 100644 --- a/src/core/libraries/ime/ime.cpp +++ b/src/core/libraries/ime/ime.cpp @@ -99,16 +99,16 @@ public: if (m_ime_mode) { OrbisImeParam param = m_param.ime; if (use_param_handler) { - Core::ExecuteGuest(param.handler, param.arg, event); + param.handler(param.arg, event); } else { - Core::ExecuteGuest(handler, param.arg, event); + handler(param.arg, event); } } else { OrbisImeKeyboardParam param = m_param.key; if (use_param_handler) { - Core::ExecuteGuest(param.handler, param.arg, event); + param.handler(param.arg, event); } else { - Core::ExecuteGuest(handler, param.arg, event); + handler(param.arg, event); } } } diff --git a/src/core/libraries/ime/ime_dialog_ui.cpp b/src/core/libraries/ime/ime_dialog_ui.cpp index 4a95c60c9..9611e7c49 100644 --- a/src/core/libraries/ime/ime_dialog_ui.cpp +++ b/src/core/libraries/ime/ime_dialog_ui.cpp @@ -131,8 +131,7 @@ bool ImeDialogState::CallTextFilter() { return false; } - int ret = - Core::ExecuteGuest(text_filter, out_text, &out_text_length, src_text, src_text_length); + int ret = text_filter(out_text, &out_text_length, src_text, src_text_length); if (ret != 0) { return false; @@ -153,7 +152,7 @@ bool ImeDialogState::CallKeyboardFilter(const OrbisImeKeycode* src_keycode, u16* return true; } - int ret = Core::ExecuteGuest(keyboard_filter, src_keycode, out_keycode, out_status, nullptr); + int ret = keyboard_filter(src_keycode, out_keycode, out_status, nullptr); return ret == 0; } diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index 20bd20f4b..0218285f7 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -194,6 +194,21 @@ int PS4_SYSV_ABI posix_pthread_detach(PthreadT pthread) { return 0; } +#ifdef __clang__ +__attribute__((optnone)) +#else +__attribute__((optimize("O0"))) +#endif +void ClearStack(const PthreadAttr& attr) { + void* sp; + asm("mov %%rsp, %0" : "=rm"(sp)); + // leave a safety net of 128 bytes for memset + const u64 size = (u64)sp - (u64)attr.stackaddr_attr - 128; + volatile void* buf = alloca(size); + memset(const_cast(buf), 0, size); + buf = nullptr; +} + static void RunThread(void* arg) { auto* curthread = static_cast(arg); g_curthread = curthread; @@ -202,7 +217,12 @@ static void RunThread(void* arg) { /* Run the current thread's start routine with argument: */ curthread->native_thr.Initialize(); - void* ret = Core::ExecuteGuest(curthread->start_routine, curthread->arg); + Core::EnsureThreadInitialized(); + + // Clear the stack before running the guest thread + ClearStack(curthread->attr); + + void* ret = curthread->start_routine(curthread->arg); /* Remove thread from tracking */ DebugState.RemoveCurrentThreadFromGuestList(); diff --git a/src/core/libraries/kernel/threads/pthread_spec.cpp b/src/core/libraries/kernel/threads/pthread_spec.cpp index 094866a5a..38032f174 100644 --- a/src/core/libraries/kernel/threads/pthread_spec.cpp +++ b/src/core/libraries/kernel/threads/pthread_spec.cpp @@ -84,7 +84,7 @@ void _thread_cleanupspecific() { * destructor: */ lk.unlock(); - Core::ExecuteGuest(destructor, data); + destructor(data); lk.lock(); } } diff --git a/src/core/libraries/network/net_ctl_obj.cpp b/src/core/libraries/network/net_ctl_obj.cpp index a295477b6..a4081cd11 100644 --- a/src/core/libraries/network/net_ctl_obj.cpp +++ b/src/core/libraries/network/net_ctl_obj.cpp @@ -50,7 +50,7 @@ void NetCtlInternal::CheckCallback() { : ORBIS_NET_CTL_EVENT_TYPE_DISCONNECTED; for (const auto [func, arg] : callbacks) { if (func != nullptr) { - Core::ExecuteGuest(func, event, arg); + func(event, arg); } } } @@ -61,7 +61,7 @@ void NetCtlInternal::CheckNpToolkitCallback() { : ORBIS_NET_CTL_EVENT_TYPE_DISCONNECTED; for (const auto [func, arg] : nptool_callbacks) { if (func != nullptr) { - Core::ExecuteGuest(func, event, arg); + func(event, arg); } } } diff --git a/src/core/libraries/ngs2/ngs2.cpp b/src/core/libraries/ngs2/ngs2.cpp index 2f785f9a0..97d19c352 100644 --- a/src/core/libraries/ngs2/ngs2.cpp +++ b/src/core/libraries/ngs2/ngs2.cpp @@ -160,13 +160,13 @@ s32 PS4_SYSV_ABI sceNgs2SystemCreateWithAllocator(const OrbisNgs2SystemOption* o result = SystemSetup(option, &bufferInfo, 0, 0); if (result >= 0) { uintptr_t sysUserData = allocator->userData; - result = Core::ExecuteGuest(hostAlloc, &bufferInfo); + result = hostAlloc(&bufferInfo); if (result >= 0) { OrbisNgs2Handle* handleCopy = outHandle; result = SystemSetup(option, &bufferInfo, hostFree, handleCopy); if (result < 0) { if (hostFree) { - Core::ExecuteGuest(hostFree, &bufferInfo); + hostFree(&bufferInfo); } } } diff --git a/src/core/libraries/usbd/emulated/dimensions.cpp b/src/core/libraries/usbd/emulated/dimensions.cpp index 272f2f649..4d38c66fa 100644 --- a/src/core/libraries/usbd/emulated/dimensions.cpp +++ b/src/core/libraries/usbd/emulated/dimensions.cpp @@ -3,6 +3,8 @@ #include "dimensions.h" +#include "core/tls.h" + #include #include @@ -622,6 +624,8 @@ libusb_transfer_status DimensionsBackend::HandleAsyncTransfer(libusb_transfer* t s32 DimensionsBackend::SubmitTransfer(libusb_transfer* transfer) { if (transfer->endpoint == 0x01) { std::thread write_thread([this, transfer] { + Core::EnsureThreadInitialized(); + HandleAsyncTransfer(transfer); const u8 flags = transfer->flags; diff --git a/src/core/linker.cpp b/src/core/linker.cpp index 7a0653e9f..20d81409e 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -135,7 +135,8 @@ void Linker::Execute(const std::vector& args) { } } params.entry_addr = module->GetEntryAddress(); - ExecuteGuest(RunMainEntry, ¶ms); + Core::EnsureThreadInitialized(); + RunMainEntry(¶ms); }); } @@ -379,8 +380,7 @@ void* Linker::TlsGetAddr(u64 module_index, u64 offset) { if (!addr) { // Module was just loaded by above code. Allocate TLS block for it. const u32 init_image_size = module->tls.init_image_size; - u8* dest = reinterpret_cast( - Core::ExecuteGuest(heap_api->heap_malloc, module->tls.image_size)); + u8* dest = reinterpret_cast(heap_api->heap_malloc(module->tls.image_size)); const u8* src = reinterpret_cast(module->tls.image_virtual_addr); std::memcpy(dest, src, init_image_size); std::memset(dest + init_image_size, 0, module->tls.image_size - init_image_size); @@ -412,7 +412,7 @@ void* Linker::AllocateTlsForThread(bool is_primary) { ASSERT_MSG(ret == 0, "Unable to allocate TLS+TCB for the primary thread"); } else { if (heap_api) { - addr_out = Core::ExecuteGuest(heap_api->heap_malloc, total_tls_size); + addr_out = heap_api->heap_malloc(total_tls_size); } else { addr_out = std::malloc(total_tls_size); } @@ -422,7 +422,7 @@ void* Linker::AllocateTlsForThread(bool is_primary) { void Linker::FreeTlsForNonPrimaryThread(void* pointer) { if (heap_api) { - Core::ExecuteGuest(heap_api->heap_free, pointer); + heap_api->heap_free(pointer); } else { std::free(pointer); } diff --git a/src/core/module.cpp b/src/core/module.cpp index 127e74293..d0fae3a9f 100644 --- a/src/core/module.cpp +++ b/src/core/module.cpp @@ -97,7 +97,8 @@ Module::~Module() = default; s32 Module::Start(u64 args, const void* argp, void* param) { LOG_INFO(Core_Linker, "Module started : {}", name); const VAddr addr = dynamic_info.init_virtual_addr + GetBaseAddress(); - return ExecuteGuest(reinterpret_cast(addr), args, argp, param); + Core::EnsureThreadInitialized(); + return reinterpret_cast(addr)(args, argp, param); } void Module::LoadModuleToMemory(u32& max_tls_index) { diff --git a/src/core/tls.h b/src/core/tls.h index 27de518ea..00eba188e 100644 --- a/src/core/tls.h +++ b/src/core/tls.h @@ -45,29 +45,6 @@ Tcb* GetTcbBase(); /// Makes sure TLS is initialized for the thread before entering guest. void EnsureThreadInitialized(); -template -#ifdef __clang__ -__attribute__((optnone)) -#else -__attribute__((optimize("O0"))) -#endif -void ClearStack() { - volatile void* buf = alloca(size); - memset(const_cast(buf), 0, size); - buf = nullptr; -} - -template -ReturnType ExecuteGuest(PS4_SYSV_ABI ReturnType (*func)(FuncArgs...), CallArgs&&... args) { - EnsureThreadInitialized(); - // clear stack to avoid trash from EnsureThreadInitialized - auto* tcb = GetTcbBase(); - if (tcb != nullptr && tcb->tcb_fiber == nullptr) { - ClearStack<12_KB>(); - } - return func(std::forward(args)...); -} - template struct HostCallWrapperImpl;