From 4fb2be05e56793cdf2467fc0d76a1be4452324e4 Mon Sep 17 00:00:00 2001 From: Elad <18193363+elad335@users.noreply.github.com> Date: Sat, 9 May 2026 15:29:34 +0300 Subject: [PATCH] PPU: Cleanup MacOs executable write protection guards --- Utilities/JIT.h | 28 +++++++++++-- rpcs3/Emu/Cell/PPUThread.cpp | 55 ++++++++------------------ rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 25 ++++++------ rpcs3/Emu/Cell/lv2/lv2.cpp | 8 ---- rpcs3/Emu/System.cpp | 2 +- 5 files changed, 55 insertions(+), 63 deletions(-) diff --git a/Utilities/JIT.h b/Utilities/JIT.h index 86fc72ed55..ebf15cbb56 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -434,13 +434,35 @@ namespace asmjit #endif } +#ifdef __APPLE__ +struct jit_write_guard +{ + jit_write_guard() noexcept + { + pthread_jit_write_protect_np(false); + + // Ensure stores are not reordered by the compiler + atomic_fence_acq_rel(); + } + + ~jit_write_guard() noexcept + { + // Ensure stores are not reordered by the compiler + atomic_fence_seq_cst(); + + pthread_jit_write_protect_np(true); + } +}; +#else +#define jit_write_guard [[maybe_unused]] int +#endif + // Build runtime function with asmjit::X86Assembler template inline FT build_function_asm(std::string_view name, F&& builder, ::jit_runtime* custom_runtime = nullptr, bool reduced_size = false) { -#ifdef __APPLE__ - pthread_jit_write_protect_np(false); -#endif + jit_write_guard jit_guard; + using namespace asmjit; auto& rt = custom_runtime ? *custom_runtime : get_global_runtime(); diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index f5d91cc519..13f07c8e95 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -2174,18 +2174,12 @@ void ppu_thread::cpu_task() } case ppu_cmd::lle_call: { -#ifdef __APPLE__ - pthread_jit_write_protect_np(true); -#endif const vm::ptr opd(arg < 32 ? vm::cast(gpr[arg]) : vm::cast(arg)); cmd_pop(), fast_call(opd[0], opd[1]); break; } case ppu_cmd::entry_call: { -#ifdef __APPLE__ - pthread_jit_write_protect_np(true); -#endif cmd_pop(), fast_call(entry_func.addr, entry_func.rtoc, true); break; } @@ -2196,9 +2190,6 @@ void ppu_thread::cpu_task() } case ppu_cmd::opd_call: { -#ifdef __APPLE__ - pthread_jit_write_protect_np(true); -#endif const ppu_func_opd_t opd = cmd_get(1).as(); cmd_pop(1), fast_call(opd.addr, opd.rtoc); break; @@ -2217,9 +2208,6 @@ void ppu_thread::cpu_task() } case ppu_cmd::initialize: { -#ifdef __APPLE__ - pthread_jit_write_protect_np(false); -#endif cmd_pop(); ppu_initialize(); @@ -2231,9 +2219,6 @@ void ppu_thread::cpu_task() spu_cache::initialize(); -#ifdef __APPLE__ - pthread_jit_write_protect_np(true); -#endif #ifdef ARCH_ARM64 // Flush all cache lines after potentially writing executable code asm("ISB"); @@ -2380,6 +2365,11 @@ void ppu_thread::cpu_wait(bs_t old) void ppu_thread::exec_task() { +#ifdef __APPLE__ + // Ensure correct state before executing JIT code + pthread_jit_write_protect_np(true); +#endif + if (g_cfg.core.ppu_decoder != ppu_decoder_type::_static) { // HVContext push to allow recursion. This happens with guest callback invocations. @@ -2461,9 +2451,6 @@ ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u3 syscall_history.data.resize(g_cfg.core.ppu_call_history ? syscall_history_max_size : 1); syscall_history.count_debug_arguments = static_cast(g_cfg.core.ppu_call_history ? std::size(syscall_history.data[0].args) : 0); -#ifdef __APPLE__ - pthread_jit_write_protect_np(true); -#endif #ifdef ARCH_ARM64 // Flush all cache lines after potentially writing executable code asm("ISB"); @@ -3993,9 +3980,8 @@ extern void ppu_precompile(std::vector& dir_queue, std::vector(software_thread_limit, cpu_thread_limit), [&] { -#ifdef __APPLE__ - pthread_jit_write_protect_np(false); -#endif + jit_write_guard jit_guard; + // Set low priority thread_ctrl::scoped_priority low_prio(-1); u32 inc_fdone = 1; @@ -4221,9 +4207,6 @@ extern void ppu_precompile(std::vector& dir_queue, std::vector& info, bool check_only, u64 file_s progress_dialog.emplace(get_localized_string(localized_string_id::PROGRESS_DIALOG_LOADING_PPU_MODULES)); } + jit_write_guard jit_guard; + // Permanently loaded compiled PPU modules (name -> data) jit_module& jit_mod = g_fxo->get().get(cache_path + "_" + std::to_string(std::bit_cast(info.segs[0].ptr))); @@ -4785,9 +4770,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_s // Try to make the code fit in 16 bytes, may fail and fallback if (*full_sample && abs_diff(*full_sample, reinterpret_cast(jit_runtime::peek(true) + 3 * 4)) < (128u << 20)) { -#ifdef __APPLE__ - pthread_jit_write_protect_np(false); -#endif u8* code = jit_runtime::alloc(12, 4, true); code_ptr = reinterpret_cast(code); @@ -4902,6 +4884,11 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_s #endif }, runtime.get(), true); +#ifdef __APPLE__ + // Restore write-protection state (modified by build_function_asm) + pthread_jit_write_protect_np(false); +#endif + // Full sample may exist already, but is very far away // So in this case, a new sample is written ensure(code_size_until_jump != umax); @@ -5288,9 +5275,8 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_s // Set low priority thread_ctrl::scoped_priority low_prio(-1); - #ifdef __APPLE__ - pthread_jit_write_protect_np(false); - #endif + jit_write_guard jit_guard; + for (u32 i = work_cv++; i < workload.size(); i = work_cv++, g_progr_pdone++) { if (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped()) @@ -5447,10 +5433,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_s } } -#ifdef __APPLE__ - // Symbol resolver is in JIT mem, so we must enable execution - pthread_jit_write_protect_np(true); -#endif { usz index = umax; @@ -5463,11 +5445,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_s } } -#ifdef __APPLE__ - // Symbol resolver is in JIT mem, so we must enable execution - pthread_jit_write_protect_np(false); -#endif - // Find a BLR-only function in order to copy it to all BLRs (some games need it) for (const auto& func : info.get_funcs()) { diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index dffca21cae..bac41283c8 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -119,9 +119,7 @@ static void ghc_cpp_trampoline(u64 fn_target, native_asm& c, auto& args) DECLARE(spu_runtime::tr_dispatch) = [] { -#ifdef __APPLE__ - pthread_jit_write_protect_np(false); -#endif + jit_write_guard jit_guard; #if defined(ARCH_X64) // Generate a special trampoline to spu_recompiler_base::dispatch with pause instruction u8* const trptr = jit_runtime::alloc(32, 16); @@ -149,6 +147,7 @@ DECLARE(spu_runtime::tr_dispatch) = [] DECLARE(spu_runtime::tr_branch) = [] { + jit_write_guard jit_guard; #if defined(ARCH_X64) // Generate a trampoline to spu_recompiler_base::branch u8* const trptr = jit_runtime::alloc(32, 16); @@ -174,6 +173,7 @@ DECLARE(spu_runtime::tr_branch) = [] DECLARE(spu_runtime::tr_interpreter) = [] { + jit_write_guard jit_guard; #if defined(ARCH_X64) u8* const trptr = jit_runtime::alloc(32, 16); u8* raw = move_args_ghc_to_native(trptr); @@ -195,6 +195,8 @@ DECLARE(spu_runtime::tr_interpreter) = [] DECLARE(spu_runtime::g_dispatcher) = [] { + jit_write_guard jit_guard; + // Allocate 2^20 positions in data area const auto ptr = reinterpret_cast>(jit_runtime::alloc(sizeof(*g_dispatcher), 64, false)); @@ -208,6 +210,8 @@ DECLARE(spu_runtime::g_dispatcher) = [] DECLARE(spu_runtime::tr_all) = [] { + jit_write_guard jit_guard; + #if defined(ARCH_X64) u8* const trptr = jit_runtime::alloc(32, 16); u8* raw = trptr; @@ -732,6 +736,8 @@ void spu_cache::add(const spu_program& func) void spu_cache::initialize(bool build_existing_cache) { + jit_write_guard jit_guard; + spu_runtime::g_interpreter = spu_runtime::g_gateway; if (g_cfg.core.spu_decoder == spu_decoder_type::_static || g_cfg.core.spu_decoder == spu_decoder_type::dynamic) @@ -862,15 +868,7 @@ void spu_cache::initialize(bool build_existing_cache) // every exit path (return, exception, etc.). Leaving a worker // in write mode at teardown can leave per-thread state // inconsistent on AArch64. - pthread_jit_write_protect_np(false); - - struct jit_write_guard - { - ~jit_write_guard() - { - pthread_jit_write_protect_np(true); - } - } _jit_guard; + jit_write_guard jit_guard; #endif // Set low priority thread_ctrl::scoped_priority low_prio(-1); @@ -2096,6 +2094,9 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip) return; } +#if defined(__APPLE__) + pthread_jit_write_protect_np(false); +#endif const auto func = spu.jit->compile(spu.jit->analyse(spu._ptr(0), spu.pc)); if (!func) diff --git a/rpcs3/Emu/Cell/lv2/lv2.cpp b/rpcs3/Emu/Cell/lv2/lv2.cpp index dbe11039e7..cc0c7fff69 100644 --- a/rpcs3/Emu/Cell/lv2/lv2.cpp +++ b/rpcs3/Emu/Cell/lv2/lv2.cpp @@ -1252,16 +1252,8 @@ extern void ppu_execute_syscall(ppu_thread& ppu, u64 code) if (const auto func = g_ppu_syscall_table[code].first) { -#ifdef __APPLE__ - pthread_jit_write_protect_np(false); -#endif func(ppu, {}, vm::_ptr(ppu.cia), nullptr); ppu_log.trace("Syscall '%s' (%llu) finished, r3=0x%llx", ppu_syscall_code(code), code, ppu.gpr[3]); - -#ifdef __APPLE__ - pthread_jit_write_protect_np(true); - // No need to flush cache lines after a syscall, since we didn't generate any code. -#endif return; } } diff --git a/rpcs3/Emu/System.cpp b/rpcs3/Emu/System.cpp index 722286cf14..847b1c07d5 100644 --- a/rpcs3/Emu/System.cpp +++ b/rpcs3/Emu/System.cpp @@ -1847,7 +1847,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch, struct jit_write_guard { - ~jit_write_guard() + ~jit_write_guard() noexcept { pthread_jit_write_protect_np(true); }