PPU: Cleanup MacOs executable write protection guards

This commit is contained in:
Elad 2026-05-09 15:29:34 +03:00
parent 64f0d94792
commit 4fb2be05e5
5 changed files with 55 additions and 63 deletions

View File

@ -434,13 +434,35 @@ namespace asmjit
#endif
}
#ifdef __APPLE__
struct jit_write_guard
{
jit_write_guard() noexcept
{
pthread_jit_write_protect_np(false);
// Ensure stores are not reordered by the compiler
atomic_fence_acq_rel();
}
~jit_write_guard() noexcept
{
// Ensure stores are not reordered by the compiler
atomic_fence_seq_cst();
pthread_jit_write_protect_np(true);
}
};
#else
#define jit_write_guard [[maybe_unused]] int
#endif
// Build runtime function with asmjit::X86Assembler
template <typename FT, typename Asm = native_asm, typename F>
inline FT build_function_asm(std::string_view name, F&& builder, ::jit_runtime* custom_runtime = nullptr, bool reduced_size = false)
{
#ifdef __APPLE__
pthread_jit_write_protect_np(false);
#endif
jit_write_guard jit_guard;
using namespace asmjit;
auto& rt = custom_runtime ? *custom_runtime : get_global_runtime();

View File

@ -2174,18 +2174,12 @@ void ppu_thread::cpu_task()
}
case ppu_cmd::lle_call:
{
#ifdef __APPLE__
pthread_jit_write_protect_np(true);
#endif
const vm::ptr<u32> opd(arg < 32 ? vm::cast(gpr[arg]) : vm::cast(arg));
cmd_pop(), fast_call(opd[0], opd[1]);
break;
}
case ppu_cmd::entry_call:
{
#ifdef __APPLE__
pthread_jit_write_protect_np(true);
#endif
cmd_pop(), fast_call(entry_func.addr, entry_func.rtoc, true);
break;
}
@ -2196,9 +2190,6 @@ void ppu_thread::cpu_task()
}
case ppu_cmd::opd_call:
{
#ifdef __APPLE__
pthread_jit_write_protect_np(true);
#endif
const ppu_func_opd_t opd = cmd_get(1).as<ppu_func_opd_t>();
cmd_pop(1), fast_call(opd.addr, opd.rtoc);
break;
@ -2217,9 +2208,6 @@ void ppu_thread::cpu_task()
}
case ppu_cmd::initialize:
{
#ifdef __APPLE__
pthread_jit_write_protect_np(false);
#endif
cmd_pop();
ppu_initialize();
@ -2231,9 +2219,6 @@ void ppu_thread::cpu_task()
spu_cache::initialize();
#ifdef __APPLE__
pthread_jit_write_protect_np(true);
#endif
#ifdef ARCH_ARM64
// Flush all cache lines after potentially writing executable code
asm("ISB");
@ -2380,6 +2365,11 @@ void ppu_thread::cpu_wait(bs_t<cpu_flag> old)
void ppu_thread::exec_task()
{
#ifdef __APPLE__
// Ensure correct state before executing JIT code
pthread_jit_write_protect_np(true);
#endif
if (g_cfg.core.ppu_decoder != ppu_decoder_type::_static)
{
// HVContext push to allow recursion. This happens with guest callback invocations.
@ -2461,9 +2451,6 @@ ppu_thread::ppu_thread(const ppu_thread_params& param, std::string_view name, u3
syscall_history.data.resize(g_cfg.core.ppu_call_history ? syscall_history_max_size : 1);
syscall_history.count_debug_arguments = static_cast<u32>(g_cfg.core.ppu_call_history ? std::size(syscall_history.data[0].args) : 0);
#ifdef __APPLE__
pthread_jit_write_protect_np(true);
#endif
#ifdef ARCH_ARM64
// Flush all cache lines after potentially writing executable code
asm("ISB");
@ -3993,9 +3980,8 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
named_thread_group workers("SPRX Worker ", std::min<u32>(software_thread_limit, cpu_thread_limit), [&]
{
#ifdef __APPLE__
pthread_jit_write_protect_np(false);
#endif
jit_write_guard jit_guard;
// Set low priority
thread_ctrl::scoped_priority low_prio(-1);
u32 inc_fdone = 1;
@ -4221,9 +4207,6 @@ extern void ppu_precompile(std::vector<std::string>& dir_queue, std::vector<ppu_
return;
}
#ifdef __APPLE__
pthread_jit_write_protect_np(false);
#endif
// Set low priority
thread_ctrl::scoped_priority low_prio(-1);
@ -4602,6 +4585,8 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
progress_dialog.emplace(get_localized_string(localized_string_id::PROGRESS_DIALOG_LOADING_PPU_MODULES));
}
jit_write_guard jit_guard;
// Permanently loaded compiled PPU modules (name -> data)
jit_module& jit_mod = g_fxo->get<jit_module_manager>().get(cache_path + "_" + std::to_string(std::bit_cast<usz>(info.segs[0].ptr)));
@ -4785,9 +4770,6 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
// Try to make the code fit in 16 bytes, may fail and fallback
if (*full_sample && abs_diff(*full_sample, reinterpret_cast<u64>(jit_runtime::peek(true) + 3 * 4)) < (128u << 20))
{
#ifdef __APPLE__
pthread_jit_write_protect_np(false);
#endif
u8* code = jit_runtime::alloc(12, 4, true);
code_ptr = reinterpret_cast<u64>(code);
@ -4902,6 +4884,11 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
#endif
}, runtime.get(), true);
#ifdef __APPLE__
// Restore write-protection state (modified by build_function_asm)
pthread_jit_write_protect_np(false);
#endif
// Full sample may exist already, but is very far away
// So in this case, a new sample is written
ensure(code_size_until_jump != umax);
@ -5288,9 +5275,8 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
// Set low priority
thread_ctrl::scoped_priority low_prio(-1);
#ifdef __APPLE__
pthread_jit_write_protect_np(false);
#endif
jit_write_guard jit_guard;
for (u32 i = work_cv++; i < workload.size(); i = work_cv++, g_progr_pdone++)
{
if (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped())
@ -5447,10 +5433,6 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
}
}
#ifdef __APPLE__
// Symbol resolver is in JIT mem, so we must enable execution
pthread_jit_write_protect_np(true);
#endif
{
usz index = umax;
@ -5463,11 +5445,6 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
}
}
#ifdef __APPLE__
// Symbol resolver is in JIT mem, so we must enable execution
pthread_jit_write_protect_np(false);
#endif
// Find a BLR-only function in order to copy it to all BLRs (some games need it)
for (const auto& func : info.get_funcs())
{

View File

@ -119,9 +119,7 @@ static void ghc_cpp_trampoline(u64 fn_target, native_asm& c, auto& args)
DECLARE(spu_runtime::tr_dispatch) = []
{
#ifdef __APPLE__
pthread_jit_write_protect_np(false);
#endif
jit_write_guard jit_guard;
#if defined(ARCH_X64)
// Generate a special trampoline to spu_recompiler_base::dispatch with pause instruction
u8* const trptr = jit_runtime::alloc(32, 16);
@ -149,6 +147,7 @@ DECLARE(spu_runtime::tr_dispatch) = []
DECLARE(spu_runtime::tr_branch) = []
{
jit_write_guard jit_guard;
#if defined(ARCH_X64)
// Generate a trampoline to spu_recompiler_base::branch
u8* const trptr = jit_runtime::alloc(32, 16);
@ -174,6 +173,7 @@ DECLARE(spu_runtime::tr_branch) = []
DECLARE(spu_runtime::tr_interpreter) = []
{
jit_write_guard jit_guard;
#if defined(ARCH_X64)
u8* const trptr = jit_runtime::alloc(32, 16);
u8* raw = move_args_ghc_to_native(trptr);
@ -195,6 +195,8 @@ DECLARE(spu_runtime::tr_interpreter) = []
DECLARE(spu_runtime::g_dispatcher) = []
{
jit_write_guard jit_guard;
// Allocate 2^20 positions in data area
const auto ptr = reinterpret_cast<std::remove_const_t<decltype(spu_runtime::g_dispatcher)>>(jit_runtime::alloc(sizeof(*g_dispatcher), 64, false));
@ -208,6 +210,8 @@ DECLARE(spu_runtime::g_dispatcher) = []
DECLARE(spu_runtime::tr_all) = []
{
jit_write_guard jit_guard;
#if defined(ARCH_X64)
u8* const trptr = jit_runtime::alloc(32, 16);
u8* raw = trptr;
@ -732,6 +736,8 @@ void spu_cache::add(const spu_program& func)
void spu_cache::initialize(bool build_existing_cache)
{
jit_write_guard jit_guard;
spu_runtime::g_interpreter = spu_runtime::g_gateway;
if (g_cfg.core.spu_decoder == spu_decoder_type::_static || g_cfg.core.spu_decoder == spu_decoder_type::dynamic)
@ -862,15 +868,7 @@ void spu_cache::initialize(bool build_existing_cache)
// every exit path (return, exception, etc.). Leaving a worker
// in write mode at teardown can leave per-thread state
// inconsistent on AArch64.
pthread_jit_write_protect_np(false);
struct jit_write_guard
{
~jit_write_guard()
{
pthread_jit_write_protect_np(true);
}
} _jit_guard;
jit_write_guard jit_guard;
#endif
// Set low priority
thread_ctrl::scoped_priority low_prio(-1);
@ -2096,6 +2094,9 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip)
return;
}
#if defined(__APPLE__)
pthread_jit_write_protect_np(false);
#endif
const auto func = spu.jit->compile(spu.jit->analyse(spu._ptr<u32>(0), spu.pc));
if (!func)

View File

@ -1252,16 +1252,8 @@ extern void ppu_execute_syscall(ppu_thread& ppu, u64 code)
if (const auto func = g_ppu_syscall_table[code].first)
{
#ifdef __APPLE__
pthread_jit_write_protect_np(false);
#endif
func(ppu, {}, vm::_ptr<u32>(ppu.cia), nullptr);
ppu_log.trace("Syscall '%s' (%llu) finished, r3=0x%llx", ppu_syscall_code(code), code, ppu.gpr[3]);
#ifdef __APPLE__
pthread_jit_write_protect_np(true);
// No need to flush cache lines after a syscall, since we didn't generate any code.
#endif
return;
}
}

View File

@ -1847,7 +1847,7 @@ game_boot_result Emulator::Load(const std::string& title_id, bool is_disc_patch,
struct jit_write_guard
{
~jit_write_guard()
~jit_write_guard() noexcept
{
pthread_jit_write_protect_np(true);
}