From 78afea131278577521588332c89e9277547035ad Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 27 Dec 2024 14:47:41 +0100 Subject: [PATCH 1/3] PowerPC: Track registers used in memory breakpoint conditions --- Source/Core/Core/PowerPC/BreakPoints.cpp | 31 ++++++++++++++++++++++-- Source/Core/Core/PowerPC/BreakPoints.h | 9 +++++++ Source/Core/Core/PowerPC/Expression.cpp | 24 ++++++++++++++++++ Source/Core/Core/PowerPC/Expression.h | 20 +++++++++++++++ 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/BreakPoints.cpp b/Source/Core/Core/PowerPC/BreakPoints.cpp index a100a69b6ec..c806c906ff3 100644 --- a/Source/Core/Core/PowerPC/BreakPoints.cpp +++ b/Source/Core/Core/PowerPC/BreakPoints.cpp @@ -10,6 +10,7 @@ #include #include +#include "Common/BitSet.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" #include "Core/Core.h" @@ -341,8 +342,13 @@ void MemChecks::Update() { const Core::CPUThreadGuard guard(m_system); - // Clear the JIT cache so it can switch the watchpoint-compatible mode. - if (m_mem_breakpoints_set != HasAny()) + const bool registers_changed = UpdateRegistersUsedInConditions(); + + // If we've added a first memcheck, clear the JIT cache so it can switch to watchpoint-compatible + // code. Or, if we've added a memcheck whose condition wants to read from a new register, clear + // the JIT cache to make the slow memory access code flush that register. And conversely, if the + // aforementioned functionality is no longer needed, clear the JIT cache to switch to faster code. + if (registers_changed || m_mem_breakpoints_set != HasAny()) { m_system.GetJitInterface().ClearCache(guard); m_mem_breakpoints_set = HasAny(); @@ -351,6 +357,27 @@ void MemChecks::Update() m_system.GetMMU().DBATUpdated(); } +bool MemChecks::UpdateRegistersUsedInConditions() +{ + BitSet32 gprs_used, fprs_used; + for (TMemCheck& mem_check : m_mem_checks) + { + if (mem_check.condition) + { + gprs_used |= mem_check.condition->GetGPRsUsed(); + fprs_used |= mem_check.condition->GetFPRsUsed(); + } + } + + const bool registers_changed = + gprs_used != m_gprs_used_in_conditions || fprs_used != m_fprs_used_in_conditions; + + m_gprs_used_in_conditions = gprs_used; + m_fprs_used_in_conditions = fprs_used; + + return registers_changed; +} + TMemCheck* MemChecks::GetMemCheck(u32 address, size_t size) { const auto iter = std::ranges::find_if(m_mem_checks, [address, size](const auto& mc) { diff --git a/Source/Core/Core/PowerPC/BreakPoints.h b/Source/Core/Core/PowerPC/BreakPoints.h index 6408a542523..03fc0e039a5 100644 --- a/Source/Core/Core/PowerPC/BreakPoints.h +++ b/Source/Core/Core/PowerPC/BreakPoints.h @@ -8,6 +8,7 @@ #include #include +#include "Common/BitSet.h" #include "Common/CommonTypes.h" #include "Core/PowerPC/Expression.h" @@ -129,9 +130,17 @@ public: void Clear(); bool HasAny() const { return !m_mem_checks.empty(); } + BitSet32 GetGPRsUsedInConditions() { return m_gprs_used_in_conditions; } + BitSet32 GetFPRsUsedInConditions() { return m_fprs_used_in_conditions; } + private: + // Returns whether any change was made + bool UpdateRegistersUsedInConditions(); + TMemChecks m_mem_checks; Core::System& m_system; + BitSet32 m_gprs_used_in_conditions; + BitSet32 m_fprs_used_in_conditions; bool m_mem_breakpoints_set = false; }; diff --git a/Source/Core/Core/PowerPC/Expression.cpp b/Source/Core/Core/PowerPC/Expression.cpp index bd97e8a8b1e..6dda61040aa 100644 --- a/Source/Core/Core/PowerPC/Expression.cpp +++ b/Source/Core/Core/PowerPC/Expression.cpp @@ -25,6 +25,7 @@ using std::isinf; using std::isnan; #include +#include "Common/BitSet.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" #include "Core/Core.h" @@ -503,3 +504,26 @@ std::string Expression::GetText() const { return m_text; } + +void Expression::ComputeRegistersUsed() +{ + if (m_has_computed_registers_used) + return; + + for (const VarBinding& bind : m_binds) + { + switch (bind.type) + { + case VarBindingType::GPR: + m_gprs_used[bind.index] = true; + break; + case VarBindingType::FPR: + m_fprs_used[bind.index] = true; + break; + default: + break; + } + } + + m_has_computed_registers_used = true; +} diff --git a/Source/Core/Core/PowerPC/Expression.h b/Source/Core/Core/PowerPC/Expression.h index 3c439f9e9e5..deee9e69ad8 100644 --- a/Source/Core/Core/PowerPC/Expression.h +++ b/Source/Core/Core/PowerPC/Expression.h @@ -9,6 +9,8 @@ #include #include +#include "Common/BitSet.h" + struct expr; struct expr_var_list; @@ -41,6 +43,18 @@ public: std::string GetText() const; + BitSet32 GetGPRsUsed() + { + ComputeRegistersUsed(); + return m_gprs_used; + } + + BitSet32 GetFPRsUsed() + { + ComputeRegistersUsed(); + return m_fprs_used; + } + private: enum class SynchronizeDirection { @@ -69,10 +83,16 @@ private: void SynchronizeBindings(Core::System& system, SynchronizeDirection dir) const; void Reporting(const double result) const; + void ComputeRegistersUsed(); + std::string m_text; ExprPointer m_expr; ExprVarListPointer m_vars; std::vector m_binds; + + BitSet32 m_gprs_used; + BitSet32 m_fprs_used; + bool m_has_computed_registers_used = false; }; inline bool EvaluateCondition(Core::System& system, const std::optional& condition) From a06bc5417dac625c975338f70f69a1ff86e62fdb Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 27 Dec 2024 14:05:54 +0100 Subject: [PATCH 2/3] JitArm64: Document an assumption we've been making in EmitBackpatchRoutine The next commit will add another piece of code that depends on this assumption that we've been making. Good opportunity to document it. In practice, all callers of EmitBackpatchRoutine are locking X30. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index fd5cc4e32eb..5554bc69559 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -273,8 +273,11 @@ protected: // !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30 // !emitting_routine && mode == Auto && jo.fastmem: X30 // - // Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push - // may be clobbered if mode != AlwaysFastAccess. + // Furthermore: + // - Any callee-saved register which isn't marked in gprs_to_push/fprs_to_push may be + // clobbered if mode != AlwaysFastAccess. + // - If !emitting_routine && mode != AlwaysFastAccess && jo.memcheck, X30 must not + // contain a guest register. void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false); From 02e32ffaaf191151ec4449cee9a254bb400ec3c0 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 27 Dec 2024 16:01:08 +0100 Subject: [PATCH 3/3] Jit: Flush registers used in memory breakpoint conditions Aims to fix https://bugs.dolphin-emu.org/issues/13686. I'm using a less efficient approach for Jit64 than for JitArm64. In JitArm64, I'm flushing in the slow access code, but in Jit64, I'm flushing before the split between the slow access code and the fast access code, because Jit64 doesn't keep register mappings around for when it's time to emit the slow access code. But the flushing code is only emitted when there are memory breakpoints with conditions, so I'd say that this is a performance loss we can live with. --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 19 ++++++++++++ Source/Core/Core/PowerPC/Jit64/Jit.h | 2 ++ .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 14 ++++++++- .../PowerPC/Jit64/Jit_LoadStoreFloating.cpp | 6 ++++ .../PowerPC/Jit64/Jit_LoadStorePaired.cpp | 16 ++++++---- .../Core/PowerPC/Jit64Common/EmuCodeBlock.cpp | 31 +++++++++---------- .../Core/PowerPC/Jit64Common/EmuCodeBlock.h | 2 ++ Source/Core/Core/PowerPC/JitArm64/Jit.h | 3 ++ .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 31 +++++++++++++------ .../JitArm64/JitArm64_LoadStorePaired.cpp | 8 ++--- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 15 +++++---- 11 files changed, 102 insertions(+), 45 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index b09279d4585..ef4660b2fdc 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -1294,6 +1294,25 @@ void Jit64::IntializeSpeculativeConstants() } } +void Jit64::FlushRegistersBeforeSlowAccess() +{ + // Register values can be used by memory watchpoint conditions. + MemChecks& mem_checks = m_system.GetPowerPC().GetMemChecks(); + if (mem_checks.HasAny()) + { + BitSet32 gprs = mem_checks.GetGPRsUsedInConditions(); + BitSet32 fprs = mem_checks.GetFPRsUsedInConditions(); + if (gprs || fprs) + { + RCForkGuard gpr_guard = gpr.Fork(); + RCForkGuard fpr_guard = fpr.Fork(); + + gpr.Flush(gprs); + fpr.Flush(fprs); + } + } +} + bool Jit64::HandleFunctionHooking(u32 address) { const auto result = HLE::TryReplaceFunction(m_ppc_symbol_db, address, PowerPC::CoreMode::JIT); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 0e98981a469..e72397ccc52 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -81,6 +81,8 @@ public: void IntializeSpeculativeConstants(); + void FlushRegistersBeforeSlowAccess(); + JitBlockCache* GetBlockCache() override { return &blocks; } void Trace(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 05bd6906948..a200689f35e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -110,6 +110,8 @@ void Jit64::lXXx(UGeckoInstruction inst) PanicAlertFmt("Invalid instruction"); } + FlushRegistersBeforeSlowAccess(); + // PowerPC has no 8-bit sign extended load, but x86 does, so merge extsb with the load if we find // it. if (CanMergeNextInstructions(1) && accessSize == 8 && js.op[1].inst.OPCD == 31 && @@ -439,6 +441,8 @@ void Jit64::dcbz(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; + FlushRegistersBeforeSlowAccess(); + { RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0); RCOpArg Rb = gpr.Use(b, RCMode::Read); @@ -477,7 +481,7 @@ void Jit64::dcbz(UGeckoInstruction inst) SwitchToFarCode(); SetJumpTarget(slow); } - MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + FlushPCBeforeSlowAccess(); BitSet32 registersInUse = CallerSavedRegistersInUse(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); ABI_CallFunctionPR(PowerPC::ClearDCacheLineFromJit, &m_mmu, RSCRATCH); @@ -524,6 +528,8 @@ void Jit64::stX(UGeckoInstruction inst) return; } + FlushRegistersBeforeSlowAccess(); + // If we already know the address of the write if (!a || gpr.IsImm(a)) { @@ -602,6 +608,8 @@ void Jit64::stXx(UGeckoInstruction inst) break; } + FlushRegistersBeforeSlowAccess(); + const bool does_clobber = WriteClobbersRegValue(accessSize, /* swap */ !byte_reverse); RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read); @@ -634,6 +642,8 @@ void Jit64::lmw(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; + FlushRegistersBeforeSlowAccess(); + // TODO: This doesn't handle rollback on DSI correctly { RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0); @@ -657,6 +667,8 @@ void Jit64::stmw(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; + FlushRegistersBeforeSlowAccess(); + // TODO: This doesn't handle rollback on DSI correctly for (int i = d; i < 32; i++) { diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp index 7ac34f5d6ee..7b347a37428 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp @@ -30,6 +30,8 @@ void Jit64::lfXXX(UGeckoInstruction inst) FALLBACK_IF(!indexed && !a); + FlushRegistersBeforeSlowAccess(); + s32 offset = 0; RCOpArg addr = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read); RegCache::Realize(addr); @@ -103,6 +105,8 @@ void Jit64::stfXXX(UGeckoInstruction inst) FALLBACK_IF(update && jo.memcheck && indexed && a == b); + FlushRegistersBeforeSlowAccess(); + if (single) { if (js.fpr_is_store_safe[s] && js.op->fprIsSingle[s]) @@ -196,6 +200,8 @@ void Jit64::stfiwx(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; + FlushRegistersBeforeSlowAccess(); + RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0); RCOpArg Rb = gpr.Use(b, RCMode::Read); RCOpArg Rs = fpr.Use(s, RCMode::Read); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 91865b61a08..78b00b99f0b 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -35,6 +35,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst) int w = indexed ? inst.Wx : inst.W; FALLBACK_IF(!a); + FlushRegistersBeforeSlowAccess(); + RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA); RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read); RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset); @@ -69,8 +71,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst) } else { - // Stash PC in case asm routine needs to call into C++ - MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + FlushPCBeforeSlowAccess(); + // We know what GQR is here, so we can load RSCRATCH2 and call into the store method directly // with just the scale bits. MOV(32, R(RSCRATCH2), Imm32(gqrValue & 0x3F00)); @@ -83,8 +85,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst) } else { - // Stash PC in case asm routine needs to call into C++ - MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + FlushPCBeforeSlowAccess(); + // Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code. // Hence, we need to mask out the unused bits. The layout of the GQR register is // UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with @@ -124,6 +126,8 @@ void Jit64::psq_lXX(UGeckoInstruction inst) int w = indexed ? inst.Wx : inst.W; FALLBACK_IF(!a); + FlushRegistersBeforeSlowAccess(); + RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA); RCX64Reg Ra = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read); RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset); @@ -144,8 +148,8 @@ void Jit64::psq_lXX(UGeckoInstruction inst) } else { - // Stash PC in case asm routine needs to call into C++ - MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + FlushPCBeforeSlowAccess(); + // Get the high part of the GQR register OpArg gqr = PPCSTATE_SPR(SPR_GQR0 + i); gqr.AddMemOffset(2); diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp index 02c8ba18934..b7f90db5e97 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp @@ -92,6 +92,13 @@ void EmuCodeBlock::SwitchToNearCode() SetCodePtr(m_near_code, m_near_code_end, m_near_code_write_failed); } +void EmuCodeBlock::FlushPCBeforeSlowAccess() +{ + // PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe + // interrupt checks, and printing accurate PC locations in debug logs. + MOV(32, PPCSTATE(pc), Imm32(m_jit.js.compilerPC)); +} + FixupBranch EmuCodeBlock::BATAddressLookup(X64Reg addr, X64Reg tmp, const void* bat_table) { MOV(64, R(tmp), ImmPtr(bat_table)); @@ -386,14 +393,11 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress, SetJumpTarget(slow); } - // PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe - // interrupt checks, and printing accurate PC locations in debug logs. - // - // In the case of Jit64AsmCommon routines, we don't know the PC here, - // so the caller has to store the PC themselves. + // In the case of Jit64AsmCommon routines, the state we want to store here isn't known + // when compiling the routine, so the caller has to store it themselves. if (!(flags & SAFE_LOADSTORE_NO_UPDATE_PC)) { - MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + FlushPCBeforeSlowAccess(); } size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0; @@ -457,8 +461,7 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc return; } - // Helps external systems know which instruction triggered the read. - MOV(32, PPCSTATE(pc), Imm32(m_jit.js.compilerPC)); + FlushPCBeforeSlowAccess(); // Fall back to general-case code. ABI_PushRegistersAndAdjustStack(registersInUse, 0); @@ -560,14 +563,11 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces SetJumpTarget(slow); } - // PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe - // interrupt checks, and printing accurate PC locations in debug logs. - // - // In the case of Jit64AsmCommon routines, we don't know the PC here, - // so the caller has to store the PC themselves. + // In the case of Jit64AsmCommon routines, the state we want to store here isn't known + // when compiling the routine, so the caller has to store it themselves. if (!(flags & SAFE_LOADSTORE_NO_UPDATE_PC)) { - MOV(32, PPCSTATE(pc), Imm32(js.compilerPC)); + FlushPCBeforeSlowAccess(); } size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0; @@ -663,8 +663,7 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address, } else { - // Helps external systems know which instruction triggered the write - MOV(32, PPCSTATE(pc), Imm32(m_jit.js.compilerPC)); + FlushPCBeforeSlowAccess(); ABI_PushRegistersAndAdjustStack(registersInUse, 0); switch (accessSize) diff --git a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h index ba9433134f2..54cade23c5a 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h +++ b/Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.h @@ -49,6 +49,8 @@ public: return Gen::M(m_const_pool.GetConstant(&value, sizeof(T), N, index)); } + void FlushPCBeforeSlowAccess(); + // Writes upper 15 bits of physical address to addr and clobbers the lower 17 bits of addr. // Jumps to the returned FixupBranch if lookup fails. Gen::FixupBranch BATAddressLookup(Gen::X64Reg addr, Gen::X64Reg tmp, const void* bat_table); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 5554bc69559..77ba6d0bb35 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -282,6 +282,9 @@ protected: Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false); + // temp_gpr must be a valid register, but temp_fpr can be INVALID_REG. + void FlushPPCStateBeforeSlowAccess(Arm64Gen::ARM64Reg temp_gpr, Arm64Gen::ARM64Reg temp_fpr); + // Loadstore routines void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update); void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset, bool update); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 1c54c00ebea..654d6bed6f3 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -21,6 +21,7 @@ #include "Core/PowerPC/JitArmCommon/BackPatch.h" #include "Core/PowerPC/MMU.h" #include "Core/PowerPC/PowerPC.h" +#include "Core/System.h" using namespace Arm64Gen; @@ -171,6 +172,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, const ARM64Reg temp_gpr = ARM64Reg::W1; const int temp_gpr_index = DecodeReg(temp_gpr); + const ARM64Reg temp_fpr = fprs_to_push[0] ? ARM64Reg::INVALID_REG : ARM64Reg::Q0; BitSet32 gprs_to_push_early = {}; if (memcheck) @@ -189,16 +191,10 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ABI_PushRegisters(gprs_to_push & ~gprs_to_push_early); m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30); - // PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe - // interrupt checks, and printing accurate PC locations in debug logs. - // - // In the case of JitAsm routines, we don't know the PC here, - // so the caller has to store the PC themselves. + // In the case of JitAsm routines, the state we want to store here isn't known + // when compiling the routine, so the caller has to store it themselves. if (!emitting_routine) - { - MOVI2R(ARM64Reg::W30, js.compilerPC); - STR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(pc)); - } + FlushPPCStateBeforeSlowAccess(ARM64Reg::W30, temp_fpr); if (flags & BackPatchInfo::FLAG_STORE) { @@ -265,7 +261,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (memcheck) { - const ARM64Reg temp_fpr = fprs_to_push[0] ? ARM64Reg::INVALID_REG : ARM64Reg::Q0; const u64 early_push_count = (gprs_to_push & gprs_to_push_early).Count(); const u64 early_push_size = Common::AlignUp(early_push_count, 2) * 8; @@ -316,6 +311,22 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, } } +void JitArm64::FlushPPCStateBeforeSlowAccess(ARM64Reg temp_gpr, ARM64Reg temp_fpr) +{ + // PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe + // interrupt checks, and printing accurate PC locations in debug logs. + MOVI2R(temp_gpr, js.compilerPC); + STR(IndexType::Unsigned, temp_gpr, PPC_REG, PPCSTATE_OFF(pc)); + + // Register values can be used by memory watchpoint conditions. + MemChecks& mem_checks = m_system.GetPowerPC().GetMemChecks(); + if (mem_checks.HasAny()) + { + gpr.StoreRegisters(mem_checks.GetGPRsUsedInConditions(), temp_gpr, FlushMode::MaintainState); + fpr.StoreRegisters(mem_checks.GetFPRsUsedInConditions(), temp_fpr, FlushMode::MaintainState); + } +} + bool JitArm64::HandleFastmemFault(SContext* ctx) { const u8* pc = reinterpret_cast(ctx->CTX_PC); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 2d5c5d93554..31c1317097e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -102,9 +102,7 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) { LDR(IndexType::Unsigned, scale_reg, PPC_REG, PPCSTATE_OFF_SPR(SPR_GQR0 + i)); - // Stash PC in case asm routine needs to call into C++ - MOVI2R(ARM64Reg::W30, js.compilerPC); - STR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(pc)); + FlushPPCStateBeforeSlowAccess(ARM64Reg::W30, ARM64Reg::Q1); UBFM(type_reg, scale_reg, 16, 18); // Type UBFM(scale_reg, scale_reg, 24, 29); // Scale @@ -260,9 +258,7 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) { LDR(IndexType::Unsigned, scale_reg, PPC_REG, PPCSTATE_OFF_SPR(SPR_GQR0 + i)); - // Stash PC in case asm routine needs to call into C++ - MOVI2R(ARM64Reg::W30, js.compilerPC); - STR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(pc)); + FlushPPCStateBeforeSlowAccess(ARM64Reg::W30, ARM64Reg::Q1); UBFM(type_reg, scale_reg, 0, 2); // Type UBFM(scale_reg, scale_reg, 8, 13); // Scale diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 5164745cd80..990b2dcee79 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -388,14 +388,16 @@ public: BitSet32 GetDirtyGPRs() const; - void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG) + void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG, + FlushMode flush_mode = FlushMode::All) { - FlushRegisters(regs, FlushMode::All, tmp_reg, IgnoreDiscardedRegisters::No); + FlushRegisters(regs, flush_mode, tmp_reg, IgnoreDiscardedRegisters::No); } - void StoreCRRegisters(BitSet8 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG) + void StoreCRRegisters(BitSet8 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG, + FlushMode flush_mode = FlushMode::All) { - FlushCRRegisters(regs, FlushMode::All, tmp_reg, IgnoreDiscardedRegisters::No); + FlushCRRegisters(regs, flush_mode, tmp_reg, IgnoreDiscardedRegisters::No); } void DiscardCRRegisters(BitSet8 regs); @@ -459,9 +461,10 @@ public: void FixSinglePrecision(size_t preg); - void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG) + void StoreRegisters(BitSet32 regs, Arm64Gen::ARM64Reg tmp_reg = Arm64Gen::ARM64Reg::INVALID_REG, + FlushMode flush_mode = FlushMode::All) { - FlushRegisters(regs, FlushMode::All, tmp_reg); + FlushRegisters(regs, flush_mode, tmp_reg); } protected: