From c9457cf90624aeeda8c13b86e2abc473682f5b17 Mon Sep 17 00:00:00 2001 From: Martino Fontana Date: Sun, 8 Feb 2026 10:42:23 +0100 Subject: [PATCH] Jit: Emit Branch Watch code only if it's enabled JIT code related to Branch Watch was emitted if the debugging UI was active: the emitted code would dynamically check whether Branch Watch is active. However, this causes two problems: 1. It decreases performance by just having the debugging UI enabled 2. It clutters the host assembly in the JIT tab, making it harder to read (unaware readers will wonder what these instructions are for) With this PR, code related to Branch Watch is emitted only if Branch Watch itself is active, fixing the issues above. The JIT cache will now be wiped whenever the feature is toggled, causing a slight stutter. However, this isn't the kind of feature that is toggled over and over, so IMO it is an acceptable trade-off. --- Source/Core/Core/Debugger/BranchWatch.h | 29 ++- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 5 +- Source/Core/Core/PowerPC/Jit64/Jit.h | 6 +- Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp | 169 +++++------------- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 46 ++--- .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 19 +- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 9 +- Source/Core/Core/PowerPC/JitArm64/Jit.h | 4 +- .../Core/PowerPC/JitArm64/JitArm64_Branch.cpp | 146 +++++---------- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 23 +-- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 6 + 11 files changed, 130 insertions(+), 332 deletions(-) diff --git a/Source/Core/Core/Debugger/BranchWatch.h b/Source/Core/Core/Debugger/BranchWatch.h index a4e5ce1775..f455d91898 100644 --- a/Source/Core/Core/Debugger/BranchWatch.h +++ b/Source/Core/Core/Debugger/BranchWatch.h @@ -12,7 +12,10 @@ #include #include "Common/CommonTypes.h" +#include "Core/Core.h" #include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/JitInterface.h" +#include "Core/System.h" namespace Core { @@ -110,7 +113,7 @@ enum class BranchWatchPhase : bool Reduction, }; -class BranchWatch final // Class is final to enforce the safety of GetOffsetOfRecordingActive(). +class BranchWatch final { public: using Collection = BranchWatchCollection; @@ -119,7 +122,12 @@ public: using SelectionInspection = BranchWatchSelectionInspection; bool GetRecordingActive() const { return m_recording_active; } - void SetRecordingActive(const CPUThreadGuard& guard, bool active) { m_recording_active = active; } + void SetRecordingActive(const CPUThreadGuard& guard, bool active) + { + m_recording_active = active; + auto& system = guard.GetSystem(); + system.GetJitInterface().ClearCache(guard); + } void Clear(const CPUThreadGuard& guard); void Save(const CPUThreadGuard& guard, std::FILE* file) const; @@ -226,19 +234,6 @@ public: HitPhysicalFalse(this, origin, destination, inst.hex); } - // The JIT needs this value, but doesn't need to be a full-on friend. - static constexpr int GetOffsetOfRecordingActive() - { -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Winvalid-offsetof" -#endif - return offsetof(BranchWatch, m_recording_active); -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif - } - private: Collection& GetCollectionV(bool condition) { @@ -273,8 +268,4 @@ private: Collection m_collection_pf; // physical address space | false path Selection m_selection; }; - -#if _M_X86_64 -static_assert(BranchWatch::GetOffsetOfRecordingActive() < 0x80); // Makes JIT code smaller. -#endif } // namespace Core diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 78bda8451b..0b976eb1c8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -1067,13 +1067,12 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) if (op.skip) { - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { // The only thing that currently sets op.skip is the BLR following optimization. // If any non-branch instruction starts setting that too, this will need to be changed. ASSERT(op.inst.hex == 0x4e800020); - WriteBranchWatch(op.address, op.branchTo, op.inst, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); + WriteBranchWatch(op.address, op.branchTo, op.inst, CallerSavedRegistersInUse()); } } else diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index fde8691b48..2d8ef2be55 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -110,10 +110,8 @@ public: void WriteRfiExitDestInRSCRATCH(); void WriteIdleExit(u32 destination); template - void WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, Gen::X64Reg reg_a, - Gen::X64Reg reg_b, BitSet32 caller_save); - void WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, Gen::X64Reg reg_a, - Gen::X64Reg reg_b, BitSet32 caller_save); + void WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, BitSet32 caller_save); + void WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, BitSet32 caller_save); bool Cleanup(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 81e7ad8a09..4667238720 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -67,65 +67,42 @@ void Jit64::rfi(UGeckoInstruction inst) } template -void Jit64::WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, X64Reg reg_a, - X64Reg reg_b, BitSet32 caller_save) +void Jit64::WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, + BitSet32 caller_save) { - MOV(64, R(reg_a), ImmPtr(&m_branch_watch)); - MOVZX(32, 8, reg_b, MDisp(reg_a, Core::BranchWatch::GetOffsetOfRecordingActive())); - TEST(32, R(reg_b), R(reg_b)); - - FixupBranch branch_in = J_CC(CC_NZ, Jump::Near); - SwitchToFarCode(); - SetJumpTarget(branch_in); - - ABI_PushRegistersAndAdjustStack(caller_save, 0); - // Some call sites have an optimization to use ABI_PARAM1 as a scratch register. - if (reg_a != ABI_PARAM1) - MOV(64, R(ABI_PARAM1), R(reg_a)); - MOV(64, R(ABI_PARAM2), Imm64(Core::FakeBranchWatchCollectionKey{origin, destination})); - MOV(32, R(ABI_PARAM3), Imm32(inst.hex)); - ABI_CallFunction(m_ppc_state.msr.IR ? (condition ? &Core::BranchWatch::HitVirtualTrue_fk : - &Core::BranchWatch::HitVirtualFalse_fk) : - (condition ? &Core::BranchWatch::HitPhysicalTrue_fk : - &Core::BranchWatch::HitPhysicalFalse_fk)); - ABI_PopRegistersAndAdjustStack(caller_save, 0); - - FixupBranch branch_out = J(Jump::Near); - SwitchToNearCode(); - SetJumpTarget(branch_out); + if (IsBranchWatchEnabled()) + { + ABI_PushRegistersAndAdjustStack(caller_save, 0); + MOV(64, R(ABI_PARAM1), ImmPtr(&m_branch_watch)); + MOV(64, R(ABI_PARAM2), Imm64(Core::FakeBranchWatchCollectionKey{origin, destination})); + MOV(32, R(ABI_PARAM3), Imm32(inst.hex)); + ABI_CallFunction(m_ppc_state.msr.IR ? (condition ? &Core::BranchWatch::HitVirtualTrue_fk : + &Core::BranchWatch::HitVirtualFalse_fk) : + (condition ? &Core::BranchWatch::HitPhysicalTrue_fk : + &Core::BranchWatch::HitPhysicalFalse_fk)); + ABI_PopRegistersAndAdjustStack(caller_save, 0); + } } -template void Jit64::WriteBranchWatch(u32, u32, UGeckoInstruction, X64Reg, X64Reg, BitSet32); -template void Jit64::WriteBranchWatch(u32, u32, UGeckoInstruction, X64Reg, X64Reg, BitSet32); +template void Jit64::WriteBranchWatch(u32, u32, UGeckoInstruction, BitSet32); +template void Jit64::WriteBranchWatch(u32, u32, UGeckoInstruction, BitSet32); -void Jit64::WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, X64Reg reg_a, - X64Reg reg_b, BitSet32 caller_save) +void Jit64::WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, BitSet32 caller_save) { - MOV(64, R(reg_a), ImmPtr(&m_branch_watch)); - MOVZX(32, 8, reg_b, MDisp(reg_a, Core::BranchWatch::GetOffsetOfRecordingActive())); - TEST(32, R(reg_b), R(reg_b)); + if (IsBranchWatchEnabled()) + { + // Assert RSCRATCH won't be clobbered before it is moved from. + static_assert(ABI_PARAM1 != RSCRATCH); - FixupBranch branch_in = J_CC(CC_NZ, Jump::Near); - SwitchToFarCode(); - SetJumpTarget(branch_in); - - // Assert RSCRATCH won't be clobbered before it is moved from. - static_assert(ABI_PARAM1 != RSCRATCH); - - ABI_PushRegistersAndAdjustStack(caller_save, 0); - // Some call sites have an optimization to use ABI_PARAM1 as a scratch register. - if (reg_a != ABI_PARAM1) - MOV(64, R(ABI_PARAM1), R(reg_a)); - MOV(32, R(ABI_PARAM3), R(RSCRATCH)); - MOV(32, R(ABI_PARAM2), Imm32(origin)); - MOV(32, R(ABI_PARAM4), Imm32(inst.hex)); - ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue : - &Core::BranchWatch::HitPhysicalTrue); - ABI_PopRegistersAndAdjustStack(caller_save, 0); - - FixupBranch branch_out = J(Jump::Near); - SwitchToNearCode(); - SetJumpTarget(branch_out); + ABI_PushRegistersAndAdjustStack(caller_save, 0); + MOV(64, R(ABI_PARAM1), ImmPtr(&m_branch_watch)); + MOV(32, R(ABI_PARAM3), R(RSCRATCH)); + MOV(32, R(ABI_PARAM2), Imm32(origin)); + MOV(32, R(ABI_PARAM4), Imm32(inst.hex)); + ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue : + &Core::BranchWatch::HitPhysicalTrue); + ABI_PopRegistersAndAdjustStack(caller_save, 0); + } } void Jit64::bx(UGeckoInstruction inst) @@ -143,11 +120,7 @@ void Jit64::bx(UGeckoInstruction inst) // Because PPCAnalyst::Flatten() merged the blocks. if (!js.isLastInstruction) { - if (IsDebuggingEnabled()) - { - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); - } + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, CallerSavedRegistersInUse()); if (inst.LK && !js.op->skipLRStack) { // We have to fake the stack as the RET instruction was not @@ -161,11 +134,7 @@ void Jit64::bx(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, {}); #ifdef ACID_TEST if (inst.LK) AND(32, PPCSTATE(cr), Imm32(~(0xFF000000))); @@ -216,11 +185,7 @@ void Jit64::bcx(UGeckoInstruction inst) if (!js.isLastInstruction && (inst.BO & BO_DONT_DECREMENT_FLAG) && (inst.BO & BO_DONT_CHECK_CONDITION)) { - if (IsDebuggingEnabled()) - { - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); - } + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, CallerSavedRegistersInUse()); if (inst.LK && !js.op->skipLRStack) { // We have to fake the stack as the RET instruction was not @@ -237,11 +202,7 @@ void Jit64::bcx(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, {}); if (js.op->branchIsIdleLoop) { WriteIdleExit(js.op->branchTo); @@ -261,18 +222,10 @@ void Jit64::bcx(UGeckoInstruction inst) { gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, {}); WriteExit(js.compilerPC + 4); } - else if (IsDebuggingEnabled()) - { - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); - } + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, CallerSavedRegistersInUse()); } void Jit64::bcctrx(UGeckoInstruction inst) @@ -296,12 +249,7 @@ void Jit64::bcctrx(UGeckoInstruction inst) if (inst.LK_3) MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4; AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, ABI_PARAM1, RSCRATCH2, - BitSet32{RSCRATCH}); - } + WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, BitSet32{RSCRATCH}); WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4); } else @@ -324,12 +272,7 @@ void Jit64::bcctrx(UGeckoInstruction inst) RCForkGuard fpr_guard = fpr.Fork(); gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, ABI_PARAM1, RSCRATCH2, - BitSet32{RSCRATCH}); - } + WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, BitSet32{RSCRATCH}); WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4); // Would really like to continue the block here, but it ends. TODO. } @@ -339,18 +282,10 @@ void Jit64::bcctrx(UGeckoInstruction inst) { gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, {}); WriteExit(js.compilerPC + 4); } - else if (IsDebuggingEnabled()) - { - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); - } + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, CallerSavedRegistersInUse()); } } @@ -399,21 +334,12 @@ void Jit64::bclrx(UGeckoInstruction inst) if (js.op->branchIsIdleLoop) { - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, {}); WriteIdleExit(js.op->branchTo); } else { - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, ABI_PARAM1, RSCRATCH2, - BitSet32{RSCRATCH}); - } + WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, BitSet32{RSCRATCH}); WriteBLRExit(); } } @@ -427,16 +353,11 @@ void Jit64::bclrx(UGeckoInstruction inst) { gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, {}); WriteExit(js.compilerPC + 4); } - else if (IsDebuggingEnabled()) + else { - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, CallerSavedRegistersInUse()); } } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 90fd009787..f3c329cef4 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -387,11 +387,7 @@ void Jit64::DoMergedBranch() MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4)); const u32 destination = js.op[1].branchTo; - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(nextPC, destination, next, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(nextPC, destination, next, {}); WriteIdleExit(destination); } else if (next.OPCD == 16) // bcx @@ -400,11 +396,7 @@ void Jit64::DoMergedBranch() MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4)); const u32 destination = js.op[1].branchTo; - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(nextPC, destination, next, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(nextPC, destination, next, {}); WriteExit(destination, next.LK, nextPC + 4); } else if ((next.OPCD == 19) && (next.SUBOP10 == 528)) // bcctrx @@ -413,11 +405,7 @@ void Jit64::DoMergedBranch() MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4)); MOV(32, R(RSCRATCH), PPCSTATE_SPR(SPR_CTR)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatchDestInRSCRATCH(nextPC, next, ABI_PARAM1, RSCRATCH2, BitSet32{RSCRATCH}); - } + WriteBranchWatchDestInRSCRATCH(nextPC, next, BitSet32{RSCRATCH}); WriteExitDestInRSCRATCH(next.LK, nextPC + 4); } else if ((next.OPCD == 19) && (next.SUBOP10 == 16)) // bclrx @@ -427,11 +415,7 @@ void Jit64::DoMergedBranch() AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); if (next.LK) MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4)); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatchDestInRSCRATCH(nextPC, next, ABI_PARAM1, RSCRATCH2, BitSet32{RSCRATCH}); - } + WriteBranchWatchDestInRSCRATCH(nextPC, next, BitSet32{RSCRATCH}); WriteBLRExit(); } else @@ -488,17 +472,12 @@ void Jit64::DoMergedBranchCondition() { gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(nextPC, nextPC + 4, next, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(nextPC, nextPC + 4, next, {}); WriteExit(nextPC + 4); } - else if (IsDebuggingEnabled()) + else { - WriteBranchWatch(nextPC, nextPC + 4, next, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); + WriteBranchWatch(nextPC, nextPC + 4, next, CallerSavedRegistersInUse()); } } @@ -540,17 +519,12 @@ void Jit64::DoMergedBranchImmediate(s64 val) { gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(nextPC, nextPC + 4, next, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(nextPC, nextPC + 4, next, {}); WriteExit(nextPC + 4); } - else if (IsDebuggingEnabled()) + else { - WriteBranchWatch(nextPC, nextPC + 4, next, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); + WriteBranchWatch(nextPC, nextPC + 4, next, CallerSavedRegistersInUse()); } } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 4b11f96942..b4a3b4ef43 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -305,25 +305,16 @@ void Jit64::dcbx(UGeckoInstruction inst) // Load the loop_counter register with the amount of invalidations to execute. LEA(32, loop_counter, MDisp(RSCRATCH2, 1)); - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - const X64Reg bw_reg_a = reg_cycle_count, bw_reg_b = reg_downcount; const BitSet32 bw_caller_save = (CallerSavedRegistersInUse() | BitSet32{RSCRATCH2}) & - ~BitSet32{int(bw_reg_a), int(bw_reg_b)}; - - MOV(64, R(bw_reg_a), ImmPtr(&m_branch_watch)); - MOVZX(32, 8, bw_reg_b, MDisp(bw_reg_a, Core::BranchWatch::GetOffsetOfRecordingActive())); - TEST(32, R(bw_reg_b), R(bw_reg_b)); - - FixupBranch branch_in = J_CC(CC_NZ, Jump::Near); - SwitchToFarCode(); - SetJumpTarget(branch_in); + ~BitSet32{int(reg_cycle_count), int(reg_downcount)}; // Assert RSCRATCH2 won't be clobbered before it is moved from. static_assert(RSCRATCH2 != ABI_PARAM1); ABI_PushRegistersAndAdjustStack(bw_caller_save, 0); - MOV(64, R(ABI_PARAM1), R(bw_reg_a)); + MOV(64, R(ABI_PARAM1), ImmPtr(&m_branch_watch)); // RSCRATCH2 holds the amount of faked branch watch hits. Move RSCRATCH2 first, because // ABI_PARAM2 clobbers RSCRATCH2 on Windows and ABI_PARAM3 clobbers RSCRATCH2 on Linux! MOV(32, R(ABI_PARAM4), R(RSCRATCH2)); @@ -333,10 +324,6 @@ void Jit64::dcbx(UGeckoInstruction inst) ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue_fk_n : &Core::BranchWatch::HitPhysicalTrue_fk_n); ABI_PopRegistersAndAdjustStack(bw_caller_save, 0); - - FixupBranch branch_out = J(Jump::Near); - SwitchToNearCode(); - SetJumpTarget(branch_out); } } diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index c04c45783d..ce5dd96c74 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -1296,16 +1296,13 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) if (op.skip) { - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { // The only thing that currently sets op.skip is the BLR following optimization. // If any non-branch instruction starts setting that too, this will need to be changed. ASSERT(op.inst.hex == 0x4e800020); - const auto bw_reg_a = gpr.GetScopedReg(), bw_reg_b = gpr.GetScopedReg(); - const BitSet32 gpr_caller_save = - gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)}; - WriteBranchWatch(op.address, op.branchTo, op.inst, bw_reg_a, bw_reg_b, - gpr_caller_save, fpr.GetCallerSavedUsed()); + WriteBranchWatch(op.address, op.branchTo, op.inst, gpr.GetCallerSavedUsed(), + fpr.GetCallerSavedUsed()); } } else diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 7a46eb58fe..5b42dff3dd 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -334,11 +334,9 @@ protected: // Branch Watch template void WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, - Arm64Gen::ARM64Reg reg_a, Arm64Gen::ARM64Reg reg_b, BitSet32 gpr_caller_save, BitSet32 fpr_caller_save); void WriteBranchWatchDestInRegister(u32 origin, Arm64Gen::ARM64Reg destination, - UGeckoInstruction inst, Arm64Gen::ARM64Reg reg_a, - Arm64Gen::ARM64Reg reg_b, BitSet32 gpr_caller_save, + UGeckoInstruction inst, BitSet32 gpr_caller_save, BitSet32 fpr_caller_save); // Exits diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp index a9df905b10..191c423876 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp @@ -75,67 +75,35 @@ void JitArm64::rfi(UGeckoInstruction inst) } template -void JitArm64::WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, ARM64Reg reg_a, - ARM64Reg reg_b, BitSet32 gpr_caller_save, BitSet32 fpr_caller_save) +void JitArm64::WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, + BitSet32 gpr_caller_save, BitSet32 fpr_caller_save) { - const ARM64Reg branch_watch = EncodeRegTo64(reg_a); - MOVP2R(branch_watch, &m_branch_watch); - LDRB(IndexType::Unsigned, reg_b, branch_watch, Core::BranchWatch::GetOffsetOfRecordingActive()); - FixupBranch branch_over = CBZ(reg_b); - - FixupBranch branch_in = B(); - SwitchToFarCode(); - SetJumpTarget(branch_in); - - const ARM64Reg float_emit_tmp = EncodeRegTo64(reg_b); ABI_PushRegisters(gpr_caller_save); - m_float_emit.ABI_PushRegisters(fpr_caller_save, float_emit_tmp); + m_float_emit.ABI_PushRegisters(fpr_caller_save, ARM64Reg::X8); ABI_CallFunction(m_ppc_state.msr.IR ? (condition ? &Core::BranchWatch::HitVirtualTrue_fk : &Core::BranchWatch::HitVirtualFalse_fk) : (condition ? &Core::BranchWatch::HitPhysicalTrue_fk : &Core::BranchWatch::HitPhysicalFalse_fk), - branch_watch, Core::FakeBranchWatchCollectionKey{origin, destination}, inst.hex); - m_float_emit.ABI_PopRegisters(fpr_caller_save, float_emit_tmp); + &m_branch_watch, Core::FakeBranchWatchCollectionKey{origin, destination}, + inst.hex); + m_float_emit.ABI_PopRegisters(fpr_caller_save, ARM64Reg::X8); ABI_PopRegisters(gpr_caller_save); - - FixupBranch branch_out = B(); - SwitchToNearCode(); - SetJumpTarget(branch_out); - SetJumpTarget(branch_over); } -template void JitArm64::WriteBranchWatch(u32, u32, UGeckoInstruction, ARM64Reg, ARM64Reg, - BitSet32, BitSet32); -template void JitArm64::WriteBranchWatch(u32, u32, UGeckoInstruction, ARM64Reg, ARM64Reg, - BitSet32, BitSet32); +template void JitArm64::WriteBranchWatch(u32, u32, UGeckoInstruction, BitSet32, BitSet32); +template void JitArm64::WriteBranchWatch(u32, u32, UGeckoInstruction, BitSet32, BitSet32); void JitArm64::WriteBranchWatchDestInRegister(u32 origin, ARM64Reg destination, - UGeckoInstruction inst, ARM64Reg reg_a, - ARM64Reg reg_b, BitSet32 gpr_caller_save, + UGeckoInstruction inst, BitSet32 gpr_caller_save, BitSet32 fpr_caller_save) { - const ARM64Reg branch_watch = EncodeRegTo64(reg_a); - MOVP2R(branch_watch, &m_branch_watch); - LDRB(IndexType::Unsigned, reg_b, branch_watch, Core::BranchWatch::GetOffsetOfRecordingActive()); - FixupBranch branch_over = CBZ(reg_b); - - FixupBranch branch_in = B(); - SwitchToFarCode(); - SetJumpTarget(branch_in); - - const ARM64Reg float_emit_tmp = EncodeRegTo64(reg_b); ABI_PushRegisters(gpr_caller_save); - m_float_emit.ABI_PushRegisters(fpr_caller_save, float_emit_tmp); + m_float_emit.ABI_PushRegisters(fpr_caller_save, ARM64Reg::X8); ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue : &Core::BranchWatch::HitPhysicalTrue, - branch_watch, origin, destination, inst.hex); - m_float_emit.ABI_PopRegisters(fpr_caller_save, float_emit_tmp); + &m_branch_watch, origin, destination, inst.hex); + m_float_emit.ABI_PopRegisters(fpr_caller_save, ARM64Reg::X8); ABI_PopRegisters(gpr_caller_save); - - FixupBranch branch_out = B(); - SwitchToNearCode(); - SetJumpTarget(branch_out); - SetJumpTarget(branch_over); } void JitArm64::bx(UGeckoInstruction inst) @@ -153,13 +121,12 @@ void JitArm64::bx(UGeckoInstruction inst) if (!js.isLastInstruction) { - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - const auto WB = gpr.GetScopedReg(), WC = gpr.GetScopedReg(); - BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WB), DecodeReg(WC)}; + BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed(); if (WA != ARM64Reg::INVALID_REG && js.op->skipLRStack) gpr_caller_save[DecodeReg(WA)] = false; - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, WB, WC, gpr_caller_save, + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, gpr_caller_save, fpr.GetCallerSavedUsed()); } if (inst.LK && !js.op->skipLRStack) @@ -178,15 +145,14 @@ void JitArm64::bx(UGeckoInstruction inst) if (js.op->branchIsIdleLoop) { + if (IsBranchWatchEnabled()) + { + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, {}, {}); + } + if (WA == ARM64Reg::INVALID_REG) WA = gpr.GetScopedReg(); - if (IsDebuggingEnabled()) - { - const auto WB = gpr.GetScopedReg(); - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, WA, WB, {}, {}); - } - // make idle loops go faster ARM64Reg XA = EncodeRegTo64(WA); @@ -198,12 +164,11 @@ void JitArm64::bx(UGeckoInstruction inst) return; } - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - const auto WB = gpr.GetScopedReg(), WC = gpr.GetScopedReg(); const BitSet32 gpr_caller_save = WA != ARM64Reg::INVALID_REG ? BitSet32{DecodeReg(WA)} & CALLER_SAVED_GPRS : BitSet32{}; - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, WB, WC, gpr_caller_save, {}); + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, gpr_caller_save, {}); } WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4, WA); } @@ -214,14 +179,11 @@ void JitArm64::bcx(UGeckoInstruction inst) JITDISABLE(bJITBranchOff); auto WA = gpr.GetScopedReg(); - auto WB = inst.LK || IsDebuggingEnabled() ? gpr.GetScopedReg() : - Arm64GPRCache::ScopedARM64Reg(WA.GetReg()); + // If WA isn't needed for WriteExit, it can be safely clobbered. + auto WB = (inst.LK && !js.op->branchIsIdleLoop) ? gpr.GetScopedReg() : + Arm64GPRCache::ScopedARM64Reg(WA.GetReg()); { - auto WC = IsDebuggingEnabled() && inst.LK && !js.op->branchIsIdleLoop ? - gpr.GetScopedReg() : - Arm64GPRCache::ScopedARM64Reg(ARM64Reg::INVALID_REG); - FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { @@ -252,18 +214,11 @@ void JitArm64::bcx(UGeckoInstruction inst) gpr.Flush(FlushMode::MaintainState, WB); fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG); - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - ARM64Reg bw_reg_a, bw_reg_b; - // WC is only allocated when WA is needed for WriteExit and cannot be clobbered. - if (WC == ARM64Reg::INVALID_REG) - bw_reg_a = WA, bw_reg_b = WB; - else - bw_reg_a = WB, bw_reg_b = WC; - const BitSet32 gpr_caller_save = - gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)}; - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, bw_reg_a, bw_reg_b, - gpr_caller_save, fpr.GetCallerSavedUsed()); + BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WB)}; + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, gpr_caller_save, + fpr.GetCallerSavedUsed()); } if (js.op->branchIsIdleLoop) { @@ -290,17 +245,15 @@ void JitArm64::bcx(UGeckoInstruction inst) { gpr.Flush(FlushMode::All, WA); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, WA, WB, {}, {}); + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, {}, {}); } WriteExit(js.compilerPC + 4); } - else if (IsDebuggingEnabled()) + else if (IsBranchWatchEnabled()) { - const BitSet32 gpr_caller_save = - gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WA), DecodeReg(WB)}; - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, WA, WB, gpr_caller_save, + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, gpr.GetCallerSavedUsed(), fpr.GetCallerSavedUsed()); } } @@ -338,14 +291,13 @@ void JitArm64::bcctrx(UGeckoInstruction inst) LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR)); AND(WA, WA, LogicalImm(~0x3, GPRSize::B32)); - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - const auto WC = gpr.GetScopedReg(), WD = gpr.GetScopedReg(); BitSet32 gpr_caller_save = BitSet32{DecodeReg(WA)}; if (WB != ARM64Reg::INVALID_REG) gpr_caller_save[DecodeReg(WB)] = true; gpr_caller_save &= CALLER_SAVED_GPRS; - WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, WC, WD, gpr_caller_save, {}); + WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, gpr_caller_save, {}); } WriteExit(WA, inst.LK_3, js.compilerPC + 4, WB); } @@ -360,18 +312,10 @@ void JitArm64::bclrx(UGeckoInstruction inst) auto WA = gpr.GetScopedReg(); Arm64GPRCache::ScopedARM64Reg WB; - if (conditional || inst.LK || IsDebuggingEnabled()) - { + if (conditional || inst.LK) WB = gpr.GetScopedReg(); - } { - Arm64GPRCache::ScopedARM64Reg WC; - if (IsDebuggingEnabled()) - { - WC = gpr.GetScopedReg(); - } - FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { @@ -404,13 +348,13 @@ void JitArm64::bclrx(UGeckoInstruction inst) gpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::All, WB); fpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::All, ARM64Reg::INVALID_REG); - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { BitSet32 gpr_caller_save; BitSet32 fpr_caller_save; if (conditional) { - gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WB), DecodeReg(WC)}; + gpr_caller_save = gpr.GetCallerSavedUsed(); if (js.op->branchIsIdleLoop) gpr_caller_save[DecodeReg(WA)] = false; fpr_caller_save = fpr.GetCallerSavedUsed(); @@ -421,8 +365,7 @@ void JitArm64::bclrx(UGeckoInstruction inst) js.op->branchIsIdleLoop ? BitSet32{} : BitSet32{DecodeReg(WA)} & CALLER_SAVED_GPRS; fpr_caller_save = {}; } - WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, WB, WC, gpr_caller_save, - fpr_caller_save); + WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, gpr_caller_save, fpr_caller_save); } if (js.op->branchIsIdleLoop) { @@ -449,17 +392,16 @@ void JitArm64::bclrx(UGeckoInstruction inst) { gpr.Flush(FlushMode::All, WA); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, WA, WB, {}, {}); + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, {}, {}); } WriteExit(js.compilerPC + 4); } - else if (IsDebuggingEnabled()) + else if (IsBranchWatchEnabled()) { - const BitSet32 gpr_caller_save = - gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WA), DecodeReg(WB)}; - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, WA, WB, gpr_caller_save, + const BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WA)}; + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, gpr_caller_save, fpr.GetCallerSavedUsed()); } } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 2da5d113aa..bcab1ede76 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -826,36 +826,21 @@ void JitArm64::dcbx(UGeckoInstruction inst) // Load the loop_counter register with the amount of invalidations to execute. ADD(loop_counter, WA, 1); - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - const ARM64Reg branch_watch = EncodeRegTo64(reg_cycle_count); - MOVP2R(branch_watch, &m_branch_watch); - LDRB(IndexType::Unsigned, WB, branch_watch, Core::BranchWatch::GetOffsetOfRecordingActive()); - FixupBranch branch_over = CBZ(WB); - - FixupBranch branch_in = B(); - SwitchToFarCode(); - SetJumpTarget(branch_in); - const BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WB), DecodeReg(reg_cycle_count), DecodeReg(reg_downcount)}; ABI_PushRegisters(gpr_caller_save); - const ARM64Reg float_emit_tmp = EncodeRegTo64(WB); const BitSet32 fpr_caller_save = fpr.GetCallerSavedUsed(); - m_float_emit.ABI_PushRegisters(fpr_caller_save, float_emit_tmp); + m_float_emit.ABI_PushRegisters(fpr_caller_save, ARM64Reg::X8); const PPCAnalyst::CodeOp& op = js.op[2]; ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue_fk_n : &Core::BranchWatch::HitPhysicalTrue_fk_n, - branch_watch, Core::FakeBranchWatchCollectionKey{op.address, op.branchTo}, + &m_branch_watch, Core::FakeBranchWatchCollectionKey{op.address, op.branchTo}, op.inst.hex, WA); - m_float_emit.ABI_PopRegisters(fpr_caller_save, float_emit_tmp); + m_float_emit.ABI_PopRegisters(fpr_caller_save, ARM64Reg::X8); ABI_PopRegisters(gpr_caller_save); - - FixupBranch branch_out = B(); - SwitchToNearCode(); - SetJumpTarget(branch_out); - SetJumpTarget(branch_over); } } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index db615ce7e1..2f366ad07f 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -23,6 +23,7 @@ #include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitCache.h" #include "Core/PowerPC/PPCAnalyst.h" +#include "Core/PowerPC/PowerPC.h" namespace Core { @@ -200,6 +201,11 @@ public: bool IsProfilingEnabled() const { return m_enable_profiling && m_enable_debugging; } bool IsDebuggingEnabled() const { return m_enable_debugging; } + bool IsBranchWatchEnabled() const + { + auto& branch_watch = m_system.GetPowerPC().GetBranchWatch(); + return branch_watch.GetRecordingActive(); + } static const u8* Dispatch(JitBase& jit); virtual JitBaseBlockCache* GetBlockCache() = 0;