diff --git a/Source/Core/Core/Debugger/BranchWatch.h b/Source/Core/Core/Debugger/BranchWatch.h index a4e5ce1775..f455d91898 100644 --- a/Source/Core/Core/Debugger/BranchWatch.h +++ b/Source/Core/Core/Debugger/BranchWatch.h @@ -12,7 +12,10 @@ #include #include "Common/CommonTypes.h" +#include "Core/Core.h" #include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/JitInterface.h" +#include "Core/System.h" namespace Core { @@ -110,7 +113,7 @@ enum class BranchWatchPhase : bool Reduction, }; -class BranchWatch final // Class is final to enforce the safety of GetOffsetOfRecordingActive(). +class BranchWatch final { public: using Collection = BranchWatchCollection; @@ -119,7 +122,12 @@ public: using SelectionInspection = BranchWatchSelectionInspection; bool GetRecordingActive() const { return m_recording_active; } - void SetRecordingActive(const CPUThreadGuard& guard, bool active) { m_recording_active = active; } + void SetRecordingActive(const CPUThreadGuard& guard, bool active) + { + m_recording_active = active; + auto& system = guard.GetSystem(); + system.GetJitInterface().ClearCache(guard); + } void Clear(const CPUThreadGuard& guard); void Save(const CPUThreadGuard& guard, std::FILE* file) const; @@ -226,19 +234,6 @@ public: HitPhysicalFalse(this, origin, destination, inst.hex); } - // The JIT needs this value, but doesn't need to be a full-on friend. - static constexpr int GetOffsetOfRecordingActive() - { -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Winvalid-offsetof" -#endif - return offsetof(BranchWatch, m_recording_active); -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif - } - private: Collection& GetCollectionV(bool condition) { @@ -273,8 +268,4 @@ private: Collection m_collection_pf; // physical address space | false path Selection m_selection; }; - -#if _M_X86_64 -static_assert(BranchWatch::GetOffsetOfRecordingActive() < 0x80); // Makes JIT code smaller. -#endif } // namespace Core diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 78bda8451b..0b976eb1c8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -1067,13 +1067,12 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) if (op.skip) { - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { // The only thing that currently sets op.skip is the BLR following optimization. // If any non-branch instruction starts setting that too, this will need to be changed. ASSERT(op.inst.hex == 0x4e800020); - WriteBranchWatch(op.address, op.branchTo, op.inst, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); + WriteBranchWatch(op.address, op.branchTo, op.inst, CallerSavedRegistersInUse()); } } else diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index fde8691b48..2d8ef2be55 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -110,10 +110,8 @@ public: void WriteRfiExitDestInRSCRATCH(); void WriteIdleExit(u32 destination); template - void WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, Gen::X64Reg reg_a, - Gen::X64Reg reg_b, BitSet32 caller_save); - void WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, Gen::X64Reg reg_a, - Gen::X64Reg reg_b, BitSet32 caller_save); + void WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, BitSet32 caller_save); + void WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, BitSet32 caller_save); bool Cleanup(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp index 81e7ad8a09..4667238720 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Branch.cpp @@ -67,65 +67,42 @@ void Jit64::rfi(UGeckoInstruction inst) } template -void Jit64::WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, X64Reg reg_a, - X64Reg reg_b, BitSet32 caller_save) +void Jit64::WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, + BitSet32 caller_save) { - MOV(64, R(reg_a), ImmPtr(&m_branch_watch)); - MOVZX(32, 8, reg_b, MDisp(reg_a, Core::BranchWatch::GetOffsetOfRecordingActive())); - TEST(32, R(reg_b), R(reg_b)); - - FixupBranch branch_in = J_CC(CC_NZ, Jump::Near); - SwitchToFarCode(); - SetJumpTarget(branch_in); - - ABI_PushRegistersAndAdjustStack(caller_save, 0); - // Some call sites have an optimization to use ABI_PARAM1 as a scratch register. - if (reg_a != ABI_PARAM1) - MOV(64, R(ABI_PARAM1), R(reg_a)); - MOV(64, R(ABI_PARAM2), Imm64(Core::FakeBranchWatchCollectionKey{origin, destination})); - MOV(32, R(ABI_PARAM3), Imm32(inst.hex)); - ABI_CallFunction(m_ppc_state.msr.IR ? (condition ? &Core::BranchWatch::HitVirtualTrue_fk : - &Core::BranchWatch::HitVirtualFalse_fk) : - (condition ? &Core::BranchWatch::HitPhysicalTrue_fk : - &Core::BranchWatch::HitPhysicalFalse_fk)); - ABI_PopRegistersAndAdjustStack(caller_save, 0); - - FixupBranch branch_out = J(Jump::Near); - SwitchToNearCode(); - SetJumpTarget(branch_out); + if (IsBranchWatchEnabled()) + { + ABI_PushRegistersAndAdjustStack(caller_save, 0); + MOV(64, R(ABI_PARAM1), ImmPtr(&m_branch_watch)); + MOV(64, R(ABI_PARAM2), Imm64(Core::FakeBranchWatchCollectionKey{origin, destination})); + MOV(32, R(ABI_PARAM3), Imm32(inst.hex)); + ABI_CallFunction(m_ppc_state.msr.IR ? (condition ? &Core::BranchWatch::HitVirtualTrue_fk : + &Core::BranchWatch::HitVirtualFalse_fk) : + (condition ? &Core::BranchWatch::HitPhysicalTrue_fk : + &Core::BranchWatch::HitPhysicalFalse_fk)); + ABI_PopRegistersAndAdjustStack(caller_save, 0); + } } -template void Jit64::WriteBranchWatch(u32, u32, UGeckoInstruction, X64Reg, X64Reg, BitSet32); -template void Jit64::WriteBranchWatch(u32, u32, UGeckoInstruction, X64Reg, X64Reg, BitSet32); +template void Jit64::WriteBranchWatch(u32, u32, UGeckoInstruction, BitSet32); +template void Jit64::WriteBranchWatch(u32, u32, UGeckoInstruction, BitSet32); -void Jit64::WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, X64Reg reg_a, - X64Reg reg_b, BitSet32 caller_save) +void Jit64::WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, BitSet32 caller_save) { - MOV(64, R(reg_a), ImmPtr(&m_branch_watch)); - MOVZX(32, 8, reg_b, MDisp(reg_a, Core::BranchWatch::GetOffsetOfRecordingActive())); - TEST(32, R(reg_b), R(reg_b)); + if (IsBranchWatchEnabled()) + { + // Assert RSCRATCH won't be clobbered before it is moved from. + static_assert(ABI_PARAM1 != RSCRATCH); - FixupBranch branch_in = J_CC(CC_NZ, Jump::Near); - SwitchToFarCode(); - SetJumpTarget(branch_in); - - // Assert RSCRATCH won't be clobbered before it is moved from. - static_assert(ABI_PARAM1 != RSCRATCH); - - ABI_PushRegistersAndAdjustStack(caller_save, 0); - // Some call sites have an optimization to use ABI_PARAM1 as a scratch register. - if (reg_a != ABI_PARAM1) - MOV(64, R(ABI_PARAM1), R(reg_a)); - MOV(32, R(ABI_PARAM3), R(RSCRATCH)); - MOV(32, R(ABI_PARAM2), Imm32(origin)); - MOV(32, R(ABI_PARAM4), Imm32(inst.hex)); - ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue : - &Core::BranchWatch::HitPhysicalTrue); - ABI_PopRegistersAndAdjustStack(caller_save, 0); - - FixupBranch branch_out = J(Jump::Near); - SwitchToNearCode(); - SetJumpTarget(branch_out); + ABI_PushRegistersAndAdjustStack(caller_save, 0); + MOV(64, R(ABI_PARAM1), ImmPtr(&m_branch_watch)); + MOV(32, R(ABI_PARAM3), R(RSCRATCH)); + MOV(32, R(ABI_PARAM2), Imm32(origin)); + MOV(32, R(ABI_PARAM4), Imm32(inst.hex)); + ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue : + &Core::BranchWatch::HitPhysicalTrue); + ABI_PopRegistersAndAdjustStack(caller_save, 0); + } } void Jit64::bx(UGeckoInstruction inst) @@ -143,11 +120,7 @@ void Jit64::bx(UGeckoInstruction inst) // Because PPCAnalyst::Flatten() merged the blocks. if (!js.isLastInstruction) { - if (IsDebuggingEnabled()) - { - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); - } + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, CallerSavedRegistersInUse()); if (inst.LK && !js.op->skipLRStack) { // We have to fake the stack as the RET instruction was not @@ -161,11 +134,7 @@ void Jit64::bx(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, {}); #ifdef ACID_TEST if (inst.LK) AND(32, PPCSTATE(cr), Imm32(~(0xFF000000))); @@ -216,11 +185,7 @@ void Jit64::bcx(UGeckoInstruction inst) if (!js.isLastInstruction && (inst.BO & BO_DONT_DECREMENT_FLAG) && (inst.BO & BO_DONT_CHECK_CONDITION)) { - if (IsDebuggingEnabled()) - { - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); - } + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, CallerSavedRegistersInUse()); if (inst.LK && !js.op->skipLRStack) { // We have to fake the stack as the RET instruction was not @@ -237,11 +202,7 @@ void Jit64::bcx(UGeckoInstruction inst) gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, {}); if (js.op->branchIsIdleLoop) { WriteIdleExit(js.op->branchTo); @@ -261,18 +222,10 @@ void Jit64::bcx(UGeckoInstruction inst) { gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, {}); WriteExit(js.compilerPC + 4); } - else if (IsDebuggingEnabled()) - { - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); - } + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, CallerSavedRegistersInUse()); } void Jit64::bcctrx(UGeckoInstruction inst) @@ -296,12 +249,7 @@ void Jit64::bcctrx(UGeckoInstruction inst) if (inst.LK_3) MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4; AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, ABI_PARAM1, RSCRATCH2, - BitSet32{RSCRATCH}); - } + WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, BitSet32{RSCRATCH}); WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4); } else @@ -324,12 +272,7 @@ void Jit64::bcctrx(UGeckoInstruction inst) RCForkGuard fpr_guard = fpr.Fork(); gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, ABI_PARAM1, RSCRATCH2, - BitSet32{RSCRATCH}); - } + WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, BitSet32{RSCRATCH}); WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4); // Would really like to continue the block here, but it ends. TODO. } @@ -339,18 +282,10 @@ void Jit64::bcctrx(UGeckoInstruction inst) { gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, {}); WriteExit(js.compilerPC + 4); } - else if (IsDebuggingEnabled()) - { - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); - } + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, CallerSavedRegistersInUse()); } } @@ -399,21 +334,12 @@ void Jit64::bclrx(UGeckoInstruction inst) if (js.op->branchIsIdleLoop) { - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, {}); WriteIdleExit(js.op->branchTo); } else { - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, ABI_PARAM1, RSCRATCH2, - BitSet32{RSCRATCH}); - } + WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, BitSet32{RSCRATCH}); WriteBLRExit(); } } @@ -427,16 +353,11 @@ void Jit64::bclrx(UGeckoInstruction inst) { gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, {}); WriteExit(js.compilerPC + 4); } - else if (IsDebuggingEnabled()) + else { - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, CallerSavedRegistersInUse()); } } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 90fd009787..f3c329cef4 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -387,11 +387,7 @@ void Jit64::DoMergedBranch() MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4)); const u32 destination = js.op[1].branchTo; - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(nextPC, destination, next, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(nextPC, destination, next, {}); WriteIdleExit(destination); } else if (next.OPCD == 16) // bcx @@ -400,11 +396,7 @@ void Jit64::DoMergedBranch() MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4)); const u32 destination = js.op[1].branchTo; - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(nextPC, destination, next, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(nextPC, destination, next, {}); WriteExit(destination, next.LK, nextPC + 4); } else if ((next.OPCD == 19) && (next.SUBOP10 == 528)) // bcctrx @@ -413,11 +405,7 @@ void Jit64::DoMergedBranch() MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4)); MOV(32, R(RSCRATCH), PPCSTATE_SPR(SPR_CTR)); AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatchDestInRSCRATCH(nextPC, next, ABI_PARAM1, RSCRATCH2, BitSet32{RSCRATCH}); - } + WriteBranchWatchDestInRSCRATCH(nextPC, next, BitSet32{RSCRATCH}); WriteExitDestInRSCRATCH(next.LK, nextPC + 4); } else if ((next.OPCD == 19) && (next.SUBOP10 == 16)) // bclrx @@ -427,11 +415,7 @@ void Jit64::DoMergedBranch() AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); if (next.LK) MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4)); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatchDestInRSCRATCH(nextPC, next, ABI_PARAM1, RSCRATCH2, BitSet32{RSCRATCH}); - } + WriteBranchWatchDestInRSCRATCH(nextPC, next, BitSet32{RSCRATCH}); WriteBLRExit(); } else @@ -488,17 +472,12 @@ void Jit64::DoMergedBranchCondition() { gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(nextPC, nextPC + 4, next, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(nextPC, nextPC + 4, next, {}); WriteExit(nextPC + 4); } - else if (IsDebuggingEnabled()) + else { - WriteBranchWatch(nextPC, nextPC + 4, next, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); + WriteBranchWatch(nextPC, nextPC + 4, next, CallerSavedRegistersInUse()); } } @@ -540,17 +519,12 @@ void Jit64::DoMergedBranchImmediate(s64 val) { gpr.Flush(); fpr.Flush(); - if (IsDebuggingEnabled()) - { - // ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function. - WriteBranchWatch(nextPC, nextPC + 4, next, ABI_PARAM1, RSCRATCH, {}); - } + WriteBranchWatch(nextPC, nextPC + 4, next, {}); WriteExit(nextPC + 4); } - else if (IsDebuggingEnabled()) + else { - WriteBranchWatch(nextPC, nextPC + 4, next, RSCRATCH, RSCRATCH2, - CallerSavedRegistersInUse()); + WriteBranchWatch(nextPC, nextPC + 4, next, CallerSavedRegistersInUse()); } } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 4b11f96942..b4a3b4ef43 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -305,25 +305,16 @@ void Jit64::dcbx(UGeckoInstruction inst) // Load the loop_counter register with the amount of invalidations to execute. LEA(32, loop_counter, MDisp(RSCRATCH2, 1)); - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - const X64Reg bw_reg_a = reg_cycle_count, bw_reg_b = reg_downcount; const BitSet32 bw_caller_save = (CallerSavedRegistersInUse() | BitSet32{RSCRATCH2}) & - ~BitSet32{int(bw_reg_a), int(bw_reg_b)}; - - MOV(64, R(bw_reg_a), ImmPtr(&m_branch_watch)); - MOVZX(32, 8, bw_reg_b, MDisp(bw_reg_a, Core::BranchWatch::GetOffsetOfRecordingActive())); - TEST(32, R(bw_reg_b), R(bw_reg_b)); - - FixupBranch branch_in = J_CC(CC_NZ, Jump::Near); - SwitchToFarCode(); - SetJumpTarget(branch_in); + ~BitSet32{int(reg_cycle_count), int(reg_downcount)}; // Assert RSCRATCH2 won't be clobbered before it is moved from. static_assert(RSCRATCH2 != ABI_PARAM1); ABI_PushRegistersAndAdjustStack(bw_caller_save, 0); - MOV(64, R(ABI_PARAM1), R(bw_reg_a)); + MOV(64, R(ABI_PARAM1), ImmPtr(&m_branch_watch)); // RSCRATCH2 holds the amount of faked branch watch hits. Move RSCRATCH2 first, because // ABI_PARAM2 clobbers RSCRATCH2 on Windows and ABI_PARAM3 clobbers RSCRATCH2 on Linux! MOV(32, R(ABI_PARAM4), R(RSCRATCH2)); @@ -333,10 +324,6 @@ void Jit64::dcbx(UGeckoInstruction inst) ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue_fk_n : &Core::BranchWatch::HitPhysicalTrue_fk_n); ABI_PopRegistersAndAdjustStack(bw_caller_save, 0); - - FixupBranch branch_out = J(Jump::Near); - SwitchToNearCode(); - SetJumpTarget(branch_out); } } diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index c04c45783d..ce5dd96c74 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -1296,16 +1296,13 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) if (op.skip) { - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { // The only thing that currently sets op.skip is the BLR following optimization. // If any non-branch instruction starts setting that too, this will need to be changed. ASSERT(op.inst.hex == 0x4e800020); - const auto bw_reg_a = gpr.GetScopedReg(), bw_reg_b = gpr.GetScopedReg(); - const BitSet32 gpr_caller_save = - gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)}; - WriteBranchWatch(op.address, op.branchTo, op.inst, bw_reg_a, bw_reg_b, - gpr_caller_save, fpr.GetCallerSavedUsed()); + WriteBranchWatch(op.address, op.branchTo, op.inst, gpr.GetCallerSavedUsed(), + fpr.GetCallerSavedUsed()); } } else diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 7a46eb58fe..5b42dff3dd 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -334,11 +334,9 @@ protected: // Branch Watch template void WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, - Arm64Gen::ARM64Reg reg_a, Arm64Gen::ARM64Reg reg_b, BitSet32 gpr_caller_save, BitSet32 fpr_caller_save); void WriteBranchWatchDestInRegister(u32 origin, Arm64Gen::ARM64Reg destination, - UGeckoInstruction inst, Arm64Gen::ARM64Reg reg_a, - Arm64Gen::ARM64Reg reg_b, BitSet32 gpr_caller_save, + UGeckoInstruction inst, BitSet32 gpr_caller_save, BitSet32 fpr_caller_save); // Exits diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp index a9df905b10..191c423876 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Branch.cpp @@ -75,67 +75,35 @@ void JitArm64::rfi(UGeckoInstruction inst) } template -void JitArm64::WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, ARM64Reg reg_a, - ARM64Reg reg_b, BitSet32 gpr_caller_save, BitSet32 fpr_caller_save) +void JitArm64::WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, + BitSet32 gpr_caller_save, BitSet32 fpr_caller_save) { - const ARM64Reg branch_watch = EncodeRegTo64(reg_a); - MOVP2R(branch_watch, &m_branch_watch); - LDRB(IndexType::Unsigned, reg_b, branch_watch, Core::BranchWatch::GetOffsetOfRecordingActive()); - FixupBranch branch_over = CBZ(reg_b); - - FixupBranch branch_in = B(); - SwitchToFarCode(); - SetJumpTarget(branch_in); - - const ARM64Reg float_emit_tmp = EncodeRegTo64(reg_b); ABI_PushRegisters(gpr_caller_save); - m_float_emit.ABI_PushRegisters(fpr_caller_save, float_emit_tmp); + m_float_emit.ABI_PushRegisters(fpr_caller_save, ARM64Reg::X8); ABI_CallFunction(m_ppc_state.msr.IR ? (condition ? &Core::BranchWatch::HitVirtualTrue_fk : &Core::BranchWatch::HitVirtualFalse_fk) : (condition ? &Core::BranchWatch::HitPhysicalTrue_fk : &Core::BranchWatch::HitPhysicalFalse_fk), - branch_watch, Core::FakeBranchWatchCollectionKey{origin, destination}, inst.hex); - m_float_emit.ABI_PopRegisters(fpr_caller_save, float_emit_tmp); + &m_branch_watch, Core::FakeBranchWatchCollectionKey{origin, destination}, + inst.hex); + m_float_emit.ABI_PopRegisters(fpr_caller_save, ARM64Reg::X8); ABI_PopRegisters(gpr_caller_save); - - FixupBranch branch_out = B(); - SwitchToNearCode(); - SetJumpTarget(branch_out); - SetJumpTarget(branch_over); } -template void JitArm64::WriteBranchWatch(u32, u32, UGeckoInstruction, ARM64Reg, ARM64Reg, - BitSet32, BitSet32); -template void JitArm64::WriteBranchWatch(u32, u32, UGeckoInstruction, ARM64Reg, ARM64Reg, - BitSet32, BitSet32); +template void JitArm64::WriteBranchWatch(u32, u32, UGeckoInstruction, BitSet32, BitSet32); +template void JitArm64::WriteBranchWatch(u32, u32, UGeckoInstruction, BitSet32, BitSet32); void JitArm64::WriteBranchWatchDestInRegister(u32 origin, ARM64Reg destination, - UGeckoInstruction inst, ARM64Reg reg_a, - ARM64Reg reg_b, BitSet32 gpr_caller_save, + UGeckoInstruction inst, BitSet32 gpr_caller_save, BitSet32 fpr_caller_save) { - const ARM64Reg branch_watch = EncodeRegTo64(reg_a); - MOVP2R(branch_watch, &m_branch_watch); - LDRB(IndexType::Unsigned, reg_b, branch_watch, Core::BranchWatch::GetOffsetOfRecordingActive()); - FixupBranch branch_over = CBZ(reg_b); - - FixupBranch branch_in = B(); - SwitchToFarCode(); - SetJumpTarget(branch_in); - - const ARM64Reg float_emit_tmp = EncodeRegTo64(reg_b); ABI_PushRegisters(gpr_caller_save); - m_float_emit.ABI_PushRegisters(fpr_caller_save, float_emit_tmp); + m_float_emit.ABI_PushRegisters(fpr_caller_save, ARM64Reg::X8); ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue : &Core::BranchWatch::HitPhysicalTrue, - branch_watch, origin, destination, inst.hex); - m_float_emit.ABI_PopRegisters(fpr_caller_save, float_emit_tmp); + &m_branch_watch, origin, destination, inst.hex); + m_float_emit.ABI_PopRegisters(fpr_caller_save, ARM64Reg::X8); ABI_PopRegisters(gpr_caller_save); - - FixupBranch branch_out = B(); - SwitchToNearCode(); - SetJumpTarget(branch_out); - SetJumpTarget(branch_over); } void JitArm64::bx(UGeckoInstruction inst) @@ -153,13 +121,12 @@ void JitArm64::bx(UGeckoInstruction inst) if (!js.isLastInstruction) { - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - const auto WB = gpr.GetScopedReg(), WC = gpr.GetScopedReg(); - BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WB), DecodeReg(WC)}; + BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed(); if (WA != ARM64Reg::INVALID_REG && js.op->skipLRStack) gpr_caller_save[DecodeReg(WA)] = false; - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, WB, WC, gpr_caller_save, + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, gpr_caller_save, fpr.GetCallerSavedUsed()); } if (inst.LK && !js.op->skipLRStack) @@ -178,15 +145,14 @@ void JitArm64::bx(UGeckoInstruction inst) if (js.op->branchIsIdleLoop) { + if (IsBranchWatchEnabled()) + { + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, {}, {}); + } + if (WA == ARM64Reg::INVALID_REG) WA = gpr.GetScopedReg(); - if (IsDebuggingEnabled()) - { - const auto WB = gpr.GetScopedReg(); - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, WA, WB, {}, {}); - } - // make idle loops go faster ARM64Reg XA = EncodeRegTo64(WA); @@ -198,12 +164,11 @@ void JitArm64::bx(UGeckoInstruction inst) return; } - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - const auto WB = gpr.GetScopedReg(), WC = gpr.GetScopedReg(); const BitSet32 gpr_caller_save = WA != ARM64Reg::INVALID_REG ? BitSet32{DecodeReg(WA)} & CALLER_SAVED_GPRS : BitSet32{}; - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, WB, WC, gpr_caller_save, {}); + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, gpr_caller_save, {}); } WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4, WA); } @@ -214,14 +179,11 @@ void JitArm64::bcx(UGeckoInstruction inst) JITDISABLE(bJITBranchOff); auto WA = gpr.GetScopedReg(); - auto WB = inst.LK || IsDebuggingEnabled() ? gpr.GetScopedReg() : - Arm64GPRCache::ScopedARM64Reg(WA.GetReg()); + // If WA isn't needed for WriteExit, it can be safely clobbered. + auto WB = (inst.LK && !js.op->branchIsIdleLoop) ? gpr.GetScopedReg() : + Arm64GPRCache::ScopedARM64Reg(WA.GetReg()); { - auto WC = IsDebuggingEnabled() && inst.LK && !js.op->branchIsIdleLoop ? - gpr.GetScopedReg() : - Arm64GPRCache::ScopedARM64Reg(ARM64Reg::INVALID_REG); - FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { @@ -252,18 +214,11 @@ void JitArm64::bcx(UGeckoInstruction inst) gpr.Flush(FlushMode::MaintainState, WB); fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG); - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - ARM64Reg bw_reg_a, bw_reg_b; - // WC is only allocated when WA is needed for WriteExit and cannot be clobbered. - if (WC == ARM64Reg::INVALID_REG) - bw_reg_a = WA, bw_reg_b = WB; - else - bw_reg_a = WB, bw_reg_b = WC; - const BitSet32 gpr_caller_save = - gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)}; - WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, bw_reg_a, bw_reg_b, - gpr_caller_save, fpr.GetCallerSavedUsed()); + BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WB)}; + WriteBranchWatch(js.compilerPC, js.op->branchTo, inst, gpr_caller_save, + fpr.GetCallerSavedUsed()); } if (js.op->branchIsIdleLoop) { @@ -290,17 +245,15 @@ void JitArm64::bcx(UGeckoInstruction inst) { gpr.Flush(FlushMode::All, WA); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, WA, WB, {}, {}); + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, {}, {}); } WriteExit(js.compilerPC + 4); } - else if (IsDebuggingEnabled()) + else if (IsBranchWatchEnabled()) { - const BitSet32 gpr_caller_save = - gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WA), DecodeReg(WB)}; - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, WA, WB, gpr_caller_save, + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, gpr.GetCallerSavedUsed(), fpr.GetCallerSavedUsed()); } } @@ -338,14 +291,13 @@ void JitArm64::bcctrx(UGeckoInstruction inst) LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR)); AND(WA, WA, LogicalImm(~0x3, GPRSize::B32)); - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - const auto WC = gpr.GetScopedReg(), WD = gpr.GetScopedReg(); BitSet32 gpr_caller_save = BitSet32{DecodeReg(WA)}; if (WB != ARM64Reg::INVALID_REG) gpr_caller_save[DecodeReg(WB)] = true; gpr_caller_save &= CALLER_SAVED_GPRS; - WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, WC, WD, gpr_caller_save, {}); + WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, gpr_caller_save, {}); } WriteExit(WA, inst.LK_3, js.compilerPC + 4, WB); } @@ -360,18 +312,10 @@ void JitArm64::bclrx(UGeckoInstruction inst) auto WA = gpr.GetScopedReg(); Arm64GPRCache::ScopedARM64Reg WB; - if (conditional || inst.LK || IsDebuggingEnabled()) - { + if (conditional || inst.LK) WB = gpr.GetScopedReg(); - } { - Arm64GPRCache::ScopedARM64Reg WC; - if (IsDebuggingEnabled()) - { - WC = gpr.GetScopedReg(); - } - FixupBranch pCTRDontBranch; if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR { @@ -404,13 +348,13 @@ void JitArm64::bclrx(UGeckoInstruction inst) gpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::All, WB); fpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::All, ARM64Reg::INVALID_REG); - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { BitSet32 gpr_caller_save; BitSet32 fpr_caller_save; if (conditional) { - gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WB), DecodeReg(WC)}; + gpr_caller_save = gpr.GetCallerSavedUsed(); if (js.op->branchIsIdleLoop) gpr_caller_save[DecodeReg(WA)] = false; fpr_caller_save = fpr.GetCallerSavedUsed(); @@ -421,8 +365,7 @@ void JitArm64::bclrx(UGeckoInstruction inst) js.op->branchIsIdleLoop ? BitSet32{} : BitSet32{DecodeReg(WA)} & CALLER_SAVED_GPRS; fpr_caller_save = {}; } - WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, WB, WC, gpr_caller_save, - fpr_caller_save); + WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, gpr_caller_save, fpr_caller_save); } if (js.op->branchIsIdleLoop) { @@ -449,17 +392,16 @@ void JitArm64::bclrx(UGeckoInstruction inst) { gpr.Flush(FlushMode::All, WA); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, WA, WB, {}, {}); + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, {}, {}); } WriteExit(js.compilerPC + 4); } - else if (IsDebuggingEnabled()) + else if (IsBranchWatchEnabled()) { - const BitSet32 gpr_caller_save = - gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WA), DecodeReg(WB)}; - WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, WA, WB, gpr_caller_save, + const BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WA)}; + WriteBranchWatch(js.compilerPC, js.compilerPC + 4, inst, gpr_caller_save, fpr.GetCallerSavedUsed()); } } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 2da5d113aa..bcab1ede76 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -826,36 +826,21 @@ void JitArm64::dcbx(UGeckoInstruction inst) // Load the loop_counter register with the amount of invalidations to execute. ADD(loop_counter, WA, 1); - if (IsDebuggingEnabled()) + if (IsBranchWatchEnabled()) { - const ARM64Reg branch_watch = EncodeRegTo64(reg_cycle_count); - MOVP2R(branch_watch, &m_branch_watch); - LDRB(IndexType::Unsigned, WB, branch_watch, Core::BranchWatch::GetOffsetOfRecordingActive()); - FixupBranch branch_over = CBZ(WB); - - FixupBranch branch_in = B(); - SwitchToFarCode(); - SetJumpTarget(branch_in); - const BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WB), DecodeReg(reg_cycle_count), DecodeReg(reg_downcount)}; ABI_PushRegisters(gpr_caller_save); - const ARM64Reg float_emit_tmp = EncodeRegTo64(WB); const BitSet32 fpr_caller_save = fpr.GetCallerSavedUsed(); - m_float_emit.ABI_PushRegisters(fpr_caller_save, float_emit_tmp); + m_float_emit.ABI_PushRegisters(fpr_caller_save, ARM64Reg::X8); const PPCAnalyst::CodeOp& op = js.op[2]; ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue_fk_n : &Core::BranchWatch::HitPhysicalTrue_fk_n, - branch_watch, Core::FakeBranchWatchCollectionKey{op.address, op.branchTo}, + &m_branch_watch, Core::FakeBranchWatchCollectionKey{op.address, op.branchTo}, op.inst.hex, WA); - m_float_emit.ABI_PopRegisters(fpr_caller_save, float_emit_tmp); + m_float_emit.ABI_PopRegisters(fpr_caller_save, ARM64Reg::X8); ABI_PopRegisters(gpr_caller_save); - - FixupBranch branch_out = B(); - SwitchToNearCode(); - SetJumpTarget(branch_out); - SetJumpTarget(branch_over); } } diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index db615ce7e1..2f366ad07f 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -23,6 +23,7 @@ #include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitCache.h" #include "Core/PowerPC/PPCAnalyst.h" +#include "Core/PowerPC/PowerPC.h" namespace Core { @@ -200,6 +201,11 @@ public: bool IsProfilingEnabled() const { return m_enable_profiling && m_enable_debugging; } bool IsDebuggingEnabled() const { return m_enable_debugging; } + bool IsBranchWatchEnabled() const + { + auto& branch_watch = m_system.GetPowerPC().GetBranchWatch(); + return branch_watch.GetRecordingActive(); + } static const u8* Dispatch(JitBase& jit); virtual JitBaseBlockCache* GetBlockCache() = 0;