Merge pull request #13991 from SuperSamus/branch-watch-optional-emit

Jit: Emit Branch Watch code only if it's enabled
This commit is contained in:
JosJuice 2026-02-15 12:07:35 +01:00 committed by GitHub
commit 5aacc1ca7c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 130 additions and 332 deletions

View File

@ -12,7 +12,10 @@
#include <vector>
#include "Common/CommonTypes.h"
#include "Core/Core.h"
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/System.h"
namespace Core
{
@ -110,7 +113,7 @@ enum class BranchWatchPhase : bool
Reduction,
};
class BranchWatch final // Class is final to enforce the safety of GetOffsetOfRecordingActive().
class BranchWatch final
{
public:
using Collection = BranchWatchCollection;
@ -119,7 +122,12 @@ public:
using SelectionInspection = BranchWatchSelectionInspection;
bool GetRecordingActive() const { return m_recording_active; }
void SetRecordingActive(const CPUThreadGuard& guard, bool active) { m_recording_active = active; }
void SetRecordingActive(const CPUThreadGuard& guard, bool active)
{
m_recording_active = active;
auto& system = guard.GetSystem();
system.GetJitInterface().ClearCache(guard);
}
void Clear(const CPUThreadGuard& guard);
void Save(const CPUThreadGuard& guard, std::FILE* file) const;
@ -226,19 +234,6 @@ public:
HitPhysicalFalse(this, origin, destination, inst.hex);
}
// The JIT needs this value, but doesn't need to be a full-on friend.
static constexpr int GetOffsetOfRecordingActive()
{
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Winvalid-offsetof"
#endif
return offsetof(BranchWatch, m_recording_active);
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
}
private:
Collection& GetCollectionV(bool condition)
{
@ -273,8 +268,4 @@ private:
Collection m_collection_pf; // physical address space | false path
Selection m_selection;
};
#if _M_X86_64
static_assert(BranchWatch::GetOffsetOfRecordingActive() < 0x80); // Makes JIT code smaller.
#endif
} // namespace Core

View File

@ -1067,13 +1067,12 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
if (op.skip)
{
if (IsDebuggingEnabled())
if (IsBranchWatchEnabled())
{
// The only thing that currently sets op.skip is the BLR following optimization.
// If any non-branch instruction starts setting that too, this will need to be changed.
ASSERT(op.inst.hex == 0x4e800020);
WriteBranchWatch<true>(op.address, op.branchTo, op.inst, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
WriteBranchWatch<true>(op.address, op.branchTo, op.inst, CallerSavedRegistersInUse());
}
}
else

View File

@ -110,10 +110,8 @@ public:
void WriteRfiExitDestInRSCRATCH();
void WriteIdleExit(u32 destination);
template <bool condition>
void WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, Gen::X64Reg reg_a,
Gen::X64Reg reg_b, BitSet32 caller_save);
void WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, Gen::X64Reg reg_a,
Gen::X64Reg reg_b, BitSet32 caller_save);
void WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, BitSet32 caller_save);
void WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, BitSet32 caller_save);
bool Cleanup();

View File

@ -67,65 +67,42 @@ void Jit64::rfi(UGeckoInstruction inst)
}
template <bool condition>
void Jit64::WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, X64Reg reg_a,
X64Reg reg_b, BitSet32 caller_save)
void Jit64::WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst,
BitSet32 caller_save)
{
MOV(64, R(reg_a), ImmPtr(&m_branch_watch));
MOVZX(32, 8, reg_b, MDisp(reg_a, Core::BranchWatch::GetOffsetOfRecordingActive()));
TEST(32, R(reg_b), R(reg_b));
FixupBranch branch_in = J_CC(CC_NZ, Jump::Near);
SwitchToFarCode();
SetJumpTarget(branch_in);
ABI_PushRegistersAndAdjustStack(caller_save, 0);
// Some call sites have an optimization to use ABI_PARAM1 as a scratch register.
if (reg_a != ABI_PARAM1)
MOV(64, R(ABI_PARAM1), R(reg_a));
MOV(64, R(ABI_PARAM2), Imm64(Core::FakeBranchWatchCollectionKey{origin, destination}));
MOV(32, R(ABI_PARAM3), Imm32(inst.hex));
ABI_CallFunction(m_ppc_state.msr.IR ? (condition ? &Core::BranchWatch::HitVirtualTrue_fk :
&Core::BranchWatch::HitVirtualFalse_fk) :
(condition ? &Core::BranchWatch::HitPhysicalTrue_fk :
&Core::BranchWatch::HitPhysicalFalse_fk));
ABI_PopRegistersAndAdjustStack(caller_save, 0);
FixupBranch branch_out = J(Jump::Near);
SwitchToNearCode();
SetJumpTarget(branch_out);
if (IsBranchWatchEnabled())
{
ABI_PushRegistersAndAdjustStack(caller_save, 0);
MOV(64, R(ABI_PARAM1), ImmPtr(&m_branch_watch));
MOV(64, R(ABI_PARAM2), Imm64(Core::FakeBranchWatchCollectionKey{origin, destination}));
MOV(32, R(ABI_PARAM3), Imm32(inst.hex));
ABI_CallFunction(m_ppc_state.msr.IR ? (condition ? &Core::BranchWatch::HitVirtualTrue_fk :
&Core::BranchWatch::HitVirtualFalse_fk) :
(condition ? &Core::BranchWatch::HitPhysicalTrue_fk :
&Core::BranchWatch::HitPhysicalFalse_fk));
ABI_PopRegistersAndAdjustStack(caller_save, 0);
}
}
template void Jit64::WriteBranchWatch<true>(u32, u32, UGeckoInstruction, X64Reg, X64Reg, BitSet32);
template void Jit64::WriteBranchWatch<false>(u32, u32, UGeckoInstruction, X64Reg, X64Reg, BitSet32);
template void Jit64::WriteBranchWatch<true>(u32, u32, UGeckoInstruction, BitSet32);
template void Jit64::WriteBranchWatch<false>(u32, u32, UGeckoInstruction, BitSet32);
void Jit64::WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, X64Reg reg_a,
X64Reg reg_b, BitSet32 caller_save)
void Jit64::WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, BitSet32 caller_save)
{
MOV(64, R(reg_a), ImmPtr(&m_branch_watch));
MOVZX(32, 8, reg_b, MDisp(reg_a, Core::BranchWatch::GetOffsetOfRecordingActive()));
TEST(32, R(reg_b), R(reg_b));
if (IsBranchWatchEnabled())
{
// Assert RSCRATCH won't be clobbered before it is moved from.
static_assert(ABI_PARAM1 != RSCRATCH);
FixupBranch branch_in = J_CC(CC_NZ, Jump::Near);
SwitchToFarCode();
SetJumpTarget(branch_in);
// Assert RSCRATCH won't be clobbered before it is moved from.
static_assert(ABI_PARAM1 != RSCRATCH);
ABI_PushRegistersAndAdjustStack(caller_save, 0);
// Some call sites have an optimization to use ABI_PARAM1 as a scratch register.
if (reg_a != ABI_PARAM1)
MOV(64, R(ABI_PARAM1), R(reg_a));
MOV(32, R(ABI_PARAM3), R(RSCRATCH));
MOV(32, R(ABI_PARAM2), Imm32(origin));
MOV(32, R(ABI_PARAM4), Imm32(inst.hex));
ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue :
&Core::BranchWatch::HitPhysicalTrue);
ABI_PopRegistersAndAdjustStack(caller_save, 0);
FixupBranch branch_out = J(Jump::Near);
SwitchToNearCode();
SetJumpTarget(branch_out);
ABI_PushRegistersAndAdjustStack(caller_save, 0);
MOV(64, R(ABI_PARAM1), ImmPtr(&m_branch_watch));
MOV(32, R(ABI_PARAM3), R(RSCRATCH));
MOV(32, R(ABI_PARAM2), Imm32(origin));
MOV(32, R(ABI_PARAM4), Imm32(inst.hex));
ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue :
&Core::BranchWatch::HitPhysicalTrue);
ABI_PopRegistersAndAdjustStack(caller_save, 0);
}
}
void Jit64::bx(UGeckoInstruction inst)
@ -143,11 +120,7 @@ void Jit64::bx(UGeckoInstruction inst)
// Because PPCAnalyst::Flatten() merged the blocks.
if (!js.isLastInstruction)
{
if (IsDebuggingEnabled())
{
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
}
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, CallerSavedRegistersInUse());
if (inst.LK && !js.op->skipLRStack)
{
// We have to fake the stack as the RET instruction was not
@ -161,11 +134,7 @@ void Jit64::bx(UGeckoInstruction inst)
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, ABI_PARAM1, RSCRATCH, {});
}
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, {});
#ifdef ACID_TEST
if (inst.LK)
AND(32, PPCSTATE(cr), Imm32(~(0xFF000000)));
@ -216,11 +185,7 @@ void Jit64::bcx(UGeckoInstruction inst)
if (!js.isLastInstruction && (inst.BO & BO_DONT_DECREMENT_FLAG) &&
(inst.BO & BO_DONT_CHECK_CONDITION))
{
if (IsDebuggingEnabled())
{
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
}
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, CallerSavedRegistersInUse());
if (inst.LK && !js.op->skipLRStack)
{
// We have to fake the stack as the RET instruction was not
@ -237,11 +202,7 @@ void Jit64::bcx(UGeckoInstruction inst)
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, ABI_PARAM1, RSCRATCH, {});
}
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, {});
if (js.op->branchIsIdleLoop)
{
WriteIdleExit(js.op->branchTo);
@ -261,18 +222,10 @@ void Jit64::bcx(UGeckoInstruction inst)
{
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, ABI_PARAM1, RSCRATCH, {});
}
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, {});
WriteExit(js.compilerPC + 4);
}
else if (IsDebuggingEnabled())
{
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
}
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, CallerSavedRegistersInUse());
}
void Jit64::bcctrx(UGeckoInstruction inst)
@ -296,12 +249,7 @@ void Jit64::bcctrx(UGeckoInstruction inst)
if (inst.LK_3)
MOV(32, PPCSTATE_LR, Imm32(js.compilerPC + 4)); // LR = PC + 4;
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, ABI_PARAM1, RSCRATCH2,
BitSet32{RSCRATCH});
}
WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, BitSet32{RSCRATCH});
WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
}
else
@ -324,12 +272,7 @@ void Jit64::bcctrx(UGeckoInstruction inst)
RCForkGuard fpr_guard = fpr.Fork();
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, ABI_PARAM1, RSCRATCH2,
BitSet32{RSCRATCH});
}
WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, BitSet32{RSCRATCH});
WriteExitDestInRSCRATCH(inst.LK_3, js.compilerPC + 4);
// Would really like to continue the block here, but it ends. TODO.
}
@ -339,18 +282,10 @@ void Jit64::bcctrx(UGeckoInstruction inst)
{
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, ABI_PARAM1, RSCRATCH, {});
}
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, {});
WriteExit(js.compilerPC + 4);
}
else if (IsDebuggingEnabled())
{
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
}
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, CallerSavedRegistersInUse());
}
}
@ -399,21 +334,12 @@ void Jit64::bclrx(UGeckoInstruction inst)
if (js.op->branchIsIdleLoop)
{
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, ABI_PARAM1, RSCRATCH, {});
}
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, {});
WriteIdleExit(js.op->branchTo);
}
else
{
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, ABI_PARAM1, RSCRATCH2,
BitSet32{RSCRATCH});
}
WriteBranchWatchDestInRSCRATCH(js.compilerPC, inst, BitSet32{RSCRATCH});
WriteBLRExit();
}
}
@ -427,16 +353,11 @@ void Jit64::bclrx(UGeckoInstruction inst)
{
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, ABI_PARAM1, RSCRATCH, {});
}
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, {});
WriteExit(js.compilerPC + 4);
}
else if (IsDebuggingEnabled())
else
{
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, CallerSavedRegistersInUse());
}
}

View File

@ -387,11 +387,7 @@ void Jit64::DoMergedBranch()
MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4));
const u32 destination = js.op[1].branchTo;
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<true>(nextPC, destination, next, ABI_PARAM1, RSCRATCH, {});
}
WriteBranchWatch<true>(nextPC, destination, next, {});
WriteIdleExit(destination);
}
else if (next.OPCD == 16) // bcx
@ -400,11 +396,7 @@ void Jit64::DoMergedBranch()
MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4));
const u32 destination = js.op[1].branchTo;
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<true>(nextPC, destination, next, ABI_PARAM1, RSCRATCH, {});
}
WriteBranchWatch<true>(nextPC, destination, next, {});
WriteExit(destination, next.LK, nextPC + 4);
}
else if ((next.OPCD == 19) && (next.SUBOP10 == 528)) // bcctrx
@ -413,11 +405,7 @@ void Jit64::DoMergedBranch()
MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4));
MOV(32, R(RSCRATCH), PPCSTATE_SPR(SPR_CTR));
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatchDestInRSCRATCH(nextPC, next, ABI_PARAM1, RSCRATCH2, BitSet32{RSCRATCH});
}
WriteBranchWatchDestInRSCRATCH(nextPC, next, BitSet32{RSCRATCH});
WriteExitDestInRSCRATCH(next.LK, nextPC + 4);
}
else if ((next.OPCD == 19) && (next.SUBOP10 == 16)) // bclrx
@ -427,11 +415,7 @@ void Jit64::DoMergedBranch()
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
if (next.LK)
MOV(32, PPCSTATE_SPR(SPR_LR), Imm32(nextPC + 4));
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatchDestInRSCRATCH(nextPC, next, ABI_PARAM1, RSCRATCH2, BitSet32{RSCRATCH});
}
WriteBranchWatchDestInRSCRATCH(nextPC, next, BitSet32{RSCRATCH});
WriteBLRExit();
}
else
@ -488,17 +472,12 @@ void Jit64::DoMergedBranchCondition()
{
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<false>(nextPC, nextPC + 4, next, ABI_PARAM1, RSCRATCH, {});
}
WriteBranchWatch<false>(nextPC, nextPC + 4, next, {});
WriteExit(nextPC + 4);
}
else if (IsDebuggingEnabled())
else
{
WriteBranchWatch<false>(nextPC, nextPC + 4, next, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
WriteBranchWatch<false>(nextPC, nextPC + 4, next, CallerSavedRegistersInUse());
}
}
@ -540,17 +519,12 @@ void Jit64::DoMergedBranchImmediate(s64 val)
{
gpr.Flush();
fpr.Flush();
if (IsDebuggingEnabled())
{
// ABI_PARAM1 is safe to use after a GPR flush for an optimization in this function.
WriteBranchWatch<false>(nextPC, nextPC + 4, next, ABI_PARAM1, RSCRATCH, {});
}
WriteBranchWatch<false>(nextPC, nextPC + 4, next, {});
WriteExit(nextPC + 4);
}
else if (IsDebuggingEnabled())
else
{
WriteBranchWatch<false>(nextPC, nextPC + 4, next, RSCRATCH, RSCRATCH2,
CallerSavedRegistersInUse());
WriteBranchWatch<false>(nextPC, nextPC + 4, next, CallerSavedRegistersInUse());
}
}

View File

@ -305,25 +305,16 @@ void Jit64::dcbx(UGeckoInstruction inst)
// Load the loop_counter register with the amount of invalidations to execute.
LEA(32, loop_counter, MDisp(RSCRATCH2, 1));
if (IsDebuggingEnabled())
if (IsBranchWatchEnabled())
{
const X64Reg bw_reg_a = reg_cycle_count, bw_reg_b = reg_downcount;
const BitSet32 bw_caller_save = (CallerSavedRegistersInUse() | BitSet32{RSCRATCH2}) &
~BitSet32{int(bw_reg_a), int(bw_reg_b)};
MOV(64, R(bw_reg_a), ImmPtr(&m_branch_watch));
MOVZX(32, 8, bw_reg_b, MDisp(bw_reg_a, Core::BranchWatch::GetOffsetOfRecordingActive()));
TEST(32, R(bw_reg_b), R(bw_reg_b));
FixupBranch branch_in = J_CC(CC_NZ, Jump::Near);
SwitchToFarCode();
SetJumpTarget(branch_in);
~BitSet32{int(reg_cycle_count), int(reg_downcount)};
// Assert RSCRATCH2 won't be clobbered before it is moved from.
static_assert(RSCRATCH2 != ABI_PARAM1);
ABI_PushRegistersAndAdjustStack(bw_caller_save, 0);
MOV(64, R(ABI_PARAM1), R(bw_reg_a));
MOV(64, R(ABI_PARAM1), ImmPtr(&m_branch_watch));
// RSCRATCH2 holds the amount of faked branch watch hits. Move RSCRATCH2 first, because
// ABI_PARAM2 clobbers RSCRATCH2 on Windows and ABI_PARAM3 clobbers RSCRATCH2 on Linux!
MOV(32, R(ABI_PARAM4), R(RSCRATCH2));
@ -333,10 +324,6 @@ void Jit64::dcbx(UGeckoInstruction inst)
ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue_fk_n :
&Core::BranchWatch::HitPhysicalTrue_fk_n);
ABI_PopRegistersAndAdjustStack(bw_caller_save, 0);
FixupBranch branch_out = J(Jump::Near);
SwitchToNearCode();
SetJumpTarget(branch_out);
}
}

View File

@ -1296,16 +1296,13 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
if (op.skip)
{
if (IsDebuggingEnabled())
if (IsBranchWatchEnabled())
{
// The only thing that currently sets op.skip is the BLR following optimization.
// If any non-branch instruction starts setting that too, this will need to be changed.
ASSERT(op.inst.hex == 0x4e800020);
const auto bw_reg_a = gpr.GetScopedReg(), bw_reg_b = gpr.GetScopedReg();
const BitSet32 gpr_caller_save =
gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)};
WriteBranchWatch<true>(op.address, op.branchTo, op.inst, bw_reg_a, bw_reg_b,
gpr_caller_save, fpr.GetCallerSavedUsed());
WriteBranchWatch<true>(op.address, op.branchTo, op.inst, gpr.GetCallerSavedUsed(),
fpr.GetCallerSavedUsed());
}
}
else

View File

@ -334,11 +334,9 @@ protected:
// Branch Watch
template <bool condition>
void WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst,
Arm64Gen::ARM64Reg reg_a, Arm64Gen::ARM64Reg reg_b,
BitSet32 gpr_caller_save, BitSet32 fpr_caller_save);
void WriteBranchWatchDestInRegister(u32 origin, Arm64Gen::ARM64Reg destination,
UGeckoInstruction inst, Arm64Gen::ARM64Reg reg_a,
Arm64Gen::ARM64Reg reg_b, BitSet32 gpr_caller_save,
UGeckoInstruction inst, BitSet32 gpr_caller_save,
BitSet32 fpr_caller_save);
// Exits

View File

@ -75,67 +75,35 @@ void JitArm64::rfi(UGeckoInstruction inst)
}
template <bool condition>
void JitArm64::WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst, ARM64Reg reg_a,
ARM64Reg reg_b, BitSet32 gpr_caller_save, BitSet32 fpr_caller_save)
void JitArm64::WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst,
BitSet32 gpr_caller_save, BitSet32 fpr_caller_save)
{
const ARM64Reg branch_watch = EncodeRegTo64(reg_a);
MOVP2R(branch_watch, &m_branch_watch);
LDRB(IndexType::Unsigned, reg_b, branch_watch, Core::BranchWatch::GetOffsetOfRecordingActive());
FixupBranch branch_over = CBZ(reg_b);
FixupBranch branch_in = B();
SwitchToFarCode();
SetJumpTarget(branch_in);
const ARM64Reg float_emit_tmp = EncodeRegTo64(reg_b);
ABI_PushRegisters(gpr_caller_save);
m_float_emit.ABI_PushRegisters(fpr_caller_save, float_emit_tmp);
m_float_emit.ABI_PushRegisters(fpr_caller_save, ARM64Reg::X8);
ABI_CallFunction(m_ppc_state.msr.IR ? (condition ? &Core::BranchWatch::HitVirtualTrue_fk :
&Core::BranchWatch::HitVirtualFalse_fk) :
(condition ? &Core::BranchWatch::HitPhysicalTrue_fk :
&Core::BranchWatch::HitPhysicalFalse_fk),
branch_watch, Core::FakeBranchWatchCollectionKey{origin, destination}, inst.hex);
m_float_emit.ABI_PopRegisters(fpr_caller_save, float_emit_tmp);
&m_branch_watch, Core::FakeBranchWatchCollectionKey{origin, destination},
inst.hex);
m_float_emit.ABI_PopRegisters(fpr_caller_save, ARM64Reg::X8);
ABI_PopRegisters(gpr_caller_save);
FixupBranch branch_out = B();
SwitchToNearCode();
SetJumpTarget(branch_out);
SetJumpTarget(branch_over);
}
template void JitArm64::WriteBranchWatch<true>(u32, u32, UGeckoInstruction, ARM64Reg, ARM64Reg,
BitSet32, BitSet32);
template void JitArm64::WriteBranchWatch<false>(u32, u32, UGeckoInstruction, ARM64Reg, ARM64Reg,
BitSet32, BitSet32);
template void JitArm64::WriteBranchWatch<true>(u32, u32, UGeckoInstruction, BitSet32, BitSet32);
template void JitArm64::WriteBranchWatch<false>(u32, u32, UGeckoInstruction, BitSet32, BitSet32);
void JitArm64::WriteBranchWatchDestInRegister(u32 origin, ARM64Reg destination,
UGeckoInstruction inst, ARM64Reg reg_a,
ARM64Reg reg_b, BitSet32 gpr_caller_save,
UGeckoInstruction inst, BitSet32 gpr_caller_save,
BitSet32 fpr_caller_save)
{
const ARM64Reg branch_watch = EncodeRegTo64(reg_a);
MOVP2R(branch_watch, &m_branch_watch);
LDRB(IndexType::Unsigned, reg_b, branch_watch, Core::BranchWatch::GetOffsetOfRecordingActive());
FixupBranch branch_over = CBZ(reg_b);
FixupBranch branch_in = B();
SwitchToFarCode();
SetJumpTarget(branch_in);
const ARM64Reg float_emit_tmp = EncodeRegTo64(reg_b);
ABI_PushRegisters(gpr_caller_save);
m_float_emit.ABI_PushRegisters(fpr_caller_save, float_emit_tmp);
m_float_emit.ABI_PushRegisters(fpr_caller_save, ARM64Reg::X8);
ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue :
&Core::BranchWatch::HitPhysicalTrue,
branch_watch, origin, destination, inst.hex);
m_float_emit.ABI_PopRegisters(fpr_caller_save, float_emit_tmp);
&m_branch_watch, origin, destination, inst.hex);
m_float_emit.ABI_PopRegisters(fpr_caller_save, ARM64Reg::X8);
ABI_PopRegisters(gpr_caller_save);
FixupBranch branch_out = B();
SwitchToNearCode();
SetJumpTarget(branch_out);
SetJumpTarget(branch_over);
}
void JitArm64::bx(UGeckoInstruction inst)
@ -153,13 +121,12 @@ void JitArm64::bx(UGeckoInstruction inst)
if (!js.isLastInstruction)
{
if (IsDebuggingEnabled())
if (IsBranchWatchEnabled())
{
const auto WB = gpr.GetScopedReg(), WC = gpr.GetScopedReg();
BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WB), DecodeReg(WC)};
BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed();
if (WA != ARM64Reg::INVALID_REG && js.op->skipLRStack)
gpr_caller_save[DecodeReg(WA)] = false;
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, WB, WC, gpr_caller_save,
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, gpr_caller_save,
fpr.GetCallerSavedUsed());
}
if (inst.LK && !js.op->skipLRStack)
@ -178,15 +145,14 @@ void JitArm64::bx(UGeckoInstruction inst)
if (js.op->branchIsIdleLoop)
{
if (IsBranchWatchEnabled())
{
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, {}, {});
}
if (WA == ARM64Reg::INVALID_REG)
WA = gpr.GetScopedReg();
if (IsDebuggingEnabled())
{
const auto WB = gpr.GetScopedReg();
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, WA, WB, {}, {});
}
// make idle loops go faster
ARM64Reg XA = EncodeRegTo64(WA);
@ -198,12 +164,11 @@ void JitArm64::bx(UGeckoInstruction inst)
return;
}
if (IsDebuggingEnabled())
if (IsBranchWatchEnabled())
{
const auto WB = gpr.GetScopedReg(), WC = gpr.GetScopedReg();
const BitSet32 gpr_caller_save =
WA != ARM64Reg::INVALID_REG ? BitSet32{DecodeReg(WA)} & CALLER_SAVED_GPRS : BitSet32{};
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, WB, WC, gpr_caller_save, {});
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, gpr_caller_save, {});
}
WriteExit(js.op->branchTo, inst.LK, js.compilerPC + 4, WA);
}
@ -214,14 +179,11 @@ void JitArm64::bcx(UGeckoInstruction inst)
JITDISABLE(bJITBranchOff);
auto WA = gpr.GetScopedReg();
auto WB = inst.LK || IsDebuggingEnabled() ? gpr.GetScopedReg() :
Arm64GPRCache::ScopedARM64Reg(WA.GetReg());
// If WA isn't needed for WriteExit, it can be safely clobbered.
auto WB = (inst.LK && !js.op->branchIsIdleLoop) ? gpr.GetScopedReg() :
Arm64GPRCache::ScopedARM64Reg(WA.GetReg());
{
auto WC = IsDebuggingEnabled() && inst.LK && !js.op->branchIsIdleLoop ?
gpr.GetScopedReg() :
Arm64GPRCache::ScopedARM64Reg(ARM64Reg::INVALID_REG);
FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{
@ -252,18 +214,11 @@ void JitArm64::bcx(UGeckoInstruction inst)
gpr.Flush(FlushMode::MaintainState, WB);
fpr.Flush(FlushMode::MaintainState, ARM64Reg::INVALID_REG);
if (IsDebuggingEnabled())
if (IsBranchWatchEnabled())
{
ARM64Reg bw_reg_a, bw_reg_b;
// WC is only allocated when WA is needed for WriteExit and cannot be clobbered.
if (WC == ARM64Reg::INVALID_REG)
bw_reg_a = WA, bw_reg_b = WB;
else
bw_reg_a = WB, bw_reg_b = WC;
const BitSet32 gpr_caller_save =
gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(bw_reg_a), DecodeReg(bw_reg_b)};
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, bw_reg_a, bw_reg_b,
gpr_caller_save, fpr.GetCallerSavedUsed());
BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WB)};
WriteBranchWatch<true>(js.compilerPC, js.op->branchTo, inst, gpr_caller_save,
fpr.GetCallerSavedUsed());
}
if (js.op->branchIsIdleLoop)
{
@ -290,17 +245,15 @@ void JitArm64::bcx(UGeckoInstruction inst)
{
gpr.Flush(FlushMode::All, WA);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
if (IsDebuggingEnabled())
if (IsBranchWatchEnabled())
{
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, WA, WB, {}, {});
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, {}, {});
}
WriteExit(js.compilerPC + 4);
}
else if (IsDebuggingEnabled())
else if (IsBranchWatchEnabled())
{
const BitSet32 gpr_caller_save =
gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WA), DecodeReg(WB)};
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, WA, WB, gpr_caller_save,
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, gpr.GetCallerSavedUsed(),
fpr.GetCallerSavedUsed());
}
}
@ -338,14 +291,13 @@ void JitArm64::bcctrx(UGeckoInstruction inst)
LDR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR));
AND(WA, WA, LogicalImm(~0x3, GPRSize::B32));
if (IsDebuggingEnabled())
if (IsBranchWatchEnabled())
{
const auto WC = gpr.GetScopedReg(), WD = gpr.GetScopedReg();
BitSet32 gpr_caller_save = BitSet32{DecodeReg(WA)};
if (WB != ARM64Reg::INVALID_REG)
gpr_caller_save[DecodeReg(WB)] = true;
gpr_caller_save &= CALLER_SAVED_GPRS;
WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, WC, WD, gpr_caller_save, {});
WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, gpr_caller_save, {});
}
WriteExit(WA, inst.LK_3, js.compilerPC + 4, WB);
}
@ -360,18 +312,10 @@ void JitArm64::bclrx(UGeckoInstruction inst)
auto WA = gpr.GetScopedReg();
Arm64GPRCache::ScopedARM64Reg WB;
if (conditional || inst.LK || IsDebuggingEnabled())
{
if (conditional || inst.LK)
WB = gpr.GetScopedReg();
}
{
Arm64GPRCache::ScopedARM64Reg WC;
if (IsDebuggingEnabled())
{
WC = gpr.GetScopedReg();
}
FixupBranch pCTRDontBranch;
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR
{
@ -404,13 +348,13 @@ void JitArm64::bclrx(UGeckoInstruction inst)
gpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::All, WB);
fpr.Flush(conditional ? FlushMode::MaintainState : FlushMode::All, ARM64Reg::INVALID_REG);
if (IsDebuggingEnabled())
if (IsBranchWatchEnabled())
{
BitSet32 gpr_caller_save;
BitSet32 fpr_caller_save;
if (conditional)
{
gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WB), DecodeReg(WC)};
gpr_caller_save = gpr.GetCallerSavedUsed();
if (js.op->branchIsIdleLoop)
gpr_caller_save[DecodeReg(WA)] = false;
fpr_caller_save = fpr.GetCallerSavedUsed();
@ -421,8 +365,7 @@ void JitArm64::bclrx(UGeckoInstruction inst)
js.op->branchIsIdleLoop ? BitSet32{} : BitSet32{DecodeReg(WA)} & CALLER_SAVED_GPRS;
fpr_caller_save = {};
}
WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, WB, WC, gpr_caller_save,
fpr_caller_save);
WriteBranchWatchDestInRegister(js.compilerPC, WA, inst, gpr_caller_save, fpr_caller_save);
}
if (js.op->branchIsIdleLoop)
{
@ -449,17 +392,16 @@ void JitArm64::bclrx(UGeckoInstruction inst)
{
gpr.Flush(FlushMode::All, WA);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
if (IsDebuggingEnabled())
if (IsBranchWatchEnabled())
{
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, WA, WB, {}, {});
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, {}, {});
}
WriteExit(js.compilerPC + 4);
}
else if (IsDebuggingEnabled())
else if (IsBranchWatchEnabled())
{
const BitSet32 gpr_caller_save =
gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WA), DecodeReg(WB)};
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, WA, WB, gpr_caller_save,
const BitSet32 gpr_caller_save = gpr.GetCallerSavedUsed() & ~BitSet32{DecodeReg(WA)};
WriteBranchWatch<false>(js.compilerPC, js.compilerPC + 4, inst, gpr_caller_save,
fpr.GetCallerSavedUsed());
}
}

View File

@ -826,36 +826,21 @@ void JitArm64::dcbx(UGeckoInstruction inst)
// Load the loop_counter register with the amount of invalidations to execute.
ADD(loop_counter, WA, 1);
if (IsDebuggingEnabled())
if (IsBranchWatchEnabled())
{
const ARM64Reg branch_watch = EncodeRegTo64(reg_cycle_count);
MOVP2R(branch_watch, &m_branch_watch);
LDRB(IndexType::Unsigned, WB, branch_watch, Core::BranchWatch::GetOffsetOfRecordingActive());
FixupBranch branch_over = CBZ(WB);
FixupBranch branch_in = B();
SwitchToFarCode();
SetJumpTarget(branch_in);
const BitSet32 gpr_caller_save =
gpr.GetCallerSavedUsed() &
~BitSet32{DecodeReg(WB), DecodeReg(reg_cycle_count), DecodeReg(reg_downcount)};
ABI_PushRegisters(gpr_caller_save);
const ARM64Reg float_emit_tmp = EncodeRegTo64(WB);
const BitSet32 fpr_caller_save = fpr.GetCallerSavedUsed();
m_float_emit.ABI_PushRegisters(fpr_caller_save, float_emit_tmp);
m_float_emit.ABI_PushRegisters(fpr_caller_save, ARM64Reg::X8);
const PPCAnalyst::CodeOp& op = js.op[2];
ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue_fk_n :
&Core::BranchWatch::HitPhysicalTrue_fk_n,
branch_watch, Core::FakeBranchWatchCollectionKey{op.address, op.branchTo},
&m_branch_watch, Core::FakeBranchWatchCollectionKey{op.address, op.branchTo},
op.inst.hex, WA);
m_float_emit.ABI_PopRegisters(fpr_caller_save, float_emit_tmp);
m_float_emit.ABI_PopRegisters(fpr_caller_save, ARM64Reg::X8);
ABI_PopRegisters(gpr_caller_save);
FixupBranch branch_out = B();
SwitchToNearCode();
SetJumpTarget(branch_out);
SetJumpTarget(branch_over);
}
}

View File

@ -23,6 +23,7 @@
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
#include "Core/PowerPC/JitCommon/JitCache.h"
#include "Core/PowerPC/PPCAnalyst.h"
#include "Core/PowerPC/PowerPC.h"
namespace Core
{
@ -200,6 +201,11 @@ public:
bool IsProfilingEnabled() const { return m_enable_profiling && m_enable_debugging; }
bool IsDebuggingEnabled() const { return m_enable_debugging; }
bool IsBranchWatchEnabled() const
{
auto& branch_watch = m_system.GetPowerPC().GetBranchWatch();
return branch_watch.GetRecordingActive();
}
static const u8* Dispatch(JitBase& jit);
virtual JitBaseBlockCache* GetBlockCache() = 0;