Merge pull request #12134 from JosJuice/jit-constprop

Jit: Extract immediate handling to separate ConstantPropagation class
This commit is contained in:
JosJuice 2025-11-16 18:34:49 +01:00 committed by GitHub
commit 3ff4985120
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 1569 additions and 1332 deletions

View File

@ -508,6 +508,8 @@ add_library(core
PowerPC/Interpreter/Interpreter_Tables.cpp
PowerPC/Interpreter/Interpreter.cpp
PowerPC/Interpreter/Interpreter.h
PowerPC/JitCommon/ConstantPropagation.cpp
PowerPC/JitCommon/ConstantPropagation.h
PowerPC/JitCommon/DivUtils.cpp
PowerPC/JitCommon/DivUtils.h
PowerPC/JitCommon/JitAsmCommon.cpp

View File

@ -42,6 +42,7 @@
#include "Core/PowerPC/Jit64Common/Jit64Constants.h"
#include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h"
#include "Core/PowerPC/Jit64Common/TrampolineCache.h"
#include "Core/PowerPC/JitCommon/ConstantPropagation.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/MMU.h"
#include "Core/PowerPC/PPCAnalyst.h"
@ -369,6 +370,9 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst)
gpr.Reset(js.op->regsOut);
fpr.Reset(js.op->GetFregsOut());
// We must also update constant propagation
m_constant_propagation.ClearGPRs(js.op->regsOut);
if (js.op->opinfo->flags & FL_SET_MSR)
EmitUpdateMembase();
@ -921,6 +925,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
gpr.Start();
fpr.Start();
m_constant_propagation.Clear();
js.downcountAmount = 0;
js.skipInstructions = 0;
js.carryFlag = CarryFlag::InPPCState;
@ -1105,21 +1111,55 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
{
gpr.Flush();
fpr.Flush();
m_constant_propagation.Clear();
CompileInstruction(op);
}
else
{
// If we have an input register that is going to be used again, load it pre-emptively,
// even if the instruction doesn't strictly need it in a register, to avoid redundant
// loads later. Of course, don't do this if we're already out of registers.
// As a bit of a heuristic, make sure we have at least one register left over for the
// output, which needs to be bound in the actual instruction compilation.
// TODO: make this smarter in the case that we're actually register-starved, i.e.
// prioritize the more important registers.
gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable);
fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable);
}
const JitCommon::ConstantPropagationResult constant_propagation_result =
m_constant_propagation.EvaluateInstruction(op.inst, opinfo->flags);
CompileInstruction(op);
if (!constant_propagation_result.instruction_fully_executed)
{
if (!bJITRegisterCacheOff)
{
// If we have an input register that is going to be used again, load it pre-emptively,
// even if the instruction doesn't strictly need it in a register, to avoid redundant
// loads later. Of course, don't do this if we're already out of registers.
// As a bit of a heuristic, make sure we have at least one register left over for the
// output, which needs to be bound in the actual instruction compilation.
// TODO: make this smarter in the case that we're actually register-starved, i.e.
// prioritize the more important registers.
gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable);
fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable);
}
CompileInstruction(op);
}
m_constant_propagation.Apply(constant_propagation_result);
if (constant_propagation_result.gpr >= 0)
{
// Mark the GPR as dirty in the register cache
gpr.SetImmediate32(constant_propagation_result.gpr,
constant_propagation_result.gpr_value);
}
if (constant_propagation_result.instruction_fully_executed)
{
if (constant_propagation_result.carry)
FinalizeCarry(*constant_propagation_result.carry);
if (constant_propagation_result.overflow)
GenerateConstantOverflow(*constant_propagation_result.overflow);
// FinalizeImmediateRC is called last, because it may trigger branch merging
if (constant_propagation_result.compute_rc)
FinalizeImmediateRC(constant_propagation_result.gpr_value);
}
}
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;

View File

@ -31,6 +31,7 @@
#include "Core/PowerPC/Jit64Common/BlockCache.h"
#include "Core/PowerPC/Jit64Common/Jit64AsmCommon.h"
#include "Core/PowerPC/Jit64Common/TrampolineCache.h"
#include "Core/PowerPC/JitCommon/ConstantPropagation.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#include "Core/PowerPC/JitCommon/JitCache.h"
@ -83,6 +84,8 @@ public:
void FlushRegistersBeforeSlowAccess();
JitCommon::ConstantPropagation& GetConstantPropagation() { return m_constant_propagation; }
JitBlockCache* GetBlockCache() override { return &blocks; }
void Trace();
@ -122,6 +125,7 @@ public:
void FinalizeCarry(Gen::CCFlags cond);
void FinalizeCarry(bool ca);
void ComputeRC(preg_t preg, bool needs_test = true, bool needs_sext = true);
void FinalizeImmediateRC(s32 value);
void AndWithMask(Gen::X64Reg reg, u32 mask);
void RotateLeft(int bits, Gen::X64Reg regOp, const Gen::OpArg& arg, u8 rotate);
@ -288,6 +292,8 @@ private:
GPRRegCache gpr{*this};
FPURegCache fpr{*this};
JitCommon::ConstantPropagation m_constant_propagation;
Jit64AsmRoutineManager asm_routines{*this};
HyoutaUtilities::RangeSizeSet<u8*> m_free_ranges_near;

View File

@ -150,7 +150,10 @@ void Jit64::ComputeRC(preg_t preg, bool needs_test, bool needs_sext)
if (arg.IsImm())
{
MOV(64, PPCSTATE_CR(0), Imm32(arg.SImm32()));
const s32 value = arg.SImm32();
arg.Unlock();
FinalizeImmediateRC(value);
return;
}
else if (needs_sext)
{
@ -164,33 +167,32 @@ void Jit64::ComputeRC(preg_t preg, bool needs_test, bool needs_sext)
if (CheckMergedBranch(0))
{
if (arg.IsImm())
if (needs_test)
{
s32 offset = arg.SImm32();
TEST(32, arg, arg);
arg.Unlock();
DoMergedBranchImmediate(offset);
}
else
{
if (needs_test)
{
TEST(32, arg, arg);
arg.Unlock();
}
else
{
// If an operand to the cmp/rc op we're merging with the branch isn't used anymore, it'd be
// better to flush it here so that we don't have to flush it on both sides of the branch.
// We don't want to do this if a test is needed though, because it would interrupt macro-op
// fusion.
arg.Unlock();
gpr.Flush(~js.op->gprInUse);
}
DoMergedBranchCondition();
// If an operand to the cmp/rc op we're merging with the branch isn't used anymore, it'd be
// better to flush it here so that we don't have to flush it on both sides of the branch.
// We don't want to do this if a test is needed though, because it would interrupt macro-op
// fusion.
arg.Unlock();
gpr.Flush(~js.op->gprInUse);
}
DoMergedBranchCondition();
}
}
void Jit64::FinalizeImmediateRC(s32 value)
{
MOV(64, PPCSTATE_CR(0), Imm32(value));
if (CheckMergedBranch(0))
DoMergedBranchImmediate(value);
}
// we can't do this optimization in the emitter because MOVZX and AND have different effects on
// flags.
void Jit64::AndWithMask(X64Reg reg, u32 mask)
@ -258,25 +260,18 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop,
if (a || binary || carry)
{
carry &= js.op->wantsCA;
if (gpr.IsImm(a) && !carry)
RCOpArg Ra = gpr.Use(a, RCMode::Read);
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Ra, Rd);
if (doop == Add && Ra.IsSimpleReg() && !carry && d != a)
{
gpr.SetImmediate32(d, doop(gpr.Imm32(a), value));
LEA(32, Rd, MDisp(Ra.GetSimpleReg(), value));
}
else
{
RCOpArg Ra = gpr.Use(a, RCMode::Read);
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Ra, Rd);
if (doop == Add && Ra.IsSimpleReg() && !carry && d != a)
{
LEA(32, Rd, MDisp(Ra.GetSimpleReg(), value));
}
else
{
if (d != a)
MOV(32, Rd, Ra);
(this->*op)(32, Rd, Imm32(value)); // m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
}
if (d != a)
MOV(32, Rd, Ra);
(this->*op)(32, Rd, Imm32(value)); // m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
}
if (carry)
FinalizeCarry(CC_C);
@ -302,12 +297,8 @@ void Jit64::reg_imm(UGeckoInstruction inst)
switch (inst.OPCD)
{
case 14: // addi
// occasionally used as MOV - emulate, with immediate propagation
if (a != 0 && d != a && gpr.IsImm(a))
{
gpr.SetImmediate32(d, gpr.Imm32(a) + (u32)(s32)inst.SIMM_16);
}
else if (a != 0 && d != a && inst.SIMM_16 == 0)
// occasionally used as MOV
if (a != 0 && d != a && inst.SIMM_16 == 0)
{
RCOpArg Ra = gpr.Use(a, RCMode::Read);
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
@ -325,14 +316,6 @@ void Jit64::reg_imm(UGeckoInstruction inst)
case 24: // ori
case 25: // oris
{
// check for nop
if (a == s && inst.UIMM == 0)
{
// Make the nop visible in the generated code. not much use but interesting if we see one.
NOP();
return;
}
const u32 immediate = inst.OPCD == 24 ? inst.UIMM : inst.UIMM << 16;
regimmop(a, s, true, immediate, Or, &XEmitter::OR);
break;
@ -346,13 +329,6 @@ void Jit64::reg_imm(UGeckoInstruction inst)
case 26: // xori
case 27: // xoris
{
if (s == a && inst.UIMM == 0)
{
// Make the nop visible in the generated code.
NOP();
return;
}
const u32 immediate = inst.OPCD == 26 ? inst.UIMM : inst.UIMM << 16;
regimmop(a, s, true, immediate, Xor, &XEmitter::XOR, false);
break;
@ -704,29 +680,7 @@ void Jit64::boolX(UGeckoInstruction inst)
bool needs_test = false;
DEBUG_ASSERT_MSG(DYNA_REC, inst.OPCD == 31, "Invalid boolX");
if (gpr.IsImm(s, b))
{
const u32 rs_offset = gpr.Imm32(s);
const u32 rb_offset = gpr.Imm32(b);
if (inst.SUBOP10 == 28) // andx
gpr.SetImmediate32(a, rs_offset & rb_offset);
else if (inst.SUBOP10 == 476) // nandx
gpr.SetImmediate32(a, ~(rs_offset & rb_offset));
else if (inst.SUBOP10 == 60) // andcx
gpr.SetImmediate32(a, rs_offset & (~rb_offset));
else if (inst.SUBOP10 == 444) // orx
gpr.SetImmediate32(a, rs_offset | rb_offset);
else if (inst.SUBOP10 == 124) // norx
gpr.SetImmediate32(a, ~(rs_offset | rb_offset));
else if (inst.SUBOP10 == 412) // orcx
gpr.SetImmediate32(a, rs_offset | (~rb_offset));
else if (inst.SUBOP10 == 316) // xorx
gpr.SetImmediate32(a, rs_offset ^ rb_offset);
else if (inst.SUBOP10 == 284) // eqvx
gpr.SetImmediate32(a, ~(rs_offset ^ rb_offset));
}
else if (gpr.IsImm(s) || gpr.IsImm(b))
if (gpr.IsImm(s) || gpr.IsImm(b))
{
const auto [i, j] = gpr.IsImm(s) ? std::pair(s, b) : std::pair(b, s);
u32 imm = gpr.Imm32(i);
@ -780,53 +734,46 @@ void Jit64::boolX(UGeckoInstruction inst)
}
else if (is_and)
{
if (imm == 0)
RCOpArg Rj = gpr.Use(j, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RegCache::Realize(Rj, Ra);
if (imm == 0xFFFFFFFF)
{
gpr.SetImmediate32(a, final_not ? 0xFFFFFFFF : 0);
if (a != j)
MOV(32, Ra, Rj);
if (final_not || complement_b)
NOT(32, Ra);
needs_test = true;
}
else if (complement_b)
{
if (a != j)
MOV(32, Ra, Rj);
NOT(32, Ra);
AND(32, Ra, Imm32(imm));
}
else
{
RCOpArg Rj = gpr.Use(j, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RegCache::Realize(Rj, Ra);
if (imm == 0xFFFFFFFF)
if (a == j)
{
if (a != j)
MOV(32, Ra, Rj);
if (final_not || complement_b)
NOT(32, Ra);
needs_test = true;
AND(32, Ra, Imm32(imm));
}
else if (complement_b)
else if (s32(imm) >= -128 && s32(imm) <= 127)
{
if (a != j)
MOV(32, Ra, Rj);
NOT(32, Ra);
MOV(32, Ra, Rj);
AND(32, Ra, Imm32(imm));
}
else
{
if (a == j)
{
AND(32, Ra, Imm32(imm));
}
else if (s32(imm) >= -128 && s32(imm) <= 127)
{
MOV(32, Ra, Rj);
AND(32, Ra, Imm32(imm));
}
else
{
MOV(32, Ra, Imm32(imm));
AND(32, Ra, Rj);
}
MOV(32, Ra, Imm32(imm));
AND(32, Ra, Rj);
}
if (final_not)
{
NOT(32, Ra);
needs_test = true;
}
if (final_not)
{
NOT(32, Ra);
needs_test = true;
}
}
}
@ -1079,13 +1026,8 @@ void Jit64::extsXx(UGeckoInstruction inst)
int a = inst.RA, s = inst.RS;
int size = inst.SUBOP10 == 922 ? 16 : 8;
if (gpr.IsImm(s))
{
gpr.SetImmediate32(a, (u32)(s32)(size == 16 ? (s16)gpr.Imm32(s) : (s8)gpr.Imm32(s)));
}
else
{
RCOpArg Rs = gpr.Use(s, RCMode::Read);
RCOpArg Rs = gpr.UseNoImm(s, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RegCache::Realize(Rs, Ra);
MOVSX(32, size, Ra, Rs);
@ -1100,14 +1042,6 @@ void Jit64::subfic(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff);
int a = inst.RA, d = inst.RD, imm = inst.SIMM_16;
if (gpr.IsImm(a))
{
u32 i = imm, j = gpr.Imm32(a);
gpr.SetImmediate32(d, i - j);
FinalizeCarry(j == 0 || (i > j - 1));
return;
}
RCOpArg Ra = gpr.Use(a, RCMode::Read);
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Ra, Rd);
@ -1155,24 +1089,7 @@ void Jit64::subfx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
const bool carry = !(inst.SUBOP10 & (1 << 5));
if (a == b)
{
gpr.SetImmediate32(d, 0);
if (carry)
FinalizeCarry(true);
if (inst.OE)
GenerateConstantOverflow(false);
}
else if (gpr.IsImm(a, b))
{
s32 i = gpr.SImm32(b), j = gpr.SImm32(a);
gpr.SetImmediate32(d, i - j);
if (carry)
FinalizeCarry(j == 0 || Interpreter::Helper_Carry((u32)i, 0u - (u32)j));
if (inst.OE)
GenerateConstantOverflow((s64)i - (s64)j);
}
else if (gpr.IsImm(a))
if (gpr.IsImm(a))
{
s32 j = gpr.SImm32(a);
RCOpArg Rb = gpr.Use(b, RCMode::Read);
@ -1259,13 +1176,6 @@ void Jit64::MultiplyImmediate(u32 imm, int a, int d, bool overflow)
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Ra, Rd);
// simplest cases first
if (imm == 0)
{
XOR(32, Rd, Rd);
return;
}
if (imm == (u32)-1)
{
if (d != a)
@ -1320,14 +1230,7 @@ void Jit64::mulli(UGeckoInstruction inst)
int a = inst.RA, d = inst.RD;
u32 imm = inst.SIMM_16;
if (gpr.IsImm(a))
{
gpr.SetImmediate32(d, gpr.Imm32(a) * imm);
}
else
{
MultiplyImmediate(imm, a, d, false);
}
MultiplyImmediate(imm, a, d, false);
}
void Jit64::mullwx(UGeckoInstruction inst)
@ -1336,14 +1239,7 @@ void Jit64::mullwx(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a, b))
{
s32 i = gpr.SImm32(a), j = gpr.SImm32(b);
gpr.SetImmediate32(d, i * j);
if (inst.OE)
GenerateConstantOverflow((s64)i * (s64)j);
}
else if (gpr.IsImm(a) || gpr.IsImm(b))
if (gpr.IsImm(a) || gpr.IsImm(b))
{
u32 imm = gpr.IsImm(a) ? gpr.Imm32(a) : gpr.Imm32(b);
int src = gpr.IsImm(a) ? b : a;
@ -1385,14 +1281,7 @@ void Jit64::mulhwXx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
bool sign = inst.SUBOP10 == 75;
if (gpr.IsImm(a, b))
{
if (sign)
gpr.SetImmediate32(d, (u32)((u64)(((s64)gpr.SImm32(a) * (s64)gpr.SImm32(b))) >> 32));
else
gpr.SetImmediate32(d, (u32)(((u64)gpr.Imm32(a) * (u64)gpr.Imm32(b)) >> 32));
}
else if (sign)
if (sign)
{
RCOpArg Ra = gpr.Use(a, RCMode::Read);
RCOpArg Rb = gpr.UseNoImm(b, RCMode::Read);
@ -1432,22 +1321,7 @@ void Jit64::divwux(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a, b))
{
if (gpr.Imm32(b) == 0)
{
gpr.SetImmediate32(d, 0);
if (inst.OE)
GenerateConstantOverflow(true);
}
else
{
gpr.SetImmediate32(d, gpr.Imm32(a) / gpr.Imm32(b));
if (inst.OE)
GenerateConstantOverflow(false);
}
}
else if (gpr.IsImm(b))
if (gpr.IsImm(b))
{
u32 divisor = gpr.Imm32(b);
if (divisor == 0)
@ -1559,24 +1433,7 @@ void Jit64::divwx(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a, b))
{
s32 i = gpr.SImm32(a), j = gpr.SImm32(b);
if (j == 0 || (i == (s32)0x80000000 && j == -1))
{
const u32 result = i < 0 ? 0xFFFFFFFF : 0x00000000;
gpr.SetImmediate32(d, result);
if (inst.OE)
GenerateConstantOverflow(true);
}
else
{
gpr.SetImmediate32(d, i / j);
if (inst.OE)
GenerateConstantOverflow(false);
}
}
else if (gpr.IsImm(a))
if (gpr.IsImm(a))
{
// Constant dividend
const u32 dividend = gpr.Imm32(a);
@ -1879,16 +1736,7 @@ void Jit64::addx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
bool carry = !(inst.SUBOP10 & (1 << 8));
if (gpr.IsImm(a, b))
{
const s32 i = gpr.SImm32(a), j = gpr.SImm32(b);
gpr.SetImmediate32(d, i + j);
if (carry)
FinalizeCarry(Interpreter::Helper_Carry(i, j));
if (inst.OE)
GenerateConstantOverflow((s64)i + (s64)j);
}
else if (gpr.IsImm(a) || gpr.IsImm(b))
if (gpr.IsImm(a) || gpr.IsImm(b))
{
const auto [i, j] = gpr.IsImm(a) ? std::pair(a, b) : std::pair(b, a);
const s32 imm = gpr.SImm32(i);
@ -2046,112 +1894,99 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
int a = inst.RA;
int s = inst.RS;
if (gpr.IsImm(s))
const bool left_shift = inst.SH && inst.MB == 0 && inst.ME == 31 - inst.SH;
const bool right_shift = inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH;
const bool field_extract = inst.SH && inst.ME == 31 && inst.MB > 32 - inst.SH;
const u32 mask = MakeRotationMask(inst.MB, inst.ME);
const u32 prerotate_mask = std::rotr(mask, inst.SH);
const bool simple_mask = mask == 0xff || mask == 0xffff;
const bool simple_prerotate_mask = prerotate_mask == 0xff || prerotate_mask == 0xffff;
// In case of a merged branch, track whether or not we've set flags.
// If not, we need to do a test later to get them.
bool needs_test = true;
// If we know the high bit can't be set, we can avoid doing a sign extend for flag storage.
bool needs_sext = true;
int mask_size = inst.ME - inst.MB + 1;
if (simple_mask && !(inst.SH & (mask_size - 1)) && !gpr.IsBound(s) && !gpr.IsImm(s))
{
u32 result = gpr.Imm32(s);
if (inst.SH != 0)
result = std::rotl(result, inst.SH);
result &= MakeRotationMask(inst.MB, inst.ME);
gpr.SetImmediate32(a, result);
if (inst.Rc)
ComputeRC(a);
// optimized case: byte/word extract from m_ppc_state
// Note: If a == s, calling Realize(Ra) will allocate a host register for Rs,
// so we have to get mem_source from Rs before calling Realize(Ra)
RCOpArg Rs = gpr.Use(s, RCMode::Read);
RegCache::Realize(Rs);
OpArg mem_source = Rs.Location();
if (inst.SH)
mem_source.AddMemOffset((32 - inst.SH) >> 3);
Rs.Unlock();
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RegCache::Realize(Ra);
MOVZX(32, mask_size, Ra, mem_source);
needs_sext = false;
}
else
{
const bool left_shift = inst.SH && inst.MB == 0 && inst.ME == 31 - inst.SH;
const bool right_shift = inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH;
const bool field_extract = inst.SH && inst.ME == 31 && inst.MB > 32 - inst.SH;
const u32 mask = MakeRotationMask(inst.MB, inst.ME);
const u32 prerotate_mask = std::rotr(mask, inst.SH);
const bool simple_mask = mask == 0xff || mask == 0xffff;
const bool simple_prerotate_mask = prerotate_mask == 0xff || prerotate_mask == 0xffff;
// In case of a merged branch, track whether or not we've set flags.
// If not, we need to do a test later to get them.
bool needs_test = true;
// If we know the high bit can't be set, we can avoid doing a sign extend for flag storage.
bool needs_sext = true;
int mask_size = inst.ME - inst.MB + 1;
RCOpArg Rs = gpr.UseNoImm(s, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RegCache::Realize(Rs, Ra);
if (simple_mask && !(inst.SH & (mask_size - 1)) && !gpr.IsBound(s))
if (a != s && left_shift && Rs.IsSimpleReg() && inst.SH <= 3)
{
// optimized case: byte/word extract from m_ppc_state
// Note: If a == s, calling Realize(Ra) will allocate a host register for Rs,
// so we have to get mem_source from Rs before calling Realize(Ra)
RCOpArg Rs = gpr.Use(s, RCMode::Read);
RegCache::Realize(Rs);
OpArg mem_source = Rs.Location();
LEA(32, Ra, MScaled(Rs.GetSimpleReg(), SCALE_1 << inst.SH, 0));
}
// optimized case: byte/word extract plus rotate
else if (simple_prerotate_mask && !left_shift)
{
MOVZX(32, prerotate_mask == 0xff ? 8 : 16, Ra, Rs);
if (inst.SH)
mem_source.AddMemOffset((32 - inst.SH) >> 3);
Rs.Unlock();
ROL(32, Ra, Imm8(inst.SH));
needs_sext = (mask & 0x80000000) != 0;
}
// Use BEXTR where possible: Only AMD implements this in one uop
else if (field_extract && cpu_info.bBMI1 && cpu_info.vendor == CPUVendor::AMD)
{
MOV(32, R(RSCRATCH), Imm32((mask_size << 8) | (32 - inst.SH)));
BEXTR(32, Ra, Rs, RSCRATCH);
needs_sext = false;
}
else if (left_shift)
{
if (a != s)
MOV(32, Ra, Rs);
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RegCache::Realize(Ra);
MOVZX(32, mask_size, Ra, mem_source);
SHL(32, Ra, Imm8(inst.SH));
}
else if (right_shift)
{
if (a != s)
MOV(32, Ra, Rs);
SHR(32, Ra, Imm8(inst.MB));
needs_sext = false;
}
else
{
RCOpArg Rs = gpr.Use(s, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RegCache::Realize(Rs, Ra);
RotateLeft(32, Ra, Rs, inst.SH);
if (a != s && left_shift && Rs.IsSimpleReg() && inst.SH <= 3)
if (!(inst.MB == 0 && inst.ME == 31))
{
LEA(32, Ra, MScaled(Rs.GetSimpleReg(), SCALE_1 << inst.SH, 0));
}
// optimized case: byte/word extract plus rotate
else if (simple_prerotate_mask && !left_shift)
{
MOVZX(32, prerotate_mask == 0xff ? 8 : 16, Ra, Rs);
if (inst.SH)
ROL(32, Ra, Imm8(inst.SH));
needs_sext = (mask & 0x80000000) != 0;
}
// Use BEXTR where possible: Only AMD implements this in one uop
else if (field_extract && cpu_info.bBMI1 && cpu_info.vendor == CPUVendor::AMD)
{
MOV(32, R(RSCRATCH), Imm32((mask_size << 8) | (32 - inst.SH)));
BEXTR(32, Ra, Rs, RSCRATCH);
needs_sext = false;
}
else if (left_shift)
{
if (a != s)
MOV(32, Ra, Rs);
SHL(32, Ra, Imm8(inst.SH));
}
else if (right_shift)
{
if (a != s)
MOV(32, Ra, Rs);
SHR(32, Ra, Imm8(inst.MB));
needs_sext = false;
}
else
{
RotateLeft(32, Ra, Rs, inst.SH);
if (!(inst.MB == 0 && inst.ME == 31))
{
// we need flags if we're merging the branch
if (inst.Rc && CheckMergedBranch(0))
AND(32, Ra, Imm32(mask));
else
AndWithMask(Ra, mask);
needs_sext = inst.MB == 0;
needs_test = false;
}
// we need flags if we're merging the branch
if (inst.Rc && CheckMergedBranch(0))
AND(32, Ra, Imm32(mask));
else
AndWithMask(Ra, mask);
needs_sext = inst.MB == 0;
needs_test = false;
}
}
if (inst.Rc)
ComputeRC(a, needs_test, needs_sext);
}
if (inst.Rc)
ComputeRC(a, needs_test, needs_sext);
}
void Jit64::rlwimix(UGeckoInstruction inst)
@ -2162,135 +1997,118 @@ void Jit64::rlwimix(UGeckoInstruction inst)
int s = inst.RS;
const u32 mask = MakeRotationMask(inst.MB, inst.ME);
const bool left_shift = mask == 0U - (1U << inst.SH);
const bool right_shift = mask == (1U << inst.SH) - 1;
bool needs_test = false;
if (gpr.IsImm(a, s))
if (mask == 0 || (a == s && inst.SH == 0))
{
gpr.SetImmediate32(a, (gpr.Imm32(a) & ~mask) | (std::rotl(gpr.Imm32(s), inst.SH) & mask));
if (inst.Rc)
ComputeRC(a);
needs_test = true;
}
else if (gpr.IsImm(s) && mask == 0xFFFFFFFF)
else if (mask == 0xFFFFFFFF)
{
gpr.SetImmediate32(a, std::rotl(gpr.Imm32(s), inst.SH));
if (inst.Rc)
ComputeRC(a);
RCOpArg Rs = gpr.Use(s, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RegCache::Realize(Rs, Ra);
RotateLeft(32, Ra, Rs, inst.SH);
needs_test = true;
}
else
else if (gpr.IsImm(s))
{
const bool left_shift = mask == 0U - (1U << inst.SH);
const bool right_shift = mask == (1U << inst.SH) - 1;
bool needs_test = false;
RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite);
RegCache::Realize(Ra);
AndWithMask(Ra, ~mask);
OR(32, Ra, Imm32(std::rotl(gpr.Imm32(s), inst.SH) & mask));
}
else if (gpr.IsImm(a))
{
const u32 maskA = gpr.Imm32(a) & ~mask;
if (mask == 0 || (a == s && inst.SH == 0))
RCOpArg Rs = gpr.Use(s, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RegCache::Realize(Rs, Ra);
if (inst.SH == 0)
{
needs_test = true;
MOV(32, Ra, Rs);
AndWithMask(Ra, mask);
}
else if (mask == 0xFFFFFFFF)
else if (left_shift)
{
RCOpArg Rs = gpr.Use(s, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RegCache::Realize(Rs, Ra);
RotateLeft(32, Ra, Rs, inst.SH);
needs_test = true;
MOV(32, Ra, Rs);
SHL(32, Ra, Imm8(inst.SH));
}
else if (gpr.IsImm(s))
else if (right_shift)
{
RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite);
RegCache::Realize(Ra);
AndWithMask(Ra, ~mask);
OR(32, Ra, Imm32(std::rotl(gpr.Imm32(s), inst.SH) & mask));
}
else if (gpr.IsImm(a))
{
const u32 maskA = gpr.Imm32(a) & ~mask;
RCOpArg Rs = gpr.Use(s, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RegCache::Realize(Rs, Ra);
if (inst.SH == 0)
{
MOV(32, Ra, Rs);
AndWithMask(Ra, mask);
}
else if (left_shift)
{
MOV(32, Ra, Rs);
SHL(32, Ra, Imm8(inst.SH));
}
else if (right_shift)
{
MOV(32, Ra, Rs);
SHR(32, Ra, Imm8(32 - inst.SH));
}
else
{
RotateLeft(32, Ra, Rs, inst.SH);
AndWithMask(Ra, mask);
}
if (maskA)
OR(32, Ra, Imm32(maskA));
else
needs_test = true;
}
else if (inst.SH)
{
// TODO: perhaps consider pinsrb or abuse of AH
RCOpArg Rs = gpr.Use(s, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite);
RegCache::Realize(Rs, Ra);
if (left_shift)
{
MOV(32, R(RSCRATCH), Rs);
SHL(32, R(RSCRATCH), Imm8(inst.SH));
}
else if (right_shift)
{
MOV(32, R(RSCRATCH), Rs);
SHR(32, R(RSCRATCH), Imm8(32 - inst.SH));
}
else
{
RotateLeft(32, RSCRATCH, Rs, inst.SH);
}
if (mask == 0xFF || mask == 0xFFFF)
{
MOV(mask == 0xFF ? 8 : 16, Ra, R(RSCRATCH));
needs_test = true;
}
else
{
if (!left_shift && !right_shift)
AndWithMask(RSCRATCH, mask);
AndWithMask(Ra, ~mask);
OR(32, Ra, R(RSCRATCH));
}
MOV(32, Ra, Rs);
SHR(32, Ra, Imm8(32 - inst.SH));
}
else
{
RCX64Reg Rs = gpr.Bind(s, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite);
RegCache::Realize(Rs, Ra);
if (mask == 0xFF || mask == 0xFFFF)
{
MOV(mask == 0xFF ? 8 : 16, Ra, Rs);
needs_test = true;
}
else
{
XOR(32, Ra, Rs);
AndWithMask(Ra, ~mask);
XOR(32, Ra, Rs);
}
RotateLeft(32, Ra, Rs, inst.SH);
AndWithMask(Ra, mask);
}
if (inst.Rc)
ComputeRC(a, needs_test);
if (maskA)
OR(32, Ra, Imm32(maskA));
else
needs_test = true;
}
else if (inst.SH)
{
// TODO: perhaps consider pinsrb or abuse of AH
RCOpArg Rs = gpr.Use(s, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite);
RegCache::Realize(Rs, Ra);
if (left_shift)
{
MOV(32, R(RSCRATCH), Rs);
SHL(32, R(RSCRATCH), Imm8(inst.SH));
}
else if (right_shift)
{
MOV(32, R(RSCRATCH), Rs);
SHR(32, R(RSCRATCH), Imm8(32 - inst.SH));
}
else
{
RotateLeft(32, RSCRATCH, Rs, inst.SH);
}
if (mask == 0xFF || mask == 0xFFFF)
{
MOV(mask == 0xFF ? 8 : 16, Ra, R(RSCRATCH));
needs_test = true;
}
else
{
if (!left_shift && !right_shift)
AndWithMask(RSCRATCH, mask);
AndWithMask(Ra, ~mask);
OR(32, Ra, R(RSCRATCH));
}
}
else
{
RCX64Reg Rs = gpr.Bind(s, RCMode::Read);
RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite);
RegCache::Realize(Rs, Ra);
if (mask == 0xFF || mask == 0xFFFF)
{
MOV(mask == 0xFF ? 8 : 16, Ra, Rs);
needs_test = true;
}
else
{
XOR(32, Ra, Rs);
AndWithMask(Ra, ~mask);
XOR(32, Ra, Rs);
}
}
if (inst.Rc)
ComputeRC(a, needs_test);
}
void Jit64::rlwnmx(UGeckoInstruction inst)
@ -2300,11 +2118,7 @@ void Jit64::rlwnmx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, s = inst.RS;
const u32 mask = MakeRotationMask(inst.MB, inst.ME);
if (gpr.IsImm(b, s))
{
gpr.SetImmediate32(a, std::rotl(gpr.Imm32(s), gpr.Imm32(b) & 0x1F) & mask);
}
else if (gpr.IsImm(b))
if (gpr.IsImm(b))
{
u32 amount = gpr.Imm32(b) & 0x1f;
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
@ -2350,15 +2164,8 @@ void Jit64::negx(UGeckoInstruction inst)
int a = inst.RA;
int d = inst.RD;
if (gpr.IsImm(a))
{
gpr.SetImmediate32(d, ~(gpr.Imm32(a)) + 1);
if (inst.OE)
GenerateConstantOverflow(gpr.Imm32(d) == 0x80000000);
}
else
{
RCOpArg Ra = gpr.Use(a, RCMode::Read);
RCOpArg Ra = gpr.UseNoImm(a, RCMode::Read);
RCX64Reg Rd = gpr.Bind(d, RCMode::Write);
RegCache::Realize(Ra, Rd);
@ -2380,12 +2187,7 @@ void Jit64::srwx(UGeckoInstruction inst)
int b = inst.RB;
int s = inst.RS;
if (gpr.IsImm(b, s))
{
u32 amount = gpr.Imm32(b);
gpr.SetImmediate32(a, (amount & 0x20) ? 0 : (gpr.Imm32(s) >> (amount & 0x1f)));
}
else if (gpr.IsImm(b))
if (gpr.IsImm(b))
{
u32 amount = gpr.Imm32(b);
if (amount & 0x20)
@ -2442,14 +2244,7 @@ void Jit64::slwx(UGeckoInstruction inst)
int b = inst.RB;
int s = inst.RS;
if (gpr.IsImm(b, s))
{
u32 amount = gpr.Imm32(b);
gpr.SetImmediate32(a, (amount & 0x20) ? 0 : gpr.Imm32(s) << (amount & 0x1f));
if (inst.Rc)
ComputeRC(a);
}
else if (gpr.IsImm(b))
if (gpr.IsImm(b))
{
u32 amount = gpr.Imm32(b);
if (amount & 0x20)
@ -2473,12 +2268,6 @@ void Jit64::slwx(UGeckoInstruction inst)
if (inst.Rc)
ComputeRC(a);
}
else if (gpr.IsImm(s) && gpr.Imm32(s) == 0)
{
gpr.SetImmediate32(a, 0);
if (inst.Rc)
ComputeRC(a);
}
else if (cpu_info.bBMI2)
{
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
@ -2532,22 +2321,7 @@ void Jit64::srawx(UGeckoInstruction inst)
int b = inst.RB;
int s = inst.RS;
if (gpr.IsImm(b, s))
{
s32 i = gpr.SImm32(s), amount = gpr.SImm32(b);
if (amount & 0x20)
{
gpr.SetImmediate32(a, i & 0x80000000 ? 0xFFFFFFFF : 0);
FinalizeCarry(i & 0x80000000 ? true : false);
}
else
{
amount &= 0x1F;
gpr.SetImmediate32(a, i >> amount);
FinalizeCarry(amount != 0 && i < 0 && (u32(i) << (32 - amount)));
}
}
else if (gpr.IsImm(b))
if (gpr.IsImm(b))
{
u32 amount = gpr.Imm32(b);
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
@ -2583,11 +2357,6 @@ void Jit64::srawx(UGeckoInstruction inst)
FinalizeCarry(CC_NZ);
}
}
else if (gpr.IsImm(s) && gpr.Imm32(s) == 0)
{
gpr.SetImmediate32(a, 0);
FinalizeCarry(false);
}
else if (cpu_info.bBMI2)
{
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
@ -2652,13 +2421,7 @@ void Jit64::srawix(UGeckoInstruction inst)
int s = inst.RS;
int amount = inst.SH;
if (gpr.IsImm(s))
{
s32 imm = gpr.SImm32(s);
gpr.SetImmediate32(a, imm >> amount);
FinalizeCarry(amount != 0 && imm < 0 && (u32(imm) << (32 - amount)));
}
else if (amount != 0)
if (amount != 0)
{
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RCOpArg Rs = gpr.Use(s, RCMode::Read);
@ -2716,14 +2479,9 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
int s = inst.RS;
bool needs_test = false;
if (gpr.IsImm(s))
{
gpr.SetImmediate32(a, static_cast<u32>(std::countl_zero(gpr.Imm32(s))));
}
else
{
RCX64Reg Ra = gpr.Bind(a, RCMode::Write);
RCOpArg Rs = gpr.Use(s, RCMode::Read);
RCOpArg Rs = gpr.UseNoImm(s, RCMode::Read);
RegCache::Realize(Ra, Rs);
if (cpu_info.bLZCNT)

View File

@ -16,111 +16,79 @@ using preg_t = size_t;
class PPCCachedReg
{
public:
enum class LocationType
{
/// Value is currently at its default location
Default,
/// Value is not stored anywhere because we know it won't be read before the next write
Discarded,
/// Value is currently bound to a x64 register
Bound,
/// Value is known as an immediate and has not been written back to its default location
Immediate,
/// Value is known as an immediate and is already present at its default location
SpeculativeImmediate,
};
PPCCachedReg() = default;
explicit PPCCachedReg(Gen::OpArg default_location_)
: default_location(default_location_), location(default_location_)
explicit PPCCachedReg(Gen::OpArg default_location) : m_default_location(default_location) {}
Gen::OpArg GetDefaultLocation() const { return m_default_location; }
Gen::X64Reg GetHostRegister() const
{
ASSERT(m_in_host_register);
return m_host_register;
}
const std::optional<Gen::OpArg>& Location() const { return location; }
bool IsInDefaultLocation() const { return m_in_default_location; }
bool IsInHostRegister() const { return m_in_host_register; }
LocationType GetLocationType() const
void SetFlushed(bool maintain_host_register)
{
if (!location.has_value())
return LocationType::Discarded;
if (!away)
{
ASSERT(!revertable);
if (location->IsImm())
return LocationType::SpeculativeImmediate;
ASSERT(*location == default_location);
return LocationType::Default;
}
ASSERT(location->IsImm() || location->IsSimpleReg());
return location->IsImm() ? LocationType::Immediate : LocationType::Bound;
ASSERT(!m_revertable);
if (!maintain_host_register)
m_in_host_register = false;
m_in_default_location = true;
}
bool IsAway() const { return away; }
bool IsDiscarded() const { return !location.has_value(); }
bool IsBound() const { return GetLocationType() == LocationType::Bound; }
void SetBoundTo(Gen::X64Reg xreg)
void SetInHostRegister(Gen::X64Reg xreg, bool dirty)
{
away = true;
location = Gen::R(xreg);
if (dirty)
m_in_default_location = false;
m_in_host_register = true;
m_host_register = xreg;
}
void SetDirty() { m_in_default_location = false; }
void SetDiscarded()
{
ASSERT(!revertable);
away = false;
location = std::nullopt;
ASSERT(!m_revertable);
m_in_default_location = false;
m_in_host_register = false;
}
void SetFlushed()
{
ASSERT(!revertable);
away = false;
location = default_location;
}
void SetToImm32(u32 imm32, bool dirty = true)
{
away |= dirty;
location = Gen::Imm32(imm32);
}
bool IsRevertable() const { return revertable; }
bool IsRevertable() const { return m_revertable; }
void SetRevertable()
{
ASSERT(IsBound());
revertable = true;
ASSERT(m_in_host_register);
m_revertable = true;
}
void SetRevert()
{
ASSERT(revertable);
revertable = false;
SetFlushed();
ASSERT(m_revertable);
m_revertable = false;
SetFlushed(false);
}
void SetCommit()
{
ASSERT(revertable);
revertable = false;
ASSERT(m_revertable);
m_revertable = false;
}
bool IsLocked() const { return locked > 0; }
void Lock() { locked++; }
bool IsLocked() const { return m_locked > 0; }
void Lock() { m_locked++; }
void Unlock()
{
ASSERT(IsLocked());
locked--;
m_locked--;
}
private:
Gen::OpArg default_location{};
std::optional<Gen::OpArg> location{};
bool away = false; // value not in source register
bool revertable = false;
size_t locked = 0;
Gen::OpArg m_default_location{};
Gen::X64Reg m_host_register{};
bool m_in_default_location = true;
bool m_in_host_register = false;
bool m_revertable = false;
size_t m_locked = 0;
};
class X64CachedReg
@ -128,25 +96,20 @@ class X64CachedReg
public:
preg_t Contents() const { return ppcReg; }
void SetBoundTo(preg_t ppcReg_, bool dirty_)
void SetBoundTo(preg_t ppcReg_)
{
free = false;
ppcReg = ppcReg_;
dirty = dirty_;
}
void Unbind()
{
ppcReg = static_cast<preg_t>(Gen::INVALID_REG);
free = true;
dirty = false;
}
bool IsFree() const { return free && !locked; }
bool IsDirty() const { return dirty; }
void MakeDirty() { dirty = true; }
bool IsLocked() const { return locked > 0; }
void Lock() { locked++; }
void Unlock()
@ -158,7 +121,6 @@ public:
private:
preg_t ppcReg = static_cast<preg_t>(Gen::INVALID_REG);
bool free = true;
bool dirty = false;
size_t locked = 0;
};

View File

@ -13,16 +13,59 @@ FPURegCache::FPURegCache(Jit64& jit) : RegCache{jit}
{
}
void FPURegCache::StoreRegister(preg_t preg, const OpArg& new_loc)
bool FPURegCache::IsImm(preg_t preg) const
{
ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - {}", preg);
m_emitter->MOVAPD(new_loc, m_regs[preg].Location()->GetSimpleReg());
return false;
}
u32 FPURegCache::Imm32(preg_t preg) const
{
ASSERT_MSG(DYNA_REC, false, "FPURegCache doesn't support immediates");
return 0;
}
s32 FPURegCache::SImm32(preg_t preg) const
{
ASSERT_MSG(DYNA_REC, false, "FPURegCache doesn't support immediates");
return 0;
}
OpArg FPURegCache::R(preg_t preg) const
{
if (m_regs[preg].IsInHostRegister())
{
return ::Gen::R(m_regs[preg].GetHostRegister());
}
else
{
ASSERT_MSG(DYNA_REC, m_regs[preg].IsInDefaultLocation(), "FPR {} missing!", preg);
return m_regs[preg].GetDefaultLocation();
}
}
void FPURegCache::StoreRegister(preg_t preg, const OpArg& new_loc,
IgnoreDiscardedRegisters ignore_discarded_registers)
{
if (m_regs[preg].IsInHostRegister())
{
m_emitter->MOVAPD(new_loc, m_regs[preg].GetHostRegister());
}
else
{
ASSERT_MSG(DYNA_REC, ignore_discarded_registers != IgnoreDiscardedRegisters::No,
"FPR {} not in host register", preg);
}
}
void FPURegCache::LoadRegister(preg_t preg, X64Reg new_loc)
{
ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - {}", preg);
m_emitter->MOVAPD(new_loc, m_regs[preg].Location().value());
ASSERT_MSG(DYNA_REC, m_regs[preg].IsInDefaultLocation(), "FPR {} not in default location", preg);
m_emitter->MOVAPD(new_loc, m_regs[preg].GetDefaultLocation());
}
void FPURegCache::DiscardImm(preg_t preg)
{
// FPURegCache doesn't support immediates, so no need to do anything
}
std::span<const X64Reg> FPURegCache::GetAllocationOrder() const

View File

@ -12,10 +12,17 @@ class FPURegCache final : public RegCache
public:
explicit FPURegCache(Jit64& jit);
bool IsImm(preg_t preg) const override;
u32 Imm32(preg_t preg) const override;
s32 SImm32(preg_t preg) const override;
protected:
Gen::OpArg R(preg_t preg) const override;
Gen::OpArg GetDefaultLocation(preg_t preg) const override;
void StoreRegister(preg_t preg, const Gen::OpArg& newLoc) override;
void StoreRegister(preg_t preg, const Gen::OpArg& newLoc,
IgnoreDiscardedRegisters ignore_discarded_registers) override;
void LoadRegister(preg_t preg, Gen::X64Reg newLoc) override;
void DiscardImm(preg_t preg) override;
std::span<const Gen::X64Reg> GetAllocationOrder() const override;
BitSet32 GetRegUtilization() const override;
BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const override;

View File

@ -13,16 +13,76 @@ GPRRegCache::GPRRegCache(Jit64& jit) : RegCache{jit}
{
}
void GPRRegCache::StoreRegister(preg_t preg, const OpArg& new_loc)
bool GPRRegCache::IsImm(preg_t preg) const
{
ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - {}", preg);
m_emitter->MOV(32, new_loc, m_regs[preg].Location().value());
return m_jit.GetConstantPropagation().HasGPR(preg);
}
u32 GPRRegCache::Imm32(preg_t preg) const
{
ASSERT(m_jit.GetConstantPropagation().HasGPR(preg));
return m_jit.GetConstantPropagation().GetGPR(preg);
}
s32 GPRRegCache::SImm32(preg_t preg) const
{
ASSERT(m_jit.GetConstantPropagation().HasGPR(preg));
return m_jit.GetConstantPropagation().GetGPR(preg);
}
OpArg GPRRegCache::R(preg_t preg) const
{
if (m_regs[preg].IsInHostRegister())
{
return ::Gen::R(m_regs[preg].GetHostRegister());
}
else if (m_jit.GetConstantPropagation().HasGPR(preg))
{
return ::Gen::Imm32(m_jit.GetConstantPropagation().GetGPR(preg));
}
else
{
ASSERT_MSG(DYNA_REC, m_regs[preg].IsInDefaultLocation(), "GPR {} missing!", preg);
return m_regs[preg].GetDefaultLocation();
}
}
void GPRRegCache::StoreRegister(preg_t preg, const OpArg& new_loc,
IgnoreDiscardedRegisters ignore_discarded_registers)
{
if (m_regs[preg].IsInHostRegister())
{
m_emitter->MOV(32, new_loc, ::Gen::R(m_regs[preg].GetHostRegister()));
}
else if (m_jit.GetConstantPropagation().HasGPR(preg))
{
m_emitter->MOV(32, new_loc, ::Gen::Imm32(m_jit.GetConstantPropagation().GetGPR(preg)));
}
else
{
ASSERT_MSG(DYNA_REC, ignore_discarded_registers != IgnoreDiscardedRegisters::No,
"GPR {} not in host register or constant propagation", preg);
}
}
void GPRRegCache::LoadRegister(preg_t preg, X64Reg new_loc)
{
ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - {}", preg);
m_emitter->MOV(32, ::Gen::R(new_loc), m_regs[preg].Location().value());
const JitCommon::ConstantPropagation& constant_propagation = m_jit.GetConstantPropagation();
if (constant_propagation.HasGPR(preg))
{
m_emitter->MOV(32, ::Gen::R(new_loc), ::Gen::Imm32(constant_propagation.GetGPR(preg)));
}
else
{
ASSERT_MSG(DYNA_REC, m_regs[preg].IsInDefaultLocation(), "GPR {} not in default location",
preg);
m_emitter->MOV(32, ::Gen::R(new_loc), m_regs[preg].GetDefaultLocation());
}
}
void GPRRegCache::DiscardImm(preg_t preg)
{
m_jit.GetConstantPropagation().ClearGPR(preg);
}
OpArg GPRRegCache::GetDefaultLocation(preg_t preg) const
@ -48,8 +108,9 @@ void GPRRegCache::SetImmediate32(preg_t preg, u32 imm_value, bool dirty)
{
// "dirty" can be false to avoid redundantly flushing an immediate when
// processing speculative constants.
DiscardRegContentsIfCached(preg);
m_regs[preg].SetToImm32(imm_value, dirty);
if (dirty)
DiscardRegister(preg);
m_jit.GetConstantPropagation().SetGPR(preg, imm_value);
}
BitSet32 GPRRegCache::GetRegUtilization() const

View File

@ -11,12 +11,20 @@ class GPRRegCache final : public RegCache
{
public:
explicit GPRRegCache(Jit64& jit);
bool IsImm(preg_t preg) const override;
u32 Imm32(preg_t preg) const override;
s32 SImm32(preg_t preg) const override;
void SetImmediate32(preg_t preg, u32 imm_value, bool dirty = true);
protected:
Gen::OpArg R(preg_t preg) const override;
Gen::OpArg GetDefaultLocation(preg_t preg) const override;
void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) override;
void StoreRegister(preg_t preg, const Gen::OpArg& new_loc,
IgnoreDiscardedRegisters ignore_discarded_registers) override;
void LoadRegister(preg_t preg, Gen::X64Reg new_loc) override;
void DiscardImm(preg_t preg) override;
std::span<const Gen::X64Reg> GetAllocationOrder() const override;
BitSet32 GetRegUtilization() const override;
BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const override;

View File

@ -136,7 +136,7 @@ bool RCOpArg::IsImm() const
{
if (const preg_t* preg = std::get_if<preg_t>(&contents))
{
return rc->R(*preg).IsImm();
return rc->IsImm(*preg);
}
else if (std::holds_alternative<u32>(contents))
{
@ -149,7 +149,7 @@ s32 RCOpArg::SImm32() const
{
if (const preg_t* preg = std::get_if<preg_t>(&contents))
{
return rc->R(*preg).SImm32();
return rc->SImm32(*preg);
}
else if (const u32* imm = std::get_if<u32>(&contents))
{
@ -163,7 +163,7 @@ u32 RCOpArg::Imm32() const
{
if (const preg_t* preg = std::get_if<preg_t>(&contents))
{
return rc->R(*preg).Imm32();
return rc->Imm32(*preg);
}
else if (const u32* imm = std::get_if<u32>(&contents))
{
@ -297,25 +297,16 @@ bool RegCache::SanityCheck() const
{
for (size_t i = 0; i < m_regs.size(); i++)
{
switch (m_regs[i].GetLocationType())
{
case PPCCachedReg::LocationType::Default:
case PPCCachedReg::LocationType::Discarded:
case PPCCachedReg::LocationType::SpeculativeImmediate:
case PPCCachedReg::LocationType::Immediate:
break;
case PPCCachedReg::LocationType::Bound:
if (m_regs[i].IsInHostRegister())
{
if (m_regs[i].IsLocked() || m_regs[i].IsRevertable())
return false;
Gen::X64Reg xr = m_regs[i].Location()->GetSimpleReg();
Gen::X64Reg xr = m_regs[i].GetHostRegister();
if (m_xregs[xr].IsLocked())
return false;
if (m_xregs[xr].Contents() != i)
return false;
break;
}
}
}
return true;
@ -379,13 +370,7 @@ void RegCache::Discard(BitSet32 pregs)
ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress for {}!",
i);
if (m_regs[i].IsBound())
{
X64Reg xr = RX(i);
m_xregs[xr].Unbind();
}
m_regs[i].SetDiscarded();
DiscardRegister(i);
}
}
@ -401,25 +386,7 @@ void RegCache::Flush(BitSet32 pregs, IgnoreDiscardedRegisters ignore_discarded_r
ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress for {}!",
i);
switch (m_regs[i].GetLocationType())
{
case PPCCachedReg::LocationType::Default:
break;
case PPCCachedReg::LocationType::Discarded:
ASSERT_MSG(DYNA_REC, ignore_discarded_registers != IgnoreDiscardedRegisters::No,
"Attempted to flush discarded PPC reg {}", i);
break;
case PPCCachedReg::LocationType::SpeculativeImmediate:
// We can have a cached value without a host register through speculative constants.
// It must be cleared when flushing, otherwise it may be out of sync with PPCSTATE,
// if PPCSTATE is modified externally (e.g. fallback to interpreter).
m_regs[i].SetFlushed();
break;
case PPCCachedReg::LocationType::Bound:
case PPCCachedReg::LocationType::Immediate:
StoreFromRegister(i);
break;
}
StoreFromRegister(i, FlushMode::Full, ignore_discarded_registers);
}
}
@ -427,9 +394,9 @@ void RegCache::Reset(BitSet32 pregs)
{
for (preg_t i : pregs)
{
ASSERT_MSG(DYNA_REC, !m_regs[i].IsAway(),
ASSERT_MSG(DYNA_REC, !m_regs[i].IsInHostRegister(),
"Attempted to reset a loaded register (did you mean to flush it?)");
m_regs[i].SetFlushed();
m_regs[i].SetFlushed(false);
}
}
@ -465,7 +432,7 @@ void RegCache::PreloadRegisters(BitSet32 to_preload)
{
if (NumFreeRegisters() < 2)
return;
if (!R(preg).IsImm())
if (!IsImm(preg))
BindToRegister(preg, true, false);
}
}
@ -492,84 +459,68 @@ void RegCache::FlushX(X64Reg reg)
}
}
void RegCache::DiscardRegContentsIfCached(preg_t preg)
void RegCache::DiscardRegister(preg_t preg)
{
if (m_regs[preg].IsBound())
if (m_regs[preg].IsInHostRegister())
{
X64Reg xr = m_regs[preg].Location()->GetSimpleReg();
X64Reg xr = m_regs[preg].GetHostRegister();
m_xregs[xr].Unbind();
m_regs[preg].SetFlushed();
}
m_regs[preg].SetDiscarded();
}
void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty)
{
if (!m_regs[i].IsBound())
if (!m_regs[i].IsInHostRegister())
{
X64Reg xr = GetFreeXReg();
ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsDirty(), "Xreg {} already dirty", Common::ToUnderlying(xr));
ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsLocked(), "GetFreeXReg returned locked register");
ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Invalid transaction state");
m_xregs[xr].SetBoundTo(i, makeDirty || m_regs[i].IsAway());
m_xregs[xr].SetBoundTo(i);
if (doLoad)
{
ASSERT_MSG(DYNA_REC, !m_regs[i].IsDiscarded(), "Attempted to load a discarded value");
LoadRegister(i, xr);
}
ASSERT_MSG(DYNA_REC,
std::ranges::none_of(
m_regs, [xr](const auto& l) { return l.has_value() && l->IsSimpleReg(xr); },
&PPCCachedReg::Location),
std::ranges::none_of(m_regs,
[xr](const auto& r) {
return r.IsInHostRegister() && r.GetHostRegister() == xr;
}),
"Xreg {} already bound", Common::ToUnderlying(xr));
m_regs[i].SetBoundTo(xr);
m_regs[i].SetInHostRegister(xr, makeDirty);
}
else
{
// reg location must be simplereg; memory locations
// and immediates are taken care of above.
if (makeDirty)
m_xregs[RX(i)].MakeDirty();
m_regs[i].SetDirty();
}
if (makeDirty)
DiscardImm(i);
ASSERT_MSG(DYNA_REC, !m_xregs[RX(i)].IsLocked(),
"WTF, this reg ({} -> {}) should have been flushed", i, Common::ToUnderlying(RX(i)));
}
void RegCache::StoreFromRegister(preg_t i, FlushMode mode)
void RegCache::StoreFromRegister(preg_t i, FlushMode mode,
IgnoreDiscardedRegisters ignore_discarded_registers)
{
// When a transaction is in progress, allowing the store would overwrite the old value.
ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction on {} is in progress!", i);
bool doStore = false;
if (!m_regs[i].IsInDefaultLocation())
StoreRegister(i, GetDefaultLocation(i), ignore_discarded_registers);
switch (m_regs[i].GetLocationType())
{
case PPCCachedReg::LocationType::Default:
case PPCCachedReg::LocationType::Discarded:
case PPCCachedReg::LocationType::SpeculativeImmediate:
return;
case PPCCachedReg::LocationType::Bound:
{
X64Reg xr = RX(i);
doStore = m_xregs[xr].IsDirty();
if (mode == FlushMode::Full)
m_xregs[xr].Unbind();
break;
}
case PPCCachedReg::LocationType::Immediate:
doStore = true;
break;
}
if (mode == FlushMode::Full && m_regs[i].IsInHostRegister())
m_xregs[m_regs[i].GetHostRegister()].Unbind();
if (doStore)
StoreRegister(i, GetDefaultLocation(i));
if (mode == FlushMode::Full)
m_regs[i].SetFlushed();
m_regs[i].SetFlushed(mode != FlushMode::Full);
}
X64Reg RegCache::GetFreeXReg()
@ -634,7 +585,7 @@ float RegCache::ScoreRegister(X64Reg xreg) const
// bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly
// right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative
// to the number of extra stores it causes.
if (m_xregs[xreg].IsDirty())
if (!m_regs[preg].IsInDefaultLocation())
score += 2;
// If the register isn't actually needed in a physical register for a later instruction,
@ -655,16 +606,10 @@ float RegCache::ScoreRegister(X64Reg xreg) const
return score;
}
const OpArg& RegCache::R(preg_t preg) const
{
ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - {}", preg);
return m_regs[preg].Location().value();
}
X64Reg RegCache::RX(preg_t preg) const
{
ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - {}", preg);
return m_regs[preg].Location()->GetSimpleReg();
ASSERT_MSG(DYNA_REC, m_regs[preg].IsInHostRegister(), "Not in host register - {}", preg);
return m_regs[preg].GetHostRegister();
}
void RegCache::Lock(preg_t preg)
@ -720,29 +665,23 @@ void RegCache::Realize(preg_t preg)
return;
}
switch (m_regs[preg].GetLocationType())
if (IsImm(preg))
{
case PPCCachedReg::LocationType::Default:
if (kill_mem)
{
do_bind();
return;
}
m_constraints[preg].Realized(RCConstraint::RealizedLoc::Mem);
return;
case PPCCachedReg::LocationType::Discarded:
case PPCCachedReg::LocationType::Bound:
do_bind();
return;
case PPCCachedReg::LocationType::Immediate:
case PPCCachedReg::LocationType::SpeculativeImmediate:
if (dirty || kill_imm)
{
do_bind();
return;
}
m_constraints[preg].Realized(RCConstraint::RealizedLoc::Imm);
break;
else
m_constraints[preg].Realized(RCConstraint::RealizedLoc::Imm);
}
else if (!m_regs[preg].IsInHostRegister())
{
if (kill_mem)
do_bind();
else
m_constraints[preg].Realized(RCConstraint::RealizedLoc::Mem);
}
else
{
do_bind();
}
}

View File

@ -157,12 +157,14 @@ public:
bool IsImm(Args... pregs) const
{
static_assert(sizeof...(pregs) > 0);
return (R(pregs).IsImm() && ...);
return (IsImm(preg_t(pregs)) && ...);
}
u32 Imm32(preg_t preg) const { return R(preg).Imm32(); }
s32 SImm32(preg_t preg) const { return R(preg).SImm32(); }
bool IsBound(preg_t preg) const { return m_regs[preg].IsBound(); }
virtual bool IsImm(preg_t preg) const = 0;
virtual u32 Imm32(preg_t preg) const = 0;
virtual s32 SImm32(preg_t preg) const = 0;
bool IsBound(preg_t preg) const { return m_regs[preg].IsInHostRegister(); }
RCOpArg Use(preg_t preg, RCMode mode);
RCOpArg UseNoImm(preg_t preg, RCMode mode);
@ -191,8 +193,10 @@ protected:
friend class RCForkGuard;
virtual Gen::OpArg GetDefaultLocation(preg_t preg) const = 0;
virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) = 0;
virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc,
IgnoreDiscardedRegisters ignore_discarded_registers) = 0;
virtual void LoadRegister(preg_t preg, Gen::X64Reg new_loc) = 0;
virtual void DiscardImm(preg_t preg) = 0;
virtual std::span<const Gen::X64Reg> GetAllocationOrder() const = 0;
@ -200,16 +204,18 @@ protected:
virtual BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const = 0;
void FlushX(Gen::X64Reg reg);
void DiscardRegContentsIfCached(preg_t preg);
void DiscardRegister(preg_t preg);
void BindToRegister(preg_t preg, bool doLoad = true, bool makeDirty = true);
void StoreFromRegister(preg_t preg, FlushMode mode = FlushMode::Full);
void StoreFromRegister(
preg_t preg, FlushMode mode = FlushMode::Full,
IgnoreDiscardedRegisters ignore_discarded_registers = IgnoreDiscardedRegisters::No);
Gen::X64Reg GetFreeXReg();
int NumFreeRegisters() const;
float ScoreRegister(Gen::X64Reg xreg) const;
const Gen::OpArg& R(preg_t preg) const;
virtual Gen::OpArg R(preg_t preg) const = 0;
Gen::X64Reg RX(preg_t preg) const;
void Lock(preg_t preg);

View File

@ -33,6 +33,7 @@
#include "Core/PatchEngine.h"
#include "Core/PowerPC/Interpreter/Interpreter.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitCommon/ConstantPropagation.h"
#include "Core/PowerPC/JitInterface.h"
#include "Core/PowerPC/PowerPC.h"
#include "Core/System.h"
@ -278,6 +279,9 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
fpr.ResetRegisters(js.op->GetFregsOut());
gpr.ResetCRRegisters(js.op->crOut);
// We must also update constant propagation
m_constant_propagation.ClearGPRs(js.op->regsOut);
if (js.op->opinfo->flags & FL_SET_MSR)
EmitUpdateMembase();
@ -1169,6 +1173,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
gpr.Start(js.gpa);
fpr.Start(js.fpa);
m_constant_propagation.Clear();
if (!js.noSpeculativeConstantsAddresses.contains(js.blockStart))
{
IntializeSpeculativeConstants();
@ -1341,9 +1347,38 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
FlushCarry();
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
}
m_constant_propagation.Clear();
CompileInstruction(op);
CompileInstruction(op);
}
else
{
const JitCommon::ConstantPropagationResult constant_propagation_result =
m_constant_propagation.EvaluateInstruction(op.inst, opinfo->flags);
if (!constant_propagation_result.instruction_fully_executed)
CompileInstruction(op);
m_constant_propagation.Apply(constant_propagation_result);
if (constant_propagation_result.gpr >= 0)
{
// Mark the GPR as dirty in the register cache
gpr.SetImmediate(constant_propagation_result.gpr, constant_propagation_result.gpr_value);
}
if (constant_propagation_result.instruction_fully_executed)
{
if (constant_propagation_result.carry)
ComputeCarry(*constant_propagation_result.carry);
if (constant_propagation_result.overflow)
GenerateConstantOverflow(*constant_propagation_result.overflow);
if (constant_propagation_result.compute_rc)
ComputeRC0(constant_propagation_result.gpr_value);
}
}
js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst;

View File

@ -16,6 +16,7 @@
#include "Core/PowerPC/JitArm64/JitArm64Cache.h"
#include "Core/PowerPC/JitArm64/JitArm64_RegCache.h"
#include "Core/PowerPC/JitArmCommon/BackPatch.h"
#include "Core/PowerPC/JitCommon/ConstantPropagation.h"
#include "Core/PowerPC/JitCommon/JitAsmCommon.h"
#include "Core/PowerPC/JitCommon/JitBase.h"
#include "Core/PowerPC/PPCAnalyst.h"
@ -35,6 +36,8 @@ public:
void Init() override;
void Shutdown() override;
JitCommon::ConstantPropagation& GetConstantPropagation() { return m_constant_propagation; }
JitBaseBlockCache* GetBlockCache() override { return &blocks; }
bool IsInCodeSpace(const u8* ptr) const { return IsInSpace(ptr); }
bool HandleFault(uintptr_t access_address, SContext* ctx) override;
@ -376,13 +379,14 @@ protected:
void ComputeRC0(Arm64Gen::ARM64Reg reg);
void ComputeRC0(u32 imm);
void GenerateConstantOverflow(bool overflow);
void ComputeCarry(Arm64Gen::ARM64Reg reg); // reg must contain 0 or 1
void ComputeCarry(bool carry);
void ComputeCarry();
void LoadCarry();
void FlushCarry();
void reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32),
void reg_imm(u32 d, u32 a, u32 value,
void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, u64,
Arm64Gen::ARM64Reg),
bool Rc = false);
@ -396,6 +400,8 @@ protected:
Arm64GPRCache gpr;
Arm64FPRCache fpr;
JitCommon::ConstantPropagation m_constant_propagation;
JitArm64BlockCache blocks{*this};
Arm64Gen::ARM64FloatEmitter m_float_emit;

View File

@ -39,6 +39,25 @@ void JitArm64::ComputeRC0(u32 imm)
MOVI2R(gpr.CR(0), s64(s32(imm)));
}
void JitArm64::GenerateConstantOverflow(bool overflow)
{
ARM64Reg WA = gpr.GetReg();
if (overflow)
{
MOVI2R(WA, XER_OV_MASK | XER_SO_MASK);
STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov));
}
else
{
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov));
AND(WA, WA, LogicalImm(~XER_OV_MASK, GPRSize::B32));
STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov));
}
gpr.Unlock(WA);
}
void JitArm64::ComputeCarry(ARM64Reg reg)
{
js.carryFlag = CarryFlag::InPPCState;
@ -136,41 +155,17 @@ void JitArm64::FlushCarry()
js.carryFlag = CarryFlag::InPPCState;
}
void JitArm64::reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32),
void JitArm64::reg_imm(u32 d, u32 a, u32 value,
void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, u64, ARM64Reg), bool Rc)
{
if (gpr.IsImm(a))
gpr.BindToRegister(d, d == a);
{
gpr.SetImmediate(d, do_op(gpr.GetImm(a), value));
if (Rc)
ComputeRC0(gpr.GetImm(d));
auto WA = gpr.GetScopedReg();
(this->*op)(gpr.R(d), gpr.R(a), value, WA);
}
else
{
gpr.BindToRegister(d, d == a);
{
auto WA = gpr.GetScopedReg();
(this->*op)(gpr.R(d), gpr.R(a), value, WA);
}
if (Rc)
ComputeRC0(gpr.R(d));
}
}
static constexpr u32 BitOR(u32 a, u32 b)
{
return a | b;
}
static constexpr u32 BitAND(u32 a, u32 b)
{
return a & b;
}
static constexpr u32 BitXOR(u32 a, u32 b)
{
return a ^ b;
if (Rc)
ComputeRC0(gpr.R(d));
}
void JitArm64::arith_imm(UGeckoInstruction inst)
@ -184,34 +179,21 @@ void JitArm64::arith_imm(UGeckoInstruction inst)
case 24: // ori
case 25: // oris
{
// check for nop
if (a == s && inst.UIMM == 0)
{
// NOP
return;
}
const u32 immediate = inst.OPCD == 24 ? inst.UIMM : inst.UIMM << 16;
reg_imm(a, s, immediate, BitOR, &ARM64XEmitter::ORRI2R);
reg_imm(a, s, immediate, &ARM64XEmitter::ORRI2R);
break;
}
case 28: // andi
reg_imm(a, s, inst.UIMM, BitAND, &ARM64XEmitter::ANDI2R, true);
reg_imm(a, s, inst.UIMM, &ARM64XEmitter::ANDI2R, true);
break;
case 29: // andis
reg_imm(a, s, inst.UIMM << 16, BitAND, &ARM64XEmitter::ANDI2R, true);
reg_imm(a, s, inst.UIMM << 16, &ARM64XEmitter::ANDI2R, true);
break;
case 26: // xori
case 27: // xoris
{
if (a == s && inst.UIMM == 0)
{
// NOP
return;
}
const u32 immediate = inst.OPCD == 26 ? inst.UIMM : inst.UIMM << 16;
reg_imm(a, s, immediate, BitXOR, &ARM64XEmitter::EORI2R);
reg_imm(a, s, immediate, &ARM64XEmitter::EORI2R);
break;
}
}
@ -231,17 +213,10 @@ void JitArm64::addix(UGeckoInstruction inst)
if (a)
{
if (gpr.IsImm(a))
{
gpr.SetImmediate(d, gpr.GetImm(a) + imm);
}
else
{
gpr.BindToRegister(d, d == a);
gpr.BindToRegister(d, d == a);
auto WA = gpr.GetScopedReg();
ADDI2R(gpr.R(d), gpr.R(a), imm, WA);
}
auto WA = gpr.GetScopedReg();
ADDI2R(gpr.R(d), gpr.R(a), imm, WA);
}
else
{
@ -256,29 +231,7 @@ void JitArm64::boolX(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff);
int a = inst.RA, s = inst.RS, b = inst.RB;
if (gpr.IsImm(s) && gpr.IsImm(b))
{
if (inst.SUBOP10 == 28) // andx
gpr.SetImmediate(a, (u32)gpr.GetImm(s) & (u32)gpr.GetImm(b));
else if (inst.SUBOP10 == 476) // nandx
gpr.SetImmediate(a, ~((u32)gpr.GetImm(s) & (u32)gpr.GetImm(b)));
else if (inst.SUBOP10 == 60) // andcx
gpr.SetImmediate(a, (u32)gpr.GetImm(s) & (~(u32)gpr.GetImm(b)));
else if (inst.SUBOP10 == 444) // orx
gpr.SetImmediate(a, (u32)gpr.GetImm(s) | (u32)gpr.GetImm(b));
else if (inst.SUBOP10 == 124) // norx
gpr.SetImmediate(a, ~((u32)gpr.GetImm(s) | (u32)gpr.GetImm(b)));
else if (inst.SUBOP10 == 412) // orcx
gpr.SetImmediate(a, (u32)gpr.GetImm(s) | (~(u32)gpr.GetImm(b)));
else if (inst.SUBOP10 == 316) // xorx
gpr.SetImmediate(a, (u32)gpr.GetImm(s) ^ (u32)gpr.GetImm(b));
else if (inst.SUBOP10 == 284) // eqvx
gpr.SetImmediate(a, ~((u32)gpr.GetImm(s) ^ (u32)gpr.GetImm(b)));
if (inst.Rc)
ComputeRC0(gpr.GetImm(a));
}
else if (s == b)
if (s == b)
{
if ((inst.SUBOP10 == 28 /* andx */) || (inst.SUBOP10 == 444 /* orx */))
{
@ -523,14 +476,7 @@ void JitArm64::addx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b);
gpr.SetImmediate(d, i + j);
if (inst.Rc)
ComputeRC0(gpr.GetImm(d));
}
else if (gpr.IsImm(a) || gpr.IsImm(b))
if (gpr.IsImm(a) || gpr.IsImm(b))
{
int imm_reg = gpr.IsImm(a) ? a : b;
int in_reg = gpr.IsImm(a) ? b : a;
@ -560,19 +506,10 @@ void JitArm64::extsXx(UGeckoInstruction inst)
int a = inst.RA, s = inst.RS;
int size = inst.SUBOP10 == 922 ? 16 : 8;
if (gpr.IsImm(s))
{
gpr.SetImmediate(a, (u32)(s32)(size == 16 ? (s16)gpr.GetImm(s) : (s8)gpr.GetImm(s)));
if (inst.Rc)
ComputeRC0(gpr.GetImm(a));
}
else
{
gpr.BindToRegister(a, a == s);
SBFM(gpr.R(a), gpr.R(s), 0, size - 1);
if (inst.Rc)
ComputeRC0(gpr.R(a));
}
gpr.BindToRegister(a, a == s);
SBFM(gpr.R(a), gpr.R(s), 0, size - 1);
if (inst.Rc)
ComputeRC0(gpr.R(a));
}
void JitArm64::cntlzwx(UGeckoInstruction inst)
@ -582,19 +519,10 @@ void JitArm64::cntlzwx(UGeckoInstruction inst)
int a = inst.RA;
int s = inst.RS;
if (gpr.IsImm(s))
{
gpr.SetImmediate(a, static_cast<u32>(std::countl_zero(gpr.GetImm(s))));
if (inst.Rc)
ComputeRC0(gpr.GetImm(a));
}
else
{
gpr.BindToRegister(a, a == s);
CLZ(gpr.R(a), gpr.R(s));
if (inst.Rc)
ComputeRC0(gpr.R(a));
}
gpr.BindToRegister(a, a == s);
CLZ(gpr.R(a), gpr.R(s));
if (inst.Rc)
ComputeRC0(gpr.R(a));
}
void JitArm64::negx(UGeckoInstruction inst)
@ -606,19 +534,10 @@ void JitArm64::negx(UGeckoInstruction inst)
FALLBACK_IF(inst.OE);
if (gpr.IsImm(a))
{
gpr.SetImmediate(d, ~((u32)gpr.GetImm(a)) + 1);
if (inst.Rc)
ComputeRC0(gpr.GetImm(d));
}
else
{
gpr.BindToRegister(d, d == a);
SUB(gpr.R(d), ARM64Reg::WSP, gpr.R(a));
if (inst.Rc)
ComputeRC0(gpr.R(d));
}
gpr.BindToRegister(d, d == a);
SUB(gpr.R(d), ARM64Reg::WSP, gpr.R(a));
if (inst.Rc)
ComputeRC0(gpr.R(d));
}
void JitArm64::cmp(UGeckoInstruction inst)
@ -800,15 +719,7 @@ void JitArm64::cmpli(UGeckoInstruction inst)
void JitArm64::rlwinmx_internal(UGeckoInstruction inst, u32 sh)
{
u32 a = inst.RA, s = inst.RS;
const u32 mask = MakeRotationMask(inst.MB, inst.ME);
if (gpr.IsImm(inst.RS))
{
gpr.SetImmediate(a, std::rotl(gpr.GetImm(s), sh) & mask);
if (inst.Rc)
ComputeRC0(gpr.GetImm(a));
return;
}
if (mask == 0)
{
@ -899,17 +810,7 @@ void JitArm64::srawix(UGeckoInstruction inst)
int amount = inst.SH;
bool inplace_carry = CanMergeNextInstructions(1) && js.op[1].wantsCAInFlags;
if (gpr.IsImm(s))
{
s32 imm = (s32)gpr.GetImm(s);
gpr.SetImmediate(a, imm >> amount);
ComputeCarry(amount != 0 && (imm < 0) && (u32(imm) << (32 - amount)));
if (inst.Rc)
ComputeRC0(gpr.GetImm(a));
}
else if (amount == 0)
if (amount == 0)
{
gpr.BindToRegister(a, a == s);
ARM64Reg RA = gpr.R(a);
@ -970,42 +871,21 @@ void JitArm64::addic(UGeckoInstruction inst)
int a = inst.RA, d = inst.RD;
bool rc = inst.OPCD == 13;
s32 simm = inst.SIMM_16;
u32 imm = (u32)simm;
if (gpr.IsImm(a))
gpr.BindToRegister(d, d == a);
{
u32 i = gpr.GetImm(a);
gpr.SetImmediate(d, i + imm);
bool has_carry = Interpreter::Helper_Carry(i, imm);
ComputeCarry(has_carry);
if (rc)
ComputeRC0(gpr.GetImm(d));
auto WA = gpr.GetScopedReg();
CARRY_IF_NEEDED(ADDI2R, ADDSI2R, gpr.R(d), gpr.R(a), simm, WA);
}
else
{
gpr.BindToRegister(d, d == a);
{
auto WA = gpr.GetScopedReg();
CARRY_IF_NEEDED(ADDI2R, ADDSI2R, gpr.R(d), gpr.R(a), simm, WA);
}
ComputeCarry();
if (rc)
ComputeRC0(gpr.R(d));
}
ComputeCarry();
if (rc)
ComputeRC0(gpr.R(d));
}
bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc)
{
if (imm == 0)
{
// Multiplication by zero (0).
gpr.SetImmediate(d, 0);
if (rc)
ComputeRC0(gpr.GetImm(d));
}
else if (imm == 1)
if (imm == 1)
{
// Multiplication by one (1).
if (d != a)
@ -1072,12 +952,7 @@ void JitArm64::mulli(UGeckoInstruction inst)
int a = inst.RA, d = inst.RD;
if (gpr.IsImm(a))
{
s32 i = (s32)gpr.GetImm(a);
gpr.SetImmediate(d, i * inst.SIMM_16);
}
else if (MultiplyImmediate((u32)(s32)inst.SIMM_16, a, d, false))
if (MultiplyImmediate((u32)(s32)inst.SIMM_16, a, d, false))
{
// Code is generated inside MultiplyImmediate, nothing to be done here.
}
@ -1102,15 +977,8 @@ void JitArm64::mullwx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b);
gpr.SetImmediate(d, i * j);
if (inst.Rc)
ComputeRC0(gpr.GetImm(d));
}
else if ((gpr.IsImm(a) && MultiplyImmediate(gpr.GetImm(a), b, d, inst.Rc)) ||
(gpr.IsImm(b) && MultiplyImmediate(gpr.GetImm(b), a, d, inst.Rc)))
if ((gpr.IsImm(a) && MultiplyImmediate(gpr.GetImm(a), b, d, inst.Rc)) ||
(gpr.IsImm(b) && MultiplyImmediate(gpr.GetImm(b), a, d, inst.Rc)))
{
// Code is generated inside MultiplyImmediate, nothing to be done here.
}
@ -1130,22 +998,12 @@ void JitArm64::mulhwx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b);
gpr.SetImmediate(d, (u32)((u64)(((s64)i * (s64)j)) >> 32));
if (inst.Rc)
ComputeRC0(gpr.GetImm(d));
}
else
{
gpr.BindToRegister(d, d == a || d == b);
SMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b));
LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32);
gpr.BindToRegister(d, d == a || d == b);
SMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b));
LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32);
if (inst.Rc)
ComputeRC0(gpr.R(d));
}
if (inst.Rc)
ComputeRC0(gpr.R(d));
}
void JitArm64::mulhwux(UGeckoInstruction inst)
@ -1155,22 +1013,12 @@ void JitArm64::mulhwux(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
u32 i = gpr.GetImm(a), j = gpr.GetImm(b);
gpr.SetImmediate(d, (u32)(((u64)i * (u64)j) >> 32));
if (inst.Rc)
ComputeRC0(gpr.GetImm(d));
}
else
{
gpr.BindToRegister(d, d == a || d == b);
UMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b));
LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32);
gpr.BindToRegister(d, d == a || d == b);
UMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b));
LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32);
if (inst.Rc)
ComputeRC0(gpr.R(d));
}
if (inst.Rc)
ComputeRC0(gpr.R(d));
}
void JitArm64::addzex(UGeckoInstruction inst)
@ -1274,26 +1122,10 @@ void JitArm64::subfx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (a == b)
{
gpr.SetImmediate(d, 0);
if (inst.Rc)
ComputeRC0(gpr.GetImm(d));
}
else if (gpr.IsImm(a) && gpr.IsImm(b))
{
u32 i = gpr.GetImm(a), j = gpr.GetImm(b);
gpr.SetImmediate(d, j - i);
if (inst.Rc)
ComputeRC0(gpr.GetImm(d));
}
else
{
gpr.BindToRegister(d, d == a || d == b);
SUB(gpr.R(d), gpr.R(b), gpr.R(a));
if (inst.Rc)
ComputeRC0(gpr.R(d));
}
gpr.BindToRegister(d, d == a || d == b);
SUB(gpr.R(d), gpr.R(b), gpr.R(a));
if (inst.Rc)
ComputeRC0(gpr.R(d));
}
void JitArm64::subfex(UGeckoInstruction inst)
@ -1435,17 +1267,7 @@ void JitArm64::subfcx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
u32 a_imm = gpr.GetImm(a), b_imm = gpr.GetImm(b);
gpr.SetImmediate(d, b_imm - a_imm);
ComputeCarry(a_imm == 0 || Interpreter::Helper_Carry(b_imm, 0u - a_imm));
if (inst.Rc)
ComputeRC0(gpr.GetImm(d));
}
else if (gpr.IsImm(a, 0))
if (gpr.IsImm(a, 0))
{
if (d != b)
{
@ -1559,44 +1381,34 @@ void JitArm64::subfic(UGeckoInstruction inst)
int a = inst.RA, d = inst.RD;
s32 imm = inst.SIMM_16;
if (gpr.IsImm(a))
{
u32 a_imm = gpr.GetImm(a);
const bool will_read = d == a;
gpr.BindToRegister(d, will_read);
ARM64Reg RD = gpr.R(d);
gpr.SetImmediate(d, imm - a_imm);
ComputeCarry(a_imm == 0 || Interpreter::Helper_Carry(imm, 0u - a_imm));
if (imm == -1)
{
// d = -1 - a = ~a
MVN(RD, gpr.R(a));
// CA is always set in this case
ComputeCarry(true);
}
else
{
const bool will_read = d == a;
gpr.BindToRegister(d, will_read);
ARM64Reg RD = gpr.R(d);
const bool is_zero = imm == 0;
if (imm == -1)
// d = imm - a
{
// d = -1 - a = ~a
MVN(RD, gpr.R(a));
// CA is always set in this case
ComputeCarry(true);
}
else
{
const bool is_zero = imm == 0;
// d = imm - a
Arm64GPRCache::ScopedARM64Reg WA(ARM64Reg::WZR);
if (!is_zero)
{
Arm64GPRCache::ScopedARM64Reg WA(ARM64Reg::WZR);
if (!is_zero)
{
WA = will_read ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(RD);
MOVI2R(WA, imm);
}
CARRY_IF_NEEDED(SUB, SUBS, RD, WA, gpr.R(a));
WA = will_read ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(RD);
MOVI2R(WA, imm);
}
ComputeCarry();
CARRY_IF_NEEDED(SUB, SUBS, RD, WA, gpr.R(a));
}
ComputeCarry();
}
}
@ -1726,25 +1538,12 @@ void JitArm64::addcx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
u32 i = gpr.GetImm(a), j = gpr.GetImm(b);
gpr.SetImmediate(d, i + j);
gpr.BindToRegister(d, d == a || d == b);
CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), gpr.R(b));
bool has_carry = Interpreter::Helper_Carry(i, j);
ComputeCarry(has_carry);
if (inst.Rc)
ComputeRC0(gpr.GetImm(d));
}
else
{
gpr.BindToRegister(d, d == a || d == b);
CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), gpr.R(b));
ComputeCarry();
if (inst.Rc)
ComputeRC0(gpr.R(d));
}
ComputeCarry();
if (inst.Rc)
ComputeRC0(gpr.R(d));
}
void JitArm64::divwux(UGeckoInstruction inst)
@ -1755,15 +1554,7 @@ void JitArm64::divwux(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
u32 i = gpr.GetImm(a), j = gpr.GetImm(b);
gpr.SetImmediate(d, j == 0 ? 0 : i / j);
if (inst.Rc)
ComputeRC0(gpr.GetImm(d));
}
else if (gpr.IsImm(b))
if (gpr.IsImm(b))
{
const u32 divisor = gpr.GetImm(b);
@ -1834,28 +1625,7 @@ void JitArm64::divwx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, d = inst.RD;
if (gpr.IsImm(a) && gpr.IsImm(b))
{
s32 imm_a = gpr.GetImm(a);
s32 imm_b = gpr.GetImm(b);
u32 imm_d;
if (imm_b == 0 || (static_cast<u32>(imm_a) == 0x80000000 && imm_b == -1))
{
if (imm_a < 0)
imm_d = 0xFFFFFFFF;
else
imm_d = 0;
}
else
{
imm_d = static_cast<u32>(imm_a / imm_b);
}
gpr.SetImmediate(d, imm_d);
if (inst.Rc)
ComputeRC0(imm_d);
}
else if (gpr.IsImm(a, 0))
if (gpr.IsImm(a, 0))
{
// Zero divided by anything is always zero
gpr.SetImmediate(d, 0);
@ -2029,21 +1799,7 @@ void JitArm64::slwx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, s = inst.RS;
if (gpr.IsImm(b) && gpr.IsImm(s))
{
u32 i = gpr.GetImm(s), j = gpr.GetImm(b);
gpr.SetImmediate(a, (j & 0x20) ? 0 : i << (j & 0x1F));
if (inst.Rc)
ComputeRC0(gpr.GetImm(a));
}
else if (gpr.IsImm(s, 0))
{
gpr.SetImmediate(a, 0);
if (inst.Rc)
ComputeRC0(0);
}
else if (gpr.IsImm(b))
if (gpr.IsImm(b))
{
u32 i = gpr.GetImm(b);
if (i & 0x20)
@ -2080,15 +1836,7 @@ void JitArm64::srwx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, s = inst.RS;
if (gpr.IsImm(b) && gpr.IsImm(s))
{
u32 i = gpr.GetImm(s), amount = gpr.GetImm(b);
gpr.SetImmediate(a, (amount & 0x20) ? 0 : i >> (amount & 0x1F));
if (inst.Rc)
ComputeRC0(gpr.GetImm(a));
}
else if (gpr.IsImm(b))
if (gpr.IsImm(b))
{
u32 amount = gpr.GetImm(b);
if (amount & 0x20)
@ -2123,34 +1871,7 @@ void JitArm64::srawx(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB, s = inst.RS;
if (gpr.IsImm(b) && gpr.IsImm(s))
{
s32 i = gpr.GetImm(s), amount = gpr.GetImm(b);
if (amount & 0x20)
{
gpr.SetImmediate(a, i & 0x80000000 ? 0xFFFFFFFF : 0);
ComputeCarry(i & 0x80000000 ? true : false);
}
else
{
amount &= 0x1F;
gpr.SetImmediate(a, i >> amount);
ComputeCarry(amount != 0 && i < 0 && (u32(i) << (32 - amount)));
}
if (inst.Rc)
ComputeRC0(gpr.GetImm(a));
return;
}
else if (gpr.IsImm(s, 0))
{
gpr.SetImmediate(a, 0);
ComputeCarry(false);
if (inst.Rc)
ComputeRC0(0);
return;
}
else if (gpr.IsImm(b))
if (gpr.IsImm(b))
{
int amount = gpr.GetImm(b);
@ -2254,74 +1975,64 @@ void JitArm64::rlwimix(UGeckoInstruction inst)
const u32 width = inst.ME - inst.MB + 1;
const u32 rot_dist = inst.SH ? 32 - inst.SH : 0;
if (gpr.IsImm(a) && gpr.IsImm(s))
if (mask == 0 || (a == s && inst.SH == 0))
{
u32 res = (gpr.GetImm(a) & ~mask) | (std::rotl(gpr.GetImm(s), inst.SH) & mask);
gpr.SetImmediate(a, res);
if (inst.Rc)
ComputeRC0(res);
// Do Nothing
}
else
else if (mask == 0xFFFFFFFF)
{
if (mask == 0 || (a == s && inst.SH == 0))
{
// Do Nothing
}
else if (mask == 0xFFFFFFFF)
{
if (inst.SH || a != s)
gpr.BindToRegister(a, a == s);
if (inst.SH || a != s)
gpr.BindToRegister(a, a == s);
if (inst.SH)
ROR(gpr.R(a), gpr.R(s), rot_dist);
else if (a != s)
MOV(gpr.R(a), gpr.R(s));
}
else if (lsb == 0 && inst.MB <= inst.ME && rot_dist + width <= 32)
if (inst.SH)
ROR(gpr.R(a), gpr.R(s), rot_dist);
else if (a != s)
MOV(gpr.R(a), gpr.R(s));
}
else if (lsb == 0 && inst.MB <= inst.ME && rot_dist + width <= 32)
{
// Destination is in least significant position
// No mask inversion
// Source field pre-rotation is contiguous
gpr.BindToRegister(a, true);
BFXIL(gpr.R(a), gpr.R(s), rot_dist, width);
}
else if (inst.SH == 0 && inst.MB <= inst.ME)
{
// No rotation
// No mask inversion
gpr.BindToRegister(a, true);
auto WA = gpr.GetScopedReg();
UBFX(WA, gpr.R(s), lsb, width);
BFI(gpr.R(a), WA, lsb, width);
}
else if (inst.SH && inst.MB <= inst.ME)
{
// No mask inversion
gpr.BindToRegister(a, true);
if ((rot_dist + lsb) % 32 == 0)
{
// Destination is in least significant position
// No mask inversion
// Source field pre-rotation is contiguous
gpr.BindToRegister(a, true);
BFXIL(gpr.R(a), gpr.R(s), rot_dist, width);
}
else if (inst.SH == 0 && inst.MB <= inst.ME)
{
// No rotation
// No mask inversion
gpr.BindToRegister(a, true);
auto WA = gpr.GetScopedReg();
UBFX(WA, gpr.R(s), lsb, width);
BFI(gpr.R(a), WA, lsb, width);
}
else if (inst.SH && inst.MB <= inst.ME)
{
// No mask inversion
gpr.BindToRegister(a, true);
if ((rot_dist + lsb) % 32 == 0)
{
BFI(gpr.R(a), gpr.R(s), lsb, width);
}
else
{
auto WA = gpr.GetScopedReg();
ROR(WA, gpr.R(s), (rot_dist + lsb) % 32);
BFI(gpr.R(a), WA, lsb, width);
}
BFI(gpr.R(a), gpr.R(s), lsb, width);
}
else
{
gpr.BindToRegister(a, true);
ARM64Reg RA = gpr.R(a);
auto WA = gpr.GetScopedReg();
const u32 inverted_mask = ~mask;
AND(WA, gpr.R(s), LogicalImm(std::rotl(mask, rot_dist), GPRSize::B32));
AND(RA, RA, LogicalImm(inverted_mask, GPRSize::B32));
ORR(RA, RA, WA, ArithOption(WA, ShiftType::ROR, rot_dist));
ROR(WA, gpr.R(s), (rot_dist + lsb) % 32);
BFI(gpr.R(a), WA, lsb, width);
}
if (inst.Rc)
ComputeRC0(gpr.R(a));
}
else
{
gpr.BindToRegister(a, true);
ARM64Reg RA = gpr.R(a);
auto WA = gpr.GetScopedReg();
const u32 inverted_mask = ~mask;
AND(WA, gpr.R(s), LogicalImm(std::rotl(mask, rot_dist), GPRSize::B32));
AND(RA, RA, LogicalImm(inverted_mask, GPRSize::B32));
ORR(RA, RA, WA, ArithOption(WA, ShiftType::ROR, rot_dist));
}
if (inst.Rc)
ComputeRC0(gpr.R(a));
}

View File

@ -114,8 +114,7 @@ void Arm64RegCache::FlushMostStaleRegister()
const auto& reg = m_guest_registers[i];
const u32 last_used = reg.GetLastUsed();
if (last_used > most_stale_amount && reg.GetType() != RegType::NotLoaded &&
reg.GetType() != RegType::Discarded && reg.GetType() != RegType::Immediate)
if (last_used > most_stale_amount && reg.IsInHostRegister())
{
most_stale_preg = i;
most_stale_amount = last_used;
@ -137,12 +136,6 @@ void Arm64RegCache::DiscardRegister(size_t preg)
UnlockRegister(host_reg);
}
// GPR Cache
constexpr size_t GUEST_GPR_COUNT = 32;
constexpr size_t GUEST_CR_COUNT = 8;
constexpr size_t GUEST_GPR_OFFSET = 0;
constexpr size_t GUEST_CR_OFFSET = GUEST_GPR_COUNT;
Arm64GPRCache::Arm64GPRCache() : Arm64RegCache(GUEST_GPR_COUNT + GUEST_CR_COUNT)
{
}
@ -151,6 +144,19 @@ void Arm64GPRCache::Start(PPCAnalyst::BlockRegStats& stats)
{
}
// Returns if a register is set as an immediate. Only valid for guest GPRs.
bool Arm64GPRCache::IsImm(size_t preg) const
{
return m_jit->GetConstantPropagation().HasGPR(preg);
}
// Gets the immediate that a register is set to. Only valid for guest GPRs.
u32 Arm64GPRCache::GetImm(size_t preg) const
{
ASSERT(m_jit->GetConstantPropagation().HasGPR(preg));
return m_jit->GetConstantPropagation().GetGPR(preg);
}
bool Arm64GPRCache::IsCallerSaved(ARM64Reg reg) const
{
return ARM64XEmitter::CALLER_SAVED_GPRS[DecodeReg(reg)];
@ -192,11 +198,12 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg
GuestRegInfo guest_reg = GetGuestByIndex(index);
OpArg& reg = guest_reg.reg;
size_t bitsize = guest_reg.bitsize;
const bool is_gpr = index >= GUEST_GPR_OFFSET && index < GUEST_GPR_OFFSET + GUEST_GPR_COUNT;
if (reg.GetType() == RegType::Register)
if (reg.IsInHostRegister())
{
ARM64Reg host_reg = reg.GetReg();
if (reg.IsDirty())
if (!reg.IsInPPCState())
m_emit->STR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset));
if (mode == FlushMode::All)
@ -205,11 +212,12 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg
reg.Flush();
}
}
else if (reg.GetType() == RegType::Immediate)
else if (is_gpr && IsImm(index - GUEST_GPR_OFFSET))
{
if (reg.IsDirty())
if (!reg.IsInPPCState())
{
if (!reg.GetImm())
const u32 imm = GetImm(index - GUEST_GPR_OFFSET);
if (imm == 0)
{
m_emit->STR(IndexType::Unsigned, bitsize == 64 ? ARM64Reg::ZR : ARM64Reg::WZR, PPC_REG,
u32(guest_reg.ppc_offset));
@ -231,7 +239,7 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg
const ARM64Reg encoded_tmp_reg = bitsize != 64 ? tmp_reg : EncodeRegTo64(tmp_reg);
m_emit->MOVI2R(encoded_tmp_reg, reg.GetImm());
m_emit->MOVI2R(encoded_tmp_reg, imm);
m_emit->STR(IndexType::Unsigned, encoded_tmp_reg, PPC_REG, u32(guest_reg.ppc_offset));
if (allocated_tmp_reg)
@ -250,10 +258,10 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r
for (auto iter = regs.begin(); iter != regs.end(); ++iter)
{
const int i = *iter;
OpArg& reg = m_guest_registers[GUEST_GPR_OFFSET + i];
ASSERT_MSG(DYNA_REC,
ignore_discarded_registers != IgnoreDiscardedRegisters::No ||
m_guest_registers[GUEST_GPR_OFFSET + i].GetType() != RegType::Discarded,
ignore_discarded_registers != IgnoreDiscardedRegisters::No || reg.IsInPPCState() ||
reg.IsInHostRegister() || IsImm(i),
"Attempted to flush discarded register");
if (i + 1 < int(GUEST_GPR_COUNT) && regs[i + 1])
@ -261,27 +269,27 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r
// We've got two guest registers in a row to store
OpArg& reg1 = m_guest_registers[GUEST_GPR_OFFSET + i];
OpArg& reg2 = m_guest_registers[GUEST_GPR_OFFSET + i + 1];
const bool reg1_imm = reg1.GetType() == RegType::Immediate;
const bool reg2_imm = reg2.GetType() == RegType::Immediate;
const bool reg1_zero = reg1_imm && reg1.GetImm() == 0;
const bool reg2_zero = reg2_imm && reg2.GetImm() == 0;
const bool reg1_imm = IsImm(i);
const bool reg2_imm = IsImm(i + 1);
const bool reg1_zero = reg1_imm && GetImm(i) == 0;
const bool reg2_zero = reg2_imm && GetImm(i + 1) == 0;
const bool flush_all = mode == FlushMode::All;
if (reg1.IsDirty() && reg2.IsDirty() &&
(reg1.GetType() == RegType::Register || (reg1_imm && (reg1_zero || flush_all))) &&
(reg2.GetType() == RegType::Register || (reg2_imm && (reg2_zero || flush_all))))
if (!reg1.IsInPPCState() && !reg2.IsInPPCState() &&
(reg1.IsInHostRegister() || (reg1_imm && (reg1_zero || flush_all))) &&
(reg2.IsInHostRegister() || (reg2_imm && (reg2_zero || flush_all))))
{
const size_t ppc_offset = GetGuestByIndex(i).ppc_offset;
if (ppc_offset <= 252)
{
ARM64Reg RX1 = reg1_zero ? ARM64Reg::WZR : R(GetGuestByIndex(i));
ARM64Reg RX2 = reg2_zero ? ARM64Reg::WZR : R(GetGuestByIndex(i + 1));
ARM64Reg RX1 = reg1_zero ? ARM64Reg::WZR : BindForRead(i);
ARM64Reg RX2 = reg2_zero ? ARM64Reg::WZR : BindForRead(i + 1);
m_emit->STP(IndexType::Signed, RX1, RX2, PPC_REG, u32(ppc_offset));
if (flush_all)
{
if (!reg1_zero)
UnlockRegister(EncodeRegTo32(RX1));
if (!reg2_zero)
UnlockRegister(EncodeRegTo32(RX2));
if (reg1.IsInHostRegister())
UnlockRegister(reg1.GetReg());
if (reg2.IsInHostRegister())
UnlockRegister(reg2.GetReg());
reg1.Flush();
reg2.Flush();
}
@ -300,9 +308,10 @@ void Arm64GPRCache::FlushCRRegisters(BitSet8 regs, FlushMode mode, ARM64Reg tmp_
{
for (int i : regs)
{
OpArg& reg = m_guest_registers[GUEST_CR_OFFSET + i];
ASSERT_MSG(DYNA_REC,
ignore_discarded_registers != IgnoreDiscardedRegisters::No ||
m_guest_registers[GUEST_CR_OFFSET + i].GetType() != RegType::Discarded,
ignore_discarded_registers != IgnoreDiscardedRegisters::No || reg.IsInPPCState() ||
reg.IsInHostRegister(),
"Attempted to flush discarded register");
FlushRegister(GUEST_CR_OFFSET + i, mode, tmp_reg);
@ -335,94 +344,89 @@ void Arm64GPRCache::Flush(FlushMode mode, ARM64Reg tmp_reg,
FlushCRRegisters(BitSet8(0xFF), mode, tmp_reg, ignore_discarded_registers);
}
ARM64Reg Arm64GPRCache::R(const GuestRegInfo& guest_reg)
ARM64Reg Arm64GPRCache::BindForRead(size_t index)
{
GuestRegInfo guest_reg = GetGuestByIndex(index);
OpArg& reg = guest_reg.reg;
size_t bitsize = guest_reg.bitsize;
const bool is_gpr = index >= GUEST_GPR_OFFSET && index < GUEST_GPR_OFFSET + GUEST_GPR_COUNT;
IncrementAllUsed();
reg.ResetLastUsed();
switch (reg.GetType())
if (reg.IsInHostRegister())
{
case RegType::Register: // already in a reg
return reg.GetReg();
case RegType::Immediate: // Is an immediate
}
else if (is_gpr && IsImm(index - GUEST_GPR_OFFSET))
{
ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
m_emit->MOVI2R(host_reg, reg.GetImm());
m_emit->MOVI2R(host_reg, GetImm(index - GUEST_GPR_OFFSET));
reg.Load(host_reg);
return host_reg;
}
break;
case RegType::Discarded:
ASSERT_MSG(DYNA_REC, false, "Attempted to read discarded register");
break;
case RegType::NotLoaded: // Register isn't loaded at /all/
else // Register isn't loaded at /all/
{
// This is a bit annoying. We try to keep these preloaded as much as possible
// This can also happen on cases where PPCAnalyst isn't feeing us proper register usage
// statistics
ASSERT_MSG(DYNA_REC, reg.IsInPPCState(), "Attempted to read discarded register");
ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
reg.Load(host_reg);
reg.SetDirty(false);
m_emit->LDR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset));
return host_reg;
}
break;
default:
ERROR_LOG_FMT(DYNA_REC, "Invalid OpArg Type!");
break;
}
// We've got an issue if we end up here
return ARM64Reg::INVALID_REG;
}
void Arm64GPRCache::SetImmediate(const GuestRegInfo& guest_reg, u32 imm, bool dirty)
void Arm64GPRCache::SetImmediateInternal(size_t index, u32 imm, bool dirty)
{
GuestRegInfo guest_reg = GetGuestByIndex(index);
OpArg& reg = guest_reg.reg;
if (reg.GetType() == RegType::Register)
if (reg.IsInHostRegister())
UnlockRegister(EncodeRegTo32(reg.GetReg()));
reg.LoadToImm(imm);
reg.Discard();
reg.SetDirty(dirty);
m_jit->GetConstantPropagation().SetGPR(index - GUEST_GPR_OFFSET, imm);
}
void Arm64GPRCache::BindToRegister(const GuestRegInfo& guest_reg, bool will_read, bool will_write)
void Arm64GPRCache::BindForWrite(size_t index, bool will_read, bool will_write)
{
GuestRegInfo guest_reg = GetGuestByIndex(index);
OpArg& reg = guest_reg.reg;
const size_t bitsize = guest_reg.bitsize;
const bool is_gpr = index >= GUEST_GPR_OFFSET && index < GUEST_GPR_OFFSET + GUEST_GPR_COUNT;
reg.ResetLastUsed();
const RegType reg_type = reg.GetType();
if (reg_type == RegType::NotLoaded || reg_type == RegType::Discarded)
if (!reg.IsInHostRegister())
{
const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
reg.Load(host_reg);
reg.SetDirty(will_write);
if (will_read)
if (is_gpr && IsImm(index - GUEST_GPR_OFFSET))
{
ASSERT_MSG(DYNA_REC, reg_type != RegType::Discarded, "Attempted to load a discarded value");
m_emit->LDR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset));
const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
if (will_read || !will_write)
{
// TODO: Emitting this instruction when (!will_read && !will_write) would be unnecessary if
// we had some way to indicate to Flush that the immediate value should be written to
// ppcState even though there is a host register allocated
m_emit->MOVI2R(host_reg, GetImm(index - GUEST_GPR_OFFSET));
}
reg.Load(host_reg);
}
else
{
ASSERT_MSG(DYNA_REC, !will_read || reg.IsInPPCState(), "Attempted to load a discarded value");
const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
reg.Load(host_reg);
reg.SetDirty(will_write);
if (will_read)
m_emit->LDR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset));
return;
}
}
else if (reg_type == RegType::Immediate)
{
const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg());
if (will_read || !will_write)
{
// TODO: Emitting this instruction when (!will_read && !will_write) would be unnecessary if we
// had some way to indicate to Flush that the immediate value should be written to ppcState
// even though there is a host register allocated
m_emit->MOVI2R(host_reg, reg.GetImm());
}
reg.Load(host_reg);
if (will_write)
reg.SetDirty(true);
}
else if (will_write)
if (will_write)
{
reg.SetDirty(true);
if (is_gpr)
m_jit->GetConstantPropagation().ClearGPR(index - GUEST_GPR_OFFSET);
}
}
@ -484,7 +488,7 @@ BitSet32 Arm64GPRCache::GetDirtyGPRs() const
for (size_t i = 0; i < GUEST_GPR_COUNT; ++i)
{
const OpArg& arg = m_guest_registers[GUEST_GPR_OFFSET + i];
registers[i] = arg.GetType() != RegType::NotLoaded && arg.IsDirty();
registers[i] = !arg.IsInPPCState();
}
return registers;
}
@ -494,7 +498,7 @@ void Arm64GPRCache::FlushByHost(ARM64Reg host_reg, ARM64Reg tmp_reg)
for (size_t i = 0; i < m_guest_registers.size(); ++i)
{
const OpArg& reg = m_guest_registers[i];
if (reg.GetType() == RegType::Register && DecodeReg(reg.GetReg()) == DecodeReg(host_reg))
if (reg.IsInHostRegister() && DecodeReg(reg.GetReg()) == DecodeReg(host_reg))
{
FlushRegister(i, FlushMode::All, tmp_reg);
return;
@ -514,17 +518,17 @@ void Arm64FPRCache::Flush(FlushMode mode, ARM64Reg tmp_reg,
{
for (size_t i = 0; i < m_guest_registers.size(); ++i)
{
const RegType reg_type = m_guest_registers[i].GetType();
if (reg_type == RegType::Discarded)
{
ASSERT_MSG(DYNA_REC, ignore_discarded_registers != IgnoreDiscardedRegisters::No,
"Attempted to flush discarded register");
}
else if (reg_type != RegType::NotLoaded && reg_type != RegType::Immediate)
if (m_guest_registers[i].IsInHostRegister())
{
FlushRegister(i, mode, tmp_reg);
}
else
{
ASSERT_MSG(DYNA_REC,
ignore_discarded_registers != IgnoreDiscardedRegisters::No ||
m_guest_registers[i].IsInPPCState(),
"Attempted to flush discarded register");
}
}
}
@ -533,9 +537,32 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
OpArg& reg = m_guest_registers[preg];
IncrementAllUsed();
reg.ResetLastUsed();
if (!reg.IsInHostRegister())
{
ASSERT_MSG(DYNA_REC, reg.IsInPPCState(), "Attempted to read discarded register");
ARM64Reg host_reg = GetReg();
u32 load_size;
if (type == RegType::Register)
{
load_size = 128;
reg.Load(host_reg, RegType::Register);
}
else
{
load_size = 64;
reg.Load(host_reg, RegType::LowerPair);
}
reg.SetDirty(false);
m_float_emit->LDR(load_size, IndexType::Unsigned, host_reg, PPC_REG,
static_cast<s32>(PPCSTATE_OFF_PS0(preg)));
return host_reg;
}
ARM64Reg host_reg = reg.GetReg();
switch (reg.GetType())
switch (reg.GetFPRType())
{
case RegType::Single:
{
@ -618,28 +645,6 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type)
}
return host_reg;
}
case RegType::Discarded:
ASSERT_MSG(DYNA_REC, false, "Attempted to read discarded register");
break;
case RegType::NotLoaded: // Register isn't loaded at /all/
{
host_reg = GetReg();
u32 load_size;
if (type == RegType::Register)
{
load_size = 128;
reg.Load(host_reg, RegType::Register);
}
else
{
load_size = 64;
reg.Load(host_reg, RegType::LowerPair);
}
reg.SetDirty(false);
m_float_emit->LDR(load_size, IndexType::Unsigned, host_reg, PPC_REG,
static_cast<s32>(PPCSTATE_OFF_PS0(preg)));
return host_reg;
}
default:
DEBUG_ASSERT_MSG(DYNA_REC, false, "Invalid OpArg Type!");
break;
@ -655,16 +660,17 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type, bool set_dirty)
IncrementAllUsed();
reg.ResetLastUsed();
// Only the lower value will be overwritten, so we must be extra careful to store PSR1 if dirty.
if (reg.IsDirty() && (type == RegType::LowerPair || type == RegType::LowerPairSingle))
// If PS1 is dirty, but the caller wants a RegType with only PS0, we must write PS1 to m_ppc_state
// now so the contents of PS1 aren't lost.
if (!reg.IsInPPCState() && (type == RegType::LowerPair || type == RegType::LowerPairSingle))
{
// We must *not* change host_reg as this register might still be in use. So it's fine to
// store this register, but it's *not* fine to convert it to double. So for double conversion,
// a temporary register needs to be used.
// We must *not* modify host_reg, as the current guest instruction might want to read its old
// value before overwriting it. So it's fine to store this register, but it's *not* fine to
// convert it to double in place. For double conversion, a temporary register needs to be used.
ARM64Reg host_reg = reg.GetReg();
ARM64Reg flush_reg = host_reg;
switch (reg.GetType())
switch (reg.GetFPRType())
{
case RegType::Single:
// For a store-safe register, conversion is just one instruction regardless of whether
@ -706,8 +712,8 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type, bool set_dirty)
// Store PSR1 (which is equal to PSR0) in memory.
m_float_emit->STR(64, IndexType::Unsigned, flush_reg, PPC_REG,
static_cast<s32>(PPCSTATE_OFF_PS1(preg)));
reg.Load(host_reg, reg.GetType() == RegType::DuplicatedSingle ? RegType::LowerPairSingle :
RegType::LowerPair);
reg.Load(host_reg, reg.GetFPRType() == RegType::DuplicatedSingle ? RegType::LowerPairSingle :
RegType::LowerPair);
break;
default:
// All other types doesn't store anything in PSR1.
@ -718,7 +724,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type, bool set_dirty)
Unlock(flush_reg);
}
if (reg.GetType() == RegType::NotLoaded || reg.GetType() == RegType::Discarded)
if (!reg.IsInHostRegister())
{
// If not loaded at all, just alloc a new one.
reg.Load(GetReg(), type);
@ -782,10 +788,8 @@ void Arm64FPRCache::FlushByHost(ARM64Reg host_reg, ARM64Reg tmp_reg)
for (size_t i = 0; i < m_guest_registers.size(); ++i)
{
const OpArg& reg = m_guest_registers[i];
const RegType reg_type = reg.GetType();
if (reg_type != RegType::NotLoaded && reg_type != RegType::Discarded &&
reg_type != RegType::Immediate && reg.GetReg() == host_reg)
if (reg.IsInHostRegister() && reg.GetReg() == host_reg)
{
FlushRegister(i, FlushMode::All, tmp_reg);
return;
@ -802,8 +806,8 @@ bool Arm64FPRCache::IsTopHalfUsed(ARM64Reg reg) const
{
for (const OpArg& r : m_guest_registers)
{
if (r.GetReg() != ARM64Reg::INVALID_REG && DecodeReg(r.GetReg()) == DecodeReg(reg))
return r.GetType() == RegType::Register;
if (r.IsInHostRegister() && DecodeReg(r.GetReg()) == DecodeReg(reg))
return r.GetFPRType() == RegType::Register;
}
return false;
@ -813,8 +817,8 @@ void Arm64FPRCache::FlushRegister(size_t preg, FlushMode mode, ARM64Reg tmp_reg)
{
OpArg& reg = m_guest_registers[preg];
const ARM64Reg host_reg = reg.GetReg();
const bool dirty = reg.IsDirty();
RegType type = reg.GetType();
const bool dirty = !reg.IsInPPCState();
RegType type = reg.GetFPRType();
bool allocated_tmp_reg = false;
if (tmp_reg != ARM64Reg::INVALID_REG)
@ -921,7 +925,7 @@ BitSet32 Arm64FPRCache::GetCallerSavedUsed() const
bool Arm64FPRCache::IsSingle(size_t preg, bool lower_only) const
{
const RegType type = m_guest_registers[preg].GetType();
const RegType type = m_guest_registers[preg].GetFPRType();
return type == RegType::Single || type == RegType::DuplicatedSingle ||
(lower_only && type == RegType::LowerPairSingle);
}
@ -929,18 +933,18 @@ bool Arm64FPRCache::IsSingle(size_t preg, bool lower_only) const
void Arm64FPRCache::FixSinglePrecision(size_t preg)
{
OpArg& reg = m_guest_registers[preg];
if (!reg.IsInHostRegister())
return;
ARM64Reg host_reg = reg.GetReg();
switch (reg.GetType())
if (reg.GetFPRType() == RegType::Duplicated) // only PS0 needs to be converted
{
case RegType::Duplicated: // only PS0 needs to be converted
m_float_emit->FCVT(32, 64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
reg.Load(host_reg, RegType::DuplicatedSingle);
break;
case RegType::Register: // PS0 and PS1 need to be converted
}
else if (reg.GetFPRType() == RegType::Register) // PS0 and PS1 need to be converted
{
m_float_emit->FCVTN(32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
reg.Load(host_reg, RegType::Single);
break;
default:
break;
}
}

View File

@ -60,16 +60,12 @@ static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!");
enum class RegType
{
NotLoaded,
Discarded, // Reg is not loaded because we know it won't be read before the next write
Register, // Reg type is register
Immediate, // Reg is really a IMM
LowerPair, // Only the lower pair of a paired register
Duplicated, // The lower reg is the same as the upper one (physical upper doesn't actually have
// the duplicated value)
Single, // Both registers are loaded as single
LowerPairSingle, // Only the lower pair of a paired register, as single
DuplicatedSingle, // The lower one contains both registers, as single
Register, // PS0 and PS1, each 64-bit
LowerPair, // PS0 only, 64-bit
Duplicated, // PS0 and PS1 are identical, host register only stores one lane (64-bit)
Single, // PS0 and PS1, each 32-bit
LowerPairSingle, // PS0 only, 32-bit
DuplicatedSingle, // PS0 and PS1 are identical, host register only stores one lane (32-bit)
};
enum class FlushMode : bool
@ -92,26 +88,21 @@ class OpArg
public:
OpArg() = default;
RegType GetType() const { return m_type; }
RegType GetFPRType() const { return m_fpr_type; }
Arm64Gen::ARM64Reg GetReg() const { return m_reg; }
u32 GetImm() const { return m_value; }
void Load(Arm64Gen::ARM64Reg reg, RegType type = RegType::Register)
void Load(Arm64Gen::ARM64Reg reg, RegType format = RegType::Register)
{
m_type = type;
m_reg = reg;
}
void LoadToImm(u32 imm)
{
m_type = RegType::Immediate;
m_value = imm;
m_reg = Arm64Gen::ARM64Reg::INVALID_REG;
m_fpr_type = format;
m_in_host_register = true;
}
void Discard()
{
// Invalidate any previous information
m_type = RegType::Discarded;
m_reg = Arm64Gen::ARM64Reg::INVALID_REG;
m_fpr_type = RegType::Register;
m_in_ppc_state = false;
m_in_host_register = false;
// Arbitrarily large value that won't roll over on a lot of increments
m_last_used = 0xFFFF;
@ -119,8 +110,10 @@ public:
void Flush()
{
// Invalidate any previous information
m_type = RegType::NotLoaded;
m_reg = Arm64Gen::ARM64Reg::INVALID_REG;
m_fpr_type = RegType::Register;
m_in_ppc_state = true;
m_in_host_register = false;
// Arbitrarily large value that won't roll over on a lot of increments
m_last_used = 0xFFFF;
@ -129,20 +122,18 @@ public:
u32 GetLastUsed() const { return m_last_used; }
void ResetLastUsed() { m_last_used = 0; }
void IncrementLastUsed() { ++m_last_used; }
void SetDirty(bool dirty) { m_dirty = dirty; }
bool IsDirty() const { return m_dirty; }
void SetDirty(bool dirty) { m_in_ppc_state = !dirty; }
bool IsInPPCState() const { return m_in_ppc_state; }
bool IsInHostRegister() const { return m_in_host_register; }
private:
// For REG_REG
RegType m_type = RegType::NotLoaded; // store type
Arm64Gen::ARM64Reg m_reg = Arm64Gen::ARM64Reg::INVALID_REG; // host register we are in
// For REG_IMM
u32 m_value = 0; // IMM value
RegType m_fpr_type = RegType::Register; // for FPRs only
u32 m_last_used = 0;
bool m_dirty = false;
bool m_in_ppc_state = true;
bool m_in_host_register = false;
};
class HostReg
@ -328,22 +319,22 @@ public:
// Returns a guest GPR inside of a host register.
// Will dump an immediate to the host register as well.
Arm64Gen::ARM64Reg R(size_t preg) { return R(GetGuestGPR(preg)); }
Arm64Gen::ARM64Reg R(size_t preg) { return BindForRead(GUEST_GPR_OFFSET + preg); }
// Returns a guest CR inside of a host register.
Arm64Gen::ARM64Reg CR(size_t preg) { return R(GetGuestCR(preg)); }
Arm64Gen::ARM64Reg CR(size_t preg) { return BindForRead(GUEST_CR_OFFSET + preg); }
// Set a register to an immediate. Only valid for guest GPRs.
void SetImmediate(size_t preg, u32 imm, bool dirty = true)
{
SetImmediate(GetGuestGPR(preg), imm, dirty);
SetImmediateInternal(GUEST_GPR_OFFSET + preg, imm, dirty);
}
// Returns if a register is set as an immediate. Only valid for guest GPRs.
bool IsImm(size_t preg) const { return GetGuestGPROpArg(preg).GetType() == RegType::Immediate; }
// Returns whether a register is set as an immediate. Only valid for guest GPRs.
bool IsImm(size_t preg) const;
// Gets the immediate that a register is set to. Only valid for guest GPRs.
u32 GetImm(size_t preg) const { return GetGuestGPROpArg(preg).GetImm(); }
u32 GetImm(size_t preg) const;
bool IsImm(size_t preg, u32 imm) const { return IsImm(preg) && GetImm(preg) == imm; }
@ -374,14 +365,14 @@ public:
// flushed. Just remember to call this function again with will_write = true after the Flush call.
void BindToRegister(size_t preg, bool will_read, bool will_write = true)
{
BindToRegister(GetGuestGPR(preg), will_read, will_write);
BindForWrite(GUEST_GPR_OFFSET + preg, will_read, will_write);
}
// Binds a guest CR to a host register, optionally loading its value.
// The description of BindToRegister above applies to this function as well.
void BindCRToRegister(size_t preg, bool will_read, bool will_write = true)
{
BindToRegister(GetGuestCR(preg), will_read, will_write);
BindForWrite(GUEST_CR_OFFSET + preg, will_read, will_write);
}
BitSet32 GetCallerSavedUsed() const override;
@ -428,14 +419,19 @@ private:
GuestRegInfo GetGuestCR(size_t preg);
GuestRegInfo GetGuestByIndex(size_t index);
Arm64Gen::ARM64Reg R(const GuestRegInfo& guest_reg);
void SetImmediate(const GuestRegInfo& guest_reg, u32 imm, bool dirty);
void BindToRegister(const GuestRegInfo& guest_reg, bool will_read, bool will_write = true);
Arm64Gen::ARM64Reg BindForRead(size_t index);
void SetImmediateInternal(size_t index, u32 imm, bool dirty);
void BindForWrite(size_t index, bool will_read, bool will_write = true);
void FlushRegisters(BitSet32 regs, FlushMode mode, Arm64Gen::ARM64Reg tmp_reg,
IgnoreDiscardedRegisters ignore_discarded_registers);
void FlushCRRegisters(BitSet8 regs, FlushMode mode, Arm64Gen::ARM64Reg tmp_reg,
IgnoreDiscardedRegisters ignore_discarded_registers);
static constexpr size_t GUEST_GPR_COUNT = 32;
static constexpr size_t GUEST_CR_COUNT = 8;
static constexpr size_t GUEST_GPR_OFFSET = 0;
static constexpr size_t GUEST_CR_OFFSET = GUEST_GPR_COUNT;
};
class Arm64FPRCache : public Arm64RegCache
@ -451,9 +447,9 @@ public:
// Returns a guest register inside of a host register
// Will dump an immediate to the host register as well
Arm64Gen::ARM64Reg R(size_t preg, RegType type);
Arm64Gen::ARM64Reg R(size_t preg, RegType format);
Arm64Gen::ARM64Reg RW(size_t preg, RegType type, bool set_dirty = true);
Arm64Gen::ARM64Reg RW(size_t preg, RegType format, bool set_dirty = true);
BitSet32 GetCallerSavedUsed() const override;

View File

@ -0,0 +1,538 @@
// Copyright 2023 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "Core/PowerPC/JitCommon/ConstantPropagation.h"
#include <bit>
#include "Core/PowerPC/Gekko.h"
#include "Core/PowerPC/Interpreter/Interpreter.h"
#include "Core/PowerPC/PPCTables.h"
namespace JitCommon
{
static constexpr u32 BitOR(u32 a, u32 b)
{
return a | b;
}
static constexpr u32 BitAND(u32 a, u32 b)
{
return a & b;
}
static constexpr u32 BitXOR(u32 a, u32 b)
{
return a ^ b;
}
ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruction inst,
u64 flags) const
{
switch (inst.OPCD)
{
case 7: // mulli
return EvaluateMulImm(inst);
case 8: // subfic
return EvaluateSubImmCarry(inst);
case 12: // addic
case 13: // addic.
return EvaluateAddImmCarry(inst);
case 14: // addi
case 15: // addis
return EvaluateAddImm(inst);
case 20: // rlwimix
return EvaluateRlwimix(inst);
case 21: // rlwinmx
return EvaluateRlwinmxRlwnmx(inst, inst.SH);
case 23: // rlwnmx
if (HasGPR(inst.RB))
return EvaluateRlwinmxRlwnmx(inst, GetGPR(inst.RB) & 0x1F);
else
return {};
case 24: // ori
case 25: // oris
return EvaluateBitwiseImm(inst, BitOR);
case 26: // xori
case 27: // xoris
return EvaluateBitwiseImm(inst, BitXOR);
case 28: // andi
case 29: // andis
return EvaluateBitwiseImm(inst, BitAND);
case 31:
return EvaluateTable31(inst, flags);
default:
return {};
}
}
ConstantPropagationResult ConstantPropagation::EvaluateMulImm(UGeckoInstruction inst) const
{
if (inst.SIMM_16 == 0)
return ConstantPropagationResult(inst.RD, 0);
if (!HasGPR(inst.RA))
return {};
return ConstantPropagationResult(inst.RD, m_gpr_values[inst.RA] * inst.SIMM_16);
}
ConstantPropagationResult ConstantPropagation::EvaluateSubImmCarry(UGeckoInstruction inst) const
{
if (!HasGPR(inst.RA))
return {};
const u32 a = GetGPR(inst.RA);
const u32 imm = s32(inst.SIMM_16);
ConstantPropagationResult result(inst.RD, imm - a);
result.carry = imm >= a;
return result;
}
ConstantPropagationResult ConstantPropagation::EvaluateAddImm(UGeckoInstruction inst) const
{
const s32 immediate = inst.OPCD & 1 ? inst.SIMM_16 << 16 : inst.SIMM_16;
if (inst.RA == 0)
return ConstantPropagationResult(inst.RD, immediate);
if (!HasGPR(inst.RA))
return {};
return ConstantPropagationResult(inst.RD, m_gpr_values[inst.RA] + immediate);
}
ConstantPropagationResult ConstantPropagation::EvaluateAddImmCarry(UGeckoInstruction inst) const
{
if (!HasGPR(inst.RA))
return {};
const u32 a = m_gpr_values[inst.RA];
const bool rc = inst.OPCD & 1;
ConstantPropagationResult result(inst.RD, a + inst.SIMM_16, rc);
result.carry = Interpreter::Helper_Carry(a, inst.SIMM_16);
return result;
}
ConstantPropagationResult ConstantPropagation::EvaluateRlwimix(UGeckoInstruction inst) const
{
if (!HasGPR(inst.RS))
return {};
const u32 mask = MakeRotationMask(inst.MB, inst.ME);
if (mask == 0xFFFFFFFF)
return ConstantPropagationResult(inst.RA, std::rotl(GetGPR(inst.RS), inst.SH), inst.Rc);
if (!HasGPR(inst.RA))
return {};
return ConstantPropagationResult(
inst.RA, (GetGPR(inst.RA) & ~mask) | (std::rotl(GetGPR(inst.RS), inst.SH) & mask), inst.Rc);
}
ConstantPropagationResult ConstantPropagation::EvaluateRlwinmxRlwnmx(UGeckoInstruction inst,
u32 shift) const
{
if (!HasGPR(inst.RS))
return {};
const u32 mask = MakeRotationMask(inst.MB, inst.ME);
return ConstantPropagationResult(inst.RA, std::rotl(GetGPR(inst.RS), shift) & mask, inst.Rc);
}
ConstantPropagationResult ConstantPropagation::EvaluateBitwiseImm(UGeckoInstruction inst,
u32 (*do_op)(u32, u32)) const
{
const bool is_and = do_op == &BitAND;
const u32 immediate = inst.OPCD & 1 ? inst.UIMM << 16 : inst.UIMM;
if (inst.UIMM == 0 && !is_and && inst.RA == inst.RS)
return DO_NOTHING;
if (!HasGPR(inst.RS))
return {};
return ConstantPropagationResult(inst.RA, do_op(m_gpr_values[inst.RS], immediate), is_and);
}
ConstantPropagationResult ConstantPropagation::EvaluateTable31(UGeckoInstruction inst,
u64 flags) const
{
if (flags & FL_IN_B)
{
if (flags & FL_OUT_D)
{
// input a, b -> output d
return EvaluateTable31AB(inst, flags);
}
else
{
// input s, b -> output a
return EvaluateTable31SB(inst);
}
}
else
{
switch (inst.SUBOP10)
{
case 104: // negx
case 616: // negox
// input a -> output d
return EvaluateTable31Negx(inst, flags);
default:
// input s -> output a
return EvaluateTable31S(inst);
}
}
}
ConstantPropagationResult ConstantPropagation::EvaluateTable31Negx(UGeckoInstruction inst,
u64 flags) const
{
if (!HasGPR(inst.RA))
return {};
const s64 out = -s64(s32(GetGPR(inst.RA)));
ConstantPropagationResult result(inst.RD, u32(out), inst.Rc);
if (flags & FL_SET_OE)
result.overflow = (out != s64(s32(out)));
return result;
}
ConstantPropagationResult ConstantPropagation::EvaluateTable31S(UGeckoInstruction inst) const
{
if (!HasGPR(inst.RS))
return {};
std::optional<bool> carry;
u32 a;
const u32 s = GetGPR(inst.RS);
switch (inst.SUBOP10)
{
case 26: // cntlzwx
a = std::countl_zero(s);
break;
case 824: // srawix
a = s32(s) >> inst.SH;
carry = inst.SH != 0 && s32(s) < 0 && (s << (32 - inst.SH));
break;
case 922: // extshx
a = s32(s16(s));
break;
case 954: // extsbx
a = s32(s8(s));
break;
default:
return {};
}
ConstantPropagationResult result(ConstantPropagationResult(inst.RA, a, inst.Rc));
result.carry = carry;
return result;
}
ConstantPropagationResult ConstantPropagation::EvaluateTable31AB(UGeckoInstruction inst,
u64 flags) const
{
const bool has_a = HasGPR(inst.RA);
const bool has_b = HasGPR(inst.RB);
if (!has_a || !has_b)
{
if (has_a)
return EvaluateTable31ABOneRegisterKnown(inst, flags, GetGPR(inst.RA), false);
else if (has_b)
return EvaluateTable31ABOneRegisterKnown(inst, flags, GetGPR(inst.RB), true);
else if (inst.RA == inst.RB)
return EvaluateTable31ABIdenticalRegisters(inst, flags);
else
return {};
}
u64 d;
s64 d_overflow;
const u32 a = GetGPR(inst.RA);
const u32 b = GetGPR(inst.RB);
switch (inst.SUBOP10)
{
case 8: // subfcx
case 40: // subfx
case 520: // subfcox
case 552: // subfox
d = u64(u32(~a)) + u64(b) + 1;
d_overflow = s64(s32(b)) - s64(s32(a));
break;
case 10: // addcx
case 522: // addcox
case 266: // addx
case 778: // addox
d = u64(a) + u64(b);
d_overflow = s64(s32(a)) + s64(s32(b));
break;
case 11: // mulhwux
d = d_overflow = (u64(a) * u64(b)) >> 32;
break;
case 75: // mulhwx
d = d_overflow = u64(s64(s32(a)) * s64(s32(b))) >> 32;
break;
case 235: // mullwx
case 747: // mullwox
d = d_overflow = s64(s32(a)) * s64(s32(b));
break;
case 459: // divwux
case 971: // divwuox
d = d_overflow = b == 0 ? 0x1'0000'0000 : u64(a / b);
break;
case 491: // divwx
case 1003: // divwox
d = d_overflow = b == 0 || (a == 0x80000000 && b == 0xFFFFFFFF) ?
(s32(a) < 0 ? 0xFFFFFFFF : 0x1'0000'0000) :
s32(a) / s32(b);
break;
default:
return {};
}
ConstantPropagationResult result(inst.RD, u32(d), inst.Rc);
if (flags & FL_SET_CA)
result.carry = (d >> 32 != 0);
if (flags & FL_SET_OE)
result.overflow = (d_overflow != s64(s32(d_overflow)));
return result;
}
ConstantPropagationResult
ConstantPropagation::EvaluateTable31ABOneRegisterKnown(UGeckoInstruction inst, u64 flags, u32 value,
bool known_reg_is_b) const
{
switch (inst.SUBOP10)
{
case 11: // mulhwux
case 75: // mulhwx
case 235: // mullwx
case 747: // mullwox
if (value == 0)
{
ConstantPropagationResult result(inst.RD, 0, inst.Rc);
if (flags & FL_SET_OE)
result.overflow = false;
return result;
}
break;
case 459: // divwux
case 971: // divwuox
if (known_reg_is_b && value == 0)
{
ConstantPropagationResult result(inst.RD, 0, inst.Rc);
if (flags & FL_SET_OE)
result.overflow = true;
return result;
}
[[fallthrough]];
case 491: // divwx
case 1003: // divwox
if (!known_reg_is_b && value == 0 && !(flags & FL_SET_OE))
{
return ConstantPropagationResult(inst.RD, 0, inst.Rc);
}
break;
}
return {};
}
ConstantPropagationResult
ConstantPropagation::EvaluateTable31ABIdenticalRegisters(UGeckoInstruction inst, u64 flags) const
{
switch (inst.SUBOP10)
{
case 8: // subfcx
case 40: // subfx
case 520: // subfcox
case 552: // subfox
{
ConstantPropagationResult result(inst.RD, 0, inst.Rc);
if (flags & FL_SET_CA)
result.carry = true;
if (flags & FL_SET_OE)
result.overflow = false;
return result;
}
default:
return {};
}
}
ConstantPropagationResult ConstantPropagation::EvaluateTable31SB(UGeckoInstruction inst) const
{
const bool has_s = HasGPR(inst.RS);
const bool has_b = HasGPR(inst.RB);
if (!has_s || !has_b)
{
if (has_s)
return EvaluateTable31SBOneRegisterKnown(inst, GetGPR(inst.RS), false);
else if (has_b)
return EvaluateTable31SBOneRegisterKnown(inst, GetGPR(inst.RB), true);
else if (inst.RS == inst.RB)
return EvaluateTable31SBIdenticalRegisters(inst);
else
return {};
}
u32 a;
const u32 s = GetGPR(inst.RS);
const u32 b = GetGPR(inst.RB);
switch (inst.SUBOP10)
{
case 24: // slwx
a = u32(u64(s) << (b & 0x3f));
break;
case 28: // andx
a = s & b;
break;
case 60: // andcx
a = s & (~b);
break;
case 124: // norx
a = ~(s | b);
break;
case 284: // eqvx
a = ~(s ^ b);
break;
case 316: // xorx
a = s ^ b;
break;
case 412: // orcx
a = s | (~b);
break;
case 444: // orx
a = s | b;
break;
case 476: // nandx
a = ~(s & b);
break;
case 536: // srwx
a = u32(u64(s) >> (b & 0x3f));
break;
case 792: // srawx
{
const u64 temp = (s64(s32(s)) << 32) >> (b & 0x3f);
a = u32(temp >> 32);
ConstantPropagationResult result(inst.RA, a, inst.Rc);
result.carry = (temp & a) != 0;
return result;
}
default:
return {};
}
return ConstantPropagationResult(inst.RA, a, inst.Rc);
}
ConstantPropagationResult
ConstantPropagation::EvaluateTable31SBOneRegisterKnown(UGeckoInstruction inst, u32 value,
bool known_reg_is_b) const
{
u32 a;
switch (inst.SUBOP10)
{
case 24: // slwx
case 536: // srwx
if (!known_reg_is_b && value == 0)
a = 0;
else if (known_reg_is_b && (value & 0x20))
a = 0;
else
return {};
break;
case 60: // andcx
if (known_reg_is_b)
value = ~value;
[[fallthrough]];
case 28: // andx
if (value == 0)
a = 0;
else
return {};
break;
case 124: // norx
if (value == 0xFFFFFFFF)
a = 0;
else
return {};
break;
case 412: // orcx
if (known_reg_is_b)
value = ~value;
[[fallthrough]];
case 444: // orx
if (value == 0xFFFFFFFF)
a = 0xFFFFFFFF;
else
return {};
break;
case 476: // nandx
if (value == 0)
a = 0xFFFFFFFF;
else
return {};
break;
case 792: // srawx
if (!known_reg_is_b && value == 0)
{
ConstantPropagationResult result(inst.RA, 0, inst.Rc);
result.carry = false;
return result;
}
else
{
return {};
}
break;
default:
return {};
}
return ConstantPropagationResult(inst.RA, a, inst.Rc);
}
ConstantPropagationResult
ConstantPropagation::EvaluateTable31SBIdenticalRegisters(UGeckoInstruction inst) const
{
u32 a;
switch (inst.SUBOP10)
{
case 60: // andcx
a = 0;
break;
case 284: // eqvx
a = 0xFFFFFFFF;
break;
case 316: // xorx
a = 0;
break;
case 412: // orcx
a = 0xFFFFFFFF;
break;
default:
return {};
}
return ConstantPropagationResult(inst.RA, a, inst.Rc);
}
void ConstantPropagation::Apply(ConstantPropagationResult result)
{
if (result.gpr >= 0)
SetGPR(result.gpr, result.gpr_value);
}
} // namespace JitCommon

View File

@ -0,0 +1,113 @@
// Copyright 2023 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Core/PowerPC/PowerPC.h"
#include <array>
#include <cstddef>
#include <optional>
namespace JitCommon
{
struct ConstantPropagationResult final
{
constexpr ConstantPropagationResult() = default;
constexpr ConstantPropagationResult(s8 gpr_, u32 gpr_value_, bool compute_rc_ = false)
: gpr_value(gpr_value_), gpr(gpr_), instruction_fully_executed(true), compute_rc(compute_rc_)
{
}
// If gpr is non-negative, this is the value the instruction writes to that GPR.
u32 gpr_value = 0;
// If the instruction couldn't be evaluated or doesn't output to a GPR, this is -1.
// Otherwise, this is the GPR that the instruction writes to.
s8 gpr = -1;
// Whether the instruction was able to be fully evaluated with no side effects unaccounted for,
// or in other words, whether the JIT can skip emitting code for this instruction.
bool instruction_fully_executed = false;
// If true, CR0 needs to be set based on gpr_value.
bool compute_rc = false;
// If not std::nullopt, the instruction writes this to the carry flag.
std::optional<bool> carry = std::nullopt;
// If not std::nullopt, the instruction writes this to the overflow flag.
std::optional<bool> overflow = std::nullopt;
};
class ConstantPropagation final
{
public:
ConstantPropagationResult EvaluateInstruction(UGeckoInstruction inst, u64 flags) const;
void Apply(ConstantPropagationResult result);
template <typename... Args>
bool HasGPR(Args... gprs) const
{
return HasGPRs(BitSet32{static_cast<int>(gprs)...});
}
bool HasGPRs(BitSet32 gprs) const { return (m_gpr_values_known & gprs) == gprs; }
u32 GetGPR(size_t gpr) const { return m_gpr_values[gpr]; }
void SetGPR(size_t gpr, u32 value)
{
m_gpr_values_known[gpr] = true;
m_gpr_values[gpr] = value;
}
template <typename... Args>
void ClearGPR(Args... gprs)
{
ClearGPRs(BitSet32{static_cast<int>(gprs)...});
}
void ClearGPRs(BitSet32 gprs) { m_gpr_values_known &= ~gprs; }
void Clear() { m_gpr_values_known = BitSet32{}; }
private:
ConstantPropagationResult EvaluateMulImm(UGeckoInstruction inst) const;
ConstantPropagationResult EvaluateSubImmCarry(UGeckoInstruction inst) const;
ConstantPropagationResult EvaluateAddImm(UGeckoInstruction inst) const;
ConstantPropagationResult EvaluateAddImmCarry(UGeckoInstruction inst) const;
ConstantPropagationResult EvaluateRlwimix(UGeckoInstruction inst) const;
ConstantPropagationResult EvaluateRlwinmxRlwnmx(UGeckoInstruction inst, u32 shift) const;
ConstantPropagationResult EvaluateBitwiseImm(UGeckoInstruction inst,
u32 (*do_op)(u32, u32)) const;
ConstantPropagationResult EvaluateTable31(UGeckoInstruction inst, u64 flags) const;
ConstantPropagationResult EvaluateTable31Negx(UGeckoInstruction inst, u64 flags) const;
ConstantPropagationResult EvaluateTable31S(UGeckoInstruction inst) const;
ConstantPropagationResult EvaluateTable31AB(UGeckoInstruction inst, u64 flags) const;
ConstantPropagationResult EvaluateTable31ABOneRegisterKnown(UGeckoInstruction inst, u64 flags,
u32 value, bool known_reg_is_b) const;
ConstantPropagationResult EvaluateTable31ABIdenticalRegisters(UGeckoInstruction inst,
u64 flags) const;
ConstantPropagationResult EvaluateTable31SB(UGeckoInstruction inst) const;
ConstantPropagationResult EvaluateTable31SBOneRegisterKnown(UGeckoInstruction inst, u32 value,
bool known_reg_is_b) const;
ConstantPropagationResult EvaluateTable31SBIdenticalRegisters(UGeckoInstruction inst) const;
static constexpr ConstantPropagationResult DO_NOTHING = [] {
ConstantPropagationResult result;
result.instruction_fully_executed = true;
return result;
}();
static constexpr size_t GPR_COUNT = 32;
std::array<u32, GPR_COUNT> m_gpr_values;
BitSet32 m_gpr_values_known{};
};
} // namespace JitCommon

View File

@ -455,6 +455,7 @@
<ClInclude Include="Core\PowerPC\Interpreter\ExceptionUtils.h" />
<ClInclude Include="Core\PowerPC\Interpreter\Interpreter_FPUtils.h" />
<ClInclude Include="Core\PowerPC\Interpreter\Interpreter.h" />
<ClInclude Include="Core\PowerPC\JitCommon\ConstantPropagation.h" />
<ClInclude Include="Core\PowerPC\JitCommon\DivUtils.h" />
<ClInclude Include="Core\PowerPC\JitCommon\JitAsmCommon.h" />
<ClInclude Include="Core\PowerPC\JitCommon\JitBase.h" />
@ -1139,6 +1140,7 @@
<ClCompile Include="Core\PowerPC\Interpreter\Interpreter_SystemRegisters.cpp" />
<ClCompile Include="Core\PowerPC\Interpreter\Interpreter_Tables.cpp" />
<ClCompile Include="Core\PowerPC\Interpreter\Interpreter.cpp" />
<ClCompile Include="Core\PowerPC\JitCommon\ConstantPropagation.cpp" />
<ClCompile Include="Core\PowerPC\JitCommon\DivUtils.cpp" />
<ClCompile Include="Core\PowerPC\JitCommon\JitAsmCommon.cpp" />
<ClCompile Include="Core\PowerPC\JitCommon\JitBase.cpp" />