From 2dead5009b593bc17f23eabf62e717be0d7bd01d Mon Sep 17 00:00:00 2001 From: JosJuice Date: Tue, 22 Aug 2023 18:49:28 +0200 Subject: [PATCH 01/30] Jit64: Extract handling of immediate Rc --- Source/Core/Core/PowerPC/Jit64/Jit.h | 1 + .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 40 ++++++++++--------- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index e72397ccc52..5ce409cbaf1 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -122,6 +122,7 @@ public: void FinalizeCarry(Gen::CCFlags cond); void FinalizeCarry(bool ca); void ComputeRC(preg_t preg, bool needs_test = true, bool needs_sext = true); + void FinalizeImmediateRC(s32 value); void AndWithMask(Gen::X64Reg reg, u32 mask); void RotateLeft(int bits, Gen::X64Reg regOp, const Gen::OpArg& arg, u8 rotate); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 65d2a4296b2..03d8e2af7a8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -150,7 +150,10 @@ void Jit64::ComputeRC(preg_t preg, bool needs_test, bool needs_sext) if (arg.IsImm()) { - MOV(64, PPCSTATE_CR(0), Imm32(arg.SImm32())); + const s32 value = arg.SImm32(); + arg.Unlock(); + FinalizeImmediateRC(value); + return; } else if (needs_sext) { @@ -164,33 +167,32 @@ void Jit64::ComputeRC(preg_t preg, bool needs_test, bool needs_sext) if (CheckMergedBranch(0)) { - if (arg.IsImm()) + if (needs_test) { - s32 offset = arg.SImm32(); + TEST(32, arg, arg); arg.Unlock(); - DoMergedBranchImmediate(offset); } else { - if (needs_test) - { - TEST(32, arg, arg); - arg.Unlock(); - } - else - { - // If an operand to the cmp/rc op we're merging with the branch isn't used anymore, it'd be - // better to flush it here so that we don't have to flush it on both sides of the branch. - // We don't want to do this if a test is needed though, because it would interrupt macro-op - // fusion. - arg.Unlock(); - gpr.Flush(~js.op->gprInUse); - } - DoMergedBranchCondition(); + // If an operand to the cmp/rc op we're merging with the branch isn't used anymore, it'd be + // better to flush it here so that we don't have to flush it on both sides of the branch. + // We don't want to do this if a test is needed though, because it would interrupt macro-op + // fusion. + arg.Unlock(); + gpr.Flush(~js.op->gprInUse); } + DoMergedBranchCondition(); } } +void Jit64::FinalizeImmediateRC(s32 value) +{ + MOV(64, PPCSTATE_CR(0), Imm32(value)); + + if (CheckMergedBranch(0)) + DoMergedBranchImmediate(value); +} + // we can't do this optimization in the emitter because MOVZX and AND have different effects on // flags. void Jit64::AndWithMask(X64Reg reg, u32 mask) From e8060bd169b6ef17f3a4e197f1b7296af328ecb5 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Wed, 23 Aug 2023 22:41:03 +0200 Subject: [PATCH 02/30] JitArm64: Add function for setting constant overflow --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 1 + .../PowerPC/JitArm64/JitArm64_Integer.cpp | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 77ba6d0bb35..2d14d634dfa 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -376,6 +376,7 @@ protected: void ComputeRC0(Arm64Gen::ARM64Reg reg); void ComputeRC0(u32 imm); + void GenerateConstantOverflow(bool overflow); void ComputeCarry(Arm64Gen::ARM64Reg reg); // reg must contain 0 or 1 void ComputeCarry(bool carry); void ComputeCarry(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 087a4cd9077..fc12676043b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -39,6 +39,25 @@ void JitArm64::ComputeRC0(u32 imm) MOVI2R(gpr.CR(0), s64(s32(imm))); } +void JitArm64::GenerateConstantOverflow(bool overflow) +{ + ARM64Reg WA = gpr.GetReg(); + + if (overflow) + { + MOVI2R(WA, XER_OV_MASK | XER_SO_MASK); + STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov)); + } + else + { + LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov)); + AND(WA, WA, LogicalImm(~XER_OV_MASK, GPRSize::B32)); + STRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_so_ov)); + } + + gpr.Unlock(WA); +} + void JitArm64::ComputeCarry(ARM64Reg reg) { js.carryFlag = CarryFlag::InPPCState; From f9601dc38c78151fe1a8f95128a07fc821e50664 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Tue, 22 Aug 2023 17:44:35 +0200 Subject: [PATCH 03/30] Jit: Extract immediate handling to separate ConstantPropagation class Restructuring things in this way brings two immediate benefits: * Code is deduplicated between Jit64 and JitArm64. * Materializing an immediate value in a register no longer results in us forgetting what the immediate value was. As a more long-term benefit, this lets us also run constant propagation as part of PPCAnalyst, which could let us do cool stuff in the future like statically determining whether a conditional branch will be taken. But I have nothing concrete planned for that right now. --- Source/Core/Core/CMakeLists.txt | 2 + Source/Core/Core/PowerPC/Jit64/Jit.cpp | 60 ++++++++++--- Source/Core/Core/PowerPC/Jit64/Jit.h | 3 + Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 37 +++++++- Source/Core/Core/PowerPC/JitArm64/Jit.h | 3 + .../PowerPC/JitCommon/ConstantPropagation.cpp | 19 ++++ .../PowerPC/JitCommon/ConstantPropagation.h | 86 +++++++++++++++++++ Source/Core/DolphinLib.props | 2 + 8 files changed, 199 insertions(+), 13 deletions(-) create mode 100644 Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp create mode 100644 Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index c898c8f693e..7fc503e2f97 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -508,6 +508,8 @@ add_library(core PowerPC/Interpreter/Interpreter_Tables.cpp PowerPC/Interpreter/Interpreter.cpp PowerPC/Interpreter/Interpreter.h + PowerPC/JitCommon/ConstantPropagation.cpp + PowerPC/JitCommon/ConstantPropagation.h PowerPC/JitCommon/DivUtils.cpp PowerPC/JitCommon/DivUtils.h PowerPC/JitCommon/JitAsmCommon.cpp diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 377b4388fb4..6c161bd8b4f 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -42,6 +42,7 @@ #include "Core/PowerPC/Jit64Common/Jit64Constants.h" #include "Core/PowerPC/Jit64Common/Jit64PowerPCState.h" #include "Core/PowerPC/Jit64Common/TrampolineCache.h" +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/MMU.h" #include "Core/PowerPC/PPCAnalyst.h" @@ -921,6 +922,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) gpr.Start(); fpr.Start(); + m_constant_propagation.Clear(); + js.downcountAmount = 0; js.skipInstructions = 0; js.carryFlag = CarryFlag::InPPCState; @@ -1105,21 +1108,56 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) { gpr.Flush(); fpr.Flush(); + m_constant_propagation.Clear(); + + CompileInstruction(op); } else { - // If we have an input register that is going to be used again, load it pre-emptively, - // even if the instruction doesn't strictly need it in a register, to avoid redundant - // loads later. Of course, don't do this if we're already out of registers. - // As a bit of a heuristic, make sure we have at least one register left over for the - // output, which needs to be bound in the actual instruction compilation. - // TODO: make this smarter in the case that we're actually register-starved, i.e. - // prioritize the more important registers. - gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable); - fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable); - } + const JitCommon::ConstantPropagationResult constant_propagation_result = + m_constant_propagation.EvaluateInstruction(op.inst); - CompileInstruction(op); + if (!constant_propagation_result.instruction_fully_executed) + { + if (!bJITRegisterCacheOff) + { + // If we have an input register that is going to be used again, load it pre-emptively, + // even if the instruction doesn't strictly need it in a register, to avoid redundant + // loads later. Of course, don't do this if we're already out of registers. + // As a bit of a heuristic, make sure we have at least one register left over for the + // output, which needs to be bound in the actual instruction compilation. + // TODO: make this smarter in the case that we're actually register-starved, i.e. + // prioritize the more important registers. + gpr.PreloadRegisters(op.regsIn & op.gprInUse & ~op.gprDiscardable); + fpr.PreloadRegisters(op.fregsIn & op.fprInXmm & ~op.fprDiscardable); + } + + CompileInstruction(op); + + m_constant_propagation.ClearGPRs(op.regsOut); + } + + m_constant_propagation.Apply(constant_propagation_result); + + if (constant_propagation_result.gpr >= 0) + { + gpr.SetImmediate32(constant_propagation_result.gpr, + constant_propagation_result.gpr_value); + } + + if (constant_propagation_result.instruction_fully_executed) + { + if (constant_propagation_result.carry) + FinalizeCarry(*constant_propagation_result.carry); + + if (constant_propagation_result.overflow) + GenerateConstantOverflow(*constant_propagation_result.overflow); + + // FinalizeImmediateRC is called last, because it may trigger branch merging + if (constant_propagation_result.compute_rc) + FinalizeImmediateRC(constant_propagation_result.gpr_value); + } + } js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 5ce409cbaf1..189f0c2b4bb 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -31,6 +31,7 @@ #include "Core/PowerPC/Jit64Common/BlockCache.h" #include "Core/PowerPC/Jit64Common/Jit64AsmCommon.h" #include "Core/PowerPC/Jit64Common/TrampolineCache.h" +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" #include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/JitCommon/JitCache.h" @@ -289,6 +290,8 @@ private: GPRRegCache gpr{*this}; FPURegCache fpr{*this}; + JitCommon::ConstantPropagation m_constant_propagation; + Jit64AsmRoutineManager asm_routines{*this}; HyoutaUtilities::RangeSizeSet m_free_ranges_near; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 1eda45c58e3..f968ef5bdf1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -33,6 +33,7 @@ #include "Core/PatchEngine.h" #include "Core/PowerPC/Interpreter/Interpreter.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" #include "Core/PowerPC/JitInterface.h" #include "Core/PowerPC/PowerPC.h" #include "Core/System.h" @@ -1169,6 +1170,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) gpr.Start(js.gpa); fpr.Start(js.fpa); + m_constant_propagation.Clear(); + if (!js.noSpeculativeConstantsAddresses.contains(js.blockStart)) { IntializeSpeculativeConstants(); @@ -1341,9 +1344,39 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) FlushCarry(); gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); - } + m_constant_propagation.Clear(); - CompileInstruction(op); + CompileInstruction(op); + } + else + { + const JitCommon::ConstantPropagationResult constant_propagation_result = + m_constant_propagation.EvaluateInstruction(op.inst); + + if (!constant_propagation_result.instruction_fully_executed) + { + CompileInstruction(op); + + m_constant_propagation.ClearGPRs(op.regsOut); + } + + m_constant_propagation.Apply(constant_propagation_result); + + if (constant_propagation_result.gpr >= 0) + gpr.SetImmediate(constant_propagation_result.gpr, constant_propagation_result.gpr_value); + + if (constant_propagation_result.instruction_fully_executed) + { + if (constant_propagation_result.carry) + ComputeCarry(*constant_propagation_result.carry); + + if (constant_propagation_result.overflow) + GenerateConstantOverflow(*constant_propagation_result.overflow); + + if (constant_propagation_result.compute_rc) + ComputeRC0(constant_propagation_result.gpr_value); + } + } js.fpr_is_store_safe = op.fprIsStoreSafeAfterInst; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 2d14d634dfa..df44b9b6793 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -16,6 +16,7 @@ #include "Core/PowerPC/JitArm64/JitArm64Cache.h" #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/JitArmCommon/BackPatch.h" +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" #include "Core/PowerPC/JitCommon/JitAsmCommon.h" #include "Core/PowerPC/JitCommon/JitBase.h" #include "Core/PowerPC/PPCAnalyst.h" @@ -397,6 +398,8 @@ protected: Arm64GPRCache gpr; Arm64FPRCache fpr; + JitCommon::ConstantPropagation m_constant_propagation; + JitArm64BlockCache blocks{*this}; Arm64Gen::ARM64FloatEmitter m_float_emit; diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp new file mode 100644 index 00000000000..b4afeed5b7d --- /dev/null +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -0,0 +1,19 @@ +// Copyright 2023 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "Core/PowerPC/JitCommon/ConstantPropagation.h" + +namespace JitCommon +{ +ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruction inst) const +{ + return {}; +} + +void ConstantPropagation::Apply(ConstantPropagationResult result) +{ + if (result.gpr >= 0) + SetGPR(result.gpr, result.gpr_value); +} + +} // namespace JitCommon diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h new file mode 100644 index 00000000000..2a24b9e7105 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -0,0 +1,86 @@ +// Copyright 2023 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "Common/BitSet.h" +#include "Common/CommonTypes.h" +#include "Core/PowerPC/PowerPC.h" + +#include +#include +#include + +namespace JitCommon +{ +struct ConstantPropagationResult final +{ + constexpr ConstantPropagationResult() = default; + + constexpr ConstantPropagationResult(s8 gpr_, u32 gpr_value_, bool compute_rc_ = false) + : gpr_value(gpr_value_), gpr(gpr_), instruction_fully_executed(true), compute_rc(compute_rc_) + { + } + + // If gpr is non-negative, this is the value the instruction writes to that GPR. + u32 gpr_value = 0; + + // If the instruction couldn't be evaluated or doesn't output to a GPR, this is -1. + // Otherwise, this is the GPR that the instruction writes to. + s8 gpr = -1; + + // Whether the instruction was able to be fully evaluated with no side effects unaccounted for, + // or in other words, whether the JIT can skip emitting code for this instruction. + bool instruction_fully_executed = false; + + // If true, CR0 needs to be set based on gpr_value. + bool compute_rc = false; + + // If not std::nullopt, the instruction writes this to the carry flag. + std::optional carry = std::nullopt; + + // If not std::nullopt, the instruction writes this to the overflow flag. + std::optional overflow = std::nullopt; +}; + +class ConstantPropagation final +{ +public: + ConstantPropagationResult EvaluateInstruction(UGeckoInstruction inst) const; + + void Apply(ConstantPropagationResult result); + + template + bool HasGPR(Args... gprs) const + { + return HasGPRs(BitSet32{static_cast(gprs)...}); + } + + bool HasGPRs(BitSet32 gprs) const { return (m_gpr_values_known & gprs) == gprs; } + + u32 GetGPR(size_t gpr) const { return m_gpr_values[gpr]; } + + void SetGPR(size_t gpr, u32 value) + { + m_gpr_values_known[gpr] = true; + m_gpr_values[gpr] = value; + } + + template + void ClearGPR(Args... gprs) + { + ClearGPRs(BitSet32{static_cast(gprs)...}); + } + + void ClearGPRs(BitSet32 gprs) { m_gpr_values_known &= ~gprs; } + + void Clear() { m_gpr_values_known = BitSet32{}; } + +private: + static constexpr size_t GPR_COUNT = 32; + + std::array m_gpr_values; + BitSet32 m_gpr_values_known{}; +}; + +} // namespace JitCommon diff --git a/Source/Core/DolphinLib.props b/Source/Core/DolphinLib.props index 17e44675ac2..3cfac5838c4 100644 --- a/Source/Core/DolphinLib.props +++ b/Source/Core/DolphinLib.props @@ -455,6 +455,7 @@ + @@ -1139,6 +1140,7 @@ + From 20332f441b7ff5ea799bf9ffc55e1ec438fd513f Mon Sep 17 00:00:00 2001 From: JosJuice Date: Tue, 22 Aug 2023 18:40:55 +0200 Subject: [PATCH 04/30] Jit: Move reg_imm to ConstantPropagation --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 15 ----- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 +- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 57 ++++--------------- .../PowerPC/JitCommon/ConstantPropagation.cpp | 45 ++++++++++++++- .../PowerPC/JitCommon/ConstantPropagation.h | 9 +++ 5 files changed, 64 insertions(+), 64 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 03d8e2af7a8..ce53909f8da 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -327,14 +327,6 @@ void Jit64::reg_imm(UGeckoInstruction inst) case 24: // ori case 25: // oris { - // check for nop - if (a == s && inst.UIMM == 0) - { - // Make the nop visible in the generated code. not much use but interesting if we see one. - NOP(); - return; - } - const u32 immediate = inst.OPCD == 24 ? inst.UIMM : inst.UIMM << 16; regimmop(a, s, true, immediate, Or, &XEmitter::OR); break; @@ -348,13 +340,6 @@ void Jit64::reg_imm(UGeckoInstruction inst) case 26: // xori case 27: // xoris { - if (s == a && inst.UIMM == 0) - { - // Make the nop visible in the generated code. - NOP(); - return; - } - const u32 immediate = inst.OPCD == 26 ? inst.UIMM : inst.UIMM << 16; regimmop(a, s, true, immediate, Xor, &XEmitter::XOR, false); break; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index df44b9b6793..1b738061b22 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -384,7 +384,7 @@ protected: void LoadCarry(); void FlushCarry(); - void reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32), + void reg_imm(u32 d, u32 a, u32 value, void (ARM64XEmitter::*op)(Arm64Gen::ARM64Reg, Arm64Gen::ARM64Reg, u64, Arm64Gen::ARM64Reg), bool Rc = false); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index fc12676043b..25d6ecbb631 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -155,41 +155,17 @@ void JitArm64::FlushCarry() js.carryFlag = CarryFlag::InPPCState; } -void JitArm64::reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32), +void JitArm64::reg_imm(u32 d, u32 a, u32 value, void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, u64, ARM64Reg), bool Rc) { - if (gpr.IsImm(a)) + gpr.BindToRegister(d, d == a); { - gpr.SetImmediate(d, do_op(gpr.GetImm(a), value)); - if (Rc) - ComputeRC0(gpr.GetImm(d)); + auto WA = gpr.GetScopedReg(); + (this->*op)(gpr.R(d), gpr.R(a), value, WA); } - else - { - gpr.BindToRegister(d, d == a); - { - auto WA = gpr.GetScopedReg(); - (this->*op)(gpr.R(d), gpr.R(a), value, WA); - } - if (Rc) - ComputeRC0(gpr.R(d)); - } -} - -static constexpr u32 BitOR(u32 a, u32 b) -{ - return a | b; -} - -static constexpr u32 BitAND(u32 a, u32 b) -{ - return a & b; -} - -static constexpr u32 BitXOR(u32 a, u32 b) -{ - return a ^ b; + if (Rc) + ComputeRC0(gpr.R(d)); } void JitArm64::arith_imm(UGeckoInstruction inst) @@ -203,34 +179,21 @@ void JitArm64::arith_imm(UGeckoInstruction inst) case 24: // ori case 25: // oris { - // check for nop - if (a == s && inst.UIMM == 0) - { - // NOP - return; - } - const u32 immediate = inst.OPCD == 24 ? inst.UIMM : inst.UIMM << 16; - reg_imm(a, s, immediate, BitOR, &ARM64XEmitter::ORRI2R); + reg_imm(a, s, immediate, &ARM64XEmitter::ORRI2R); break; } case 28: // andi - reg_imm(a, s, inst.UIMM, BitAND, &ARM64XEmitter::ANDI2R, true); + reg_imm(a, s, inst.UIMM, &ARM64XEmitter::ANDI2R, true); break; case 29: // andis - reg_imm(a, s, inst.UIMM << 16, BitAND, &ARM64XEmitter::ANDI2R, true); + reg_imm(a, s, inst.UIMM << 16, &ARM64XEmitter::ANDI2R, true); break; case 26: // xori case 27: // xoris { - if (a == s && inst.UIMM == 0) - { - // NOP - return; - } - const u32 immediate = inst.OPCD == 26 ? inst.UIMM : inst.UIMM << 16; - reg_imm(a, s, immediate, BitXOR, &ARM64XEmitter::EORI2R); + reg_imm(a, s, immediate, &ARM64XEmitter::EORI2R); break; } } diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index b4afeed5b7d..1b09f42c802 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -5,9 +5,52 @@ namespace JitCommon { +static constexpr u32 BitOR(u32 a, u32 b) +{ + return a | b; +} + +static constexpr u32 BitAND(u32 a, u32 b) +{ + return a & b; +} + +static constexpr u32 BitXOR(u32 a, u32 b) +{ + return a ^ b; +} + ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruction inst) const { - return {}; + switch (inst.OPCD) + { + case 24: // ori + case 25: // oris + return EvaluateBitwiseImm(inst, BitOR); + case 26: // xori + case 27: // xoris + return EvaluateBitwiseImm(inst, BitXOR); + case 28: // andi + case 29: // andis + return EvaluateBitwiseImm(inst, BitAND); + default: + return {}; + } +} + +ConstantPropagationResult ConstantPropagation::EvaluateBitwiseImm(UGeckoInstruction inst, + u32 (*do_op)(u32, u32)) const +{ + const bool is_and = do_op == &BitAND; + const u32 immediate = inst.OPCD & 1 ? inst.UIMM << 16 : inst.UIMM; + + if (inst.UIMM == 0 && !is_and && inst.RA == inst.RS) + return DO_NOTHING; + + if (!HasGPR(inst.RS)) + return {}; + + return ConstantPropagationResult(inst.RA, do_op(m_gpr_values[inst.RS], immediate), is_and); } void ConstantPropagation::Apply(ConstantPropagationResult result) diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h index 2a24b9e7105..176ee3d513b 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -77,6 +77,15 @@ public: void Clear() { m_gpr_values_known = BitSet32{}; } private: + ConstantPropagationResult EvaluateBitwiseImm(UGeckoInstruction inst, + u32 (*do_op)(u32, u32)) const; + + static constexpr ConstantPropagationResult DO_NOTHING = [] { + ConstantPropagationResult result; + result.instruction_fully_executed = true; + return result; + }(); + static constexpr size_t GPR_COUNT = 32; std::array m_gpr_values; From 3a6eea74dd1fd345d409455a7471764abc4207e6 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Tue, 22 Aug 2023 19:50:38 +0200 Subject: [PATCH 05/30] Jit: Move addix to ConstantPropagation --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 31 ++++++------------- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 13 ++------ .../PowerPC/JitCommon/ConstantPropagation.cpp | 16 ++++++++++ .../PowerPC/JitCommon/ConstantPropagation.h | 1 + 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index ce53909f8da..c8aea9863e9 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -260,25 +260,18 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, if (a || binary || carry) { carry &= js.op->wantsCA; - if (gpr.IsImm(a) && !carry) + RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCX64Reg Rd = gpr.Bind(d, RCMode::Write); + RegCache::Realize(Ra, Rd); + if (doop == Add && Ra.IsSimpleReg() && !carry && d != a) { - gpr.SetImmediate32(d, doop(gpr.Imm32(a), value)); + LEA(32, Rd, MDisp(Ra.GetSimpleReg(), value)); } else { - RCOpArg Ra = gpr.Use(a, RCMode::Read); - RCX64Reg Rd = gpr.Bind(d, RCMode::Write); - RegCache::Realize(Ra, Rd); - if (doop == Add && Ra.IsSimpleReg() && !carry && d != a) - { - LEA(32, Rd, MDisp(Ra.GetSimpleReg(), value)); - } - else - { - if (d != a) - MOV(32, Rd, Ra); - (this->*op)(32, Rd, Imm32(value)); // m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; - } + if (d != a) + MOV(32, Rd, Ra); + (this->*op)(32, Rd, Imm32(value)); // m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; } if (carry) FinalizeCarry(CC_C); @@ -304,12 +297,8 @@ void Jit64::reg_imm(UGeckoInstruction inst) switch (inst.OPCD) { case 14: // addi - // occasionally used as MOV - emulate, with immediate propagation - if (a != 0 && d != a && gpr.IsImm(a)) - { - gpr.SetImmediate32(d, gpr.Imm32(a) + (u32)(s32)inst.SIMM_16); - } - else if (a != 0 && d != a && inst.SIMM_16 == 0) + // occasionally used as MOV + if (a != 0 && d != a && inst.SIMM_16 == 0) { RCOpArg Ra = gpr.Use(a, RCMode::Read); RCX64Reg Rd = gpr.Bind(d, RCMode::Write); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 25d6ecbb631..1628fe04c4d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -213,17 +213,10 @@ void JitArm64::addix(UGeckoInstruction inst) if (a) { - if (gpr.IsImm(a)) - { - gpr.SetImmediate(d, gpr.GetImm(a) + imm); - } - else - { - gpr.BindToRegister(d, d == a); + gpr.BindToRegister(d, d == a); - auto WA = gpr.GetScopedReg(); - ADDI2R(gpr.R(d), gpr.R(a), imm, WA); - } + auto WA = gpr.GetScopedReg(); + ADDI2R(gpr.R(d), gpr.R(a), imm, WA); } else { diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index 1b09f42c802..34771366f3d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -24,6 +24,9 @@ ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruc { switch (inst.OPCD) { + case 14: // addi + case 15: // addis + return EvaluateAddImm(inst); case 24: // ori case 25: // oris return EvaluateBitwiseImm(inst, BitOR); @@ -38,6 +41,19 @@ ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruc } } +ConstantPropagationResult ConstantPropagation::EvaluateAddImm(UGeckoInstruction inst) const +{ + const s32 immediate = inst.OPCD & 1 ? inst.SIMM_16 << 16 : inst.SIMM_16; + + if (inst.RA == 0) + return ConstantPropagationResult(inst.RD, immediate); + + if (!HasGPR(inst.RA)) + return {}; + + return ConstantPropagationResult(inst.RD, m_gpr_values[inst.RA] + immediate); +} + ConstantPropagationResult ConstantPropagation::EvaluateBitwiseImm(UGeckoInstruction inst, u32 (*do_op)(u32, u32)) const { diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h index 176ee3d513b..797c7839254 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -77,6 +77,7 @@ public: void Clear() { m_gpr_values_known = BitSet32{}; } private: + ConstantPropagationResult EvaluateAddImm(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateBitwiseImm(UGeckoInstruction inst, u32 (*do_op)(u32, u32)) const; From 77b46c30aded7b562566483ab8d94c24c3461499 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Wed, 23 Aug 2023 15:22:34 +0200 Subject: [PATCH 06/30] Jit: Move boolX to ConstantPropagation --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 87 ++++-------- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 24 +--- .../PowerPC/JitCommon/ConstantPropagation.cpp | 128 ++++++++++++++++++ .../PowerPC/JitCommon/ConstantPropagation.h | 4 + 4 files changed, 162 insertions(+), 81 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index c8aea9863e9..d9b5cb360b3 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -680,29 +680,7 @@ void Jit64::boolX(UGeckoInstruction inst) bool needs_test = false; DEBUG_ASSERT_MSG(DYNA_REC, inst.OPCD == 31, "Invalid boolX"); - if (gpr.IsImm(s, b)) - { - const u32 rs_offset = gpr.Imm32(s); - const u32 rb_offset = gpr.Imm32(b); - - if (inst.SUBOP10 == 28) // andx - gpr.SetImmediate32(a, rs_offset & rb_offset); - else if (inst.SUBOP10 == 476) // nandx - gpr.SetImmediate32(a, ~(rs_offset & rb_offset)); - else if (inst.SUBOP10 == 60) // andcx - gpr.SetImmediate32(a, rs_offset & (~rb_offset)); - else if (inst.SUBOP10 == 444) // orx - gpr.SetImmediate32(a, rs_offset | rb_offset); - else if (inst.SUBOP10 == 124) // norx - gpr.SetImmediate32(a, ~(rs_offset | rb_offset)); - else if (inst.SUBOP10 == 412) // orcx - gpr.SetImmediate32(a, rs_offset | (~rb_offset)); - else if (inst.SUBOP10 == 316) // xorx - gpr.SetImmediate32(a, rs_offset ^ rb_offset); - else if (inst.SUBOP10 == 284) // eqvx - gpr.SetImmediate32(a, ~(rs_offset ^ rb_offset)); - } - else if (gpr.IsImm(s) || gpr.IsImm(b)) + if (gpr.IsImm(s) || gpr.IsImm(b)) { const auto [i, j] = gpr.IsImm(s) ? std::pair(s, b) : std::pair(b, s); u32 imm = gpr.Imm32(i); @@ -756,53 +734,46 @@ void Jit64::boolX(UGeckoInstruction inst) } else if (is_and) { - if (imm == 0) + RCOpArg Rj = gpr.Use(j, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Rj, Ra); + + if (imm == 0xFFFFFFFF) { - gpr.SetImmediate32(a, final_not ? 0xFFFFFFFF : 0); + if (a != j) + MOV(32, Ra, Rj); + if (final_not || complement_b) + NOT(32, Ra); + needs_test = true; + } + else if (complement_b) + { + if (a != j) + MOV(32, Ra, Rj); + NOT(32, Ra); + AND(32, Ra, Imm32(imm)); } else { - RCOpArg Rj = gpr.Use(j, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::Write); - RegCache::Realize(Rj, Ra); - - if (imm == 0xFFFFFFFF) + if (a == j) { - if (a != j) - MOV(32, Ra, Rj); - if (final_not || complement_b) - NOT(32, Ra); - needs_test = true; + AND(32, Ra, Imm32(imm)); } - else if (complement_b) + else if (s32(imm) >= -128 && s32(imm) <= 127) { - if (a != j) - MOV(32, Ra, Rj); - NOT(32, Ra); + MOV(32, Ra, Rj); AND(32, Ra, Imm32(imm)); } else { - if (a == j) - { - AND(32, Ra, Imm32(imm)); - } - else if (s32(imm) >= -128 && s32(imm) <= 127) - { - MOV(32, Ra, Rj); - AND(32, Ra, Imm32(imm)); - } - else - { - MOV(32, Ra, Imm32(imm)); - AND(32, Ra, Rj); - } + MOV(32, Ra, Imm32(imm)); + AND(32, Ra, Rj); + } - if (final_not) - { - NOT(32, Ra); - needs_test = true; - } + if (final_not) + { + NOT(32, Ra); + needs_test = true; } } } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 1628fe04c4d..9ab44b66aed 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -231,29 +231,7 @@ void JitArm64::boolX(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, s = inst.RS, b = inst.RB; - if (gpr.IsImm(s) && gpr.IsImm(b)) - { - if (inst.SUBOP10 == 28) // andx - gpr.SetImmediate(a, (u32)gpr.GetImm(s) & (u32)gpr.GetImm(b)); - else if (inst.SUBOP10 == 476) // nandx - gpr.SetImmediate(a, ~((u32)gpr.GetImm(s) & (u32)gpr.GetImm(b))); - else if (inst.SUBOP10 == 60) // andcx - gpr.SetImmediate(a, (u32)gpr.GetImm(s) & (~(u32)gpr.GetImm(b))); - else if (inst.SUBOP10 == 444) // orx - gpr.SetImmediate(a, (u32)gpr.GetImm(s) | (u32)gpr.GetImm(b)); - else if (inst.SUBOP10 == 124) // norx - gpr.SetImmediate(a, ~((u32)gpr.GetImm(s) | (u32)gpr.GetImm(b))); - else if (inst.SUBOP10 == 412) // orcx - gpr.SetImmediate(a, (u32)gpr.GetImm(s) | (~(u32)gpr.GetImm(b))); - else if (inst.SUBOP10 == 316) // xorx - gpr.SetImmediate(a, (u32)gpr.GetImm(s) ^ (u32)gpr.GetImm(b)); - else if (inst.SUBOP10 == 284) // eqvx - gpr.SetImmediate(a, ~((u32)gpr.GetImm(s) ^ (u32)gpr.GetImm(b))); - - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - } - else if (s == b) + if (s == b) { if ((inst.SUBOP10 == 28 /* andx */) || (inst.SUBOP10 == 444 /* orx */)) { diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index 34771366f3d..f29ce2a759e 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -36,6 +36,8 @@ ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruc case 28: // andi case 29: // andis return EvaluateBitwiseImm(inst, BitAND); + case 31: + return EvaluateTable31(inst); default: return {}; } @@ -69,6 +71,132 @@ ConstantPropagationResult ConstantPropagation::EvaluateBitwiseImm(UGeckoInstruct return ConstantPropagationResult(inst.RA, do_op(m_gpr_values[inst.RS], immediate), is_and); } +ConstantPropagationResult ConstantPropagation::EvaluateTable31(UGeckoInstruction inst) const +{ + const bool has_s = HasGPR(inst.RS); + const bool has_b = HasGPR(inst.RB); + if (!has_s || !has_b) + { + if (has_s) + return EvaluateTable31OneRegisterKnown(inst, GetGPR(inst.RS), false); + else if (has_b) + return EvaluateTable31OneRegisterKnown(inst, GetGPR(inst.RB), true); + else if (inst.RS == inst.RB) + return EvaluateTable31IdenticalRegisters(inst); + else + return {}; + } + + u32 a; + const u32 s = GetGPR(inst.RS); + const u32 b = GetGPR(inst.RB); + + switch (inst.SUBOP10) + { + case 28: // andx + a = s & b; + break; + case 60: // andcx + a = s & (~b); + break; + case 124: // norx + a = ~(s | b); + break; + case 284: // eqvx + a = ~(s ^ b); + break; + case 316: // xorx + a = s ^ b; + break; + case 412: // orcx + a = s | (~b); + break; + case 444: // orx + a = s | b; + break; + case 476: // nandx + a = ~(s & b); + break; + default: + return {}; + } + + return ConstantPropagationResult(inst.RA, a, inst.Rc); +} + +ConstantPropagationResult +ConstantPropagation::EvaluateTable31OneRegisterKnown(UGeckoInstruction inst, u32 value, + bool known_reg_is_b) const +{ + u32 a; + + switch (inst.SUBOP10) + { + case 60: // andcx + if (known_reg_is_b) + value = ~value; + [[fallthrough]]; + case 28: // andx + if (value == 0) + a = 0; + else + return {}; + break; + case 124: // norx + if (value == 0xFFFFFFFF) + a = 0; + else + return {}; + break; + case 412: // orcx + if (known_reg_is_b) + value = ~value; + [[fallthrough]]; + case 444: // orx + if (value == 0xFFFFFFFF) + a = 0xFFFFFFFF; + else + return {}; + break; + case 476: // nandx + if (value == 0) + a = 0xFFFFFFFF; + else + return {}; + break; + default: + return {}; + } + + return ConstantPropagationResult(inst.RA, a, inst.Rc); +} + +ConstantPropagationResult +ConstantPropagation::EvaluateTable31IdenticalRegisters(UGeckoInstruction inst) const +{ + u32 a; + + switch (inst.SUBOP10) + { + case 60: // andcx + a = 0; + break; + case 284: // eqvx + a = 0xFFFFFFFF; + break; + case 316: // xorx + a = 0; + break; + case 412: // orcx + a = 0xFFFFFFFF; + break; + default: + return {}; + } + + return ConstantPropagationResult(inst.RA, a, inst.Rc); +} + void ConstantPropagation::Apply(ConstantPropagationResult result) { if (result.gpr >= 0) diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h index 797c7839254..5560ffdb808 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -80,6 +80,10 @@ private: ConstantPropagationResult EvaluateAddImm(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateBitwiseImm(UGeckoInstruction inst, u32 (*do_op)(u32, u32)) const; + ConstantPropagationResult EvaluateTable31(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateTable31OneRegisterKnown(UGeckoInstruction inst, u32 value, + bool known_reg_is_b) const; + ConstantPropagationResult EvaluateTable31IdenticalRegisters(UGeckoInstruction inst) const; static constexpr ConstantPropagationResult DO_NOTHING = [] { ConstantPropagationResult result; From f04417eb5a117dfe6552f3203d39697b4ef0d7d5 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Thu, 24 Aug 2023 13:16:08 +0200 Subject: [PATCH 07/30] Jit: Move addx to ConstantPropagation --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 2 +- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 11 +-- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 2 +- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 32 ++------- .../PowerPC/JitCommon/ConstantPropagation.cpp | 68 ++++++++++++++++--- .../PowerPC/JitCommon/ConstantPropagation.h | 12 ++-- 6 files changed, 75 insertions(+), 52 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 6c161bd8b4f..024b4c6e4af 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -1115,7 +1115,7 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) else { const JitCommon::ConstantPropagationResult constant_propagation_result = - m_constant_propagation.EvaluateInstruction(op.inst); + m_constant_propagation.EvaluateInstruction(op.inst, opinfo->flags); if (!constant_propagation_result.instruction_fully_executed) { diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index d9b5cb360b3..1b5674f65b9 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1826,16 +1826,7 @@ void Jit64::addx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; bool carry = !(inst.SUBOP10 & (1 << 8)); - if (gpr.IsImm(a, b)) - { - const s32 i = gpr.SImm32(a), j = gpr.SImm32(b); - gpr.SetImmediate32(d, i + j); - if (carry) - FinalizeCarry(Interpreter::Helper_Carry(i, j)); - if (inst.OE) - GenerateConstantOverflow((s64)i + (s64)j); - } - else if (gpr.IsImm(a) || gpr.IsImm(b)) + if (gpr.IsImm(a) || gpr.IsImm(b)) { const auto [i, j] = gpr.IsImm(a) ? std::pair(a, b) : std::pair(b, a); const s32 imm = gpr.SImm32(i); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index f968ef5bdf1..ba30b0bc818 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -1351,7 +1351,7 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) else { const JitCommon::ConstantPropagationResult constant_propagation_result = - m_constant_propagation.EvaluateInstruction(op.inst); + m_constant_propagation.EvaluateInstruction(op.inst, opinfo->flags); if (!constant_propagation_result.instruction_fully_executed) { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 9ab44b66aed..6739de44399 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -476,14 +476,7 @@ void JitArm64::addx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b); - gpr.SetImmediate(d, i + j); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else if (gpr.IsImm(a) || gpr.IsImm(b)) + if (gpr.IsImm(a) || gpr.IsImm(b)) { int imm_reg = gpr.IsImm(a) ? a : b; int in_reg = gpr.IsImm(a) ? b : a; @@ -1679,25 +1672,12 @@ void JitArm64::addcx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - u32 i = gpr.GetImm(a), j = gpr.GetImm(b); - gpr.SetImmediate(d, i + j); + gpr.BindToRegister(d, d == a || d == b); + CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), gpr.R(b)); - bool has_carry = Interpreter::Helper_Carry(i, j); - ComputeCarry(has_carry); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else - { - gpr.BindToRegister(d, d == a || d == b); - CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), gpr.R(b)); - - ComputeCarry(); - if (inst.Rc) - ComputeRC0(gpr.R(d)); - } + ComputeCarry(); + if (inst.Rc) + ComputeRC0(gpr.R(d)); } void JitArm64::divwux(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index f29ce2a759e..84a37efa16d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -3,6 +3,8 @@ #include "Core/PowerPC/JitCommon/ConstantPropagation.h" +#include "Core/PowerPC/PPCTables.h" + namespace JitCommon { static constexpr u32 BitOR(u32 a, u32 b) @@ -20,7 +22,8 @@ static constexpr u32 BitXOR(u32 a, u32 b) return a ^ b; } -ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruction inst) const +ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruction inst, + u64 flags) const { switch (inst.OPCD) { @@ -37,7 +40,7 @@ ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruc case 29: // andis return EvaluateBitwiseImm(inst, BitAND); case 31: - return EvaluateTable31(inst); + return EvaluateTable31(inst, flags); default: return {}; } @@ -71,18 +74,65 @@ ConstantPropagationResult ConstantPropagation::EvaluateBitwiseImm(UGeckoInstruct return ConstantPropagationResult(inst.RA, do_op(m_gpr_values[inst.RS], immediate), is_and); } -ConstantPropagationResult ConstantPropagation::EvaluateTable31(UGeckoInstruction inst) const +ConstantPropagationResult ConstantPropagation::EvaluateTable31(UGeckoInstruction inst, + u64 flags) const +{ + if (flags & FL_OUT_D) + { + // input a, b -> output d + return EvaluateTable31AB(inst, flags); + } + else + { + // input s, b -> output a + return EvaluateTable31SB(inst); + } +} + +ConstantPropagationResult ConstantPropagation::EvaluateTable31AB(UGeckoInstruction inst, + u64 flags) const +{ + if (!HasGPR(inst.RA, inst.RB)) + return {}; + + u64 d; + s64 d_overflow; + const u32 a = GetGPR(inst.RA); + const u32 b = GetGPR(inst.RB); + + switch (inst.SUBOP10) + { + case 10: // addcx + case 522: // addcox + case 266: // addx + case 778: // addox + d = u64(a) + u64(b); + d_overflow = s64(s32(a)) + s64(s32(b)); + break; + default: + return {}; + } + + ConstantPropagationResult result(inst.RD, u32(d), inst.Rc); + if (flags & FL_SET_CA) + result.carry = (d >> 32 != 0); + if (flags & FL_SET_OE) + result.overflow = (d_overflow != s64(s32(d_overflow))); + return result; +} + +ConstantPropagationResult ConstantPropagation::EvaluateTable31SB(UGeckoInstruction inst) const { const bool has_s = HasGPR(inst.RS); const bool has_b = HasGPR(inst.RB); if (!has_s || !has_b) { if (has_s) - return EvaluateTable31OneRegisterKnown(inst, GetGPR(inst.RS), false); + return EvaluateTable31SBOneRegisterKnown(inst, GetGPR(inst.RS), false); else if (has_b) - return EvaluateTable31OneRegisterKnown(inst, GetGPR(inst.RB), true); + return EvaluateTable31SBOneRegisterKnown(inst, GetGPR(inst.RB), true); else if (inst.RS == inst.RB) - return EvaluateTable31IdenticalRegisters(inst); + return EvaluateTable31SBIdenticalRegisters(inst); else return {}; } @@ -125,8 +175,8 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31(UGeckoInstruction } ConstantPropagationResult -ConstantPropagation::EvaluateTable31OneRegisterKnown(UGeckoInstruction inst, u32 value, - bool known_reg_is_b) const +ConstantPropagation::EvaluateTable31SBOneRegisterKnown(UGeckoInstruction inst, u32 value, + bool known_reg_is_b) const { u32 a; @@ -172,7 +222,7 @@ ConstantPropagation::EvaluateTable31OneRegisterKnown(UGeckoInstruction inst, u32 } ConstantPropagationResult -ConstantPropagation::EvaluateTable31IdenticalRegisters(UGeckoInstruction inst) const +ConstantPropagation::EvaluateTable31SBIdenticalRegisters(UGeckoInstruction inst) const { u32 a; diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h index 5560ffdb808..a7f5b27fd30 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -46,7 +46,7 @@ struct ConstantPropagationResult final class ConstantPropagation final { public: - ConstantPropagationResult EvaluateInstruction(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateInstruction(UGeckoInstruction inst, u64 flags) const; void Apply(ConstantPropagationResult result); @@ -80,10 +80,12 @@ private: ConstantPropagationResult EvaluateAddImm(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateBitwiseImm(UGeckoInstruction inst, u32 (*do_op)(u32, u32)) const; - ConstantPropagationResult EvaluateTable31(UGeckoInstruction inst) const; - ConstantPropagationResult EvaluateTable31OneRegisterKnown(UGeckoInstruction inst, u32 value, - bool known_reg_is_b) const; - ConstantPropagationResult EvaluateTable31IdenticalRegisters(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateTable31(UGeckoInstruction inst, u64 flags) const; + ConstantPropagationResult EvaluateTable31AB(UGeckoInstruction inst, u64 flags) const; + ConstantPropagationResult EvaluateTable31SB(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateTable31SBOneRegisterKnown(UGeckoInstruction inst, u32 value, + bool known_reg_is_b) const; + ConstantPropagationResult EvaluateTable31SBIdenticalRegisters(UGeckoInstruction inst) const; static constexpr ConstantPropagationResult DO_NOTHING = [] { ConstantPropagationResult result; From b506cb2ad878a0f2560cb6a3ad1b35c2e60f9e67 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Thu, 24 Aug 2023 14:17:36 +0200 Subject: [PATCH 08/30] Jit: Move extsXx to ConstantPropagation --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 7 +--- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 17 ++------ .../PowerPC/JitCommon/ConstantPropagation.cpp | 41 ++++++++++++++++--- .../PowerPC/JitCommon/ConstantPropagation.h | 1 + 4 files changed, 42 insertions(+), 24 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 1b5674f65b9..91e879fd69d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1026,13 +1026,8 @@ void Jit64::extsXx(UGeckoInstruction inst) int a = inst.RA, s = inst.RS; int size = inst.SUBOP10 == 922 ? 16 : 8; - if (gpr.IsImm(s)) { - gpr.SetImmediate32(a, (u32)(s32)(size == 16 ? (s16)gpr.Imm32(s) : (s8)gpr.Imm32(s))); - } - else - { - RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCOpArg Rs = gpr.UseNoImm(s, RCMode::Read); RCX64Reg Ra = gpr.Bind(a, RCMode::Write); RegCache::Realize(Rs, Ra); MOVSX(32, size, Ra, Rs); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 6739de44399..bfab2217121 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -506,19 +506,10 @@ void JitArm64::extsXx(UGeckoInstruction inst) int a = inst.RA, s = inst.RS; int size = inst.SUBOP10 == 922 ? 16 : 8; - if (gpr.IsImm(s)) - { - gpr.SetImmediate(a, (u32)(s32)(size == 16 ? (s16)gpr.GetImm(s) : (s8)gpr.GetImm(s))); - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - } - else - { - gpr.BindToRegister(a, a == s); - SBFM(gpr.R(a), gpr.R(s), 0, size - 1); - if (inst.Rc) - ComputeRC0(gpr.R(a)); - } + gpr.BindToRegister(a, a == s); + SBFM(gpr.R(a), gpr.R(s), 0, size - 1); + if (inst.Rc) + ComputeRC0(gpr.R(a)); } void JitArm64::cntlzwx(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index 84a37efa16d..3aecc0f5316 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -77,18 +77,49 @@ ConstantPropagationResult ConstantPropagation::EvaluateBitwiseImm(UGeckoInstruct ConstantPropagationResult ConstantPropagation::EvaluateTable31(UGeckoInstruction inst, u64 flags) const { - if (flags & FL_OUT_D) + if (flags & FL_IN_B) { - // input a, b -> output d - return EvaluateTable31AB(inst, flags); + if (flags & FL_OUT_D) + { + // input a, b -> output d + return EvaluateTable31AB(inst, flags); + } + else + { + // input s, b -> output a + return EvaluateTable31SB(inst); + } } else { - // input s, b -> output a - return EvaluateTable31SB(inst); + // input s -> output a + return EvaluateTable31S(inst); } } +ConstantPropagationResult ConstantPropagation::EvaluateTable31S(UGeckoInstruction inst) const +{ + if (!HasGPR(inst.RS)) + return {}; + + u32 a; + const u32 s = GetGPR(inst.RS); + + switch (inst.SUBOP10) + { + case 922: // extshx + a = s32(s16(s)); + break; + case 954: // extsbx + a = s32(s8(s)); + break; + default: + return {}; + } + + return ConstantPropagationResult(inst.RA, a, inst.Rc); +} + ConstantPropagationResult ConstantPropagation::EvaluateTable31AB(UGeckoInstruction inst, u64 flags) const { diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h index a7f5b27fd30..9dfc6e602e8 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -81,6 +81,7 @@ private: ConstantPropagationResult EvaluateBitwiseImm(UGeckoInstruction inst, u32 (*do_op)(u32, u32)) const; ConstantPropagationResult EvaluateTable31(UGeckoInstruction inst, u64 flags) const; + ConstantPropagationResult EvaluateTable31S(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateTable31AB(UGeckoInstruction inst, u64 flags) const; ConstantPropagationResult EvaluateTable31SB(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateTable31SBOneRegisterKnown(UGeckoInstruction inst, u32 value, From 92a5a46b2c6ac8849b22cb931d5e37ecce036503 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Thu, 24 Aug 2023 14:31:42 +0200 Subject: [PATCH 09/30] Jit: Move cntlzwx to ConstantPropagation --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 7 +------ .../Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 17 ++++------------- .../PowerPC/JitCommon/ConstantPropagation.cpp | 5 +++++ 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 91e879fd69d..040444c7211 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -2649,14 +2649,9 @@ void Jit64::cntlzwx(UGeckoInstruction inst) int s = inst.RS; bool needs_test = false; - if (gpr.IsImm(s)) - { - gpr.SetImmediate32(a, static_cast(std::countl_zero(gpr.Imm32(s)))); - } - else { RCX64Reg Ra = gpr.Bind(a, RCMode::Write); - RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCOpArg Rs = gpr.UseNoImm(s, RCMode::Read); RegCache::Realize(Ra, Rs); if (cpu_info.bLZCNT) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index bfab2217121..1e080817a34 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -519,19 +519,10 @@ void JitArm64::cntlzwx(UGeckoInstruction inst) int a = inst.RA; int s = inst.RS; - if (gpr.IsImm(s)) - { - gpr.SetImmediate(a, static_cast(std::countl_zero(gpr.GetImm(s)))); - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - } - else - { - gpr.BindToRegister(a, a == s); - CLZ(gpr.R(a), gpr.R(s)); - if (inst.Rc) - ComputeRC0(gpr.R(a)); - } + gpr.BindToRegister(a, a == s); + CLZ(gpr.R(a), gpr.R(s)); + if (inst.Rc) + ComputeRC0(gpr.R(a)); } void JitArm64::negx(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index 3aecc0f5316..630929efbcf 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -3,6 +3,8 @@ #include "Core/PowerPC/JitCommon/ConstantPropagation.h" +#include + #include "Core/PowerPC/PPCTables.h" namespace JitCommon @@ -107,6 +109,9 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31S(UGeckoInstructio switch (inst.SUBOP10) { + case 26: // cntlzwx + a = std::countl_zero(s); + break; case 922: // extshx a = s32(s16(s)); break; From 4c8995fae565357979e40576d7ece73b264afecd Mon Sep 17 00:00:00 2001 From: JosJuice Date: Thu, 24 Aug 2023 14:45:39 +0200 Subject: [PATCH 10/30] Jit: Move negx to ConstantPropagation --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 9 +------ .../PowerPC/JitArm64/JitArm64_Integer.cpp | 17 +++--------- .../PowerPC/JitCommon/ConstantPropagation.cpp | 26 +++++++++++++++++-- .../PowerPC/JitCommon/ConstantPropagation.h | 1 + 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 040444c7211..5f3b5e17992 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -2283,15 +2283,8 @@ void Jit64::negx(UGeckoInstruction inst) int a = inst.RA; int d = inst.RD; - if (gpr.IsImm(a)) { - gpr.SetImmediate32(d, ~(gpr.Imm32(a)) + 1); - if (inst.OE) - GenerateConstantOverflow(gpr.Imm32(d) == 0x80000000); - } - else - { - RCOpArg Ra = gpr.Use(a, RCMode::Read); + RCOpArg Ra = gpr.UseNoImm(a, RCMode::Read); RCX64Reg Rd = gpr.Bind(d, RCMode::Write); RegCache::Realize(Ra, Rd); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 1e080817a34..a0d13c6a267 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -534,19 +534,10 @@ void JitArm64::negx(UGeckoInstruction inst) FALLBACK_IF(inst.OE); - if (gpr.IsImm(a)) - { - gpr.SetImmediate(d, ~((u32)gpr.GetImm(a)) + 1); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else - { - gpr.BindToRegister(d, d == a); - SUB(gpr.R(d), ARM64Reg::WSP, gpr.R(a)); - if (inst.Rc) - ComputeRC0(gpr.R(d)); - } + gpr.BindToRegister(d, d == a); + SUB(gpr.R(d), ARM64Reg::WSP, gpr.R(a)); + if (inst.Rc) + ComputeRC0(gpr.R(d)); } void JitArm64::cmp(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index 630929efbcf..a79d8f00fa1 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -94,11 +94,33 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31(UGeckoInstruction } else { - // input s -> output a - return EvaluateTable31S(inst); + switch (inst.SUBOP10) + { + case 104: // negx + case 616: // negox + // input a -> output d + return EvaluateTable31Negx(inst, flags); + default: + // input s -> output a + return EvaluateTable31S(inst); + } } } +ConstantPropagationResult ConstantPropagation::EvaluateTable31Negx(UGeckoInstruction inst, + u64 flags) const +{ + if (!HasGPR(inst.RA)) + return {}; + + const s64 out = -s64(s32(GetGPR(inst.RA))); + + ConstantPropagationResult result(inst.RD, u32(out), inst.Rc); + if (flags & FL_SET_OE) + result.overflow = (out != s64(s32(out))); + return result; +} + ConstantPropagationResult ConstantPropagation::EvaluateTable31S(UGeckoInstruction inst) const { if (!HasGPR(inst.RS)) diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h index 9dfc6e602e8..097932e1bbc 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -81,6 +81,7 @@ private: ConstantPropagationResult EvaluateBitwiseImm(UGeckoInstruction inst, u32 (*do_op)(u32, u32)) const; ConstantPropagationResult EvaluateTable31(UGeckoInstruction inst, u64 flags) const; + ConstantPropagationResult EvaluateTable31Negx(UGeckoInstruction inst, u64 flags) const; ConstantPropagationResult EvaluateTable31S(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateTable31AB(UGeckoInstruction inst, u64 flags) const; ConstantPropagationResult EvaluateTable31SB(UGeckoInstruction inst) const; From 1a22bda0a7bb3a0cc5a7335ce3756d7eed346851 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 24 May 2024 19:49:53 +0200 Subject: [PATCH 11/30] Jit: Move rlwinmx and rlwnmx to ConstantPropagation --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 169 ++++++++---------- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 8 - .../PowerPC/JitCommon/ConstantPropagation.cpp | 18 ++ .../PowerPC/JitCommon/ConstantPropagation.h | 1 + 4 files changed, 95 insertions(+), 101 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 5f3b5e17992..206e62cc482 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1979,112 +1979,99 @@ void Jit64::rlwinmx(UGeckoInstruction inst) int a = inst.RA; int s = inst.RS; - if (gpr.IsImm(s)) + const bool left_shift = inst.SH && inst.MB == 0 && inst.ME == 31 - inst.SH; + const bool right_shift = inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH; + const bool field_extract = inst.SH && inst.ME == 31 && inst.MB > 32 - inst.SH; + const u32 mask = MakeRotationMask(inst.MB, inst.ME); + const u32 prerotate_mask = std::rotr(mask, inst.SH); + const bool simple_mask = mask == 0xff || mask == 0xffff; + const bool simple_prerotate_mask = prerotate_mask == 0xff || prerotate_mask == 0xffff; + // In case of a merged branch, track whether or not we've set flags. + // If not, we need to do a test later to get them. + bool needs_test = true; + // If we know the high bit can't be set, we can avoid doing a sign extend for flag storage. + bool needs_sext = true; + int mask_size = inst.ME - inst.MB + 1; + + if (simple_mask && !(inst.SH & (mask_size - 1)) && !gpr.IsBound(s) && !gpr.IsImm(s)) { - u32 result = gpr.Imm32(s); - if (inst.SH != 0) - result = std::rotl(result, inst.SH); - result &= MakeRotationMask(inst.MB, inst.ME); - gpr.SetImmediate32(a, result); - if (inst.Rc) - ComputeRC(a); + // optimized case: byte/word extract from m_ppc_state + + // Note: If a == s, calling Realize(Ra) will allocate a host register for Rs, + // so we have to get mem_source from Rs before calling Realize(Ra) + + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RegCache::Realize(Rs); + OpArg mem_source = Rs.Location(); + if (inst.SH) + mem_source.AddMemOffset((32 - inst.SH) >> 3); + Rs.Unlock(); + + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Ra); + MOVZX(32, mask_size, Ra, mem_source); + + needs_sext = false; } else { - const bool left_shift = inst.SH && inst.MB == 0 && inst.ME == 31 - inst.SH; - const bool right_shift = inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH; - const bool field_extract = inst.SH && inst.ME == 31 && inst.MB > 32 - inst.SH; - const u32 mask = MakeRotationMask(inst.MB, inst.ME); - const u32 prerotate_mask = std::rotr(mask, inst.SH); - const bool simple_mask = mask == 0xff || mask == 0xffff; - const bool simple_prerotate_mask = prerotate_mask == 0xff || prerotate_mask == 0xffff; - // In case of a merged branch, track whether or not we've set flags. - // If not, we need to do a test later to get them. - bool needs_test = true; - // If we know the high bit can't be set, we can avoid doing a sign extend for flag storage. - bool needs_sext = true; - int mask_size = inst.ME - inst.MB + 1; + RCOpArg Rs = gpr.UseNoImm(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Rs, Ra); - if (simple_mask && !(inst.SH & (mask_size - 1)) && !gpr.IsBound(s)) + if (a != s && left_shift && Rs.IsSimpleReg() && inst.SH <= 3) { - // optimized case: byte/word extract from m_ppc_state - - // Note: If a == s, calling Realize(Ra) will allocate a host register for Rs, - // so we have to get mem_source from Rs before calling Realize(Ra) - - RCOpArg Rs = gpr.Use(s, RCMode::Read); - RegCache::Realize(Rs); - OpArg mem_source = Rs.Location(); + LEA(32, Ra, MScaled(Rs.GetSimpleReg(), SCALE_1 << inst.SH, 0)); + } + // optimized case: byte/word extract plus rotate + else if (simple_prerotate_mask && !left_shift) + { + MOVZX(32, prerotate_mask == 0xff ? 8 : 16, Ra, Rs); if (inst.SH) - mem_source.AddMemOffset((32 - inst.SH) >> 3); - Rs.Unlock(); + ROL(32, Ra, Imm8(inst.SH)); + needs_sext = (mask & 0x80000000) != 0; + } + // Use BEXTR where possible: Only AMD implements this in one uop + else if (field_extract && cpu_info.bBMI1 && cpu_info.vendor == CPUVendor::AMD) + { + MOV(32, R(RSCRATCH), Imm32((mask_size << 8) | (32 - inst.SH))); + BEXTR(32, Ra, Rs, RSCRATCH); + needs_sext = false; + } + else if (left_shift) + { + if (a != s) + MOV(32, Ra, Rs); - RCX64Reg Ra = gpr.Bind(a, RCMode::Write); - RegCache::Realize(Ra); - MOVZX(32, mask_size, Ra, mem_source); + SHL(32, Ra, Imm8(inst.SH)); + } + else if (right_shift) + { + if (a != s) + MOV(32, Ra, Rs); + SHR(32, Ra, Imm8(inst.MB)); needs_sext = false; } else { - RCOpArg Rs = gpr.Use(s, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::Write); - RegCache::Realize(Rs, Ra); + RotateLeft(32, Ra, Rs, inst.SH); - if (a != s && left_shift && Rs.IsSimpleReg() && inst.SH <= 3) + if (!(inst.MB == 0 && inst.ME == 31)) { - LEA(32, Ra, MScaled(Rs.GetSimpleReg(), SCALE_1 << inst.SH, 0)); - } - // optimized case: byte/word extract plus rotate - else if (simple_prerotate_mask && !left_shift) - { - MOVZX(32, prerotate_mask == 0xff ? 8 : 16, Ra, Rs); - if (inst.SH) - ROL(32, Ra, Imm8(inst.SH)); - needs_sext = (mask & 0x80000000) != 0; - } - // Use BEXTR where possible: Only AMD implements this in one uop - else if (field_extract && cpu_info.bBMI1 && cpu_info.vendor == CPUVendor::AMD) - { - MOV(32, R(RSCRATCH), Imm32((mask_size << 8) | (32 - inst.SH))); - BEXTR(32, Ra, Rs, RSCRATCH); - needs_sext = false; - } - else if (left_shift) - { - if (a != s) - MOV(32, Ra, Rs); - - SHL(32, Ra, Imm8(inst.SH)); - } - else if (right_shift) - { - if (a != s) - MOV(32, Ra, Rs); - - SHR(32, Ra, Imm8(inst.MB)); - needs_sext = false; - } - else - { - RotateLeft(32, Ra, Rs, inst.SH); - - if (!(inst.MB == 0 && inst.ME == 31)) - { - // we need flags if we're merging the branch - if (inst.Rc && CheckMergedBranch(0)) - AND(32, Ra, Imm32(mask)); - else - AndWithMask(Ra, mask); - needs_sext = inst.MB == 0; - needs_test = false; - } + // we need flags if we're merging the branch + if (inst.Rc && CheckMergedBranch(0)) + AND(32, Ra, Imm32(mask)); + else + AndWithMask(Ra, mask); + needs_sext = inst.MB == 0; + needs_test = false; } } - - if (inst.Rc) - ComputeRC(a, needs_test, needs_sext); } + + if (inst.Rc) + ComputeRC(a, needs_test, needs_sext); } void Jit64::rlwimix(UGeckoInstruction inst) @@ -2233,11 +2220,7 @@ void Jit64::rlwnmx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, s = inst.RS; const u32 mask = MakeRotationMask(inst.MB, inst.ME); - if (gpr.IsImm(b, s)) - { - gpr.SetImmediate32(a, std::rotl(gpr.Imm32(s), gpr.Imm32(b) & 0x1F) & mask); - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { u32 amount = gpr.Imm32(b) & 0x1f; RCX64Reg Ra = gpr.Bind(a, RCMode::Write); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index a0d13c6a267..b1d1994ac4e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -719,15 +719,7 @@ void JitArm64::cmpli(UGeckoInstruction inst) void JitArm64::rlwinmx_internal(UGeckoInstruction inst, u32 sh) { u32 a = inst.RA, s = inst.RS; - const u32 mask = MakeRotationMask(inst.MB, inst.ME); - if (gpr.IsImm(inst.RS)) - { - gpr.SetImmediate(a, std::rotl(gpr.GetImm(s), sh) & mask); - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - return; - } if (mask == 0) { diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index a79d8f00fa1..833ff1b0402 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -5,6 +5,7 @@ #include +#include "Core/PowerPC/Gekko.h" #include "Core/PowerPC/PPCTables.h" namespace JitCommon @@ -32,6 +33,13 @@ ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruc case 14: // addi case 15: // addis return EvaluateAddImm(inst); + case 21: // rlwinmx + return EvaluateRlwinmxRlwnmx(inst, inst.SH); + case 23: // rlwnmx + if (HasGPR(inst.RB)) + return EvaluateRlwinmxRlwnmx(inst, GetGPR(inst.RB) & 0x1F); + else + return {}; case 24: // ori case 25: // oris return EvaluateBitwiseImm(inst, BitOR); @@ -61,6 +69,16 @@ ConstantPropagationResult ConstantPropagation::EvaluateAddImm(UGeckoInstruction return ConstantPropagationResult(inst.RD, m_gpr_values[inst.RA] + immediate); } +ConstantPropagationResult ConstantPropagation::EvaluateRlwinmxRlwnmx(UGeckoInstruction inst, + u32 shift) const +{ + if (!HasGPR(inst.RS)) + return {}; + + const u32 mask = MakeRotationMask(inst.MB, inst.ME); + return ConstantPropagationResult(inst.RA, std::rotl(GetGPR(inst.RS), shift) & mask, inst.Rc); +} + ConstantPropagationResult ConstantPropagation::EvaluateBitwiseImm(UGeckoInstruction inst, u32 (*do_op)(u32, u32)) const { diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h index 097932e1bbc..e003d2e4195 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -78,6 +78,7 @@ public: private: ConstantPropagationResult EvaluateAddImm(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateRlwinmxRlwnmx(UGeckoInstruction inst, u32 shift) const; ConstantPropagationResult EvaluateBitwiseImm(UGeckoInstruction inst, u32 (*do_op)(u32, u32)) const; ConstantPropagationResult EvaluateTable31(UGeckoInstruction inst, u64 flags) const; From a3797778ffac9c6be5780e0cebc976239fbca74f Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 31 Aug 2024 09:55:26 +0200 Subject: [PATCH 12/30] Jit: Move srawix to ConstantPropagation --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 8 +------- .../Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 12 +----------- .../Core/PowerPC/JitCommon/ConstantPropagation.cpp | 9 ++++++++- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 206e62cc482..935c212bbc3 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -2561,13 +2561,7 @@ void Jit64::srawix(UGeckoInstruction inst) int s = inst.RS; int amount = inst.SH; - if (gpr.IsImm(s)) - { - s32 imm = gpr.SImm32(s); - gpr.SetImmediate32(a, imm >> amount); - FinalizeCarry(amount != 0 && imm < 0 && (u32(imm) << (32 - amount))); - } - else if (amount != 0) + if (amount != 0) { RCX64Reg Ra = gpr.Bind(a, RCMode::Write); RCOpArg Rs = gpr.Use(s, RCMode::Read); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index b1d1994ac4e..229bb9c4713 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -810,17 +810,7 @@ void JitArm64::srawix(UGeckoInstruction inst) int amount = inst.SH; bool inplace_carry = CanMergeNextInstructions(1) && js.op[1].wantsCAInFlags; - if (gpr.IsImm(s)) - { - s32 imm = (s32)gpr.GetImm(s); - gpr.SetImmediate(a, imm >> amount); - - ComputeCarry(amount != 0 && (imm < 0) && (u32(imm) << (32 - amount))); - - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - } - else if (amount == 0) + if (amount == 0) { gpr.BindToRegister(a, a == s); ARM64Reg RA = gpr.R(a); diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index 833ff1b0402..cffa4f77901 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -144,6 +144,7 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31S(UGeckoInstructio if (!HasGPR(inst.RS)) return {}; + std::optional carry; u32 a; const u32 s = GetGPR(inst.RS); @@ -152,6 +153,10 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31S(UGeckoInstructio case 26: // cntlzwx a = std::countl_zero(s); break; + case 824: // srawix + a = s32(s) >> inst.SH; + carry = inst.SH != 0 && s32(s) < 0 && (s << (32 - inst.SH)); + break; case 922: // extshx a = s32(s16(s)); break; @@ -162,7 +167,9 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31S(UGeckoInstructio return {}; } - return ConstantPropagationResult(inst.RA, a, inst.Rc); + ConstantPropagationResult result(ConstantPropagationResult(inst.RA, a, inst.Rc)); + result.carry = carry; + return result; } ConstantPropagationResult ConstantPropagation::EvaluateTable31AB(UGeckoInstruction inst, From 1eea6103755c98a5c398e210067a9e053d66823e Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 31 Aug 2024 10:23:14 +0200 Subject: [PATCH 13/30] Jit: Move addicx to ConstantPropagation Note: Jit64 didn't support immediate handling for addic before. --- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 26 +++++-------------- .../PowerPC/JitCommon/ConstantPropagation.cpp | 17 ++++++++++++ .../PowerPC/JitCommon/ConstantPropagation.h | 1 + 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 229bb9c4713..b78f1a838b5 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -871,30 +871,16 @@ void JitArm64::addic(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; bool rc = inst.OPCD == 13; s32 simm = inst.SIMM_16; - u32 imm = (u32)simm; - if (gpr.IsImm(a)) + gpr.BindToRegister(d, d == a); { - u32 i = gpr.GetImm(a); - gpr.SetImmediate(d, i + imm); - - bool has_carry = Interpreter::Helper_Carry(i, imm); - ComputeCarry(has_carry); - if (rc) - ComputeRC0(gpr.GetImm(d)); + auto WA = gpr.GetScopedReg(); + CARRY_IF_NEEDED(ADDI2R, ADDSI2R, gpr.R(d), gpr.R(a), simm, WA); } - else - { - gpr.BindToRegister(d, d == a); - { - auto WA = gpr.GetScopedReg(); - CARRY_IF_NEEDED(ADDI2R, ADDSI2R, gpr.R(d), gpr.R(a), simm, WA); - } - ComputeCarry(); - if (rc) - ComputeRC0(gpr.R(d)); - } + ComputeCarry(); + if (rc) + ComputeRC0(gpr.R(d)); } bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc) diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index cffa4f77901..d5f05b6f2b0 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -6,6 +6,7 @@ #include #include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/Interpreter/Interpreter.h" #include "Core/PowerPC/PPCTables.h" namespace JitCommon @@ -30,6 +31,9 @@ ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruc { switch (inst.OPCD) { + case 12: // addic + case 13: // addic. + return EvaluateAddImmCarry(inst); case 14: // addi case 15: // addis return EvaluateAddImm(inst); @@ -69,6 +73,19 @@ ConstantPropagationResult ConstantPropagation::EvaluateAddImm(UGeckoInstruction return ConstantPropagationResult(inst.RD, m_gpr_values[inst.RA] + immediate); } +ConstantPropagationResult ConstantPropagation::EvaluateAddImmCarry(UGeckoInstruction inst) const +{ + if (!HasGPR(inst.RA)) + return {}; + + const u32 a = m_gpr_values[inst.RA]; + const bool rc = inst.OPCD & 1; + + ConstantPropagationResult result(inst.RD, a + inst.SIMM_16, rc); + result.carry = Interpreter::Helper_Carry(a, inst.SIMM_16); + return result; +} + ConstantPropagationResult ConstantPropagation::EvaluateRlwinmxRlwnmx(UGeckoInstruction inst, u32 shift) const { diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h index e003d2e4195..b0718688ce5 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -78,6 +78,7 @@ public: private: ConstantPropagationResult EvaluateAddImm(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateAddImmCarry(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateRlwinmxRlwnmx(UGeckoInstruction inst, u32 shift) const; ConstantPropagationResult EvaluateBitwiseImm(UGeckoInstruction inst, u32 (*do_op)(u32, u32)) const; From b469981c7259a84608b951ca36d064eca6392667 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 31 Aug 2024 12:04:01 +0200 Subject: [PATCH 14/30] Jit: Move mulli to ConstantPropagation --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 9 +-------- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 7 +------ .../Core/PowerPC/JitCommon/ConstantPropagation.cpp | 10 ++++++++++ .../Core/Core/PowerPC/JitCommon/ConstantPropagation.h | 1 + 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 935c212bbc3..b4ac8df181a 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1262,14 +1262,7 @@ void Jit64::mulli(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; u32 imm = inst.SIMM_16; - if (gpr.IsImm(a)) - { - gpr.SetImmediate32(d, gpr.Imm32(a) * imm); - } - else - { - MultiplyImmediate(imm, a, d, false); - } + MultiplyImmediate(imm, a, d, false); } void Jit64::mullwx(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index b78f1a838b5..96dc459057d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -959,12 +959,7 @@ void JitArm64::mulli(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; - if (gpr.IsImm(a)) - { - s32 i = (s32)gpr.GetImm(a); - gpr.SetImmediate(d, i * inst.SIMM_16); - } - else if (MultiplyImmediate((u32)(s32)inst.SIMM_16, a, d, false)) + if (MultiplyImmediate((u32)(s32)inst.SIMM_16, a, d, false)) { // Code is generated inside MultiplyImmediate, nothing to be done here. } diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index d5f05b6f2b0..f3de311dee6 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -31,6 +31,8 @@ ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruc { switch (inst.OPCD) { + case 7: // mulli + return EvaluateMulImm(inst); case 12: // addic case 13: // addic. return EvaluateAddImmCarry(inst); @@ -60,6 +62,14 @@ ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruc } } +ConstantPropagationResult ConstantPropagation::EvaluateMulImm(UGeckoInstruction inst) const +{ + if (!HasGPR(inst.RA)) + return {}; + + return ConstantPropagationResult(inst.RD, m_gpr_values[inst.RA] * inst.SIMM_16); +} + ConstantPropagationResult ConstantPropagation::EvaluateAddImm(UGeckoInstruction inst) const { const s32 immediate = inst.OPCD & 1 ? inst.SIMM_16 << 16 : inst.SIMM_16; diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h index b0718688ce5..9b8070caf1b 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -77,6 +77,7 @@ public: void Clear() { m_gpr_values_known = BitSet32{}; } private: + ConstantPropagationResult EvaluateMulImm(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateAddImm(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateAddImmCarry(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateRlwinmxRlwnmx(UGeckoInstruction inst, u32 shift) const; From 7456ba3d3dacedf4e1f14775d08e3f09488d4731 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 31 Aug 2024 11:33:23 +0200 Subject: [PATCH 15/30] Jit: Move mullwx, mulhwx, mulhwux to ConstantPropagation --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 18 +------ .../PowerPC/JitArm64/JitArm64_Integer.cpp | 51 +++++-------------- .../PowerPC/JitCommon/ConstantPropagation.cpp | 10 ++++ 3 files changed, 24 insertions(+), 55 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index b4ac8df181a..af6723c8214 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1271,14 +1271,7 @@ void Jit64::mullwx(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a, b)) - { - s32 i = gpr.SImm32(a), j = gpr.SImm32(b); - gpr.SetImmediate32(d, i * j); - if (inst.OE) - GenerateConstantOverflow((s64)i * (s64)j); - } - else if (gpr.IsImm(a) || gpr.IsImm(b)) + if (gpr.IsImm(a) || gpr.IsImm(b)) { u32 imm = gpr.IsImm(a) ? gpr.Imm32(a) : gpr.Imm32(b); int src = gpr.IsImm(a) ? b : a; @@ -1320,14 +1313,7 @@ void Jit64::mulhwXx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; bool sign = inst.SUBOP10 == 75; - if (gpr.IsImm(a, b)) - { - if (sign) - gpr.SetImmediate32(d, (u32)((u64)(((s64)gpr.SImm32(a) * (s64)gpr.SImm32(b))) >> 32)); - else - gpr.SetImmediate32(d, (u32)(((u64)gpr.Imm32(a) * (u64)gpr.Imm32(b)) >> 32)); - } - else if (sign) + if (sign) { RCOpArg Ra = gpr.Use(a, RCMode::Read); RCOpArg Rb = gpr.UseNoImm(b, RCMode::Read); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 96dc459057d..ebd3dd273aa 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -984,15 +984,8 @@ void JitArm64::mullwx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b); - gpr.SetImmediate(d, i * j); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else if ((gpr.IsImm(a) && MultiplyImmediate(gpr.GetImm(a), b, d, inst.Rc)) || - (gpr.IsImm(b) && MultiplyImmediate(gpr.GetImm(b), a, d, inst.Rc))) + if ((gpr.IsImm(a) && MultiplyImmediate(gpr.GetImm(a), b, d, inst.Rc)) || + (gpr.IsImm(b) && MultiplyImmediate(gpr.GetImm(b), a, d, inst.Rc))) { // Code is generated inside MultiplyImmediate, nothing to be done here. } @@ -1012,22 +1005,12 @@ void JitArm64::mulhwx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - s32 i = (s32)gpr.GetImm(a), j = (s32)gpr.GetImm(b); - gpr.SetImmediate(d, (u32)((u64)(((s64)i * (s64)j)) >> 32)); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else - { - gpr.BindToRegister(d, d == a || d == b); - SMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b)); - LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32); + gpr.BindToRegister(d, d == a || d == b); + SMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b)); + LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32); - if (inst.Rc) - ComputeRC0(gpr.R(d)); - } + if (inst.Rc) + ComputeRC0(gpr.R(d)); } void JitArm64::mulhwux(UGeckoInstruction inst) @@ -1037,22 +1020,12 @@ void JitArm64::mulhwux(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - u32 i = gpr.GetImm(a), j = gpr.GetImm(b); - gpr.SetImmediate(d, (u32)(((u64)i * (u64)j) >> 32)); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else - { - gpr.BindToRegister(d, d == a || d == b); - UMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b)); - LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32); + gpr.BindToRegister(d, d == a || d == b); + UMULL(EncodeRegTo64(gpr.R(d)), gpr.R(a), gpr.R(b)); + LSR(EncodeRegTo64(gpr.R(d)), EncodeRegTo64(gpr.R(d)), 32); - if (inst.Rc) - ComputeRC0(gpr.R(d)); - } + if (inst.Rc) + ComputeRC0(gpr.R(d)); } void JitArm64::addzex(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index f3de311dee6..78facf40bd2 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -219,6 +219,16 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31AB(UGeckoInstructi d = u64(a) + u64(b); d_overflow = s64(s32(a)) + s64(s32(b)); break; + case 11: // mulhwux + d = d_overflow = (u64(a) * u64(b)) >> 32; + break; + case 75: // mulhwx + d = d_overflow = u64(s64(s32(a)) * s64(s32(b))) >> 32; + break; + case 235: // mullwx + case 747: // mullwox + d = d_overflow = s64(s32(a)) * s64(s32(b)); + break; default: return {}; } From 2134991be8300eb454ab2e779973017ef29f6c46 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 31 Aug 2024 12:20:40 +0200 Subject: [PATCH 16/30] Jit: Move multiplication by 0 optimization to ConstantPropagation --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 7 ---- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 9 +---- .../PowerPC/JitCommon/ConstantPropagation.cpp | 39 ++++++++++++++++++- .../PowerPC/JitCommon/ConstantPropagation.h | 2 + 4 files changed, 40 insertions(+), 17 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index af6723c8214..cdca3274af0 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1201,13 +1201,6 @@ void Jit64::MultiplyImmediate(u32 imm, int a, int d, bool overflow) RCX64Reg Rd = gpr.Bind(d, RCMode::Write); RegCache::Realize(Ra, Rd); - // simplest cases first - if (imm == 0) - { - XOR(32, Rd, Rd); - return; - } - if (imm == (u32)-1) { if (d != a) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index ebd3dd273aa..1a4027059cd 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -885,14 +885,7 @@ void JitArm64::addic(UGeckoInstruction inst) bool JitArm64::MultiplyImmediate(u32 imm, int a, int d, bool rc) { - if (imm == 0) - { - // Multiplication by zero (0). - gpr.SetImmediate(d, 0); - if (rc) - ComputeRC0(gpr.GetImm(d)); - } - else if (imm == 1) + if (imm == 1) { // Multiplication by one (1). if (d != a) diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index 78facf40bd2..980347062b4 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -64,6 +64,9 @@ ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruc ConstantPropagationResult ConstantPropagation::EvaluateMulImm(UGeckoInstruction inst) const { + if (inst.SIMM_16 == 0) + return ConstantPropagationResult(inst.RD, 0); + if (!HasGPR(inst.RA)) return {}; @@ -202,8 +205,17 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31S(UGeckoInstructio ConstantPropagationResult ConstantPropagation::EvaluateTable31AB(UGeckoInstruction inst, u64 flags) const { - if (!HasGPR(inst.RA, inst.RB)) - return {}; + const bool has_a = HasGPR(inst.RA); + const bool has_b = HasGPR(inst.RB); + if (!has_a || !has_b) + { + if (has_a) + return EvaluateTable31ABOneRegisterKnown(inst, flags, GetGPR(inst.RA)); + else if (has_b) + return EvaluateTable31ABOneRegisterKnown(inst, flags, GetGPR(inst.RB)); + else + return {}; + } u64 d; s64 d_overflow; @@ -241,6 +253,29 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31AB(UGeckoInstructi return result; } +ConstantPropagationResult +ConstantPropagation::EvaluateTable31ABOneRegisterKnown(UGeckoInstruction inst, u64 flags, + u32 value) const +{ + switch (inst.SUBOP10) + { + case 11: // mulhwux + case 75: // mulhwx + case 235: // mullwx + case 747: // mullwox + if (value == 0) + { + ConstantPropagationResult result(inst.RD, 0, inst.Rc); + if (flags & FL_SET_OE) + result.overflow = false; + return result; + } + break; + } + + return {}; +} + ConstantPropagationResult ConstantPropagation::EvaluateTable31SB(UGeckoInstruction inst) const { const bool has_s = HasGPR(inst.RS); diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h index 9b8070caf1b..4ff7823aa44 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -87,6 +87,8 @@ private: ConstantPropagationResult EvaluateTable31Negx(UGeckoInstruction inst, u64 flags) const; ConstantPropagationResult EvaluateTable31S(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateTable31AB(UGeckoInstruction inst, u64 flags) const; + ConstantPropagationResult EvaluateTable31ABOneRegisterKnown(UGeckoInstruction inst, u64 flags, + u32 value) const; ConstantPropagationResult EvaluateTable31SB(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateTable31SBOneRegisterKnown(UGeckoInstruction inst, u32 value, bool known_reg_is_b) const; From 204a8fbd5348ae2b072bca63697a72adb4c357ae Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 31 Aug 2024 16:03:02 +0200 Subject: [PATCH 17/30] Jit: Move subfx to ConstantPropagation --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 19 +--------- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 36 +++---------------- .../PowerPC/JitCommon/ConstantPropagation.cpp | 31 ++++++++++++++++ .../PowerPC/JitCommon/ConstantPropagation.h | 2 ++ 4 files changed, 39 insertions(+), 49 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index cdca3274af0..f10bca3cf3d 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1097,24 +1097,7 @@ void Jit64::subfx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; const bool carry = !(inst.SUBOP10 & (1 << 5)); - if (a == b) - { - gpr.SetImmediate32(d, 0); - if (carry) - FinalizeCarry(true); - if (inst.OE) - GenerateConstantOverflow(false); - } - else if (gpr.IsImm(a, b)) - { - s32 i = gpr.SImm32(b), j = gpr.SImm32(a); - gpr.SetImmediate32(d, i - j); - if (carry) - FinalizeCarry(j == 0 || Interpreter::Helper_Carry((u32)i, 0u - (u32)j)); - if (inst.OE) - GenerateConstantOverflow((s64)i - (s64)j); - } - else if (gpr.IsImm(a)) + if (gpr.IsImm(a)) { s32 j = gpr.SImm32(a); RCOpArg Rb = gpr.Use(b, RCMode::Read); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 1a4027059cd..fe5a401b6a9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1122,26 +1122,10 @@ void JitArm64::subfx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (a == b) - { - gpr.SetImmediate(d, 0); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else if (gpr.IsImm(a) && gpr.IsImm(b)) - { - u32 i = gpr.GetImm(a), j = gpr.GetImm(b); - gpr.SetImmediate(d, j - i); - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else - { - gpr.BindToRegister(d, d == a || d == b); - SUB(gpr.R(d), gpr.R(b), gpr.R(a)); - if (inst.Rc) - ComputeRC0(gpr.R(d)); - } + gpr.BindToRegister(d, d == a || d == b); + SUB(gpr.R(d), gpr.R(b), gpr.R(a)); + if (inst.Rc) + ComputeRC0(gpr.R(d)); } void JitArm64::subfex(UGeckoInstruction inst) @@ -1283,17 +1267,7 @@ void JitArm64::subfcx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - u32 a_imm = gpr.GetImm(a), b_imm = gpr.GetImm(b); - - gpr.SetImmediate(d, b_imm - a_imm); - ComputeCarry(a_imm == 0 || Interpreter::Helper_Carry(b_imm, 0u - a_imm)); - - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else if (gpr.IsImm(a, 0)) + if (gpr.IsImm(a, 0)) { if (d != b) { diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index 980347062b4..f794af1653c 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -213,6 +213,8 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31AB(UGeckoInstructi return EvaluateTable31ABOneRegisterKnown(inst, flags, GetGPR(inst.RA)); else if (has_b) return EvaluateTable31ABOneRegisterKnown(inst, flags, GetGPR(inst.RB)); + else if (inst.RA == inst.RB) + return EvaluateTable31ABIdenticalRegisters(inst, flags); else return {}; } @@ -224,6 +226,13 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31AB(UGeckoInstructi switch (inst.SUBOP10) { + case 8: // subfcx + case 40: // subfx + case 520: // subfcox + case 552: // subfox + d = u64(u32(~a)) + u64(b) + 1; + d_overflow = s64(s32(b)) - s64(s32(a)); + break; case 10: // addcx case 522: // addcox case 266: // addx @@ -276,6 +285,28 @@ ConstantPropagation::EvaluateTable31ABOneRegisterKnown(UGeckoInstruction inst, u return {}; } +ConstantPropagationResult +ConstantPropagation::EvaluateTable31ABIdenticalRegisters(UGeckoInstruction inst, u64 flags) const +{ + switch (inst.SUBOP10) + { + case 8: // subfcx + case 40: // subfx + case 520: // subfcox + case 552: // subfox + { + ConstantPropagationResult result(inst.RD, 0, inst.Rc); + if (flags & FL_SET_CA) + result.carry = true; + if (flags & FL_SET_OE) + result.overflow = false; + return result; + } + default: + return {}; + } +} + ConstantPropagationResult ConstantPropagation::EvaluateTable31SB(UGeckoInstruction inst) const { const bool has_s = HasGPR(inst.RS); diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h index 4ff7823aa44..7b4759c0aeb 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -89,6 +89,8 @@ private: ConstantPropagationResult EvaluateTable31AB(UGeckoInstruction inst, u64 flags) const; ConstantPropagationResult EvaluateTable31ABOneRegisterKnown(UGeckoInstruction inst, u64 flags, u32 value) const; + ConstantPropagationResult EvaluateTable31ABIdenticalRegisters(UGeckoInstruction inst, + u64 flags) const; ConstantPropagationResult EvaluateTable31SB(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateTable31SBOneRegisterKnown(UGeckoInstruction inst, u32 value, bool known_reg_is_b) const; From c7d8a0b2767849a7f18f78b22acfbb3e476c4dbc Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 31 Aug 2024 16:29:29 +0200 Subject: [PATCH 18/30] Jit: Move subfic to ConstantPropagation --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 8 ---- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 46 ++++++++----------- .../PowerPC/JitCommon/ConstantPropagation.cpp | 15 ++++++ .../PowerPC/JitCommon/ConstantPropagation.h | 1 + 4 files changed, 34 insertions(+), 36 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index f10bca3cf3d..4187e9f3468 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1042,14 +1042,6 @@ void Jit64::subfic(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, d = inst.RD, imm = inst.SIMM_16; - if (gpr.IsImm(a)) - { - u32 i = imm, j = gpr.Imm32(a); - gpr.SetImmediate32(d, i - j); - FinalizeCarry(j == 0 || (i > j - 1)); - return; - } - RCOpArg Ra = gpr.Use(a, RCMode::Read); RCX64Reg Rd = gpr.Bind(d, RCMode::Write); RegCache::Realize(Ra, Rd); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index fe5a401b6a9..b36d35b2063 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1381,44 +1381,34 @@ void JitArm64::subfic(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; s32 imm = inst.SIMM_16; - if (gpr.IsImm(a)) - { - u32 a_imm = gpr.GetImm(a); + const bool will_read = d == a; + gpr.BindToRegister(d, will_read); + ARM64Reg RD = gpr.R(d); - gpr.SetImmediate(d, imm - a_imm); - ComputeCarry(a_imm == 0 || Interpreter::Helper_Carry(imm, 0u - a_imm)); + if (imm == -1) + { + // d = -1 - a = ~a + MVN(RD, gpr.R(a)); + // CA is always set in this case + ComputeCarry(true); } else { - const bool will_read = d == a; - gpr.BindToRegister(d, will_read); - ARM64Reg RD = gpr.R(d); + const bool is_zero = imm == 0; - if (imm == -1) + // d = imm - a { - // d = -1 - a = ~a - MVN(RD, gpr.R(a)); - // CA is always set in this case - ComputeCarry(true); - } - else - { - const bool is_zero = imm == 0; - - // d = imm - a + Arm64GPRCache::ScopedARM64Reg WA(ARM64Reg::WZR); + if (!is_zero) { - Arm64GPRCache::ScopedARM64Reg WA(ARM64Reg::WZR); - if (!is_zero) - { - WA = will_read ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(RD); - MOVI2R(WA, imm); - } - - CARRY_IF_NEEDED(SUB, SUBS, RD, WA, gpr.R(a)); + WA = will_read ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(RD); + MOVI2R(WA, imm); } - ComputeCarry(); + CARRY_IF_NEEDED(SUB, SUBS, RD, WA, gpr.R(a)); } + + ComputeCarry(); } } diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index f794af1653c..1f652667967 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -33,6 +33,8 @@ ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruc { case 7: // mulli return EvaluateMulImm(inst); + case 8: // subfic + return EvaluateSubImmCarry(inst); case 12: // addic case 13: // addic. return EvaluateAddImmCarry(inst); @@ -73,6 +75,19 @@ ConstantPropagationResult ConstantPropagation::EvaluateMulImm(UGeckoInstruction return ConstantPropagationResult(inst.RD, m_gpr_values[inst.RA] * inst.SIMM_16); } +ConstantPropagationResult ConstantPropagation::EvaluateSubImmCarry(UGeckoInstruction inst) const +{ + if (!HasGPR(inst.RA)) + return {}; + + const u32 a = GetGPR(inst.RA); + const u32 imm = s32(inst.SIMM_16); + + ConstantPropagationResult result(inst.RD, imm - a); + result.carry = imm >= a; + return result; +} + ConstantPropagationResult ConstantPropagation::EvaluateAddImm(UGeckoInstruction inst) const { const s32 immediate = inst.OPCD & 1 ? inst.SIMM_16 << 16 : inst.SIMM_16; diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h index 7b4759c0aeb..50b1de58ae5 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -78,6 +78,7 @@ public: private: ConstantPropagationResult EvaluateMulImm(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateSubImmCarry(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateAddImm(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateAddImmCarry(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateRlwinmxRlwnmx(UGeckoInstruction inst, u32 shift) const; From fc6c278007f4412cd14473dbea5e985e3f18d621 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 1 Sep 2024 09:31:34 +0200 Subject: [PATCH 19/30] Jit: Move divwux to ConstantPropagation --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 17 +----------- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 10 +------ .../PowerPC/JitCommon/ConstantPropagation.cpp | 26 ++++++++++++++++--- .../PowerPC/JitCommon/ConstantPropagation.h | 2 +- 4 files changed, 25 insertions(+), 30 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 4187e9f3468..d73b20bc9e8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1321,22 +1321,7 @@ void Jit64::divwux(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a, b)) - { - if (gpr.Imm32(b) == 0) - { - gpr.SetImmediate32(d, 0); - if (inst.OE) - GenerateConstantOverflow(true); - } - else - { - gpr.SetImmediate32(d, gpr.Imm32(a) / gpr.Imm32(b)); - if (inst.OE) - GenerateConstantOverflow(false); - } - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { u32 divisor = gpr.Imm32(b); if (divisor == 0) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index b36d35b2063..129445fff94 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1554,15 +1554,7 @@ void JitArm64::divwux(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - u32 i = gpr.GetImm(a), j = gpr.GetImm(b); - gpr.SetImmediate(d, j == 0 ? 0 : i / j); - - if (inst.Rc) - ComputeRC0(gpr.GetImm(d)); - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { const u32 divisor = gpr.GetImm(b); diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index 1f652667967..e67e247b43c 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -225,9 +225,9 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31AB(UGeckoInstructi if (!has_a || !has_b) { if (has_a) - return EvaluateTable31ABOneRegisterKnown(inst, flags, GetGPR(inst.RA)); + return EvaluateTable31ABOneRegisterKnown(inst, flags, GetGPR(inst.RA), false); else if (has_b) - return EvaluateTable31ABOneRegisterKnown(inst, flags, GetGPR(inst.RB)); + return EvaluateTable31ABOneRegisterKnown(inst, flags, GetGPR(inst.RB), true); else if (inst.RA == inst.RB) return EvaluateTable31ABIdenticalRegisters(inst, flags); else @@ -265,6 +265,10 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31AB(UGeckoInstructi case 747: // mullwox d = d_overflow = s64(s32(a)) * s64(s32(b)); break; + case 459: // divwux + case 971: // divwuox + d = d_overflow = b == 0 ? 0x1'0000'0000 : u64(a / b); + break; default: return {}; } @@ -278,8 +282,8 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31AB(UGeckoInstructi } ConstantPropagationResult -ConstantPropagation::EvaluateTable31ABOneRegisterKnown(UGeckoInstruction inst, u64 flags, - u32 value) const +ConstantPropagation::EvaluateTable31ABOneRegisterKnown(UGeckoInstruction inst, u64 flags, u32 value, + bool known_reg_is_b) const { switch (inst.SUBOP10) { @@ -295,6 +299,20 @@ ConstantPropagation::EvaluateTable31ABOneRegisterKnown(UGeckoInstruction inst, u return result; } break; + case 459: // divwux + case 971: // divwuox + if (known_reg_is_b && value == 0) + { + ConstantPropagationResult result(inst.RD, 0, inst.Rc); + if (flags & FL_SET_OE) + result.overflow = true; + return result; + } + if (!known_reg_is_b && value == 0 && !(flags & FL_SET_OE)) + { + return ConstantPropagationResult(inst.RD, 0, inst.Rc); + } + break; } return {}; diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h index 50b1de58ae5..467944a658c 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -89,7 +89,7 @@ private: ConstantPropagationResult EvaluateTable31S(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateTable31AB(UGeckoInstruction inst, u64 flags) const; ConstantPropagationResult EvaluateTable31ABOneRegisterKnown(UGeckoInstruction inst, u64 flags, - u32 value) const; + u32 value, bool known_reg_is_b) const; ConstantPropagationResult EvaluateTable31ABIdenticalRegisters(UGeckoInstruction inst, u64 flags) const; ConstantPropagationResult EvaluateTable31SB(UGeckoInstruction inst) const; From 45760841b21f96fcedc325c6121b4446a55f5cb7 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 1 Sep 2024 10:13:57 +0200 Subject: [PATCH 20/30] Jit: Move divwx to ConstantPropagation --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 19 +-------------- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 23 +------------------ .../PowerPC/JitCommon/ConstantPropagation.cpp | 9 ++++++++ 3 files changed, 11 insertions(+), 40 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index d73b20bc9e8..458ecdc2e55 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1433,24 +1433,7 @@ void Jit64::divwx(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a, b)) - { - s32 i = gpr.SImm32(a), j = gpr.SImm32(b); - if (j == 0 || (i == (s32)0x80000000 && j == -1)) - { - const u32 result = i < 0 ? 0xFFFFFFFF : 0x00000000; - gpr.SetImmediate32(d, result); - if (inst.OE) - GenerateConstantOverflow(true); - } - else - { - gpr.SetImmediate32(d, i / j); - if (inst.OE) - GenerateConstantOverflow(false); - } - } - else if (gpr.IsImm(a)) + if (gpr.IsImm(a)) { // Constant dividend const u32 dividend = gpr.Imm32(a); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 129445fff94..a2b4297738b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1625,28 +1625,7 @@ void JitArm64::divwx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, d = inst.RD; - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - s32 imm_a = gpr.GetImm(a); - s32 imm_b = gpr.GetImm(b); - u32 imm_d; - if (imm_b == 0 || (static_cast(imm_a) == 0x80000000 && imm_b == -1)) - { - if (imm_a < 0) - imm_d = 0xFFFFFFFF; - else - imm_d = 0; - } - else - { - imm_d = static_cast(imm_a / imm_b); - } - gpr.SetImmediate(d, imm_d); - - if (inst.Rc) - ComputeRC0(imm_d); - } - else if (gpr.IsImm(a, 0)) + if (gpr.IsImm(a, 0)) { // Zero divided by anything is always zero gpr.SetImmediate(d, 0); diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index e67e247b43c..4484fd8c329 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -269,6 +269,12 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31AB(UGeckoInstructi case 971: // divwuox d = d_overflow = b == 0 ? 0x1'0000'0000 : u64(a / b); break; + case 491: // divwx + case 1003: // divwox + d = d_overflow = b == 0 || (a == 0x80000000 && b == 0xFFFFFFFF) ? + (s32(a) < 0 ? 0xFFFFFFFF : 0x1'0000'0000) : + s32(a) / s32(b); + break; default: return {}; } @@ -308,6 +314,9 @@ ConstantPropagation::EvaluateTable31ABOneRegisterKnown(UGeckoInstruction inst, u result.overflow = true; return result; } + [[fallthrough]]; + case 491: // divwx + case 1003: // divwox if (!known_reg_is_b && value == 0 && !(flags & FL_SET_OE)) { return ConstantPropagationResult(inst.RD, 0, inst.Rc); From bb645e6cbbeece0669e23f82872b1f0f0f36ecfd Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 1 Sep 2024 10:49:25 +0200 Subject: [PATCH 21/30] Jit: Move slwx to ConstantPropagation --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 15 +-------------- .../Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 16 +--------------- .../PowerPC/JitCommon/ConstantPropagation.cpp | 11 +++++++++++ 3 files changed, 13 insertions(+), 29 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 458ecdc2e55..e38c9e63c31 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -2266,14 +2266,7 @@ void Jit64::slwx(UGeckoInstruction inst) int b = inst.RB; int s = inst.RS; - if (gpr.IsImm(b, s)) - { - u32 amount = gpr.Imm32(b); - gpr.SetImmediate32(a, (amount & 0x20) ? 0 : gpr.Imm32(s) << (amount & 0x1f)); - if (inst.Rc) - ComputeRC(a); - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { u32 amount = gpr.Imm32(b); if (amount & 0x20) @@ -2297,12 +2290,6 @@ void Jit64::slwx(UGeckoInstruction inst) if (inst.Rc) ComputeRC(a); } - else if (gpr.IsImm(s) && gpr.Imm32(s) == 0) - { - gpr.SetImmediate32(a, 0); - if (inst.Rc) - ComputeRC(a); - } else if (cpu_info.bBMI2) { RCX64Reg Ra = gpr.Bind(a, RCMode::Write); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index a2b4297738b..6ee99a898cb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1799,21 +1799,7 @@ void JitArm64::slwx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, s = inst.RS; - if (gpr.IsImm(b) && gpr.IsImm(s)) - { - u32 i = gpr.GetImm(s), j = gpr.GetImm(b); - gpr.SetImmediate(a, (j & 0x20) ? 0 : i << (j & 0x1F)); - - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - } - else if (gpr.IsImm(s, 0)) - { - gpr.SetImmediate(a, 0); - if (inst.Rc) - ComputeRC0(0); - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { u32 i = gpr.GetImm(b); if (i & 0x20) diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index 4484fd8c329..615141273ed 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -371,6 +371,9 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31SB(UGeckoInstructi switch (inst.SUBOP10) { + case 24: // slwx + a = u32(u64(s) << (b & 0x3f)); + break; case 28: // andx a = s & b; break; @@ -410,6 +413,14 @@ ConstantPropagation::EvaluateTable31SBOneRegisterKnown(UGeckoInstruction inst, u switch (inst.SUBOP10) { + case 24: // slwx + if (!known_reg_is_b && value == 0) + a = 0; + else if (known_reg_is_b && (value & 0x20)) + a = 0; + else + return {}; + break; case 60: // andcx if (known_reg_is_b) value = ~value; From c136fd9807bb68a008fc69261259efbf461729d6 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 1 Sep 2024 10:54:12 +0200 Subject: [PATCH 22/30] Jit: Move srwx to ConstantPropagation --- Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 7 +------ Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 10 +--------- .../Core/PowerPC/JitCommon/ConstantPropagation.cpp | 6 +++++- 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index e38c9e63c31..d0d29b5e816 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -2204,12 +2204,7 @@ void Jit64::srwx(UGeckoInstruction inst) int b = inst.RB; int s = inst.RS; - if (gpr.IsImm(b, s)) - { - u32 amount = gpr.Imm32(b); - gpr.SetImmediate32(a, (amount & 0x20) ? 0 : (gpr.Imm32(s) >> (amount & 0x1f))); - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { u32 amount = gpr.Imm32(b); if (amount & 0x20) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 6ee99a898cb..8bed1f548be 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1836,15 +1836,7 @@ void JitArm64::srwx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, s = inst.RS; - if (gpr.IsImm(b) && gpr.IsImm(s)) - { - u32 i = gpr.GetImm(s), amount = gpr.GetImm(b); - gpr.SetImmediate(a, (amount & 0x20) ? 0 : i >> (amount & 0x1F)); - - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { u32 amount = gpr.GetImm(b); if (amount & 0x20) diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index 615141273ed..2aea6c887e1 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -398,6 +398,9 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31SB(UGeckoInstructi case 476: // nandx a = ~(s & b); break; + case 536: // srwx + a = u32(u64(s) >> (b & 0x3f)); + break; default: return {}; } @@ -413,7 +416,8 @@ ConstantPropagation::EvaluateTable31SBOneRegisterKnown(UGeckoInstruction inst, u switch (inst.SUBOP10) { - case 24: // slwx + case 24: // slwx + case 536: // srwx if (!known_reg_is_b && value == 0) a = 0; else if (known_reg_is_b && (value & 0x20)) From bac911aac472f5c1b8cd793769915754772447db Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 1 Sep 2024 11:28:37 +0200 Subject: [PATCH 23/30] Jit: Move srawx to ConstantPropagation --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 22 +------------- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 29 +------------------ .../PowerPC/JitCommon/ConstantPropagation.cpp | 21 ++++++++++++++ 3 files changed, 23 insertions(+), 49 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index d0d29b5e816..eb0a6086a01 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -2338,22 +2338,7 @@ void Jit64::srawx(UGeckoInstruction inst) int b = inst.RB; int s = inst.RS; - if (gpr.IsImm(b, s)) - { - s32 i = gpr.SImm32(s), amount = gpr.SImm32(b); - if (amount & 0x20) - { - gpr.SetImmediate32(a, i & 0x80000000 ? 0xFFFFFFFF : 0); - FinalizeCarry(i & 0x80000000 ? true : false); - } - else - { - amount &= 0x1F; - gpr.SetImmediate32(a, i >> amount); - FinalizeCarry(amount != 0 && i < 0 && (u32(i) << (32 - amount))); - } - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { u32 amount = gpr.Imm32(b); RCX64Reg Ra = gpr.Bind(a, RCMode::Write); @@ -2389,11 +2374,6 @@ void Jit64::srawx(UGeckoInstruction inst) FinalizeCarry(CC_NZ); } } - else if (gpr.IsImm(s) && gpr.Imm32(s) == 0) - { - gpr.SetImmediate32(a, 0); - FinalizeCarry(false); - } else if (cpu_info.bBMI2) { RCX64Reg Ra = gpr.Bind(a, RCMode::Write); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 8bed1f548be..55eddf11c03 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1871,34 +1871,7 @@ void JitArm64::srawx(UGeckoInstruction inst) int a = inst.RA, b = inst.RB, s = inst.RS; - if (gpr.IsImm(b) && gpr.IsImm(s)) - { - s32 i = gpr.GetImm(s), amount = gpr.GetImm(b); - if (amount & 0x20) - { - gpr.SetImmediate(a, i & 0x80000000 ? 0xFFFFFFFF : 0); - ComputeCarry(i & 0x80000000 ? true : false); - } - else - { - amount &= 0x1F; - gpr.SetImmediate(a, i >> amount); - ComputeCarry(amount != 0 && i < 0 && (u32(i) << (32 - amount))); - } - - if (inst.Rc) - ComputeRC0(gpr.GetImm(a)); - return; - } - else if (gpr.IsImm(s, 0)) - { - gpr.SetImmediate(a, 0); - ComputeCarry(false); - if (inst.Rc) - ComputeRC0(0); - return; - } - else if (gpr.IsImm(b)) + if (gpr.IsImm(b)) { int amount = gpr.GetImm(b); diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index 2aea6c887e1..5efd3b55e14 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -401,6 +401,15 @@ ConstantPropagationResult ConstantPropagation::EvaluateTable31SB(UGeckoInstructi case 536: // srwx a = u32(u64(s) >> (b & 0x3f)); break; + case 792: // srawx + { + const u64 temp = (s64(s32(s)) << 32) >> (b & 0x3f); + a = u32(temp >> 32); + + ConstantPropagationResult result(inst.RA, a, inst.Rc); + result.carry = (temp & a) != 0; + return result; + } default: return {}; } @@ -457,6 +466,18 @@ ConstantPropagation::EvaluateTable31SBOneRegisterKnown(UGeckoInstruction inst, u else return {}; break; + case 792: // srawx + if (!known_reg_is_b && value == 0) + { + ConstantPropagationResult result(inst.RA, 0, inst.Rc); + result.carry = false; + return result; + } + else + { + return {}; + } + break; default: return {}; } From 502317a4851141a53cc0d94cfecff4c578dafb79 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 1 Sep 2024 11:44:34 +0200 Subject: [PATCH 24/30] Jit: Move rlwimix to ConstantPropagation --- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 207 ++++++++---------- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 108 +++++---- .../PowerPC/JitCommon/ConstantPropagation.cpp | 18 ++ .../PowerPC/JitCommon/ConstantPropagation.h | 1 + 4 files changed, 163 insertions(+), 171 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index eb0a6086a01..03bf2fc7867 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -1997,135 +1997,118 @@ void Jit64::rlwimix(UGeckoInstruction inst) int s = inst.RS; const u32 mask = MakeRotationMask(inst.MB, inst.ME); + const bool left_shift = mask == 0U - (1U << inst.SH); + const bool right_shift = mask == (1U << inst.SH) - 1; + bool needs_test = false; - if (gpr.IsImm(a, s)) + if (mask == 0 || (a == s && inst.SH == 0)) { - gpr.SetImmediate32(a, (gpr.Imm32(a) & ~mask) | (std::rotl(gpr.Imm32(s), inst.SH) & mask)); - if (inst.Rc) - ComputeRC(a); + needs_test = true; } - else if (gpr.IsImm(s) && mask == 0xFFFFFFFF) + else if (mask == 0xFFFFFFFF) { - gpr.SetImmediate32(a, std::rotl(gpr.Imm32(s), inst.SH)); - - if (inst.Rc) - ComputeRC(a); + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Rs, Ra); + RotateLeft(32, Ra, Rs, inst.SH); + needs_test = true; } - else + else if (gpr.IsImm(s)) { - const bool left_shift = mask == 0U - (1U << inst.SH); - const bool right_shift = mask == (1U << inst.SH) - 1; - bool needs_test = false; + RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); + RegCache::Realize(Ra); + AndWithMask(Ra, ~mask); + OR(32, Ra, Imm32(std::rotl(gpr.Imm32(s), inst.SH) & mask)); + } + else if (gpr.IsImm(a)) + { + const u32 maskA = gpr.Imm32(a) & ~mask; - if (mask == 0 || (a == s && inst.SH == 0)) + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::Write); + RegCache::Realize(Rs, Ra); + + if (inst.SH == 0) { - needs_test = true; + MOV(32, Ra, Rs); + AndWithMask(Ra, mask); } - else if (mask == 0xFFFFFFFF) + else if (left_shift) { - RCOpArg Rs = gpr.Use(s, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::Write); - RegCache::Realize(Rs, Ra); - RotateLeft(32, Ra, Rs, inst.SH); - needs_test = true; + MOV(32, Ra, Rs); + SHL(32, Ra, Imm8(inst.SH)); } - else if (gpr.IsImm(s)) + else if (right_shift) { - RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); - RegCache::Realize(Ra); - AndWithMask(Ra, ~mask); - OR(32, Ra, Imm32(std::rotl(gpr.Imm32(s), inst.SH) & mask)); - } - else if (gpr.IsImm(a)) - { - const u32 maskA = gpr.Imm32(a) & ~mask; - - RCOpArg Rs = gpr.Use(s, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::Write); - RegCache::Realize(Rs, Ra); - - if (inst.SH == 0) - { - MOV(32, Ra, Rs); - AndWithMask(Ra, mask); - } - else if (left_shift) - { - MOV(32, Ra, Rs); - SHL(32, Ra, Imm8(inst.SH)); - } - else if (right_shift) - { - MOV(32, Ra, Rs); - SHR(32, Ra, Imm8(32 - inst.SH)); - } - else - { - RotateLeft(32, Ra, Rs, inst.SH); - AndWithMask(Ra, mask); - } - - if (maskA) - OR(32, Ra, Imm32(maskA)); - else - needs_test = true; - } - else if (inst.SH) - { - // TODO: perhaps consider pinsrb or abuse of AH - RCOpArg Rs = gpr.Use(s, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); - RegCache::Realize(Rs, Ra); - - if (left_shift) - { - MOV(32, R(RSCRATCH), Rs); - SHL(32, R(RSCRATCH), Imm8(inst.SH)); - } - else if (right_shift) - { - MOV(32, R(RSCRATCH), Rs); - SHR(32, R(RSCRATCH), Imm8(32 - inst.SH)); - } - else - { - RotateLeft(32, RSCRATCH, Rs, inst.SH); - } - - if (mask == 0xFF || mask == 0xFFFF) - { - MOV(mask == 0xFF ? 8 : 16, Ra, R(RSCRATCH)); - needs_test = true; - } - else - { - if (!left_shift && !right_shift) - AndWithMask(RSCRATCH, mask); - AndWithMask(Ra, ~mask); - OR(32, Ra, R(RSCRATCH)); - } + MOV(32, Ra, Rs); + SHR(32, Ra, Imm8(32 - inst.SH)); } else { - RCX64Reg Rs = gpr.Bind(s, RCMode::Read); - RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); - RegCache::Realize(Rs, Ra); - - if (mask == 0xFF || mask == 0xFFFF) - { - MOV(mask == 0xFF ? 8 : 16, Ra, Rs); - needs_test = true; - } - else - { - XOR(32, Ra, Rs); - AndWithMask(Ra, ~mask); - XOR(32, Ra, Rs); - } + RotateLeft(32, Ra, Rs, inst.SH); + AndWithMask(Ra, mask); } - if (inst.Rc) - ComputeRC(a, needs_test); + + if (maskA) + OR(32, Ra, Imm32(maskA)); + else + needs_test = true; } + else if (inst.SH) + { + // TODO: perhaps consider pinsrb or abuse of AH + RCOpArg Rs = gpr.Use(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); + RegCache::Realize(Rs, Ra); + + if (left_shift) + { + MOV(32, R(RSCRATCH), Rs); + SHL(32, R(RSCRATCH), Imm8(inst.SH)); + } + else if (right_shift) + { + MOV(32, R(RSCRATCH), Rs); + SHR(32, R(RSCRATCH), Imm8(32 - inst.SH)); + } + else + { + RotateLeft(32, RSCRATCH, Rs, inst.SH); + } + + if (mask == 0xFF || mask == 0xFFFF) + { + MOV(mask == 0xFF ? 8 : 16, Ra, R(RSCRATCH)); + needs_test = true; + } + else + { + if (!left_shift && !right_shift) + AndWithMask(RSCRATCH, mask); + AndWithMask(Ra, ~mask); + OR(32, Ra, R(RSCRATCH)); + } + } + else + { + RCX64Reg Rs = gpr.Bind(s, RCMode::Read); + RCX64Reg Ra = gpr.Bind(a, RCMode::ReadWrite); + RegCache::Realize(Rs, Ra); + + if (mask == 0xFF || mask == 0xFFFF) + { + MOV(mask == 0xFF ? 8 : 16, Ra, Rs); + needs_test = true; + } + else + { + XOR(32, Ra, Rs); + AndWithMask(Ra, ~mask); + XOR(32, Ra, Rs); + } + } + if (inst.Rc) + ComputeRC(a, needs_test); } void Jit64::rlwnmx(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 55eddf11c03..dd01d6dbddb 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1975,74 +1975,64 @@ void JitArm64::rlwimix(UGeckoInstruction inst) const u32 width = inst.ME - inst.MB + 1; const u32 rot_dist = inst.SH ? 32 - inst.SH : 0; - if (gpr.IsImm(a) && gpr.IsImm(s)) + if (mask == 0 || (a == s && inst.SH == 0)) { - u32 res = (gpr.GetImm(a) & ~mask) | (std::rotl(gpr.GetImm(s), inst.SH) & mask); - gpr.SetImmediate(a, res); - if (inst.Rc) - ComputeRC0(res); + // Do Nothing } - else + else if (mask == 0xFFFFFFFF) { - if (mask == 0 || (a == s && inst.SH == 0)) - { - // Do Nothing - } - else if (mask == 0xFFFFFFFF) - { - if (inst.SH || a != s) - gpr.BindToRegister(a, a == s); + if (inst.SH || a != s) + gpr.BindToRegister(a, a == s); - if (inst.SH) - ROR(gpr.R(a), gpr.R(s), rot_dist); - else if (a != s) - MOV(gpr.R(a), gpr.R(s)); - } - else if (lsb == 0 && inst.MB <= inst.ME && rot_dist + width <= 32) + if (inst.SH) + ROR(gpr.R(a), gpr.R(s), rot_dist); + else if (a != s) + MOV(gpr.R(a), gpr.R(s)); + } + else if (lsb == 0 && inst.MB <= inst.ME && rot_dist + width <= 32) + { + // Destination is in least significant position + // No mask inversion + // Source field pre-rotation is contiguous + gpr.BindToRegister(a, true); + BFXIL(gpr.R(a), gpr.R(s), rot_dist, width); + } + else if (inst.SH == 0 && inst.MB <= inst.ME) + { + // No rotation + // No mask inversion + gpr.BindToRegister(a, true); + auto WA = gpr.GetScopedReg(); + UBFX(WA, gpr.R(s), lsb, width); + BFI(gpr.R(a), WA, lsb, width); + } + else if (inst.SH && inst.MB <= inst.ME) + { + // No mask inversion + gpr.BindToRegister(a, true); + if ((rot_dist + lsb) % 32 == 0) { - // Destination is in least significant position - // No mask inversion - // Source field pre-rotation is contiguous - gpr.BindToRegister(a, true); - BFXIL(gpr.R(a), gpr.R(s), rot_dist, width); - } - else if (inst.SH == 0 && inst.MB <= inst.ME) - { - // No rotation - // No mask inversion - gpr.BindToRegister(a, true); - auto WA = gpr.GetScopedReg(); - UBFX(WA, gpr.R(s), lsb, width); - BFI(gpr.R(a), WA, lsb, width); - } - else if (inst.SH && inst.MB <= inst.ME) - { - // No mask inversion - gpr.BindToRegister(a, true); - if ((rot_dist + lsb) % 32 == 0) - { - BFI(gpr.R(a), gpr.R(s), lsb, width); - } - else - { - auto WA = gpr.GetScopedReg(); - ROR(WA, gpr.R(s), (rot_dist + lsb) % 32); - BFI(gpr.R(a), WA, lsb, width); - } + BFI(gpr.R(a), gpr.R(s), lsb, width); } else { - gpr.BindToRegister(a, true); - ARM64Reg RA = gpr.R(a); auto WA = gpr.GetScopedReg(); - const u32 inverted_mask = ~mask; - - AND(WA, gpr.R(s), LogicalImm(std::rotl(mask, rot_dist), GPRSize::B32)); - AND(RA, RA, LogicalImm(inverted_mask, GPRSize::B32)); - ORR(RA, RA, WA, ArithOption(WA, ShiftType::ROR, rot_dist)); + ROR(WA, gpr.R(s), (rot_dist + lsb) % 32); + BFI(gpr.R(a), WA, lsb, width); } - - if (inst.Rc) - ComputeRC0(gpr.R(a)); } + else + { + gpr.BindToRegister(a, true); + ARM64Reg RA = gpr.R(a); + auto WA = gpr.GetScopedReg(); + const u32 inverted_mask = ~mask; + + AND(WA, gpr.R(s), LogicalImm(std::rotl(mask, rot_dist), GPRSize::B32)); + AND(RA, RA, LogicalImm(inverted_mask, GPRSize::B32)); + ORR(RA, RA, WA, ArithOption(WA, ShiftType::ROR, rot_dist)); + } + + if (inst.Rc) + ComputeRC0(gpr.R(a)); } diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp index 5efd3b55e14..b632cd3fc4e 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.cpp @@ -41,6 +41,8 @@ ConstantPropagationResult ConstantPropagation::EvaluateInstruction(UGeckoInstruc case 14: // addi case 15: // addis return EvaluateAddImm(inst); + case 20: // rlwimix + return EvaluateRlwimix(inst); case 21: // rlwinmx return EvaluateRlwinmxRlwnmx(inst, inst.SH); case 23: // rlwnmx @@ -114,6 +116,22 @@ ConstantPropagationResult ConstantPropagation::EvaluateAddImmCarry(UGeckoInstruc return result; } +ConstantPropagationResult ConstantPropagation::EvaluateRlwimix(UGeckoInstruction inst) const +{ + if (!HasGPR(inst.RS)) + return {}; + + const u32 mask = MakeRotationMask(inst.MB, inst.ME); + if (mask == 0xFFFFFFFF) + return ConstantPropagationResult(inst.RA, std::rotl(GetGPR(inst.RS), inst.SH), inst.Rc); + + if (!HasGPR(inst.RA)) + return {}; + + return ConstantPropagationResult( + inst.RA, (GetGPR(inst.RA) & ~mask) | (std::rotl(GetGPR(inst.RS), inst.SH) & mask), inst.Rc); +} + ConstantPropagationResult ConstantPropagation::EvaluateRlwinmxRlwnmx(UGeckoInstruction inst, u32 shift) const { diff --git a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h index 467944a658c..ffbf543ed6d 100644 --- a/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h +++ b/Source/Core/Core/PowerPC/JitCommon/ConstantPropagation.h @@ -81,6 +81,7 @@ private: ConstantPropagationResult EvaluateSubImmCarry(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateAddImm(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateAddImmCarry(UGeckoInstruction inst) const; + ConstantPropagationResult EvaluateRlwimix(UGeckoInstruction inst) const; ConstantPropagationResult EvaluateRlwinmxRlwnmx(UGeckoInstruction inst, u32 shift) const; ConstantPropagationResult EvaluateBitwiseImm(UGeckoInstruction inst, u32 (*do_op)(u32, u32)) const; From 7065b93ba557513ecf9365eff8e1db9aaa41c77e Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 1 Sep 2024 15:45:23 +0200 Subject: [PATCH 25/30] JitArm64: Pass index to more Arm64GPRCache functions This refactorization is needed for upcoming commits. --- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 19 +++++++---------- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 21 ++++++++++++------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 20a86c0389d..43631b2d9af 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -137,12 +137,6 @@ void Arm64RegCache::DiscardRegister(size_t preg) UnlockRegister(host_reg); } -// GPR Cache -constexpr size_t GUEST_GPR_COUNT = 32; -constexpr size_t GUEST_CR_COUNT = 8; -constexpr size_t GUEST_GPR_OFFSET = 0; -constexpr size_t GUEST_CR_OFFSET = GUEST_GPR_COUNT; - Arm64GPRCache::Arm64GPRCache() : Arm64RegCache(GUEST_GPR_COUNT + GUEST_CR_COUNT) { } @@ -273,8 +267,8 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r const size_t ppc_offset = GetGuestByIndex(i).ppc_offset; if (ppc_offset <= 252) { - ARM64Reg RX1 = reg1_zero ? ARM64Reg::WZR : R(GetGuestByIndex(i)); - ARM64Reg RX2 = reg2_zero ? ARM64Reg::WZR : R(GetGuestByIndex(i + 1)); + ARM64Reg RX1 = reg1_zero ? ARM64Reg::WZR : BindForRead(i); + ARM64Reg RX2 = reg2_zero ? ARM64Reg::WZR : BindForRead(i + 1); m_emit->STP(IndexType::Signed, RX1, RX2, PPC_REG, u32(ppc_offset)); if (flush_all) { @@ -335,8 +329,9 @@ void Arm64GPRCache::Flush(FlushMode mode, ARM64Reg tmp_reg, FlushCRRegisters(BitSet8(0xFF), mode, tmp_reg, ignore_discarded_registers); } -ARM64Reg Arm64GPRCache::R(const GuestRegInfo& guest_reg) +ARM64Reg Arm64GPRCache::BindForRead(size_t index) { + GuestRegInfo guest_reg = GetGuestByIndex(index); OpArg& reg = guest_reg.reg; size_t bitsize = guest_reg.bitsize; @@ -378,8 +373,9 @@ ARM64Reg Arm64GPRCache::R(const GuestRegInfo& guest_reg) return ARM64Reg::INVALID_REG; } -void Arm64GPRCache::SetImmediate(const GuestRegInfo& guest_reg, u32 imm, bool dirty) +void Arm64GPRCache::SetImmediateInternal(size_t index, u32 imm, bool dirty) { + GuestRegInfo guest_reg = GetGuestByIndex(index); OpArg& reg = guest_reg.reg; if (reg.GetType() == RegType::Register) UnlockRegister(EncodeRegTo32(reg.GetReg())); @@ -387,8 +383,9 @@ void Arm64GPRCache::SetImmediate(const GuestRegInfo& guest_reg, u32 imm, bool di reg.SetDirty(dirty); } -void Arm64GPRCache::BindToRegister(const GuestRegInfo& guest_reg, bool will_read, bool will_write) +void Arm64GPRCache::BindForWrite(size_t index, bool will_read, bool will_write) { + GuestRegInfo guest_reg = GetGuestByIndex(index); OpArg& reg = guest_reg.reg; const size_t bitsize = guest_reg.bitsize; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 990b2dcee79..c973e3bc285 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -328,15 +328,15 @@ public: // Returns a guest GPR inside of a host register. // Will dump an immediate to the host register as well. - Arm64Gen::ARM64Reg R(size_t preg) { return R(GetGuestGPR(preg)); } + Arm64Gen::ARM64Reg R(size_t preg) { return BindForRead(GUEST_GPR_OFFSET + preg); } // Returns a guest CR inside of a host register. - Arm64Gen::ARM64Reg CR(size_t preg) { return R(GetGuestCR(preg)); } + Arm64Gen::ARM64Reg CR(size_t preg) { return BindForRead(GUEST_CR_OFFSET + preg); } // Set a register to an immediate. Only valid for guest GPRs. void SetImmediate(size_t preg, u32 imm, bool dirty = true) { - SetImmediate(GetGuestGPR(preg), imm, dirty); + SetImmediateInternal(GUEST_GPR_OFFSET + preg, imm, dirty); } // Returns if a register is set as an immediate. Only valid for guest GPRs. @@ -374,14 +374,14 @@ public: // flushed. Just remember to call this function again with will_write = true after the Flush call. void BindToRegister(size_t preg, bool will_read, bool will_write = true) { - BindToRegister(GetGuestGPR(preg), will_read, will_write); + BindForWrite(GUEST_GPR_OFFSET + preg, will_read, will_write); } // Binds a guest CR to a host register, optionally loading its value. // The description of BindToRegister above applies to this function as well. void BindCRToRegister(size_t preg, bool will_read, bool will_write = true) { - BindToRegister(GetGuestCR(preg), will_read, will_write); + BindForWrite(GUEST_CR_OFFSET + preg, will_read, will_write); } BitSet32 GetCallerSavedUsed() const override; @@ -428,14 +428,19 @@ private: GuestRegInfo GetGuestCR(size_t preg); GuestRegInfo GetGuestByIndex(size_t index); - Arm64Gen::ARM64Reg R(const GuestRegInfo& guest_reg); - void SetImmediate(const GuestRegInfo& guest_reg, u32 imm, bool dirty); - void BindToRegister(const GuestRegInfo& guest_reg, bool will_read, bool will_write = true); + Arm64Gen::ARM64Reg BindForRead(size_t index); + void SetImmediateInternal(size_t index, u32 imm, bool dirty); + void BindForWrite(size_t index, bool will_read, bool will_write = true); void FlushRegisters(BitSet32 regs, FlushMode mode, Arm64Gen::ARM64Reg tmp_reg, IgnoreDiscardedRegisters ignore_discarded_registers); void FlushCRRegisters(BitSet8 regs, FlushMode mode, Arm64Gen::ARM64Reg tmp_reg, IgnoreDiscardedRegisters ignore_discarded_registers); + + static constexpr size_t GUEST_GPR_COUNT = 32; + static constexpr size_t GUEST_CR_COUNT = 8; + static constexpr size_t GUEST_GPR_OFFSET = 0; + static constexpr size_t GUEST_CR_OFFSET = GUEST_GPR_COUNT; }; class Arm64FPRCache : public Arm64RegCache From 502b48a690ece557c1c2c6eeff746d11ab0f6534 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 2 Nov 2024 15:46:40 +0100 Subject: [PATCH 26/30] JitArm64: Make FlushRegisters unlock condition more robust To find out whether a host register needs to be unlocked, FlushRegisters checks if the guest register is known to be a zero immediate. This works right now, but it will stop working correctly once we gain the ability to have a guest register be a known immediate and be in a host register at the same time, because a register that's known to be a zero immediate may have had a host register allocated prior to the call to FlushRegisters. Instead, we should check whether the register is RegType::Register after we're done calling BindForRead. --- Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 43631b2d9af..786f5be4a32 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -272,10 +272,10 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r m_emit->STP(IndexType::Signed, RX1, RX2, PPC_REG, u32(ppc_offset)); if (flush_all) { - if (!reg1_zero) - UnlockRegister(EncodeRegTo32(RX1)); - if (!reg2_zero) - UnlockRegister(EncodeRegTo32(RX2)); + if (reg1.GetType() == RegType::Register) + UnlockRegister(reg1.GetReg()); + if (reg2.GetType() == RegType::Register) + UnlockRegister(reg2.GetReg()); reg1.Flush(); reg2.Flush(); } From 4114a0b50660e0821f4f54ddd597372a99c75a48 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 1 Sep 2024 16:10:17 +0200 Subject: [PATCH 27/30] Jit: Update constant propagation during instruction This commit makes the JIT set/clear the individual registers of ConstantPropagation immediately instead of at the end of the instruction. This is needed to prevent Jit64::ComputeRC, which reads from a register written to earlier during the same instruction, from reading back stale register values from ConstantPropagation in the next commit. --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 5 +++-- Source/Core/Core/PowerPC/Jit64/Jit.h | 2 ++ Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp | 5 +++++ Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h | 1 + Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp | 6 ++++++ Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h | 1 + Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp | 3 +++ Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h | 1 + Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 7 +++---- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 ++ Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp | 9 ++++++--- 11 files changed, 33 insertions(+), 9 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 024b4c6e4af..bdbb023c258 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -370,6 +370,9 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst) gpr.Reset(js.op->regsOut); fpr.Reset(js.op->GetFregsOut()); + // We must also update constant propagation + m_constant_propagation.ClearGPRs(js.op->regsOut); + if (js.op->opinfo->flags & FL_SET_MSR) EmitUpdateMembase(); @@ -1133,8 +1136,6 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) } CompileInstruction(op); - - m_constant_propagation.ClearGPRs(op.regsOut); } m_constant_propagation.Apply(constant_propagation_result); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 189f0c2b4bb..8b96fda107a 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -84,6 +84,8 @@ public: void FlushRegistersBeforeSlowAccess(); + JitCommon::ConstantPropagation& GetConstantPropagation() { return m_constant_propagation; } + JitBlockCache* GetBlockCache() override { return &blocks; } void Trace(); diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp index df210f80c5b..64870ec026a 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp @@ -25,6 +25,11 @@ void FPURegCache::LoadRegister(preg_t preg, X64Reg new_loc) m_emitter->MOVAPD(new_loc, m_regs[preg].Location().value()); } +void FPURegCache::DiscardImm(preg_t preg) +{ + // FPURegCache doesn't support immediates, so no need to do anything +} + std::span FPURegCache::GetAllocationOrder() const { static constexpr X64Reg allocation_order[] = {XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h index f7d81663b6d..f34db9d0886 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h @@ -16,6 +16,7 @@ protected: Gen::OpArg GetDefaultLocation(preg_t preg) const override; void StoreRegister(preg_t preg, const Gen::OpArg& newLoc) override; void LoadRegister(preg_t preg, Gen::X64Reg newLoc) override; + void DiscardImm(preg_t preg) override; std::span GetAllocationOrder() const override; BitSet32 GetRegUtilization() const override; BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const override; diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp index ca30e15784a..b44382ba447 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp @@ -25,6 +25,11 @@ void GPRRegCache::LoadRegister(preg_t preg, X64Reg new_loc) m_emitter->MOV(32, ::Gen::R(new_loc), m_regs[preg].Location().value()); } +void GPRRegCache::DiscardImm(preg_t preg) +{ + m_jit.GetConstantPropagation().ClearGPR(preg); +} + OpArg GPRRegCache::GetDefaultLocation(preg_t preg) const { return PPCSTATE_GPR(preg); @@ -50,6 +55,7 @@ void GPRRegCache::SetImmediate32(preg_t preg, u32 imm_value, bool dirty) // processing speculative constants. DiscardRegContentsIfCached(preg); m_regs[preg].SetToImm32(imm_value, dirty); + m_jit.GetConstantPropagation().SetGPR(preg, imm_value); } BitSet32 GPRRegCache::GetRegUtilization() const diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h index 60985e19607..a5bf5242694 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h @@ -17,6 +17,7 @@ protected: Gen::OpArg GetDefaultLocation(preg_t preg) const override; void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) override; void LoadRegister(preg_t preg, Gen::X64Reg new_loc) override; + void DiscardImm(preg_t preg) override; std::span GetAllocationOrder() const override; BitSet32 GetRegUtilization() const override; BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const override; diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp index d59d4f0a01d..d1e06661709 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp @@ -536,6 +536,9 @@ void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty) m_xregs[RX(i)].MakeDirty(); } + if (makeDirty) + DiscardImm(i); + ASSERT_MSG(DYNA_REC, !m_xregs[RX(i)].IsLocked(), "WTF, this reg ({} -> {}) should have been flushed", i, Common::ToUnderlying(RX(i))); } diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h index 3677d2b42b7..1ff4e27ea78 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h @@ -193,6 +193,7 @@ protected: virtual Gen::OpArg GetDefaultLocation(preg_t preg) const = 0; virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) = 0; virtual void LoadRegister(preg_t preg, Gen::X64Reg new_loc) = 0; + virtual void DiscardImm(preg_t preg) = 0; virtual std::span GetAllocationOrder() const = 0; diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index ba30b0bc818..975f5f95f7d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -279,6 +279,9 @@ void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) fpr.ResetRegisters(js.op->GetFregsOut()); gpr.ResetCRRegisters(js.op->crOut); + // We must also update constant propagation + m_constant_propagation.ClearGPRs(js.op->regsOut); + if (js.op->opinfo->flags & FL_SET_MSR) EmitUpdateMembase(); @@ -1354,12 +1357,8 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) m_constant_propagation.EvaluateInstruction(op.inst, opinfo->flags); if (!constant_propagation_result.instruction_fully_executed) - { CompileInstruction(op); - m_constant_propagation.ClearGPRs(op.regsOut); - } - m_constant_propagation.Apply(constant_propagation_result); if (constant_propagation_result.gpr >= 0) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 1b738061b22..e98f950d42f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -36,6 +36,8 @@ public: void Init() override; void Shutdown() override; + JitCommon::ConstantPropagation& GetConstantPropagation() { return m_constant_propagation; } + JitBaseBlockCache* GetBlockCache() override { return &blocks; } bool IsInCodeSpace(const u8* ptr) const { return IsInSpace(ptr); } bool HandleFault(uintptr_t access_address, SContext* ctx) override; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 786f5be4a32..d0c9ac71bde 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -381,6 +381,7 @@ void Arm64GPRCache::SetImmediateInternal(size_t index, u32 imm, bool dirty) UnlockRegister(EncodeRegTo32(reg.GetReg())); reg.LoadToImm(imm); reg.SetDirty(dirty); + m_jit->GetConstantPropagation().SetGPR(index - GUEST_GPR_OFFSET, imm); } void Arm64GPRCache::BindForWrite(size_t index, bool will_read, bool will_write) @@ -388,6 +389,7 @@ void Arm64GPRCache::BindForWrite(size_t index, bool will_read, bool will_write) GuestRegInfo guest_reg = GetGuestByIndex(index); OpArg& reg = guest_reg.reg; const size_t bitsize = guest_reg.bitsize; + const bool is_gpr = index >= GUEST_GPR_OFFSET && index < GUEST_GPR_OFFSET + GUEST_GPR_COUNT; reg.ResetLastUsed(); @@ -414,12 +416,13 @@ void Arm64GPRCache::BindForWrite(size_t index, bool will_read, bool will_write) m_emit->MOVI2R(host_reg, reg.GetImm()); } reg.Load(host_reg); - if (will_write) - reg.SetDirty(true); } - else if (will_write) + + if (will_write) { reg.SetDirty(true); + if (is_gpr) + m_jit->GetConstantPropagation().ClearGPR(index - GUEST_GPR_OFFSET); } } From 817bb9d94c43ca6be8862e2a09159f9b67836807 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 25 Aug 2023 15:28:07 +0200 Subject: [PATCH 28/30] Jit64: Don't store immediate values in register cache They're now stored in ConstantPropagation instead. I've also removed the LocationType enum. The location of each guest register is now tracked using three booleans: Whether it is in ppcState, whether it is in a host register, and whether it is a known immediate. The first two of these booleans are stored in the register cache, and the last one is stored in ConstantPropagation. This new model allows us to handle the combination of a value simultaneously being in a host register and being a known immediate. It also keeps track of which registers are dirty, which was previously kept track of in X64CachedReg. The old model maps to the new model as follows: default host_reg immediate Default true false false Discarded false false false Bound (!dirty) true false Immediate false false true SpeculativeImmediate true false true [previously unrepresentable] (!dirty) true true --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 1 + .../Core/PowerPC/Jit64/RegCache/CachedReg.h | 122 +++++--------- .../PowerPC/Jit64/RegCache/FPURegCache.cpp | 48 +++++- .../Core/PowerPC/Jit64/RegCache/FPURegCache.h | 8 +- .../PowerPC/Jit64/RegCache/GPRRegCache.cpp | 69 +++++++- .../Core/PowerPC/Jit64/RegCache/GPRRegCache.h | 9 +- .../PowerPC/Jit64/RegCache/JitRegCache.cpp | 158 ++++++------------ .../Core/PowerPC/Jit64/RegCache/JitRegCache.h | 21 ++- 8 files changed, 223 insertions(+), 213 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index bdbb023c258..6cee5b971e3 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -1142,6 +1142,7 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) if (constant_propagation_result.gpr >= 0) { + // Mark the GPR as dirty in the register cache gpr.SetImmediate32(constant_propagation_result.gpr, constant_propagation_result.gpr_value); } diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h b/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h index acf5480abb7..99a6e472254 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/CachedReg.h @@ -16,111 +16,79 @@ using preg_t = size_t; class PPCCachedReg { public: - enum class LocationType - { - /// Value is currently at its default location - Default, - /// Value is not stored anywhere because we know it won't be read before the next write - Discarded, - /// Value is currently bound to a x64 register - Bound, - /// Value is known as an immediate and has not been written back to its default location - Immediate, - /// Value is known as an immediate and is already present at its default location - SpeculativeImmediate, - }; - PPCCachedReg() = default; - explicit PPCCachedReg(Gen::OpArg default_location_) - : default_location(default_location_), location(default_location_) + explicit PPCCachedReg(Gen::OpArg default_location) : m_default_location(default_location) {} + + Gen::OpArg GetDefaultLocation() const { return m_default_location; } + + Gen::X64Reg GetHostRegister() const { + ASSERT(m_in_host_register); + return m_host_register; } - const std::optional& Location() const { return location; } + bool IsInDefaultLocation() const { return m_in_default_location; } + bool IsInHostRegister() const { return m_in_host_register; } - LocationType GetLocationType() const + void SetFlushed(bool maintain_host_register) { - if (!location.has_value()) - return LocationType::Discarded; - - if (!away) - { - ASSERT(!revertable); - - if (location->IsImm()) - return LocationType::SpeculativeImmediate; - - ASSERT(*location == default_location); - return LocationType::Default; - } - - ASSERT(location->IsImm() || location->IsSimpleReg()); - return location->IsImm() ? LocationType::Immediate : LocationType::Bound; + ASSERT(!m_revertable); + if (!maintain_host_register) + m_in_host_register = false; + m_in_default_location = true; } - bool IsAway() const { return away; } - bool IsDiscarded() const { return !location.has_value(); } - bool IsBound() const { return GetLocationType() == LocationType::Bound; } - - void SetBoundTo(Gen::X64Reg xreg) + void SetInHostRegister(Gen::X64Reg xreg, bool dirty) { - away = true; - location = Gen::R(xreg); + if (dirty) + m_in_default_location = false; + m_in_host_register = true; + m_host_register = xreg; } + void SetDirty() { m_in_default_location = false; } + void SetDiscarded() { - ASSERT(!revertable); - away = false; - location = std::nullopt; + ASSERT(!m_revertable); + m_in_default_location = false; + m_in_host_register = false; } - void SetFlushed() - { - ASSERT(!revertable); - away = false; - location = default_location; - } - - void SetToImm32(u32 imm32, bool dirty = true) - { - away |= dirty; - location = Gen::Imm32(imm32); - } - - bool IsRevertable() const { return revertable; } + bool IsRevertable() const { return m_revertable; } void SetRevertable() { - ASSERT(IsBound()); - revertable = true; + ASSERT(m_in_host_register); + m_revertable = true; } void SetRevert() { - ASSERT(revertable); - revertable = false; - SetFlushed(); + ASSERT(m_revertable); + m_revertable = false; + SetFlushed(false); } void SetCommit() { - ASSERT(revertable); - revertable = false; + ASSERT(m_revertable); + m_revertable = false; } - bool IsLocked() const { return locked > 0; } - void Lock() { locked++; } + bool IsLocked() const { return m_locked > 0; } + void Lock() { m_locked++; } void Unlock() { ASSERT(IsLocked()); - locked--; + m_locked--; } private: - Gen::OpArg default_location{}; - std::optional location{}; - bool away = false; // value not in source register - bool revertable = false; - size_t locked = 0; + Gen::OpArg m_default_location{}; + Gen::X64Reg m_host_register{}; + bool m_in_default_location = true; + bool m_in_host_register = false; + bool m_revertable = false; + size_t m_locked = 0; }; class X64CachedReg @@ -128,25 +96,20 @@ class X64CachedReg public: preg_t Contents() const { return ppcReg; } - void SetBoundTo(preg_t ppcReg_, bool dirty_) + void SetBoundTo(preg_t ppcReg_) { free = false; ppcReg = ppcReg_; - dirty = dirty_; } void Unbind() { ppcReg = static_cast(Gen::INVALID_REG); free = true; - dirty = false; } bool IsFree() const { return free && !locked; } - bool IsDirty() const { return dirty; } - void MakeDirty() { dirty = true; } - bool IsLocked() const { return locked > 0; } void Lock() { locked++; } void Unlock() @@ -158,7 +121,6 @@ public: private: preg_t ppcReg = static_cast(Gen::INVALID_REG); bool free = true; - bool dirty = false; size_t locked = 0; }; diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp index 64870ec026a..3e089ffe7dd 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.cpp @@ -13,16 +13,54 @@ FPURegCache::FPURegCache(Jit64& jit) : RegCache{jit} { } -void FPURegCache::StoreRegister(preg_t preg, const OpArg& new_loc) +bool FPURegCache::IsImm(preg_t preg) const { - ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - {}", preg); - m_emitter->MOVAPD(new_loc, m_regs[preg].Location()->GetSimpleReg()); + return false; +} + +u32 FPURegCache::Imm32(preg_t preg) const +{ + ASSERT_MSG(DYNA_REC, false, "FPURegCache doesn't support immediates"); + return 0; +} + +s32 FPURegCache::SImm32(preg_t preg) const +{ + ASSERT_MSG(DYNA_REC, false, "FPURegCache doesn't support immediates"); + return 0; +} + +OpArg FPURegCache::R(preg_t preg) const +{ + if (m_regs[preg].IsInHostRegister()) + { + return ::Gen::R(m_regs[preg].GetHostRegister()); + } + else + { + ASSERT_MSG(DYNA_REC, m_regs[preg].IsInDefaultLocation(), "FPR {} missing!", preg); + return m_regs[preg].GetDefaultLocation(); + } +} + +void FPURegCache::StoreRegister(preg_t preg, const OpArg& new_loc, + IgnoreDiscardedRegisters ignore_discarded_registers) +{ + if (m_regs[preg].IsInHostRegister()) + { + m_emitter->MOVAPD(new_loc, m_regs[preg].GetHostRegister()); + } + else + { + ASSERT_MSG(DYNA_REC, ignore_discarded_registers != IgnoreDiscardedRegisters::No, + "FPR {} not in host register", preg); + } } void FPURegCache::LoadRegister(preg_t preg, X64Reg new_loc) { - ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - {}", preg); - m_emitter->MOVAPD(new_loc, m_regs[preg].Location().value()); + ASSERT_MSG(DYNA_REC, m_regs[preg].IsInDefaultLocation(), "FPR {} not in default location", preg); + m_emitter->MOVAPD(new_loc, m_regs[preg].GetDefaultLocation()); } void FPURegCache::DiscardImm(preg_t preg) diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h index f34db9d0886..76cad940aec 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/FPURegCache.h @@ -12,9 +12,15 @@ class FPURegCache final : public RegCache public: explicit FPURegCache(Jit64& jit); + bool IsImm(preg_t preg) const override; + u32 Imm32(preg_t preg) const override; + s32 SImm32(preg_t preg) const override; + protected: + Gen::OpArg R(preg_t preg) const override; Gen::OpArg GetDefaultLocation(preg_t preg) const override; - void StoreRegister(preg_t preg, const Gen::OpArg& newLoc) override; + void StoreRegister(preg_t preg, const Gen::OpArg& newLoc, + IgnoreDiscardedRegisters ignore_discarded_registers) override; void LoadRegister(preg_t preg, Gen::X64Reg newLoc) override; void DiscardImm(preg_t preg) override; std::span GetAllocationOrder() const override; diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp index b44382ba447..a740d76e3dc 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.cpp @@ -13,16 +13,71 @@ GPRRegCache::GPRRegCache(Jit64& jit) : RegCache{jit} { } -void GPRRegCache::StoreRegister(preg_t preg, const OpArg& new_loc) +bool GPRRegCache::IsImm(preg_t preg) const { - ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - {}", preg); - m_emitter->MOV(32, new_loc, m_regs[preg].Location().value()); + return m_jit.GetConstantPropagation().HasGPR(preg); +} + +u32 GPRRegCache::Imm32(preg_t preg) const +{ + ASSERT(m_jit.GetConstantPropagation().HasGPR(preg)); + return m_jit.GetConstantPropagation().GetGPR(preg); +} + +s32 GPRRegCache::SImm32(preg_t preg) const +{ + ASSERT(m_jit.GetConstantPropagation().HasGPR(preg)); + return m_jit.GetConstantPropagation().GetGPR(preg); +} + +OpArg GPRRegCache::R(preg_t preg) const +{ + if (m_regs[preg].IsInHostRegister()) + { + return ::Gen::R(m_regs[preg].GetHostRegister()); + } + else if (m_jit.GetConstantPropagation().HasGPR(preg)) + { + return ::Gen::Imm32(m_jit.GetConstantPropagation().GetGPR(preg)); + } + else + { + ASSERT_MSG(DYNA_REC, m_regs[preg].IsInDefaultLocation(), "GPR {} missing!", preg); + return m_regs[preg].GetDefaultLocation(); + } +} + +void GPRRegCache::StoreRegister(preg_t preg, const OpArg& new_loc, + IgnoreDiscardedRegisters ignore_discarded_registers) +{ + if (m_regs[preg].IsInHostRegister()) + { + m_emitter->MOV(32, new_loc, ::Gen::R(m_regs[preg].GetHostRegister())); + } + else if (m_jit.GetConstantPropagation().HasGPR(preg)) + { + m_emitter->MOV(32, new_loc, ::Gen::Imm32(m_jit.GetConstantPropagation().GetGPR(preg))); + } + else + { + ASSERT_MSG(DYNA_REC, ignore_discarded_registers != IgnoreDiscardedRegisters::No, + "GPR {} not in host register or constant propagation", preg); + } } void GPRRegCache::LoadRegister(preg_t preg, X64Reg new_loc) { - ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - {}", preg); - m_emitter->MOV(32, ::Gen::R(new_loc), m_regs[preg].Location().value()); + const JitCommon::ConstantPropagation& constant_propagation = m_jit.GetConstantPropagation(); + if (constant_propagation.HasGPR(preg)) + { + m_emitter->MOV(32, ::Gen::R(new_loc), ::Gen::Imm32(constant_propagation.GetGPR(preg))); + } + else + { + ASSERT_MSG(DYNA_REC, m_regs[preg].IsInDefaultLocation(), "GPR {} not in default location", + preg); + m_emitter->MOV(32, ::Gen::R(new_loc), m_regs[preg].GetDefaultLocation()); + } } void GPRRegCache::DiscardImm(preg_t preg) @@ -53,8 +108,8 @@ void GPRRegCache::SetImmediate32(preg_t preg, u32 imm_value, bool dirty) { // "dirty" can be false to avoid redundantly flushing an immediate when // processing speculative constants. - DiscardRegContentsIfCached(preg); - m_regs[preg].SetToImm32(imm_value, dirty); + if (dirty) + DiscardRegister(preg); m_jit.GetConstantPropagation().SetGPR(preg, imm_value); } diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h index a5bf5242694..9c0b394bad9 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/GPRRegCache.h @@ -11,11 +11,18 @@ class GPRRegCache final : public RegCache { public: explicit GPRRegCache(Jit64& jit); + + bool IsImm(preg_t preg) const override; + u32 Imm32(preg_t preg) const override; + s32 SImm32(preg_t preg) const override; + void SetImmediate32(preg_t preg, u32 imm_value, bool dirty = true); protected: + Gen::OpArg R(preg_t preg) const override; Gen::OpArg GetDefaultLocation(preg_t preg) const override; - void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) override; + void StoreRegister(preg_t preg, const Gen::OpArg& new_loc, + IgnoreDiscardedRegisters ignore_discarded_registers) override; void LoadRegister(preg_t preg, Gen::X64Reg new_loc) override; void DiscardImm(preg_t preg) override; std::span GetAllocationOrder() const override; diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp index d1e06661709..2a787f31207 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.cpp @@ -136,7 +136,7 @@ bool RCOpArg::IsImm() const { if (const preg_t* preg = std::get_if(&contents)) { - return rc->R(*preg).IsImm(); + return rc->IsImm(*preg); } else if (std::holds_alternative(contents)) { @@ -149,7 +149,7 @@ s32 RCOpArg::SImm32() const { if (const preg_t* preg = std::get_if(&contents)) { - return rc->R(*preg).SImm32(); + return rc->SImm32(*preg); } else if (const u32* imm = std::get_if(&contents)) { @@ -163,7 +163,7 @@ u32 RCOpArg::Imm32() const { if (const preg_t* preg = std::get_if(&contents)) { - return rc->R(*preg).Imm32(); + return rc->Imm32(*preg); } else if (const u32* imm = std::get_if(&contents)) { @@ -297,25 +297,16 @@ bool RegCache::SanityCheck() const { for (size_t i = 0; i < m_regs.size(); i++) { - switch (m_regs[i].GetLocationType()) - { - case PPCCachedReg::LocationType::Default: - case PPCCachedReg::LocationType::Discarded: - case PPCCachedReg::LocationType::SpeculativeImmediate: - case PPCCachedReg::LocationType::Immediate: - break; - case PPCCachedReg::LocationType::Bound: + if (m_regs[i].IsInHostRegister()) { if (m_regs[i].IsLocked() || m_regs[i].IsRevertable()) return false; - Gen::X64Reg xr = m_regs[i].Location()->GetSimpleReg(); + Gen::X64Reg xr = m_regs[i].GetHostRegister(); if (m_xregs[xr].IsLocked()) return false; if (m_xregs[xr].Contents() != i) return false; - break; - } } } return true; @@ -379,13 +370,7 @@ void RegCache::Discard(BitSet32 pregs) ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress for {}!", i); - if (m_regs[i].IsBound()) - { - X64Reg xr = RX(i); - m_xregs[xr].Unbind(); - } - - m_regs[i].SetDiscarded(); + DiscardRegister(i); } } @@ -401,25 +386,7 @@ void RegCache::Flush(BitSet32 pregs, IgnoreDiscardedRegisters ignore_discarded_r ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction is in progress for {}!", i); - switch (m_regs[i].GetLocationType()) - { - case PPCCachedReg::LocationType::Default: - break; - case PPCCachedReg::LocationType::Discarded: - ASSERT_MSG(DYNA_REC, ignore_discarded_registers != IgnoreDiscardedRegisters::No, - "Attempted to flush discarded PPC reg {}", i); - break; - case PPCCachedReg::LocationType::SpeculativeImmediate: - // We can have a cached value without a host register through speculative constants. - // It must be cleared when flushing, otherwise it may be out of sync with PPCSTATE, - // if PPCSTATE is modified externally (e.g. fallback to interpreter). - m_regs[i].SetFlushed(); - break; - case PPCCachedReg::LocationType::Bound: - case PPCCachedReg::LocationType::Immediate: - StoreFromRegister(i); - break; - } + StoreFromRegister(i, FlushMode::Full, ignore_discarded_registers); } } @@ -427,9 +394,9 @@ void RegCache::Reset(BitSet32 pregs) { for (preg_t i : pregs) { - ASSERT_MSG(DYNA_REC, !m_regs[i].IsAway(), + ASSERT_MSG(DYNA_REC, !m_regs[i].IsInHostRegister(), "Attempted to reset a loaded register (did you mean to flush it?)"); - m_regs[i].SetFlushed(); + m_regs[i].SetFlushed(false); } } @@ -465,7 +432,7 @@ void RegCache::PreloadRegisters(BitSet32 to_preload) { if (NumFreeRegisters() < 2) return; - if (!R(preg).IsImm()) + if (!IsImm(preg)) BindToRegister(preg, true, false); } } @@ -492,48 +459,46 @@ void RegCache::FlushX(X64Reg reg) } } -void RegCache::DiscardRegContentsIfCached(preg_t preg) +void RegCache::DiscardRegister(preg_t preg) { - if (m_regs[preg].IsBound()) + if (m_regs[preg].IsInHostRegister()) { - X64Reg xr = m_regs[preg].Location()->GetSimpleReg(); + X64Reg xr = m_regs[preg].GetHostRegister(); m_xregs[xr].Unbind(); - m_regs[preg].SetFlushed(); } + + m_regs[preg].SetDiscarded(); } void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty) { - if (!m_regs[i].IsBound()) + if (!m_regs[i].IsInHostRegister()) { X64Reg xr = GetFreeXReg(); - ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsDirty(), "Xreg {} already dirty", Common::ToUnderlying(xr)); ASSERT_MSG(DYNA_REC, !m_xregs[xr].IsLocked(), "GetFreeXReg returned locked register"); ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Invalid transaction state"); - m_xregs[xr].SetBoundTo(i, makeDirty || m_regs[i].IsAway()); + m_xregs[xr].SetBoundTo(i); if (doLoad) - { - ASSERT_MSG(DYNA_REC, !m_regs[i].IsDiscarded(), "Attempted to load a discarded value"); LoadRegister(i, xr); - } ASSERT_MSG(DYNA_REC, - std::ranges::none_of( - m_regs, [xr](const auto& l) { return l.has_value() && l->IsSimpleReg(xr); }, - &PPCCachedReg::Location), + std::ranges::none_of(m_regs, + [xr](const auto& r) { + return r.IsInHostRegister() && r.GetHostRegister() == xr; + }), "Xreg {} already bound", Common::ToUnderlying(xr)); - m_regs[i].SetBoundTo(xr); + m_regs[i].SetInHostRegister(xr, makeDirty); } else { // reg location must be simplereg; memory locations // and immediates are taken care of above. if (makeDirty) - m_xregs[RX(i)].MakeDirty(); + m_regs[i].SetDirty(); } if (makeDirty) @@ -543,36 +508,19 @@ void RegCache::BindToRegister(preg_t i, bool doLoad, bool makeDirty) "WTF, this reg ({} -> {}) should have been flushed", i, Common::ToUnderlying(RX(i))); } -void RegCache::StoreFromRegister(preg_t i, FlushMode mode) +void RegCache::StoreFromRegister(preg_t i, FlushMode mode, + IgnoreDiscardedRegisters ignore_discarded_registers) { // When a transaction is in progress, allowing the store would overwrite the old value. ASSERT_MSG(DYNA_REC, !m_regs[i].IsRevertable(), "Register transaction on {} is in progress!", i); - bool doStore = false; + if (!m_regs[i].IsInDefaultLocation()) + StoreRegister(i, GetDefaultLocation(i), ignore_discarded_registers); - switch (m_regs[i].GetLocationType()) - { - case PPCCachedReg::LocationType::Default: - case PPCCachedReg::LocationType::Discarded: - case PPCCachedReg::LocationType::SpeculativeImmediate: - return; - case PPCCachedReg::LocationType::Bound: - { - X64Reg xr = RX(i); - doStore = m_xregs[xr].IsDirty(); - if (mode == FlushMode::Full) - m_xregs[xr].Unbind(); - break; - } - case PPCCachedReg::LocationType::Immediate: - doStore = true; - break; - } + if (mode == FlushMode::Full && m_regs[i].IsInHostRegister()) + m_xregs[m_regs[i].GetHostRegister()].Unbind(); - if (doStore) - StoreRegister(i, GetDefaultLocation(i)); - if (mode == FlushMode::Full) - m_regs[i].SetFlushed(); + m_regs[i].SetFlushed(mode != FlushMode::Full); } X64Reg RegCache::GetFreeXReg() @@ -637,7 +585,7 @@ float RegCache::ScoreRegister(X64Reg xreg) const // bias a bit against dirty registers. Testing shows that a bias of 2 seems roughly // right: 3 causes too many extra clobbers, while 1 saves very few clobbers relative // to the number of extra stores it causes. - if (m_xregs[xreg].IsDirty()) + if (!m_regs[preg].IsInDefaultLocation()) score += 2; // If the register isn't actually needed in a physical register for a later instruction, @@ -658,16 +606,10 @@ float RegCache::ScoreRegister(X64Reg xreg) const return score; } -const OpArg& RegCache::R(preg_t preg) const -{ - ASSERT_MSG(DYNA_REC, !m_regs[preg].IsDiscarded(), "Discarded register - {}", preg); - return m_regs[preg].Location().value(); -} - X64Reg RegCache::RX(preg_t preg) const { - ASSERT_MSG(DYNA_REC, m_regs[preg].IsBound(), "Unbound register - {}", preg); - return m_regs[preg].Location()->GetSimpleReg(); + ASSERT_MSG(DYNA_REC, m_regs[preg].IsInHostRegister(), "Not in host register - {}", preg); + return m_regs[preg].GetHostRegister(); } void RegCache::Lock(preg_t preg) @@ -723,29 +665,23 @@ void RegCache::Realize(preg_t preg) return; } - switch (m_regs[preg].GetLocationType()) + if (IsImm(preg)) { - case PPCCachedReg::LocationType::Default: - if (kill_mem) - { - do_bind(); - return; - } - m_constraints[preg].Realized(RCConstraint::RealizedLoc::Mem); - return; - case PPCCachedReg::LocationType::Discarded: - case PPCCachedReg::LocationType::Bound: - do_bind(); - return; - case PPCCachedReg::LocationType::Immediate: - case PPCCachedReg::LocationType::SpeculativeImmediate: if (dirty || kill_imm) - { do_bind(); - return; - } - m_constraints[preg].Realized(RCConstraint::RealizedLoc::Imm); - break; + else + m_constraints[preg].Realized(RCConstraint::RealizedLoc::Imm); + } + else if (!m_regs[preg].IsInHostRegister()) + { + if (kill_mem) + do_bind(); + else + m_constraints[preg].Realized(RCConstraint::RealizedLoc::Mem); + } + else + { + do_bind(); } } diff --git a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h index 1ff4e27ea78..0a7ab3836d2 100644 --- a/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h +++ b/Source/Core/Core/PowerPC/Jit64/RegCache/JitRegCache.h @@ -157,12 +157,14 @@ public: bool IsImm(Args... pregs) const { static_assert(sizeof...(pregs) > 0); - return (R(pregs).IsImm() && ...); + return (IsImm(preg_t(pregs)) && ...); } - u32 Imm32(preg_t preg) const { return R(preg).Imm32(); } - s32 SImm32(preg_t preg) const { return R(preg).SImm32(); } - bool IsBound(preg_t preg) const { return m_regs[preg].IsBound(); } + virtual bool IsImm(preg_t preg) const = 0; + virtual u32 Imm32(preg_t preg) const = 0; + virtual s32 SImm32(preg_t preg) const = 0; + + bool IsBound(preg_t preg) const { return m_regs[preg].IsInHostRegister(); } RCOpArg Use(preg_t preg, RCMode mode); RCOpArg UseNoImm(preg_t preg, RCMode mode); @@ -191,7 +193,8 @@ protected: friend class RCForkGuard; virtual Gen::OpArg GetDefaultLocation(preg_t preg) const = 0; - virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc) = 0; + virtual void StoreRegister(preg_t preg, const Gen::OpArg& new_loc, + IgnoreDiscardedRegisters ignore_discarded_registers) = 0; virtual void LoadRegister(preg_t preg, Gen::X64Reg new_loc) = 0; virtual void DiscardImm(preg_t preg) = 0; @@ -201,16 +204,18 @@ protected: virtual BitSet32 CountRegsIn(preg_t preg, u32 lookahead) const = 0; void FlushX(Gen::X64Reg reg); - void DiscardRegContentsIfCached(preg_t preg); + void DiscardRegister(preg_t preg); void BindToRegister(preg_t preg, bool doLoad = true, bool makeDirty = true); - void StoreFromRegister(preg_t preg, FlushMode mode = FlushMode::Full); + void StoreFromRegister( + preg_t preg, FlushMode mode = FlushMode::Full, + IgnoreDiscardedRegisters ignore_discarded_registers = IgnoreDiscardedRegisters::No); Gen::X64Reg GetFreeXReg(); int NumFreeRegisters() const; float ScoreRegister(Gen::X64Reg xreg) const; - const Gen::OpArg& R(preg_t preg) const; + virtual Gen::OpArg R(preg_t preg) const = 0; Gen::X64Reg RX(preg_t preg) const; void Lock(preg_t preg); From 2995aa5be4e09aa880c0e2303084b81d1effa62e Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 1 Sep 2024 15:34:17 +0200 Subject: [PATCH 29/30] JitArm64: Don't store immediate values in register cache Like the previous commit did for Jit64, JitArm64 can now handle the combination of a value simultaneously being in a host register and being a known immediate. Unlike with Jit64, I've put the codegen-affecting changes in this commit and the move away from the RegType enum in a follow-up commit. This is in part because the design of JitArm64 made it easy to implement the codegen-affecting changes without combining it with a big bang refactorization, and in part because we need to keep RegType around for keeping track of different float formats in Arm64FPRCache, complicating the refactorization a bit. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 3 + .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 90 +++++++++++-------- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 22 ++--- 3 files changed, 63 insertions(+), 52 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 975f5f95f7d..37bfc4c24ac 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -1362,7 +1362,10 @@ bool JitArm64::DoJit(u32 em_address, JitBlock* b, u32 nextPC) m_constant_propagation.Apply(constant_propagation_result); if (constant_propagation_result.gpr >= 0) + { + // Mark the GPR as dirty in the register cache gpr.SetImmediate(constant_propagation_result.gpr, constant_propagation_result.gpr_value); + } if (constant_propagation_result.instruction_fully_executed) { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index d0c9ac71bde..161a1e01e3b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -115,7 +115,7 @@ void Arm64RegCache::FlushMostStaleRegister() const u32 last_used = reg.GetLastUsed(); if (last_used > most_stale_amount && reg.GetType() != RegType::NotLoaded && - reg.GetType() != RegType::Discarded && reg.GetType() != RegType::Immediate) + reg.GetType() != RegType::Discarded) { most_stale_preg = i; most_stale_amount = last_used; @@ -145,6 +145,19 @@ void Arm64GPRCache::Start(PPCAnalyst::BlockRegStats& stats) { } +// Returns if a register is set as an immediate. Only valid for guest GPRs. +bool Arm64GPRCache::IsImm(size_t preg) const +{ + return m_jit->GetConstantPropagation().HasGPR(preg); +} + +// Gets the immediate that a register is set to. Only valid for guest GPRs. +u32 Arm64GPRCache::GetImm(size_t preg) const +{ + ASSERT(m_jit->GetConstantPropagation().HasGPR(preg)); + return m_jit->GetConstantPropagation().GetGPR(preg); +} + bool Arm64GPRCache::IsCallerSaved(ARM64Reg reg) const { return ARM64XEmitter::CALLER_SAVED_GPRS[DecodeReg(reg)]; @@ -186,6 +199,7 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg GuestRegInfo guest_reg = GetGuestByIndex(index); OpArg& reg = guest_reg.reg; size_t bitsize = guest_reg.bitsize; + const bool is_gpr = index >= GUEST_GPR_OFFSET && index < GUEST_GPR_OFFSET + GUEST_GPR_COUNT; if (reg.GetType() == RegType::Register) { @@ -199,11 +213,12 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg reg.Flush(); } } - else if (reg.GetType() == RegType::Immediate) + else if (is_gpr && IsImm(index - GUEST_GPR_OFFSET)) { if (reg.IsDirty()) { - if (!reg.GetImm()) + const u32 imm = GetImm(index - GUEST_GPR_OFFSET); + if (imm == 0) { m_emit->STR(IndexType::Unsigned, bitsize == 64 ? ARM64Reg::ZR : ARM64Reg::WZR, PPC_REG, u32(guest_reg.ppc_offset)); @@ -225,7 +240,7 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg const ARM64Reg encoded_tmp_reg = bitsize != 64 ? tmp_reg : EncodeRegTo64(tmp_reg); - m_emit->MOVI2R(encoded_tmp_reg, reg.GetImm()); + m_emit->MOVI2R(encoded_tmp_reg, imm); m_emit->STR(IndexType::Unsigned, encoded_tmp_reg, PPC_REG, u32(guest_reg.ppc_offset)); if (allocated_tmp_reg) @@ -244,10 +259,10 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r for (auto iter = regs.begin(); iter != regs.end(); ++iter) { const int i = *iter; - ASSERT_MSG(DYNA_REC, ignore_discarded_registers != IgnoreDiscardedRegisters::No || - m_guest_registers[GUEST_GPR_OFFSET + i].GetType() != RegType::Discarded, + m_guest_registers[GUEST_GPR_OFFSET + i].GetType() != RegType::Discarded || + IsImm(i), "Attempted to flush discarded register"); if (i + 1 < int(GUEST_GPR_COUNT) && regs[i + 1]) @@ -255,10 +270,10 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r // We've got two guest registers in a row to store OpArg& reg1 = m_guest_registers[GUEST_GPR_OFFSET + i]; OpArg& reg2 = m_guest_registers[GUEST_GPR_OFFSET + i + 1]; - const bool reg1_imm = reg1.GetType() == RegType::Immediate; - const bool reg2_imm = reg2.GetType() == RegType::Immediate; - const bool reg1_zero = reg1_imm && reg1.GetImm() == 0; - const bool reg2_zero = reg2_imm && reg2.GetImm() == 0; + const bool reg1_imm = IsImm(i); + const bool reg2_imm = IsImm(i + 1); + const bool reg1_zero = reg1_imm && GetImm(i) == 0; + const bool reg2_zero = reg2_imm && GetImm(i + 1) == 0; const bool flush_all = mode == FlushMode::All; if (reg1.IsDirty() && reg2.IsDirty() && (reg1.GetType() == RegType::Register || (reg1_imm && (reg1_zero || flush_all))) && @@ -334,6 +349,7 @@ ARM64Reg Arm64GPRCache::BindForRead(size_t index) GuestRegInfo guest_reg = GetGuestByIndex(index); OpArg& reg = guest_reg.reg; size_t bitsize = guest_reg.bitsize; + const bool is_gpr = index >= GUEST_GPR_OFFSET && index < GUEST_GPR_OFFSET + GUEST_GPR_COUNT; IncrementAllUsed(); reg.ResetLastUsed(); @@ -342,17 +358,15 @@ ARM64Reg Arm64GPRCache::BindForRead(size_t index) { case RegType::Register: // already in a reg return reg.GetReg(); - case RegType::Immediate: // Is an immediate + case RegType::Discarded: // Is an immediate or discarded { + ASSERT_MSG(DYNA_REC, is_gpr && IsImm(index - GUEST_GPR_OFFSET), + "Attempted to read discarded register"); ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); - m_emit->MOVI2R(host_reg, reg.GetImm()); + m_emit->MOVI2R(host_reg, GetImm(index - GUEST_GPR_OFFSET)); reg.Load(host_reg); return host_reg; } - break; - case RegType::Discarded: - ASSERT_MSG(DYNA_REC, false, "Attempted to read discarded register"); - break; case RegType::NotLoaded: // Register isn't loaded at /all/ { // This is a bit annoying. We try to keep these preloaded as much as possible @@ -379,7 +393,7 @@ void Arm64GPRCache::SetImmediateInternal(size_t index, u32 imm, bool dirty) OpArg& reg = guest_reg.reg; if (reg.GetType() == RegType::Register) UnlockRegister(EncodeRegTo32(reg.GetReg())); - reg.LoadToImm(imm); + reg.Discard(); reg.SetDirty(dirty); m_jit->GetConstantPropagation().SetGPR(index - GUEST_GPR_OFFSET, imm); } @@ -394,28 +408,32 @@ void Arm64GPRCache::BindForWrite(size_t index, bool will_read, bool will_write) reg.ResetLastUsed(); const RegType reg_type = reg.GetType(); - if (reg_type == RegType::NotLoaded || reg_type == RegType::Discarded) + if (reg_type != RegType::Register) { - const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); - reg.Load(host_reg); - reg.SetDirty(will_write); - if (will_read) + if (is_gpr && IsImm(index - GUEST_GPR_OFFSET)) { - ASSERT_MSG(DYNA_REC, reg_type != RegType::Discarded, "Attempted to load a discarded value"); - m_emit->LDR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset)); + const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); + if (will_read || !will_write) + { + // TODO: Emitting this instruction when (!will_read && !will_write) would be unnecessary if + // we had some way to indicate to Flush that the immediate value should be written to + // ppcState even though there is a host register allocated + m_emit->MOVI2R(host_reg, GetImm(index - GUEST_GPR_OFFSET)); + } + reg.Load(host_reg); } - } - else if (reg_type == RegType::Immediate) - { - const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); - if (will_read || !will_write) + else { - // TODO: Emitting this instruction when (!will_read && !will_write) would be unnecessary if we - // had some way to indicate to Flush that the immediate value should be written to ppcState - // even though there is a host register allocated - m_emit->MOVI2R(host_reg, reg.GetImm()); + const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); + reg.Load(host_reg); + reg.SetDirty(will_write); + if (will_read) + { + ASSERT_MSG(DYNA_REC, reg_type != RegType::Discarded, "Attempted to load a discarded value"); + m_emit->LDR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset)); + } + return; } - reg.Load(host_reg); } if (will_write) @@ -521,7 +539,7 @@ void Arm64FPRCache::Flush(FlushMode mode, ARM64Reg tmp_reg, ASSERT_MSG(DYNA_REC, ignore_discarded_registers != IgnoreDiscardedRegisters::No, "Attempted to flush discarded register"); } - else if (reg_type != RegType::NotLoaded && reg_type != RegType::Immediate) + else if (reg_type != RegType::NotLoaded) { FlushRegister(i, mode, tmp_reg); } @@ -785,7 +803,7 @@ void Arm64FPRCache::FlushByHost(ARM64Reg host_reg, ARM64Reg tmp_reg) const RegType reg_type = reg.GetType(); if (reg_type != RegType::NotLoaded && reg_type != RegType::Discarded && - reg_type != RegType::Immediate && reg.GetReg() == host_reg) + reg.GetReg() == host_reg) { FlushRegister(i, FlushMode::All, tmp_reg); return; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index c973e3bc285..82c0d941427 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -61,9 +61,8 @@ static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!"); enum class RegType { NotLoaded, - Discarded, // Reg is not loaded because we know it won't be read before the next write + Discarded, // Reg is in ConstantPropagation, or isn't loaded at all Register, // Reg type is register - Immediate, // Reg is really a IMM LowerPair, // Only the lower pair of a paired register Duplicated, // The lower reg is the same as the upper one (physical upper doesn't actually have // the duplicated value) @@ -94,24 +93,17 @@ public: RegType GetType() const { return m_type; } Arm64Gen::ARM64Reg GetReg() const { return m_reg; } - u32 GetImm() const { return m_value; } void Load(Arm64Gen::ARM64Reg reg, RegType type = RegType::Register) { m_type = type; m_reg = reg; } - void LoadToImm(u32 imm) - { - m_type = RegType::Immediate; - m_value = imm; - - m_reg = Arm64Gen::ARM64Reg::INVALID_REG; - } void Discard() { // Invalidate any previous information m_type = RegType::Discarded; m_reg = Arm64Gen::ARM64Reg::INVALID_REG; + m_dirty = true; // Arbitrarily large value that won't roll over on a lot of increments m_last_used = 0xFFFF; @@ -121,6 +113,7 @@ public: // Invalidate any previous information m_type = RegType::NotLoaded; m_reg = Arm64Gen::ARM64Reg::INVALID_REG; + m_dirty = false; // Arbitrarily large value that won't roll over on a lot of increments m_last_used = 0xFFFF; @@ -137,9 +130,6 @@ private: RegType m_type = RegType::NotLoaded; // store type Arm64Gen::ARM64Reg m_reg = Arm64Gen::ARM64Reg::INVALID_REG; // host register we are in - // For REG_IMM - u32 m_value = 0; // IMM value - u32 m_last_used = 0; bool m_dirty = false; @@ -339,11 +329,11 @@ public: SetImmediateInternal(GUEST_GPR_OFFSET + preg, imm, dirty); } - // Returns if a register is set as an immediate. Only valid for guest GPRs. - bool IsImm(size_t preg) const { return GetGuestGPROpArg(preg).GetType() == RegType::Immediate; } + // Returns whether a register is set as an immediate. Only valid for guest GPRs. + bool IsImm(size_t preg) const; // Gets the immediate that a register is set to. Only valid for guest GPRs. - u32 GetImm(size_t preg) const { return GetGuestGPROpArg(preg).GetImm(); } + u32 GetImm(size_t preg) const; bool IsImm(size_t preg, u32 imm) const { return IsImm(preg) && GetImm(preg) == imm; } From b9d9f36ce554e3af6f4f906047a3eaeebaa8030a Mon Sep 17 00:00:00 2001 From: JosJuice Date: Wed, 23 Oct 2024 20:55:51 +0200 Subject: [PATCH 30/30] JitArm64: Replace dirty flag and partially replace RegType enum Like Jit64, JitArm64 now keeps track of the location of a guest register using three booleans: Whether it is in ppcState, whether it is in a host register, and whether it is a known immediate. The RegType enum remains only for the purpose of keeping track of what format FPRs are stored in in host registers. --- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 174 ++++++++---------- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 47 ++--- 2 files changed, 104 insertions(+), 117 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 161a1e01e3b..8ba7a6a3be4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -114,8 +114,7 @@ void Arm64RegCache::FlushMostStaleRegister() const auto& reg = m_guest_registers[i]; const u32 last_used = reg.GetLastUsed(); - if (last_used > most_stale_amount && reg.GetType() != RegType::NotLoaded && - reg.GetType() != RegType::Discarded) + if (last_used > most_stale_amount && reg.IsInHostRegister()) { most_stale_preg = i; most_stale_amount = last_used; @@ -201,10 +200,10 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg size_t bitsize = guest_reg.bitsize; const bool is_gpr = index >= GUEST_GPR_OFFSET && index < GUEST_GPR_OFFSET + GUEST_GPR_COUNT; - if (reg.GetType() == RegType::Register) + if (reg.IsInHostRegister()) { ARM64Reg host_reg = reg.GetReg(); - if (reg.IsDirty()) + if (!reg.IsInPPCState()) m_emit->STR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset)); if (mode == FlushMode::All) @@ -215,7 +214,7 @@ void Arm64GPRCache::FlushRegister(size_t index, FlushMode mode, ARM64Reg tmp_reg } else if (is_gpr && IsImm(index - GUEST_GPR_OFFSET)) { - if (reg.IsDirty()) + if (!reg.IsInPPCState()) { const u32 imm = GetImm(index - GUEST_GPR_OFFSET); if (imm == 0) @@ -259,10 +258,10 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r for (auto iter = regs.begin(); iter != regs.end(); ++iter) { const int i = *iter; + OpArg& reg = m_guest_registers[GUEST_GPR_OFFSET + i]; ASSERT_MSG(DYNA_REC, - ignore_discarded_registers != IgnoreDiscardedRegisters::No || - m_guest_registers[GUEST_GPR_OFFSET + i].GetType() != RegType::Discarded || - IsImm(i), + ignore_discarded_registers != IgnoreDiscardedRegisters::No || reg.IsInPPCState() || + reg.IsInHostRegister() || IsImm(i), "Attempted to flush discarded register"); if (i + 1 < int(GUEST_GPR_COUNT) && regs[i + 1]) @@ -275,9 +274,9 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r const bool reg1_zero = reg1_imm && GetImm(i) == 0; const bool reg2_zero = reg2_imm && GetImm(i + 1) == 0; const bool flush_all = mode == FlushMode::All; - if (reg1.IsDirty() && reg2.IsDirty() && - (reg1.GetType() == RegType::Register || (reg1_imm && (reg1_zero || flush_all))) && - (reg2.GetType() == RegType::Register || (reg2_imm && (reg2_zero || flush_all)))) + if (!reg1.IsInPPCState() && !reg2.IsInPPCState() && + (reg1.IsInHostRegister() || (reg1_imm && (reg1_zero || flush_all))) && + (reg2.IsInHostRegister() || (reg2_imm && (reg2_zero || flush_all)))) { const size_t ppc_offset = GetGuestByIndex(i).ppc_offset; if (ppc_offset <= 252) @@ -287,9 +286,9 @@ void Arm64GPRCache::FlushRegisters(BitSet32 regs, FlushMode mode, ARM64Reg tmp_r m_emit->STP(IndexType::Signed, RX1, RX2, PPC_REG, u32(ppc_offset)); if (flush_all) { - if (reg1.GetType() == RegType::Register) + if (reg1.IsInHostRegister()) UnlockRegister(reg1.GetReg()); - if (reg2.GetType() == RegType::Register) + if (reg2.IsInHostRegister()) UnlockRegister(reg2.GetReg()); reg1.Flush(); reg2.Flush(); @@ -309,9 +308,10 @@ void Arm64GPRCache::FlushCRRegisters(BitSet8 regs, FlushMode mode, ARM64Reg tmp_ { for (int i : regs) { + OpArg& reg = m_guest_registers[GUEST_CR_OFFSET + i]; ASSERT_MSG(DYNA_REC, - ignore_discarded_registers != IgnoreDiscardedRegisters::No || - m_guest_registers[GUEST_CR_OFFSET + i].GetType() != RegType::Discarded, + ignore_discarded_registers != IgnoreDiscardedRegisters::No || reg.IsInPPCState() || + reg.IsInHostRegister(), "Attempted to flush discarded register"); FlushRegister(GUEST_CR_OFFSET + i, mode, tmp_reg); @@ -354,44 +354,33 @@ ARM64Reg Arm64GPRCache::BindForRead(size_t index) IncrementAllUsed(); reg.ResetLastUsed(); - switch (reg.GetType()) + if (reg.IsInHostRegister()) { - case RegType::Register: // already in a reg return reg.GetReg(); - case RegType::Discarded: // Is an immediate or discarded + } + else if (is_gpr && IsImm(index - GUEST_GPR_OFFSET)) { - ASSERT_MSG(DYNA_REC, is_gpr && IsImm(index - GUEST_GPR_OFFSET), - "Attempted to read discarded register"); ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); m_emit->MOVI2R(host_reg, GetImm(index - GUEST_GPR_OFFSET)); reg.Load(host_reg); return host_reg; } - case RegType::NotLoaded: // Register isn't loaded at /all/ + else // Register isn't loaded at /all/ { - // This is a bit annoying. We try to keep these preloaded as much as possible - // This can also happen on cases where PPCAnalyst isn't feeing us proper register usage - // statistics + ASSERT_MSG(DYNA_REC, reg.IsInPPCState(), "Attempted to read discarded register"); ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); reg.Load(host_reg); reg.SetDirty(false); m_emit->LDR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset)); return host_reg; } - break; - default: - ERROR_LOG_FMT(DYNA_REC, "Invalid OpArg Type!"); - break; - } - // We've got an issue if we end up here - return ARM64Reg::INVALID_REG; } void Arm64GPRCache::SetImmediateInternal(size_t index, u32 imm, bool dirty) { GuestRegInfo guest_reg = GetGuestByIndex(index); OpArg& reg = guest_reg.reg; - if (reg.GetType() == RegType::Register) + if (reg.IsInHostRegister()) UnlockRegister(EncodeRegTo32(reg.GetReg())); reg.Discard(); reg.SetDirty(dirty); @@ -407,8 +396,7 @@ void Arm64GPRCache::BindForWrite(size_t index, bool will_read, bool will_write) reg.ResetLastUsed(); - const RegType reg_type = reg.GetType(); - if (reg_type != RegType::Register) + if (!reg.IsInHostRegister()) { if (is_gpr && IsImm(index - GUEST_GPR_OFFSET)) { @@ -424,14 +412,12 @@ void Arm64GPRCache::BindForWrite(size_t index, bool will_read, bool will_write) } else { + ASSERT_MSG(DYNA_REC, !will_read || reg.IsInPPCState(), "Attempted to load a discarded value"); const ARM64Reg host_reg = bitsize != 64 ? GetReg() : EncodeRegTo64(GetReg()); reg.Load(host_reg); reg.SetDirty(will_write); if (will_read) - { - ASSERT_MSG(DYNA_REC, reg_type != RegType::Discarded, "Attempted to load a discarded value"); m_emit->LDR(IndexType::Unsigned, host_reg, PPC_REG, u32(guest_reg.ppc_offset)); - } return; } } @@ -502,7 +488,7 @@ BitSet32 Arm64GPRCache::GetDirtyGPRs() const for (size_t i = 0; i < GUEST_GPR_COUNT; ++i) { const OpArg& arg = m_guest_registers[GUEST_GPR_OFFSET + i]; - registers[i] = arg.GetType() != RegType::NotLoaded && arg.IsDirty(); + registers[i] = !arg.IsInPPCState(); } return registers; } @@ -512,7 +498,7 @@ void Arm64GPRCache::FlushByHost(ARM64Reg host_reg, ARM64Reg tmp_reg) for (size_t i = 0; i < m_guest_registers.size(); ++i) { const OpArg& reg = m_guest_registers[i]; - if (reg.GetType() == RegType::Register && DecodeReg(reg.GetReg()) == DecodeReg(host_reg)) + if (reg.IsInHostRegister() && DecodeReg(reg.GetReg()) == DecodeReg(host_reg)) { FlushRegister(i, FlushMode::All, tmp_reg); return; @@ -532,17 +518,17 @@ void Arm64FPRCache::Flush(FlushMode mode, ARM64Reg tmp_reg, { for (size_t i = 0; i < m_guest_registers.size(); ++i) { - const RegType reg_type = m_guest_registers[i].GetType(); - - if (reg_type == RegType::Discarded) - { - ASSERT_MSG(DYNA_REC, ignore_discarded_registers != IgnoreDiscardedRegisters::No, - "Attempted to flush discarded register"); - } - else if (reg_type != RegType::NotLoaded) + if (m_guest_registers[i].IsInHostRegister()) { FlushRegister(i, mode, tmp_reg); } + else + { + ASSERT_MSG(DYNA_REC, + ignore_discarded_registers != IgnoreDiscardedRegisters::No || + m_guest_registers[i].IsInPPCState(), + "Attempted to flush discarded register"); + } } } @@ -551,9 +537,32 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) OpArg& reg = m_guest_registers[preg]; IncrementAllUsed(); reg.ResetLastUsed(); + + if (!reg.IsInHostRegister()) + { + ASSERT_MSG(DYNA_REC, reg.IsInPPCState(), "Attempted to read discarded register"); + + ARM64Reg host_reg = GetReg(); + u32 load_size; + if (type == RegType::Register) + { + load_size = 128; + reg.Load(host_reg, RegType::Register); + } + else + { + load_size = 64; + reg.Load(host_reg, RegType::LowerPair); + } + reg.SetDirty(false); + m_float_emit->LDR(load_size, IndexType::Unsigned, host_reg, PPC_REG, + static_cast(PPCSTATE_OFF_PS0(preg))); + return host_reg; + } + ARM64Reg host_reg = reg.GetReg(); - switch (reg.GetType()) + switch (reg.GetFPRType()) { case RegType::Single: { @@ -636,28 +645,6 @@ ARM64Reg Arm64FPRCache::R(size_t preg, RegType type) } return host_reg; } - case RegType::Discarded: - ASSERT_MSG(DYNA_REC, false, "Attempted to read discarded register"); - break; - case RegType::NotLoaded: // Register isn't loaded at /all/ - { - host_reg = GetReg(); - u32 load_size; - if (type == RegType::Register) - { - load_size = 128; - reg.Load(host_reg, RegType::Register); - } - else - { - load_size = 64; - reg.Load(host_reg, RegType::LowerPair); - } - reg.SetDirty(false); - m_float_emit->LDR(load_size, IndexType::Unsigned, host_reg, PPC_REG, - static_cast(PPCSTATE_OFF_PS0(preg))); - return host_reg; - } default: DEBUG_ASSERT_MSG(DYNA_REC, false, "Invalid OpArg Type!"); break; @@ -673,16 +660,17 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type, bool set_dirty) IncrementAllUsed(); reg.ResetLastUsed(); - // Only the lower value will be overwritten, so we must be extra careful to store PSR1 if dirty. - if (reg.IsDirty() && (type == RegType::LowerPair || type == RegType::LowerPairSingle)) + // If PS1 is dirty, but the caller wants a RegType with only PS0, we must write PS1 to m_ppc_state + // now so the contents of PS1 aren't lost. + if (!reg.IsInPPCState() && (type == RegType::LowerPair || type == RegType::LowerPairSingle)) { - // We must *not* change host_reg as this register might still be in use. So it's fine to - // store this register, but it's *not* fine to convert it to double. So for double conversion, - // a temporary register needs to be used. + // We must *not* modify host_reg, as the current guest instruction might want to read its old + // value before overwriting it. So it's fine to store this register, but it's *not* fine to + // convert it to double in place. For double conversion, a temporary register needs to be used. ARM64Reg host_reg = reg.GetReg(); ARM64Reg flush_reg = host_reg; - switch (reg.GetType()) + switch (reg.GetFPRType()) { case RegType::Single: // For a store-safe register, conversion is just one instruction regardless of whether @@ -724,8 +712,8 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type, bool set_dirty) // Store PSR1 (which is equal to PSR0) in memory. m_float_emit->STR(64, IndexType::Unsigned, flush_reg, PPC_REG, static_cast(PPCSTATE_OFF_PS1(preg))); - reg.Load(host_reg, reg.GetType() == RegType::DuplicatedSingle ? RegType::LowerPairSingle : - RegType::LowerPair); + reg.Load(host_reg, reg.GetFPRType() == RegType::DuplicatedSingle ? RegType::LowerPairSingle : + RegType::LowerPair); break; default: // All other types doesn't store anything in PSR1. @@ -736,7 +724,7 @@ ARM64Reg Arm64FPRCache::RW(size_t preg, RegType type, bool set_dirty) Unlock(flush_reg); } - if (reg.GetType() == RegType::NotLoaded || reg.GetType() == RegType::Discarded) + if (!reg.IsInHostRegister()) { // If not loaded at all, just alloc a new one. reg.Load(GetReg(), type); @@ -800,10 +788,8 @@ void Arm64FPRCache::FlushByHost(ARM64Reg host_reg, ARM64Reg tmp_reg) for (size_t i = 0; i < m_guest_registers.size(); ++i) { const OpArg& reg = m_guest_registers[i]; - const RegType reg_type = reg.GetType(); - if (reg_type != RegType::NotLoaded && reg_type != RegType::Discarded && - reg.GetReg() == host_reg) + if (reg.IsInHostRegister() && reg.GetReg() == host_reg) { FlushRegister(i, FlushMode::All, tmp_reg); return; @@ -820,8 +806,8 @@ bool Arm64FPRCache::IsTopHalfUsed(ARM64Reg reg) const { for (const OpArg& r : m_guest_registers) { - if (r.GetReg() != ARM64Reg::INVALID_REG && DecodeReg(r.GetReg()) == DecodeReg(reg)) - return r.GetType() == RegType::Register; + if (r.IsInHostRegister() && DecodeReg(r.GetReg()) == DecodeReg(reg)) + return r.GetFPRType() == RegType::Register; } return false; @@ -831,8 +817,8 @@ void Arm64FPRCache::FlushRegister(size_t preg, FlushMode mode, ARM64Reg tmp_reg) { OpArg& reg = m_guest_registers[preg]; const ARM64Reg host_reg = reg.GetReg(); - const bool dirty = reg.IsDirty(); - RegType type = reg.GetType(); + const bool dirty = !reg.IsInPPCState(); + RegType type = reg.GetFPRType(); bool allocated_tmp_reg = false; if (tmp_reg != ARM64Reg::INVALID_REG) @@ -939,7 +925,7 @@ BitSet32 Arm64FPRCache::GetCallerSavedUsed() const bool Arm64FPRCache::IsSingle(size_t preg, bool lower_only) const { - const RegType type = m_guest_registers[preg].GetType(); + const RegType type = m_guest_registers[preg].GetFPRType(); return type == RegType::Single || type == RegType::DuplicatedSingle || (lower_only && type == RegType::LowerPairSingle); } @@ -947,18 +933,18 @@ bool Arm64FPRCache::IsSingle(size_t preg, bool lower_only) const void Arm64FPRCache::FixSinglePrecision(size_t preg) { OpArg& reg = m_guest_registers[preg]; + if (!reg.IsInHostRegister()) + return; + ARM64Reg host_reg = reg.GetReg(); - switch (reg.GetType()) + if (reg.GetFPRType() == RegType::Duplicated) // only PS0 needs to be converted { - case RegType::Duplicated: // only PS0 needs to be converted m_float_emit->FCVT(32, 64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); reg.Load(host_reg, RegType::DuplicatedSingle); - break; - case RegType::Register: // PS0 and PS1 need to be converted + } + else if (reg.GetFPRType() == RegType::Register) // PS0 and PS1 need to be converted + { m_float_emit->FCVTN(32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); reg.Load(host_reg, RegType::Single); - break; - default: - break; } } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 82c0d941427..d547ee9d4ee 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -60,15 +60,12 @@ static_assert(PPCSTATE_OFF(xer_so_ov) < 4096, "STRB can't store xer_so_ov!"); enum class RegType { - NotLoaded, - Discarded, // Reg is in ConstantPropagation, or isn't loaded at all - Register, // Reg type is register - LowerPair, // Only the lower pair of a paired register - Duplicated, // The lower reg is the same as the upper one (physical upper doesn't actually have - // the duplicated value) - Single, // Both registers are loaded as single - LowerPairSingle, // Only the lower pair of a paired register, as single - DuplicatedSingle, // The lower one contains both registers, as single + Register, // PS0 and PS1, each 64-bit + LowerPair, // PS0 only, 64-bit + Duplicated, // PS0 and PS1 are identical, host register only stores one lane (64-bit) + Single, // PS0 and PS1, each 32-bit + LowerPairSingle, // PS0 only, 32-bit + DuplicatedSingle, // PS0 and PS1 are identical, host register only stores one lane (32-bit) }; enum class FlushMode : bool @@ -91,19 +88,21 @@ class OpArg public: OpArg() = default; - RegType GetType() const { return m_type; } + RegType GetFPRType() const { return m_fpr_type; } Arm64Gen::ARM64Reg GetReg() const { return m_reg; } - void Load(Arm64Gen::ARM64Reg reg, RegType type = RegType::Register) + void Load(Arm64Gen::ARM64Reg reg, RegType format = RegType::Register) { - m_type = type; m_reg = reg; + m_fpr_type = format; + m_in_host_register = true; } void Discard() { // Invalidate any previous information - m_type = RegType::Discarded; m_reg = Arm64Gen::ARM64Reg::INVALID_REG; - m_dirty = true; + m_fpr_type = RegType::Register; + m_in_ppc_state = false; + m_in_host_register = false; // Arbitrarily large value that won't roll over on a lot of increments m_last_used = 0xFFFF; @@ -111,9 +110,10 @@ public: void Flush() { // Invalidate any previous information - m_type = RegType::NotLoaded; m_reg = Arm64Gen::ARM64Reg::INVALID_REG; - m_dirty = false; + m_fpr_type = RegType::Register; + m_in_ppc_state = true; + m_in_host_register = false; // Arbitrarily large value that won't roll over on a lot of increments m_last_used = 0xFFFF; @@ -122,17 +122,18 @@ public: u32 GetLastUsed() const { return m_last_used; } void ResetLastUsed() { m_last_used = 0; } void IncrementLastUsed() { ++m_last_used; } - void SetDirty(bool dirty) { m_dirty = dirty; } - bool IsDirty() const { return m_dirty; } + void SetDirty(bool dirty) { m_in_ppc_state = !dirty; } + bool IsInPPCState() const { return m_in_ppc_state; } + bool IsInHostRegister() const { return m_in_host_register; } private: - // For REG_REG - RegType m_type = RegType::NotLoaded; // store type Arm64Gen::ARM64Reg m_reg = Arm64Gen::ARM64Reg::INVALID_REG; // host register we are in + RegType m_fpr_type = RegType::Register; // for FPRs only u32 m_last_used = 0; - bool m_dirty = false; + bool m_in_ppc_state = true; + bool m_in_host_register = false; }; class HostReg @@ -446,9 +447,9 @@ public: // Returns a guest register inside of a host register // Will dump an immediate to the host register as well - Arm64Gen::ARM64Reg R(size_t preg, RegType type); + Arm64Gen::ARM64Reg R(size_t preg, RegType format); - Arm64Gen::ARM64Reg RW(size_t preg, RegType type, bool set_dirty = true); + Arm64Gen::ARM64Reg RW(size_t preg, RegType format, bool set_dirty = true); BitSet32 GetCallerSavedUsed() const override;