Jit64: Make ABI_CallFunction generic

This replaces all the hand-coded variants of ABI_CallFunction with a
single generic function. It handles all existing cases, plus many new
cases that would have been annoying to hand-code.

The implementation is based on our AArch64 ABI_CallFunction, but
adapted for x64. The most notable differences are support for memory
operands and a new way for callers to specify the sizes of operands.
This commit is contained in:
JosJuice 2025-12-14 14:28:54 +01:00
parent 04f71e5e6d
commit a024fa9cca
21 changed files with 754 additions and 292 deletions

View File

@ -57,6 +57,10 @@ public:
class Iterator
{
public:
using value_type = int;
using difference_type = std::ptrdiff_t;
constexpr Iterator() = default;
constexpr Iterator(const Iterator& other) : m_val(other.m_val), m_bit(other.m_bit) {}
constexpr Iterator(IntTy val, int bit) : m_val(val), m_bit(bit) {}
Iterator& operator=(Iterator other)
@ -89,8 +93,8 @@ public:
constexpr bool operator!=(Iterator other) const { return m_bit != other.m_bit; }
private:
IntTy m_val;
int m_bit;
IntTy m_val = 0;
int m_bit = -1;
};
constexpr BitSet() = default;

View File

@ -3,6 +3,11 @@
#include "Common/x64ABI.h"
#include <algorithm>
#include <array>
#include <optional>
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "Common/x64Emitter.h"
@ -40,6 +45,74 @@ void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_
*xmm_offsetp = subtraction - xmm_base_subtraction;
}
// This is using a hand-rolled algorithm. The goal is zero memory allocations, not necessarily
// the best JIT-time time complexity. (The number of moves is usually very small.)
void XEmitter::ParallelMoves(RegisterMove* begin, RegisterMove* end,
std::array<u8, 16>* source_reg_uses,
std::array<u8, 16>* destination_reg_uses)
{
while (begin != end)
{
bool removed_moves_during_this_loop_iteration = false;
RegisterMove* move = end;
while (move != begin)
{
RegisterMove* prev_move = move;
--move;
if ((*source_reg_uses)[move->destination] == 0)
{
// TODO: Use sign extension instead of zero extension if the relevant type is signed.
if (move->bits < 32)
MOVZX(32, move->bits, move->destination, move->source);
else
MOV(move->bits, R(move->destination), move->source);
(*destination_reg_uses)[move->destination]++;
if (const std::optional<X64Reg> base_reg = move->source.GetBaseReg())
(*source_reg_uses)[*base_reg]--;
if (const std::optional<X64Reg> index_reg = move->source.GetIndexReg())
(*source_reg_uses)[*index_reg]--;
std::move(prev_move, end, move);
--end;
removed_moves_during_this_loop_iteration = true;
}
}
if (!removed_moves_during_this_loop_iteration)
{
// We need to break a cycle using a temporary register.
const BitSet32 temp_reg_candidates = ABI_ALL_CALLER_SAVED & ABI_ALL_GPRS;
const auto temp_reg =
std::find_if(temp_reg_candidates.begin(), temp_reg_candidates.end(), [&](int reg) {
return (*source_reg_uses)[reg] == 0 && (*destination_reg_uses)[reg] == 0;
});
ASSERT_MSG(COMMON, temp_reg != temp_reg_candidates.end(), "Out of registers");
const X64Reg source = begin->destination;
const X64Reg destination = static_cast<X64Reg>(*temp_reg);
const bool need_64_bit_move = std::any_of(begin, end, [source](const RegisterMove& m) {
return (m.source.GetBaseReg() == source || m.source.GetIndexReg() == source) &&
(m.source.scale != SCALE_NONE || m.bits == 64);
});
MOV(need_64_bit_move ? 64 : 32, R(destination), R(source));
(*source_reg_uses)[destination] = (*source_reg_uses)[source];
(*source_reg_uses)[source] = 0;
std::for_each(begin, end, [source, destination](RegisterMove& m) {
if (m.source.GetBaseReg() == source)
m.source.offsetOrBaseReg = destination;
if (m.source.GetIndexReg() == source)
m.source.indexReg = destination;
});
}
}
}
size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment,
size_t needed_frame_size)
{

View File

@ -3,6 +3,8 @@
#pragma once
#include <array>
#include "Common/BitSet.h"
#include "Common/x64Reg.h"
@ -24,34 +26,46 @@
// Callee-save: RBX RBP R12 R13 R14 R15
// Parameters: RDI RSI RDX RCX R8 R9
#define ABI_ALL_FPRS BitSet32(0xffff0000)
#define ABI_ALL_GPRS BitSet32(0x0000ffff)
namespace Gen
{
constexpr BitSet32 ABI_ALL_FPRS(0xffff0000);
constexpr BitSet32 ABI_ALL_GPRS(0x0000ffff);
#ifdef _WIN32 // 64-bit Windows - the really exotic calling convention
#define ABI_PARAM1 RCX
#define ABI_PARAM2 RDX
#define ABI_PARAM3 R8
#define ABI_PARAM4 R9
constexpr X64Reg ABI_PARAM1 = RCX;
constexpr X64Reg ABI_PARAM2 = RDX;
constexpr X64Reg ABI_PARAM3 = R8;
constexpr X64Reg ABI_PARAM4 = R9;
constexpr std::array<X64Reg, 4> ABI_PARAMS = {ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4};
// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
#define ABI_ALL_CALLER_SAVED \
(BitSet32{RAX, RCX, RDX, R8, R9, R10, R11, XMM0 + 16, XMM1 + 16, XMM2 + 16, XMM3 + 16, \
XMM4 + 16, XMM5 + 16})
#else // 64-bit Unix / OS X
constexpr BitSet32 ABI_ALL_CALLER_SAVED{RAX, RCX, RDX, R8, R9,
R10, R11, XMM0 + 16, XMM1 + 16, XMM2 + 16,
XMM3 + 16, XMM4 + 16, XMM5 + 16};
#define ABI_PARAM1 RDI
#define ABI_PARAM2 RSI
#define ABI_PARAM3 RDX
#define ABI_PARAM4 RCX
#define ABI_PARAM5 R8
#define ABI_PARAM6 R9
#else // 64-bit Unix / OS X
constexpr X64Reg ABI_PARAM1 = RDI;
constexpr X64Reg ABI_PARAM2 = RSI;
constexpr X64Reg ABI_PARAM3 = RDX;
constexpr X64Reg ABI_PARAM4 = RCX;
constexpr X64Reg ABI_PARAM5 = R8;
constexpr X64Reg ABI_PARAM6 = R9;
constexpr std::array<X64Reg, 6> ABI_PARAMS = {ABI_PARAM1, ABI_PARAM2, ABI_PARAM3,
ABI_PARAM4, ABI_PARAM5, ABI_PARAM6};
// FIXME: avoid pushing all 16 XMM registers when possible? most functions we call probably
// don't actually clobber them.
#define ABI_ALL_CALLER_SAVED (BitSet32{RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11} | ABI_ALL_FPRS)
constexpr BitSet32 ABI_ALL_CALLER_SAVED(BitSet32{RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11} |
ABI_ALL_FPRS);
#endif // _WIN32
#define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED)
constexpr BitSet32 ABI_ALL_CALLEE_SAVED(~ABI_ALL_CALLER_SAVED);
#define ABI_RETURN RAX
constexpr X64Reg ABI_RETURN = RAX;
}; // namespace Gen

View File

@ -5,9 +5,12 @@
#pragma once
#include <array>
#include <concepts>
#include <cstddef>
#include <cstring>
#include <functional>
#include <optional>
#include <tuple>
#include <type_traits>
@ -15,6 +18,7 @@
#include "Common/BitSet.h"
#include "Common/CodeBlock.h"
#include "Common/CommonTypes.h"
#include "Common/SmallVector.h"
#include "Common/x64ABI.h"
namespace Gen
@ -226,6 +230,39 @@ struct OpArg
}
private:
constexpr std::optional<X64Reg> GetBaseReg() const
{
switch (scale)
{
case SCALE_NONE:
case SCALE_1:
case SCALE_2:
case SCALE_4:
case SCALE_8:
case SCALE_ATREG:
return static_cast<X64Reg>(offsetOrBaseReg);
default:
return std::nullopt;
}
}
constexpr std::optional<X64Reg> GetIndexReg() const
{
switch (scale)
{
case SCALE_1:
case SCALE_2:
case SCALE_4:
case SCALE_8:
case SCALE_NOBASE_2:
case SCALE_NOBASE_4:
case SCALE_NOBASE_8:
return static_cast<X64Reg>(indexReg);
default:
return std::nullopt;
}
}
void WriteREX(XEmitter* emit, int opBits, int bits, int customOp = -1) const;
void WriteVEX(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm,
int W = 0) const;
@ -327,6 +364,13 @@ class XEmitter
{
friend struct OpArg; // for Write8 etc
private:
struct RegisterMove
{
int bits;
X64Reg destination;
OpArg source;
};
// Pointer to memory where code will be emitted to.
u8* code = nullptr;
@ -377,6 +421,11 @@ private:
void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size,
size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
// This function solves the "parallel moves" problem that's common in compilers.
// The arguments are mutated!
void ParallelMoves(RegisterMove* begin, RegisterMove* end, std::array<u8, 16>* source_reg_uses,
std::array<u8, 16>* destination_reg_uses);
protected:
void Write8(u8 value);
void Write16(u16 value);
@ -988,10 +1037,10 @@ public:
void RDTSC();
// Utility functions
// The difference between this and CALL is that this aligns the stack
// where appropriate.
// Calls a function without setting up arguments or aligning the stack.
template <typename FunctionPointer>
void ABI_CallFunction(FunctionPointer func)
void QuickCallFunction(FunctionPointer func)
{
static_assert(std::is_pointer<FunctionPointer>() &&
std::is_function<std::remove_pointer_t<FunctionPointer>>(),
@ -1013,167 +1062,140 @@ public:
}
}
template <typename FunctionPointer>
void ABI_CallFunctionC16(FunctionPointer func, u16 param1)
struct CallFunctionArg
{
MOV(32, R(ABI_PARAM1), Imm32(param1));
ABI_CallFunction(func);
}
template <std::integral T>
CallFunctionArg(T imm)
: bits(sizeof(T) > 4 ? 64 : 32),
op_arg(sizeof(T) > 4 ? Imm64(imm) : Imm32(std::is_signed_v<T> ? s32(imm) : u32(imm)))
{
}
template <typename FunctionPointer>
void ABI_CallFunctionCC16(FunctionPointer func, u32 param1, u16 param2)
{
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
ABI_CallFunction(func);
}
CallFunctionArg(const void* imm) : bits(64), op_arg(Imm64(reinterpret_cast<u64>(imm))) {}
template <typename FunctionPointer>
void ABI_CallFunctionC(FunctionPointer func, u32 param1)
{
MOV(32, R(ABI_PARAM1), Imm32(param1));
ABI_CallFunction(func);
}
CallFunctionArg(int bits_, const OpArg& op_arg_) : bits(bits_), op_arg(op_arg_) {}
template <typename FunctionPointer>
void ABI_CallFunctionCC(FunctionPointer func, u32 param1, u32 param2)
{
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
ABI_CallFunction(func);
}
CallFunctionArg(int bits_, const X64Reg reg) : bits(bits_), op_arg(R(reg)) {}
template <typename FunctionPointer>
void ABI_CallFunctionCP(FunctionPointer func, u32 param1, const void* param2)
{
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(64, R(ABI_PARAM2), Imm64(reinterpret_cast<u64>(param2)));
ABI_CallFunction(func);
}
int bits;
OpArg op_arg;
};
template <typename FunctionPointer>
void ABI_CallFunctionCCC(FunctionPointer func, u32 param1, u32 param2, u32 param3)
// Calls a function using the given arguments. This doesn't adjust the stack; you can do that
// using ABI_PushRegistersAndAdjustStack and ABI_PopRegistersAndAdjustStack.
template <typename FuncRet, typename... FuncArgs, std::convertible_to<CallFunctionArg>... Args>
void ABI_CallFunction(FuncRet (*func)(FuncArgs...), Args... args)
{
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(32, R(ABI_PARAM3), Imm32(param3));
ABI_CallFunction(func);
}
static_assert(sizeof...(FuncArgs) == sizeof...(Args), "Wrong number of arguments");
static_assert(sizeof...(FuncArgs) <= ABI_PARAMS.size(),
"Passing arguments on the stack is not supported");
template <typename FunctionPointer>
void ABI_CallFunctionCCP(FunctionPointer func, u32 param1, u32 param2, const void* param3)
{
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(64, R(ABI_PARAM3), Imm64(reinterpret_cast<u64>(param3)));
ABI_CallFunction(func);
}
if constexpr (!std::is_void_v<FuncRet>)
static_assert(sizeof(FuncRet) <= 8, "Large return types are not supported");
template <typename FunctionPointer>
void ABI_CallFunctionCCCP(FunctionPointer func, u32 param1, u32 param2, u32 param3,
const void* param4)
{
MOV(32, R(ABI_PARAM1), Imm32(param1));
MOV(32, R(ABI_PARAM2), Imm32(param2));
MOV(32, R(ABI_PARAM3), Imm32(param3));
MOV(64, R(ABI_PARAM4), Imm64(reinterpret_cast<u64>(param4)));
ABI_CallFunction(func);
}
std::array<u8, 16> source_reg_uses{};
std::array<u8, 16> destination_reg_uses{};
template <typename FunctionPointer>
void ABI_CallFunctionP(FunctionPointer func, const void* param1)
{
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(param1)));
ABI_CallFunction(func);
}
auto check_argument = [&](const CallFunctionArg& arg) {
if (const std::optional<X64Reg> base_reg = arg.op_arg.GetBaseReg())
source_reg_uses[*base_reg]++;
if (const std::optional<X64Reg> index_reg = arg.op_arg.GetIndexReg())
source_reg_uses[*index_reg]++;
};
template <typename FunctionPointer>
void ABI_CallFunctionPP(FunctionPointer func, const void* param1, const void* param2)
{
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(param1)));
MOV(64, R(ABI_PARAM2), Imm64(reinterpret_cast<u64>(param2)));
ABI_CallFunction(func);
}
(check_argument(args), ...);
template <typename FunctionPointer>
void ABI_CallFunctionPC(FunctionPointer func, const void* param1, u32 param2)
{
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(param1)));
MOV(32, R(ABI_PARAM2), Imm32(param2));
ABI_CallFunction(func);
}
{
Common::SmallVector<RegisterMove, sizeof...(Args)> pending_moves;
template <typename FunctionPointer>
void ABI_CallFunctionPPC(FunctionPointer func, const void* param1, const void* param2, u32 param3)
{
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(param1)));
MOV(64, R(ABI_PARAM2), Imm64(reinterpret_cast<u64>(param2)));
MOV(32, R(ABI_PARAM3), Imm32(param3));
ABI_CallFunction(func);
}
size_t i = 0;
// Pass a register as a parameter.
template <typename FunctionPointer>
void ABI_CallFunctionR(FunctionPointer func, X64Reg reg1)
{
if (reg1 != ABI_PARAM1)
MOV(32, R(ABI_PARAM1), R(reg1));
ABI_CallFunction(func);
}
auto handle_non_imm_argument = [&](const CallFunctionArg& arg) {
if (!arg.op_arg.IsImm())
{
// When types that are 32 bits or smaller are stored in a register, bits 32-63 of the
// register can contain garbage. However, the System V ABI doesn't specify any rules
// for bits 8-31 and 16-31 for char and short respectively. (It does specify that _Bool
// must contain zeroes in bits 1-7 but can contain garbage in bits 8-31.) Apple's ABI
// documentation says that zero/sign extending is required for char and short, and GCC and
// Clang seem to assume that it's required. The situation on Windows is currently unknown.
// Let's assume that it's required on all platforms.
//
// TODO: Use sign extension instead of zero extension if the relevant type is signed.
const bool zero_extend = arg.bits < 32;
// Pass a pointer and register as a parameter.
template <typename FunctionPointer>
void ABI_CallFunctionPR(FunctionPointer func, const void* ptr, X64Reg reg1)
{
if (reg1 != ABI_PARAM2)
MOV(64, R(ABI_PARAM2), R(reg1));
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(ptr)));
ABI_CallFunction(func);
}
const X64Reg destination_reg = ABI_PARAMS[i];
// Pass two registers as parameters.
template <typename FunctionPointer>
void ABI_CallFunctionRR(FunctionPointer func, X64Reg reg1, X64Reg reg2)
{
MOVTwo(64, ABI_PARAM1, reg1, 0, ABI_PARAM2, reg2);
ABI_CallFunction(func);
}
if (arg.op_arg.IsSimpleReg(destination_reg) && !zero_extend)
{
// The value is already in the right register.
destination_reg_uses[destination_reg]++;
source_reg_uses[destination_reg]--;
}
else if (source_reg_uses[destination_reg] == 0)
{
// The destination register isn't used as the source of another move.
// We can go ahead and do the move right away.
if (zero_extend)
MOVZX(32, arg.bits, destination_reg, arg.op_arg);
else
MOV(arg.bits, R(destination_reg), arg.op_arg);
// Pass a pointer and two registers as parameters.
template <typename FunctionPointer>
void ABI_CallFunctionPRR(FunctionPointer func, const void* ptr, X64Reg reg1, X64Reg reg2)
{
MOVTwo(64, ABI_PARAM2, reg1, 0, ABI_PARAM3, reg2);
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(ptr)));
ABI_CallFunction(func);
}
destination_reg_uses[destination_reg]++;
if (const std::optional<X64Reg> base_reg = arg.op_arg.GetBaseReg())
source_reg_uses[*base_reg]--;
if (const std::optional<X64Reg> index_reg = arg.op_arg.GetIndexReg())
source_reg_uses[*index_reg]--;
}
else
{
// The destination register is used as the source of a move we haven't gotten to yet.
// Let's record that we need to deal with this move later.
pending_moves.emplace_back(arg.bits, destination_reg, arg.op_arg);
}
}
template <typename FunctionPointer>
void ABI_CallFunctionAC(int bits, FunctionPointer func, const Gen::OpArg& arg1, u32 param2)
{
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(bits, R(ABI_PARAM1), arg1);
MOV(32, R(ABI_PARAM2), Imm32(param2));
ABI_CallFunction(func);
}
++i;
};
template <typename FunctionPointer>
void ABI_CallFunctionPAC(int bits, FunctionPointer func, const void* ptr1, const Gen::OpArg& arg2,
u32 param3)
{
if (!arg2.IsSimpleReg(ABI_PARAM2))
MOV(bits, R(ABI_PARAM2), arg2);
MOV(32, R(ABI_PARAM3), Imm32(param3));
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(ptr1)));
ABI_CallFunction(func);
}
(handle_non_imm_argument(args), ...);
template <typename FunctionPointer>
void ABI_CallFunctionA(int bits, FunctionPointer func, const Gen::OpArg& arg1)
{
if (!arg1.IsSimpleReg(ABI_PARAM1))
MOV(bits, R(ABI_PARAM1), arg1);
ABI_CallFunction(func);
if (!pending_moves.empty())
{
ParallelMoves(pending_moves.data(), pending_moves.data() + pending_moves.size(),
&source_reg_uses, &destination_reg_uses);
}
}
{
size_t i = 0;
auto handle_imm_argument = [&](const CallFunctionArg& arg) {
switch (arg.op_arg.scale)
{
case SCALE_IMM8:
// TODO: Use sign extension instead of zero extension if the relevant type is signed.
MOV(32, R(ABI_PARAMS[i]), Imm32(arg.op_arg.Imm8()));
break;
case SCALE_IMM16:
// TODO: Use sign extension instead of zero extension if the relevant type is signed.
MOV(32, R(ABI_PARAMS[i]), Imm32(arg.op_arg.Imm16()));
break;
case SCALE_IMM32:
case SCALE_IMM64:
MOV(arg.bits, R(ABI_PARAMS[i]), arg.op_arg);
break;
default:
break;
}
++i;
};
(handle_imm_argument(args), ...);
}
QuickCallFunction(func);
}
// Helper method for ABI functions related to calling functions. May be used by itself as well.
@ -1192,17 +1214,18 @@ public:
// Unfortunately, calling operator() directly is undefined behavior in C++
// (this method might be a thunk in the case of multi-inheritance) so we
// have to go through a trampoline function.
template <typename T, typename... Args>
static T CallLambdaTrampoline(const std::function<T(Args...)>* f, Args... args)
template <typename FuncRet, typename... FuncArgs>
static FuncRet CallLambdaTrampoline(const std::function<FuncRet(FuncArgs...)>* f,
FuncArgs... args)
{
return (*f)(args...);
}
template <typename T, typename... Args>
void ABI_CallLambdaPC(const std::function<T(Args...)>* f, void* p1, u32 p2)
template <typename FuncRet, typename... FuncArgs, std::convertible_to<CallFunctionArg>... Args>
void ABI_CallLambda(const std::function<FuncRet(FuncArgs...)>* f, Args... args)
{
auto trampoline = &XEmitter::CallLambdaTrampoline<T, Args...>;
ABI_CallFunctionPPC(trampoline, reinterpret_cast<const void*>(f), p1, p2);
auto trampoline = &XEmitter::CallLambdaTrampoline<FuncRet, FuncArgs...>;
ABI_CallFunction(trampoline, reinterpret_cast<const void*>(f), args...);
}
}; // class XEmitter

View File

@ -115,7 +115,7 @@ void DSPEmitter::checkExceptions(u32 retval)
DSPJitRegCache c(m_gpr);
m_gpr.SaveRegs();
ABI_CallFunctionP(CheckExceptionsThunk, &m_dsp_core);
ABI_CallFunction(CheckExceptionsThunk, &m_dsp_core);
TEST(32, R(ABI_RETURN), R(ABI_RETURN));
FixupBranch skip_return = J_CC(CC_Z, Jump::Short);
MOV(32, R(EAX), Imm32(retval));
@ -157,7 +157,7 @@ void DSPEmitter::FallBackToInterpreter(UDSPInstruction inst)
m_gpr.PushRegs();
ASSERT_MSG(DSPLLE, interpreter_function != nullptr, "No function for {:04x}", inst);
ABI_CallFunctionPC(FallbackThunk, &m_dsp_core.GetInterpreter(), inst);
ABI_CallFunction(FallbackThunk, &m_dsp_core.GetInterpreter(), inst);
m_gpr.PopRegs();
}
@ -190,7 +190,7 @@ void DSPEmitter::EmitInstruction(UDSPInstruction inst)
{
// Fall back to interpreter
m_gpr.PushRegs();
ABI_CallFunctionPC(FallbackExtThunk, &m_dsp_core.GetInterpreter(), inst);
ABI_CallFunction(FallbackExtThunk, &m_dsp_core.GetInterpreter(), inst);
m_gpr.PopRegs();
INFO_LOG_FMT(DSPLLE, "Instruction not JITed(ext part): {:04x}", inst);
ext_is_jit = false;
@ -217,7 +217,7 @@ void DSPEmitter::EmitInstruction(UDSPInstruction inst)
// need to call the online cleanup function because
// the writeBackLog gets populated at runtime
m_gpr.PushRegs();
ABI_CallFunctionP(ApplyWriteBackLogThunk, &m_dsp_core.GetInterpreter());
ABI_CallFunction(ApplyWriteBackLogThunk, &m_dsp_core.GetInterpreter());
m_gpr.PopRegs();
}
else
@ -420,8 +420,7 @@ void DSPEmitter::CompileCurrent(DSPEmitter& emitter)
const u8* DSPEmitter::CompileStub()
{
const u8* entryPoint = AlignCode16();
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(this)));
ABI_CallFunction(CompileCurrent);
ABI_CallFunction(CompileCurrent, this);
XOR(32, R(EAX), R(EAX)); // Return 0 cycles executed
JMP(m_return_dispatcher);
return entryPoint;

View File

@ -501,11 +501,6 @@ void DSPJitRegCache::PopRegs()
}
}
X64Reg DSPJitRegCache::MakeABICallSafe(X64Reg reg)
{
return reg;
}
void DSPJitRegCache::MovToHostReg(size_t reg, X64Reg host_reg, bool load)
{
ASSERT_MSG(DSPLLE, reg < m_regs.size(), "bad register name {}", reg);

View File

@ -108,10 +108,6 @@ public:
void PushRegs(); // Save registers before ABI call
void PopRegs(); // Restore registers after ABI call
// Returns a register with the same contents as reg that is safe
// to use through saveStaticRegs and for ABI-calls
Gen::X64Reg MakeABICallSafe(Gen::X64Reg reg);
// Gives no SCALE_RIP with abs(offset) >= 0x80000000
// 32/64 bit writes allowed when the register has a _64 or _32 suffix
// only 16 bit writes allowed without any suffix.

View File

@ -495,10 +495,9 @@ void DSPEmitter::dmem_write(X64Reg value)
// else if (saddr == 0xf)
SetJumpTarget(ifx);
DSPJitRegCache c(m_gpr);
X64Reg abisafereg = m_gpr.MakeABICallSafe(value);
MOVZX(32, 16, abisafereg, R(abisafereg));
m_gpr.PushRegs();
ABI_CallFunctionPRR(WriteIFXRegisterHelper, this, EAX, abisafereg);
ABI_CallFunction(WriteIFXRegisterHelper, this, CallFunctionArg(32, EAX),
CallFunctionArg(16, value));
m_gpr.PopRegs();
m_gpr.FlushRegs(c);
SetJumpTarget(end);
@ -515,10 +514,8 @@ void DSPEmitter::dmem_write_imm(u16 address, X64Reg value)
case 0xf: // Fxxx HW regs
{
MOV(16, R(EAX), Imm16(address));
X64Reg abisafereg = m_gpr.MakeABICallSafe(value);
m_gpr.PushRegs();
ABI_CallFunctionPRR(WriteIFXRegisterHelper, this, EAX, abisafereg);
ABI_CallFunction(WriteIFXRegisterHelper, this, address, CallFunctionArg(32, value));
m_gpr.PopRegs();
break;
}
@ -581,9 +578,8 @@ void DSPEmitter::dmem_read(X64Reg address)
// else if (saddr == 0xf)
// return gdsp_ifx_read(addr);
DSPJitRegCache c(m_gpr);
X64Reg abisafereg = m_gpr.MakeABICallSafe(address);
m_gpr.PushRegs();
ABI_CallFunctionPR(ReadIFXRegisterHelper, this, abisafereg);
ABI_CallFunction(ReadIFXRegisterHelper, this, CallFunctionArg(32, address));
m_gpr.PopRegs();
m_gpr.FlushRegs(c);
SetJumpTarget(end);
@ -607,7 +603,7 @@ void DSPEmitter::dmem_read_imm(u16 address)
case 0xf: // Fxxx HW regs
{
m_gpr.PushRegs();
ABI_CallFunctionPC(ReadIFXRegisterHelper, this, address);
ABI_CallFunction(ReadIFXRegisterHelper, this, address);
m_gpr.PopRegs();
break;
}

View File

@ -363,7 +363,7 @@ void Jit64::FallBackToInterpreter(UGeckoInstruction inst)
Interpreter::Instruction instr = Interpreter::GetInterpreterOp(inst);
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionPC(instr, &m_system.GetInterpreter(), inst.hex);
ABI_CallFunction(instr, &m_system.GetInterpreter(), inst.hex);
ABI_PopRegistersAndAdjustStack({}, 0);
// If the instruction wrote to any registers which were marked as discarded,
@ -420,7 +420,7 @@ void Jit64::HLEFunction(u32 hook_index)
gpr.Flush();
fpr.Flush();
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionCCP(HLE::ExecuteFromJIT, js.compilerPC, hook_index, &m_system);
ABI_CallFunction(HLE::ExecuteFromJIT, js.compilerPC, hook_index, &m_system);
ABI_PopRegistersAndAdjustStack({}, 0);
}
@ -463,7 +463,7 @@ bool Jit64::Cleanup()
CMP(64, R(RSCRATCH), Imm32(GPFifo::GATHER_PIPE_SIZE));
FixupBranch exit = J_CC(CC_L);
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionP(GPFifo::UpdateGatherPipe, &m_system.GetGPFifo());
ABI_CallFunction(GPFifo::UpdateGatherPipe, &m_system.GetGPFifo());
ABI_PopRegistersAndAdjustStack({}, 0);
SetJumpTarget(exit);
did_something = true;
@ -472,8 +472,8 @@ bool Jit64::Cleanup()
if (m_ppc_state.feature_flags & FEATURE_FLAG_PERFMON)
{
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionCCCP(PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst,
js.numFloatingPointInst, &m_ppc_state);
ABI_CallFunction(PowerPC::UpdatePerformanceMonitor, js.downcountAmount, js.numLoadStoreInst,
js.numFloatingPointInst, &m_ppc_state);
ABI_PopRegistersAndAdjustStack({}, 0);
did_something = true;
}
@ -481,8 +481,8 @@ bool Jit64::Cleanup()
if (IsProfilingEnabled())
{
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionPC(&JitBlock::ProfileData::EndProfiling, js.curBlock->profile_data.get(),
js.downcountAmount);
ABI_CallFunction(&JitBlock::ProfileData::EndProfiling, js.curBlock->profile_data.get(),
js.downcountAmount);
ABI_PopRegistersAndAdjustStack({}, 0);
did_something = true;
}
@ -667,7 +667,7 @@ void Jit64::WriteRfiExitDestInRSCRATCH()
MOV(32, PPCSTATE(npc), R(RSCRATCH));
Cleanup();
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionP(PowerPC::CheckExceptionsFromJIT, &m_system.GetPowerPC());
ABI_CallFunction(PowerPC::CheckExceptionsFromJIT, &m_system.GetPowerPC());
ABI_PopRegistersAndAdjustStack({}, 0);
EmitUpdateMembase();
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
@ -689,7 +689,7 @@ void Jit64::WriteExceptionExit()
MOV(32, R(RSCRATCH), PPCSTATE(pc));
MOV(32, PPCSTATE(npc), R(RSCRATCH));
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionP(PowerPC::CheckExceptionsFromJIT, &m_system.GetPowerPC());
ABI_CallFunction(PowerPC::CheckExceptionsFromJIT, &m_system.GetPowerPC());
ABI_PopRegistersAndAdjustStack({}, 0);
EmitUpdateMembase();
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
@ -702,7 +702,7 @@ void Jit64::WriteExternalExceptionExit()
MOV(32, R(RSCRATCH), PPCSTATE(pc));
MOV(32, PPCSTATE(npc), R(RSCRATCH));
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionP(PowerPC::CheckExternalExceptionsFromJIT, &m_system.GetPowerPC());
ABI_CallFunction(PowerPC::CheckExternalExceptionsFromJIT, &m_system.GetPowerPC());
ABI_PopRegistersAndAdjustStack({}, 0);
EmitUpdateMembase();
SUB(32, PPCSTATE(downcount), Imm32(js.downcountAmount));
@ -908,13 +908,13 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
if (m_im_here_debug)
{
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionP(ImHere, this);
ABI_CallFunction(ImHere, this);
ABI_PopRegistersAndAdjustStack({}, 0);
}
// Conditionally add profiling code.
if (IsProfilingEnabled())
ABI_CallFunctionP(&JitBlock::ProfileData::BeginProfiling, b->profile_data.get());
ABI_CallFunction(&JitBlock::ProfileData::BeginProfiling, b->profile_data.get());
#if defined(_DEBUG) || defined(DEBUGFAST) || defined(NAN_CHECK)
// should help logged stack-traces become more accurate
@ -946,8 +946,8 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
const u8* target = GetCodePtr();
MOV(32, PPCSTATE(pc), Imm32(js.blockStart));
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionPC(JitInterface::CompileExceptionCheckFromJIT, &m_system.GetJitInterface(),
static_cast<u32>(JitInterface::ExceptionType::PairedQuantize));
ABI_CallFunction(JitInterface::CompileExceptionCheckFromJIT, &m_system.GetJitInterface(),
static_cast<u32>(JitInterface::ExceptionType::PairedQuantize));
ABI_PopRegistersAndAdjustStack({}, 0);
JMP(asm_routines.dispatcher_no_check);
SwitchToNearCode();
@ -1003,7 +1003,7 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
js.mustCheckFifo = false;
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionP(GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo());
ABI_CallFunction(GPFifo::FastCheckGatherPipe, &m_system.GetGPFifo());
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
gatherPipeIntCheck = true;
}
@ -1068,7 +1068,7 @@ bool Jit64::DoJit(u32 em_address, JitBlock* b, u32 nextPC)
MOV(32, PPCSTATE(pc), Imm32(op.address));
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionP(PowerPC::CheckAndHandleBreakPointsFromJIT, &power_pc);
ABI_CallFunction(PowerPC::CheckAndHandleBreakPointsFromJIT, &power_pc);
ABI_PopRegistersAndAdjustStack({}, 0);
MOV(64, R(RSCRATCH), ImmPtr(cpu.GetStatePtr()));
CMP(32, MatR(RSCRATCH), Imm32(Common::ToUnderlying(CPU::State::Running)));
@ -1328,8 +1328,8 @@ void Jit64::IntializeSpeculativeConstants()
target = GetCodePtr();
MOV(32, PPCSTATE(pc), Imm32(js.blockStart));
ABI_PushRegistersAndAdjustStack({}, 0);
ABI_CallFunctionPC(JitInterface::CompileExceptionCheckFromJIT, &m_system.GetJitInterface(),
static_cast<u32>(JitInterface::ExceptionType::SpeculativeConstants));
ABI_CallFunction(JitInterface::CompileExceptionCheckFromJIT, &m_system.GetJitInterface(),
static_cast<u32>(JitInterface::ExceptionType::SpeculativeConstants));
ABI_PopRegistersAndAdjustStack({}, 0);
JMP(asm_routines.dispatcher_no_check);
SwitchToNearCode();

View File

@ -190,8 +190,7 @@ void Jit64AsmRoutineManager::Generate()
{
// Ok, no block, let's call the slow dispatcher
ABI_PushRegistersAndAdjustStack({}, 0);
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(&m_jit)));
ABI_CallFunction(JitBase::Dispatch);
ABI_CallFunction(JitBase::Dispatch, &m_jit);
ABI_PopRegistersAndAdjustStack({}, 0);
TEST(64, R(ABI_RETURN), R(ABI_RETURN));
@ -210,9 +209,7 @@ void Jit64AsmRoutineManager::Generate()
ResetStack(*this);
ABI_PushRegistersAndAdjustStack({}, 0);
MOV(64, R(ABI_PARAM1), Imm64(reinterpret_cast<u64>(&m_jit)));
MOV(32, R(ABI_PARAM2), PPCSTATE(pc));
ABI_CallFunction(JitTrampoline);
ABI_CallFunction(JitTrampoline, &m_jit, CallFunctionArg(32, PPCSTATE(pc)));
ABI_PopRegistersAndAdjustStack({}, 0);
// If jitting triggered an ISI exception, MSR.DR may have changed

View File

@ -80,15 +80,12 @@ void Jit64::WriteBranchWatch(u32 origin, u32 destination, UGeckoInstruction inst
SetJumpTarget(branch_in);
ABI_PushRegistersAndAdjustStack(caller_save, 0);
// Some call sites have an optimization to use ABI_PARAM1 as a scratch register.
if (reg_a != ABI_PARAM1)
MOV(64, R(ABI_PARAM1), R(reg_a));
MOV(64, R(ABI_PARAM2), Imm64(Core::FakeBranchWatchCollectionKey{origin, destination}));
MOV(32, R(ABI_PARAM3), Imm32(inst.hex));
ABI_CallFunction(m_ppc_state.msr.IR ? (condition ? &Core::BranchWatch::HitVirtualTrue_fk :
&Core::BranchWatch::HitVirtualFalse_fk) :
(condition ? &Core::BranchWatch::HitPhysicalTrue_fk :
&Core::BranchWatch::HitPhysicalFalse_fk));
const auto function = m_ppc_state.msr.IR ? (condition ? &Core::BranchWatch::HitVirtualTrue_fk :
&Core::BranchWatch::HitVirtualFalse_fk) :
(condition ? &Core::BranchWatch::HitPhysicalTrue_fk :
&Core::BranchWatch::HitPhysicalFalse_fk);
ABI_CallFunction(function, CallFunctionArg(64, reg_a),
u64(Core::FakeBranchWatchCollectionKey{origin, destination}), inst.hex);
ABI_PopRegistersAndAdjustStack(caller_save, 0);
FixupBranch branch_out = J(Jump::Near);
@ -110,18 +107,11 @@ void Jit64::WriteBranchWatchDestInRSCRATCH(u32 origin, UGeckoInstruction inst, X
SwitchToFarCode();
SetJumpTarget(branch_in);
// Assert RSCRATCH won't be clobbered before it is moved from.
static_assert(ABI_PARAM1 != RSCRATCH);
ABI_PushRegistersAndAdjustStack(caller_save, 0);
// Some call sites have an optimization to use ABI_PARAM1 as a scratch register.
if (reg_a != ABI_PARAM1)
MOV(64, R(ABI_PARAM1), R(reg_a));
MOV(32, R(ABI_PARAM3), R(RSCRATCH));
MOV(32, R(ABI_PARAM2), Imm32(origin));
MOV(32, R(ABI_PARAM4), Imm32(inst.hex));
ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue :
&Core::BranchWatch::HitPhysicalTrue);
const auto function =
m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue : &Core::BranchWatch::HitPhysicalTrue;
ABI_CallFunction(function, CallFunctionArg(64, reg_a), origin, CallFunctionArg(32, RSCRATCH),
inst.hex);
ABI_PopRegistersAndAdjustStack(caller_save, 0);
FixupBranch branch_out = J(Jump::Near);

View File

@ -475,7 +475,7 @@ void Jit64::fmaddXX(UGeckoInstruction inst)
BitSet32 registers_in_use = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registers_in_use, 0);
ABI_CallFunction(static_cast<double (*)(double, double, double)>(&std::fma));
QuickCallFunction(static_cast<double (*)(double, double, double)>(&std::fma));
ABI_PopRegistersAndAdjustStack(registers_in_use, 0);
}

View File

@ -319,19 +319,13 @@ void Jit64::dcbx(UGeckoInstruction inst)
SwitchToFarCode();
SetJumpTarget(branch_in);
// Assert RSCRATCH2 won't be clobbered before it is moved from.
static_assert(RSCRATCH2 != ABI_PARAM1);
ABI_PushRegistersAndAdjustStack(bw_caller_save, 0);
MOV(64, R(ABI_PARAM1), R(bw_reg_a));
// RSCRATCH2 holds the amount of faked branch watch hits. Move RSCRATCH2 first, because
// ABI_PARAM2 clobbers RSCRATCH2 on Windows and ABI_PARAM3 clobbers RSCRATCH2 on Linux!
MOV(32, R(ABI_PARAM4), R(RSCRATCH2));
const auto function = m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue_fk_n :
&Core::BranchWatch::HitPhysicalTrue_fk_n;
const PPCAnalyst::CodeOp& op = js.op[2];
MOV(64, R(ABI_PARAM2), Imm64(Core::FakeBranchWatchCollectionKey{op.address, op.branchTo}));
MOV(32, R(ABI_PARAM3), Imm32(op.inst.hex));
ABI_CallFunction(m_ppc_state.msr.IR ? &Core::BranchWatch::HitVirtualTrue_fk_n :
&Core::BranchWatch::HitPhysicalTrue_fk_n);
ABI_CallFunction(function, CallFunctionArg(64, bw_reg_a),
u64(Core::FakeBranchWatchCollectionKey{op.address, op.branchTo}),
op.inst.hex, CallFunctionArg(32, RSCRATCH2));
ABI_PopRegistersAndAdjustStack(bw_caller_save, 0);
FixupBranch branch_out = J(Jump::Near);
@ -398,13 +392,14 @@ void Jit64::dcbx(UGeckoInstruction inst)
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
if (make_loop)
{
ABI_CallFunctionPRR(JitInterface::InvalidateICacheLinesFromJIT, &m_system.GetJitInterface(),
effective_address, loop_counter);
ABI_CallFunction(JitInterface::InvalidateICacheLinesFromJIT, &m_system.GetJitInterface(),
CallFunctionArg(32, X64Reg(effective_address)),
CallFunctionArg(32, X64Reg(loop_counter)));
}
else
{
ABI_CallFunctionPR(JitInterface::InvalidateICacheLineFromJIT, &m_system.GetJitInterface(),
effective_address);
ABI_CallFunction(JitInterface::InvalidateICacheLineFromJIT, &m_system.GetJitInterface(),
CallFunctionArg(32, X64Reg(effective_address)));
}
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
asm_routines.ResetStack(*this);
@ -494,7 +489,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
FlushPCBeforeSlowAccess();
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionPR(PowerPC::ClearDCacheLineFromJit, &m_mmu, RSCRATCH);
ABI_CallFunction(PowerPC::ClearDCacheLineFromJit, &m_mmu, CallFunctionArg(32, RSCRATCH));
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
if (emit_fast_path)

View File

@ -287,7 +287,7 @@ void Jit64::mtspr(UGeckoInstruction inst)
FixupBranch dont_reset_icache = J_CC(CC_NC);
BitSet32 regs = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(regs, 0);
ABI_CallFunctionPP(DoICacheReset, &m_ppc_state, &m_system.GetJitInterface());
ABI_CallFunction(DoICacheReset, &m_ppc_state, &m_system.GetJitInterface());
ABI_PopRegistersAndAdjustStack(regs, 0);
SetJumpTarget(dont_reset_icache);
return;

View File

@ -280,7 +280,7 @@ private:
void CallLambda(int sbits, const std::function<T(Core::System&, u32)>* lambda)
{
m_code->ABI_PushRegistersAndAdjustStack(m_registers_in_use, 0);
m_code->ABI_CallLambdaPC(lambda, m_system, m_address);
m_code->ABI_CallLambda(lambda, m_system, m_address);
m_code->ABI_PopRegistersAndAdjustStack(m_registers_in_use, 0);
MoveOpArgToReg(sbits, R(ABI_RETURN));
}
@ -405,16 +405,16 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
switch (accessSize)
{
case 64:
ABI_CallFunctionPR(PowerPC::ReadFromJit<u64>, &m_jit.m_mmu, reg_addr);
ABI_CallFunction(PowerPC::ReadFromJit<u64>, &m_jit.m_mmu, CallFunctionArg(32, reg_addr));
break;
case 32:
ABI_CallFunctionPR(PowerPC::ReadFromJit<u32>, &m_jit.m_mmu, reg_addr);
ABI_CallFunction(PowerPC::ReadFromJit<u32>, &m_jit.m_mmu, CallFunctionArg(32, reg_addr));
break;
case 16:
ABI_CallFunctionPR(PowerPC::ReadFromJit<u16>, &m_jit.m_mmu, reg_addr);
ABI_CallFunction(PowerPC::ReadFromJit<u16>, &m_jit.m_mmu, CallFunctionArg(32, reg_addr));
break;
case 8:
ABI_CallFunctionPR(PowerPC::ReadFromJit<u8>, &m_jit.m_mmu, reg_addr);
ABI_CallFunction(PowerPC::ReadFromJit<u8>, &m_jit.m_mmu, CallFunctionArg(32, reg_addr));
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
@ -468,16 +468,16 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc
switch (accessSize)
{
case 64:
ABI_CallFunctionPC(PowerPC::ReadFromJit<u64>, &m_jit.m_mmu, address);
ABI_CallFunction(PowerPC::ReadFromJit<u64>, &m_jit.m_mmu, address);
break;
case 32:
ABI_CallFunctionPC(PowerPC::ReadFromJit<u32>, &m_jit.m_mmu, address);
ABI_CallFunction(PowerPC::ReadFromJit<u32>, &m_jit.m_mmu, address);
break;
case 16:
ABI_CallFunctionPC(PowerPC::ReadFromJit<u16>, &m_jit.m_mmu, address);
ABI_CallFunction(PowerPC::ReadFromJit<u16>, &m_jit.m_mmu, address);
break;
case 8:
ABI_CallFunctionPC(PowerPC::ReadFromJit<u8>, &m_jit.m_mmu, address);
ABI_CallFunction(PowerPC::ReadFromJit<u8>, &m_jit.m_mmu, address);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, 0);
@ -573,34 +573,23 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0;
ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment);
// If the input is an immediate, we need to put it in a register.
X64Reg reg;
if (reg_value.IsImm())
{
reg = reg_addr == ABI_PARAM1 ? RSCRATCH : ABI_PARAM1;
MOV(accessSize, R(reg), reg_value);
}
else
{
reg = reg_value.GetSimpleReg();
}
switch (accessSize)
{
case 64:
ABI_CallFunctionPRR(swap ? PowerPC::WriteFromJit<u64> : PowerPC::WriteU64SwapFromJit,
&m_jit.m_mmu, reg, reg_addr);
ABI_CallFunction(swap ? PowerPC::WriteFromJit<u64> : PowerPC::WriteU64SwapFromJit, &m_jit.m_mmu,
CallFunctionArg(accessSize, reg_value), CallFunctionArg(32, reg_addr));
break;
case 32:
ABI_CallFunctionPRR(swap ? PowerPC::WriteFromJit<u32> : PowerPC::WriteU32SwapFromJit,
&m_jit.m_mmu, reg, reg_addr);
ABI_CallFunction(swap ? PowerPC::WriteFromJit<u32> : PowerPC::WriteU32SwapFromJit, &m_jit.m_mmu,
CallFunctionArg(accessSize, reg_value), CallFunctionArg(32, reg_addr));
break;
case 16:
ABI_CallFunctionPRR(swap ? PowerPC::WriteFromJit<u16> : PowerPC::WriteU16SwapFromJit,
&m_jit.m_mmu, reg, reg_addr);
ABI_CallFunction(swap ? PowerPC::WriteFromJit<u16> : PowerPC::WriteU16SwapFromJit, &m_jit.m_mmu,
CallFunctionArg(accessSize, reg_value), CallFunctionArg(32, reg_addr));
break;
case 8:
ABI_CallFunctionPRR(PowerPC::WriteFromJit<u8>, &m_jit.m_mmu, reg, reg_addr);
ABI_CallFunction(PowerPC::WriteFromJit<u8>, &m_jit.m_mmu,
CallFunctionArg(accessSize, reg_value), CallFunctionArg(32, reg_addr));
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, rsp_alignment);
@ -669,16 +658,20 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
switch (accessSize)
{
case 64:
ABI_CallFunctionPAC(64, PowerPC::WriteFromJit<u64>, &m_jit.m_mmu, arg, address);
ABI_CallFunction(PowerPC::WriteFromJit<u64>, &m_jit.m_mmu, CallFunctionArg(accessSize, arg),
address);
break;
case 32:
ABI_CallFunctionPAC(32, PowerPC::WriteFromJit<u32>, &m_jit.m_mmu, arg, address);
ABI_CallFunction(PowerPC::WriteFromJit<u32>, &m_jit.m_mmu, CallFunctionArg(accessSize, arg),
address);
break;
case 16:
ABI_CallFunctionPAC(16, PowerPC::WriteFromJit<u16>, &m_jit.m_mmu, arg, address);
ABI_CallFunction(PowerPC::WriteFromJit<u16>, &m_jit.m_mmu, CallFunctionArg(accessSize, arg),
address);
break;
case 8:
ABI_CallFunctionPAC(8, PowerPC::WriteFromJit<u8>, &m_jit.m_mmu, arg, address);
ABI_CallFunction(PowerPC::WriteFromJit<u8>, &m_jit.m_mmu, CallFunctionArg(accessSize, arg),
address);
break;
}
ABI_PopRegistersAndAdjustStack(registersInUse, 0);

View File

@ -207,7 +207,7 @@ void CommonAsmRoutines::GenFrsqrte()
SetJumpTarget(denormal);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
ABI_CallFunction(Common::ApproximateReciprocalSquareRoot);
QuickCallFunction(Common::ApproximateReciprocalSquareRoot);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
RET();
@ -278,7 +278,7 @@ void CommonAsmRoutines::GenFres()
SetJumpTarget(complex);
ABI_PushRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
ABI_CallFunction(Common::ApproximateReciprocal);
QuickCallFunction(Common::ApproximateReciprocal);
ABI_PopRegistersAndAdjustStack(QUANTIZED_REGS_TO_SAVE, 8);
RET();

View File

@ -23,6 +23,7 @@ add_dolphin_test(SwapTest SwapTest.cpp)
add_dolphin_test(WorkQueueThreadTest WorkQueueThreadTest.cpp)
if (_M_X86_64)
add_dolphin_test(x64ABITest x64ABITest.cpp)
add_dolphin_test(x64EmitterTest x64EmitterTest.cpp)
target_link_libraries(x64EmitterTest PRIVATE bdisasm)
elseif (_M_ARM_64)

View File

@ -0,0 +1,385 @@
// Copyright 2025 Dolphin Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <bit>
#include <concepts>
#include "Common/x64ABI.h"
#include "Common/x64Emitter.h"
#include <gtest/gtest.h>
using namespace Gen;
namespace
{
u32 ZeroParameterFunction()
{
return 123;
}
template <std::integral T>
u32 OneParameterFunction(T a)
{
return a + 23;
}
u32 TwoParameterFunction(u64 a, u64 b)
{
return a * 10 + b + 3;
}
u32 ThreeParameterFunction(u64 a, u64 b, u64 c)
{
return a * 10 + b + c / 10;
}
u32 FourParameterFunction(u64 a, u64 b, u32 c, u32 d)
{
return a == 0x0102030405060708 && b == 0x090a0b0c0d0e0f10 && c == 0x11121314 && d == 0x15161718 ?
123 :
4;
}
class TestCallFunction : public X64CodeBlock
{
public:
TestCallFunction() { AllocCodeSpace(4096); }
template <typename F>
void Emit(F f)
{
ResetCodePtr();
m_code_pointer = GetCodePtr();
{
const Common::ScopedJITPageWriteAndNoExecute enable_jit_page_writes;
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16);
f();
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16);
RET();
}
}
void Run()
{
const u64 actual = std::bit_cast<u64 (*)()>(m_code_pointer)();
constexpr u64 expected = 123;
EXPECT_EQ(expected, actual);
}
private:
const u8* m_code_pointer = nullptr;
};
} // namespace
TEST(Arm64Emitter, CallFunction_ZeroParameters)
{
TestCallFunction test;
test.Emit([&] { test.ABI_CallFunction(&ZeroParameterFunction); });
test.Run();
}
TEST(Arm64Emitter, CallFunction_OneConstantParameter)
{
TestCallFunction test;
test.Emit([&] { test.ABI_CallFunction(&OneParameterFunction<u64>, 100); });
test.Run();
}
TEST(Arm64Emitter, CallFunction_OneImm64Parameter)
{
TestCallFunction test;
test.Emit([&] {
test.ABI_CallFunction(&OneParameterFunction<u64>, XEmitter::CallFunctionArg(64, Imm64(100)));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_OneImm32Parameter)
{
TestCallFunction test;
test.Emit([&] {
test.ABI_CallFunction(&OneParameterFunction<u32>, XEmitter::CallFunctionArg(32, Imm32(100)));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_OneImm16Parameter)
{
TestCallFunction test;
test.Emit([&] {
test.ABI_CallFunction(&OneParameterFunction<u16>, XEmitter::CallFunctionArg(16, Imm16(100)));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_OneImm8Parameter)
{
TestCallFunction test;
test.Emit([&] {
test.ABI_CallFunction(&OneParameterFunction<u8>, XEmitter::CallFunctionArg(8, Imm8(100)));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_One64BitRegisterParameterNoMov)
{
TestCallFunction test;
test.Emit([&] {
test.MOV(64, R(ABI_PARAM1), Imm64(100));
test.ABI_CallFunction(&OneParameterFunction<u64>, XEmitter::CallFunctionArg(64, ABI_PARAM1));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_One64BitRegisterParameterMov)
{
TestCallFunction test;
test.Emit([&] {
test.MOV(64, R(ABI_PARAM2), Imm64(100));
test.ABI_CallFunction(&OneParameterFunction<u64>, XEmitter::CallFunctionArg(64, ABI_PARAM2));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_One32BitRegisterParameterNoMov)
{
TestCallFunction test;
test.Emit([&] {
test.MOV(64, R(ABI_PARAM1), Imm64(0x0101010100000064));
test.ABI_CallFunction(&OneParameterFunction<u32>, XEmitter::CallFunctionArg(32, ABI_PARAM1));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_One32BitRegisterParameterMov)
{
TestCallFunction test;
test.Emit([&] {
test.MOV(64, R(ABI_PARAM2), Imm64(0x0101010100000064));
test.ABI_CallFunction(&OneParameterFunction<u32>, XEmitter::CallFunctionArg(32, ABI_PARAM2));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_One16BitRegisterParameterNoMov)
{
TestCallFunction test;
test.Emit([&] {
test.MOV(64, R(ABI_PARAM1), Imm64(0x0101010101010064));
test.ABI_CallFunction(&OneParameterFunction<u16>, XEmitter::CallFunctionArg(16, ABI_PARAM1));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_One16BitRegisterParameterMov)
{
TestCallFunction test;
test.Emit([&] {
test.MOV(64, R(ABI_PARAM2), Imm64(0x0101010101010064));
test.ABI_CallFunction(&OneParameterFunction<u16>, XEmitter::CallFunctionArg(16, ABI_PARAM2));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_One8BitRegisterParameterNoMov)
{
TestCallFunction test;
test.Emit([&] {
test.MOV(64, R(ABI_PARAM1), Imm64(0x0101010101010164));
test.ABI_CallFunction(&OneParameterFunction<u8>, XEmitter::CallFunctionArg(8, ABI_PARAM1));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_One8BitRegisterParameterMov)
{
TestCallFunction test;
test.Emit([&] {
test.MOV(64, R(ABI_PARAM2), Imm64(0x0101010101010164));
test.ABI_CallFunction(&OneParameterFunction<u8>, XEmitter::CallFunctionArg(8, ABI_PARAM2));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_One64BitMemoryParameter)
{
TestCallFunction test;
test.Emit([&] {
constexpr u64 value = 100;
test.MOV(64, R(RAX), ImmPtr(&value));
test.ABI_CallFunction(&OneParameterFunction<u64>, XEmitter::CallFunctionArg(64, MatR(RAX)));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_One32BitMemoryParameter)
{
TestCallFunction test;
test.Emit([&] {
constexpr u32 value = 100;
test.MOV(64, R(RAX), ImmPtr(&value));
test.ABI_CallFunction(&OneParameterFunction<u32>, XEmitter::CallFunctionArg(32, MatR(RAX)));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_One16BitMemoryParameter)
{
TestCallFunction test;
test.Emit([&] {
constexpr u16 value = 100;
test.MOV(64, R(RAX), ImmPtr(&value));
test.ABI_CallFunction(&OneParameterFunction<u16>, XEmitter::CallFunctionArg(16, MatR(RAX)));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_One8BitMemoryParameter)
{
TestCallFunction test;
test.Emit([&] {
constexpr u8 value = 100;
test.MOV(64, R(RAX), ImmPtr(&value));
test.ABI_CallFunction(&OneParameterFunction<u8>, XEmitter::CallFunctionArg(8, MatR(RAX)));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_TwoRegistersMixed)
{
TestCallFunction test;
test.Emit([&] {
test.MOV(32, R(ABI_PARAM1), Imm32(20));
test.ABI_CallFunction(&TwoParameterFunction, 10, XEmitter::CallFunctionArg(32, ABI_PARAM1));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_TwoRegistersCycle)
{
TestCallFunction test;
test.Emit([&] {
test.MOV(32, R(ABI_PARAM1), Imm32(20));
test.MOV(32, R(ABI_PARAM2), Imm32(10));
test.ABI_CallFunction(&TwoParameterFunction, XEmitter::CallFunctionArg(32, ABI_PARAM2),
XEmitter::CallFunctionArg(32, ABI_PARAM1));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_ThreeRegistersMixed)
{
TestCallFunction test;
test.Emit([&] {
test.MOV(32, R(ABI_PARAM2), Imm32(10));
test.MOV(32, R(ABI_PARAM3), Imm32(20));
test.ABI_CallFunction(&ThreeParameterFunction, XEmitter::CallFunctionArg(32, ABI_PARAM2),
XEmitter::CallFunctionArg(32, ABI_PARAM3), 30);
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_ThreeRegistersCycle1)
{
TestCallFunction test;
test.Emit([&] {
test.MOV(32, R(ABI_PARAM1), Imm32(30));
test.MOV(32, R(ABI_PARAM2), Imm32(10));
test.MOV(32, R(ABI_PARAM3), Imm32(20));
test.ABI_CallFunction(&ThreeParameterFunction, XEmitter::CallFunctionArg(32, ABI_PARAM2),
XEmitter::CallFunctionArg(32, ABI_PARAM3),
XEmitter::CallFunctionArg(32, ABI_PARAM1));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_ThreeRegistersCycle2)
{
TestCallFunction test;
test.Emit([&] {
test.MOV(32, R(ABI_PARAM1), Imm32(20));
test.MOV(32, R(ABI_PARAM2), Imm32(30));
test.MOV(32, R(ABI_PARAM3), Imm32(10));
test.ABI_CallFunction(&ThreeParameterFunction, XEmitter::CallFunctionArg(32, ABI_PARAM3),
XEmitter::CallFunctionArg(32, ABI_PARAM1),
XEmitter::CallFunctionArg(32, ABI_PARAM2));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_ThreeMemoryRegistersCycle1)
{
TestCallFunction test;
test.Emit([&] {
constexpr u32 value1 = 10;
constexpr u32 value2 = 20;
constexpr u32 value3 = 30;
test.MOV(64, R(ABI_PARAM1), ImmPtr(&value3));
test.MOV(64, R(ABI_PARAM2), ImmPtr(&value1));
test.MOV(64, R(ABI_PARAM3), ImmPtr(&value2));
test.ABI_CallFunction(&ThreeParameterFunction, XEmitter::CallFunctionArg(32, MatR(ABI_PARAM2)),
XEmitter::CallFunctionArg(32, MatR(ABI_PARAM3)),
XEmitter::CallFunctionArg(32, MatR(ABI_PARAM1)));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_ThreeMemoryRegistersCycle2)
{
TestCallFunction test;
test.Emit([&] {
constexpr u32 value1 = 10;
constexpr u32 value2 = 20;
constexpr u32 value3 = 30;
test.MOV(64, R(ABI_PARAM1), ImmPtr(&value2));
test.MOV(64, R(ABI_PARAM2), ImmPtr(&value3));
test.MOV(64, R(ABI_PARAM3), ImmPtr(&value1));
test.ABI_CallFunction(&ThreeParameterFunction, XEmitter::CallFunctionArg(32, MatR(ABI_PARAM3)),
XEmitter::CallFunctionArg(32, MatR(ABI_PARAM1)),
XEmitter::CallFunctionArg(32, MatR(ABI_PARAM2)));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_FourRegistersMixed)
{
// Loosely based on an actual piece of code where we call Core::BranchWatch::HitVirtualTrue_fk_n
TestCallFunction test;
test.Emit([&] {
test.MOV(64, R(RAX), Imm64(0x0102030405060708));
test.MOV(32, R(RDX), Imm32(0x15161718));
test.ABI_CallFunction(&FourParameterFunction, XEmitter::CallFunctionArg(64, RAX),
0x090a0b0c0d0e0f10, 0x11121314, XEmitter::CallFunctionArg(32, RDX));
});
test.Run();
}
TEST(Arm64Emitter, CallFunction_FourRegistersComplexAddressing)
{
TestCallFunction test;
test.Emit([&] {
constexpr u64 value1 = 0x0102030405060708;
constexpr u64 value2 = 0x090a0b0c0d0e0f10;
constexpr u32 value3 = 0x11121314;
constexpr u32 value4 = 0x15161718;
test.MOV(64, R(ABI_PARAM1), Imm32(3));
test.MOV(64, R(ABI_PARAM2), Imm64(reinterpret_cast<uintptr_t>(&value1) - 3));
test.MOV(64, R(ABI_PARAM4), Imm64(reinterpret_cast<uintptr_t>(&value2) / 4 + 3));
test.MOV(64, R(ABI_PARAM3), Imm64(reinterpret_cast<uintptr_t>(&value3) - 30));
test.MOV(64, R(RAX), Imm32(15));
static_assert(std::count(ABI_PARAMS.begin(), ABI_PARAMS.end(), RAX) == 0);
test.ABI_CallFunction(
&FourParameterFunction, XEmitter::CallFunctionArg(64, MRegSum(ABI_PARAM1, ABI_PARAM2)),
XEmitter::CallFunctionArg(64, MScaled(ABI_PARAM4, SCALE_4, -12)),
XEmitter::CallFunctionArg(32, MComplex(ABI_PARAM3, ABI_PARAM1, SCALE_8, 6)),
XEmitter::CallFunctionArg(32, MComplex(ABI_PARAM3, RAX, SCALE_2,
reinterpret_cast<uintptr_t>(&value4) -
reinterpret_cast<uintptr_t>(&value3))));
});
test.Run();
}

View File

@ -40,7 +40,7 @@ public:
// Call
MOVQ_xmm(XMM0, R(ABI_PARAM1));
ABI_CallFunction(raw_cdts);
QuickCallFunction(raw_cdts);
MOV(32, R(ABI_RETURN), R(RSCRATCH));
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16);

View File

@ -47,7 +47,7 @@ public:
// Call
MOVQ_xmm(XMM0, R(ABI_PARAM1));
ABI_CallFunction(raw_frsqrte);
QuickCallFunction(raw_frsqrte);
MOVQ_xmm(R(ABI_RETURN), XMM0);
ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8, 16);

View File

@ -79,6 +79,7 @@
</ItemGroup>
<!--Arch-specific tests-->
<ItemGroup Condition="'$(Platform)'=='x64'">
<ClCompile Include="Common\x64ABITest.cpp" />
<ClCompile Include="Common\x64EmitterTest.cpp" />
<ClCompile Include="Core\PowerPC\Jit64Common\ConvertDoubleToSingle.cpp" />
<ClCompile Include="Core\PowerPC\Jit64Common\Frsqrte.cpp" />