mirror of
https://github.com/RPCS3/rpcs3.git
synced 2026-05-12 16:19:44 -06:00
Merge 9bd65faa85 into 021f16f775
This commit is contained in:
commit
80970d092a
@ -543,9 +543,15 @@ public:
|
||||
// Add module (path to obj cache dir)
|
||||
void add(std::unique_ptr<llvm::Module> _module, const std::string& path);
|
||||
|
||||
// Returns false after LLVM fatal recovery. The compiler must be discarded.
|
||||
bool try_add(std::unique_ptr<llvm::Module> _module, const std::string& path, std::string& error);
|
||||
|
||||
// Add module (not cached)
|
||||
void add(std::unique_ptr<llvm::Module> _module);
|
||||
|
||||
// Returns false after LLVM fatal recovery. The compiler must be discarded.
|
||||
bool try_add(std::unique_ptr<llvm::Module> _module, std::string& error);
|
||||
|
||||
// Add object (path to obj file)
|
||||
bool add(const std::string& path);
|
||||
|
||||
@ -558,6 +564,9 @@ public:
|
||||
// Finalize
|
||||
void fin();
|
||||
|
||||
// Returns false after LLVM fatal recovery. The compiler must be discarded.
|
||||
bool try_fin(std::string& error);
|
||||
|
||||
// Get compiled function address
|
||||
u64 get(const std::string& name);
|
||||
|
||||
|
||||
@ -12,6 +12,10 @@
|
||||
|
||||
#include <charconv>
|
||||
|
||||
#if defined(__APPLE__)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
LOG_CHANNEL(jit_log, "JIT");
|
||||
|
||||
#ifdef LLVM_AVAILABLE
|
||||
@ -50,6 +54,44 @@ LOG_CHANNEL(jit_log, "JIT");
|
||||
#include "Emu/CPU/Backends/AArch64/AArch64Common.h"
|
||||
#endif
|
||||
|
||||
namespace
|
||||
{
|
||||
thread_local std::string* g_llvm_fatal_message = nullptr;
|
||||
|
||||
template <typename F>
|
||||
bool run_recoverable_llvm(F&& func, std::string& error)
|
||||
{
|
||||
error.clear();
|
||||
|
||||
// Run LLVM codegen in a disposable thread. If LLVM invokes the fatal
|
||||
// handler, only this helper thread exits.
|
||||
named_thread worker("LLVM JIT", [&]()
|
||||
{
|
||||
#if defined(__APPLE__)
|
||||
pthread_jit_write_protect_np(false);
|
||||
#endif
|
||||
g_llvm_fatal_message = &error;
|
||||
|
||||
std::forward<F>(func)();
|
||||
|
||||
g_llvm_fatal_message = nullptr;
|
||||
#if defined(__APPLE__)
|
||||
pthread_jit_write_protect_np(true);
|
||||
#endif
|
||||
});
|
||||
|
||||
worker();
|
||||
const bool result = static_cast<thread_state>(worker) == thread_state::finished;
|
||||
|
||||
if (!result && error.empty())
|
||||
{
|
||||
error = "LLVM crash recovery invoked";
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
const bool jit_initialize = []() -> bool
|
||||
{
|
||||
llvm::InitializeNativeTarget();
|
||||
@ -649,6 +691,13 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
|
||||
llvm::install_fatal_error_handler([](void*, const char* msg, bool)
|
||||
{
|
||||
const std::string_view out = msg ? msg : "";
|
||||
|
||||
if (g_llvm_fatal_message)
|
||||
{
|
||||
*g_llvm_fatal_message = out;
|
||||
thread_ctrl::silent_exit();
|
||||
}
|
||||
|
||||
fmt::throw_exception("LLVM Emergency Exit Invoked: '%s'", out);
|
||||
}, nullptr);
|
||||
|
||||
@ -788,6 +837,33 @@ void jit_compiler::add(std::unique_ptr<llvm::Module> _module, const std::string&
|
||||
}
|
||||
}
|
||||
|
||||
bool jit_compiler::try_add(std::unique_ptr<llvm::Module> _module, const std::string& path, std::string& error)
|
||||
{
|
||||
ObjectCache cache{path, this};
|
||||
m_engine->setObjectCache(&cache);
|
||||
|
||||
const auto ptr = _module.get();
|
||||
m_engine->addModule(std::move(_module));
|
||||
|
||||
if (!run_recoverable_llvm([&]()
|
||||
{
|
||||
m_engine->generateCodeForModule(ptr);
|
||||
}, error))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
m_engine->setObjectCache(nullptr);
|
||||
|
||||
for (auto& func : ptr->functions())
|
||||
{
|
||||
// Delete IR to lower memory consumption
|
||||
func.deleteBody();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void jit_compiler::add(std::unique_ptr<llvm::Module> _module)
|
||||
{
|
||||
const auto ptr = _module.get();
|
||||
@ -801,6 +877,28 @@ void jit_compiler::add(std::unique_ptr<llvm::Module> _module)
|
||||
}
|
||||
}
|
||||
|
||||
bool jit_compiler::try_add(std::unique_ptr<llvm::Module> _module, std::string& error)
|
||||
{
|
||||
const auto ptr = _module.get();
|
||||
m_engine->addModule(std::move(_module));
|
||||
|
||||
if (!run_recoverable_llvm([&]()
|
||||
{
|
||||
m_engine->generateCodeForModule(ptr);
|
||||
}, error))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (auto& func : ptr->functions())
|
||||
{
|
||||
// Delete IR to lower memory consumption
|
||||
func.deleteBody();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool jit_compiler::add(const std::string& path)
|
||||
{
|
||||
auto cache = ObjectCache::load(path);
|
||||
@ -852,6 +950,14 @@ void jit_compiler::fin()
|
||||
m_engine->finalizeObject();
|
||||
}
|
||||
|
||||
bool jit_compiler::try_fin(std::string& error)
|
||||
{
|
||||
return run_recoverable_llvm([&]()
|
||||
{
|
||||
m_engine->finalizeObject();
|
||||
}, error);
|
||||
}
|
||||
|
||||
u64 jit_compiler::get(const std::string& name)
|
||||
{
|
||||
return m_engine->getGlobalValueAddress(name);
|
||||
|
||||
@ -2988,6 +2988,32 @@ void thread_ctrl::set_name(std::string name)
|
||||
report_fatal_error(reason);
|
||||
}
|
||||
|
||||
void thread_ctrl::silent_exit() noexcept
|
||||
{
|
||||
if (const auto _this = g_tls_this_thread)
|
||||
{
|
||||
g_tls_error_callback();
|
||||
|
||||
u64 _self = _this->finalize(thread_state::errored);
|
||||
|
||||
if (_self == umax)
|
||||
{
|
||||
// Unused, detached thread support remnant
|
||||
delete _this;
|
||||
}
|
||||
|
||||
thread_base::finalize(umax);
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
_endthreadex(0);
|
||||
#else
|
||||
pthread_exit(nullptr);
|
||||
#endif
|
||||
|
||||
std::abort();
|
||||
}
|
||||
|
||||
void thread_ctrl::detect_cpu_layout()
|
||||
{
|
||||
if (!g_native_core_layout.compare_and_swap_test(native_core_arrangement::undefined, native_core_arrangement::generic))
|
||||
|
||||
@ -315,6 +315,9 @@ public:
|
||||
// Exit.
|
||||
[[noreturn]] static void emergency_exit(std::string_view reason);
|
||||
|
||||
// Exit the current named thread as errored without reporting a fatal error.
|
||||
[[noreturn]] static void silent_exit() noexcept;
|
||||
|
||||
// Get current thread (may be nullptr)
|
||||
static thread_base* get_current()
|
||||
{
|
||||
|
||||
@ -3120,6 +3120,9 @@ protected:
|
||||
|
||||
// ARMv8 SDOT/UDOT
|
||||
bool m_use_dotprod = false;
|
||||
|
||||
// Allow direct TBL2/TBX2 emission.
|
||||
bool m_use_tbl2 = true;
|
||||
#else
|
||||
// Allow FMA
|
||||
bool m_use_fma = false;
|
||||
@ -4030,6 +4033,131 @@ public:
|
||||
});
|
||||
}
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
template <typename T1, typename T2>
|
||||
value_t<u8[16]> tbl(T1 a, T2 b)
|
||||
{
|
||||
value_t<u8[16]> result;
|
||||
const auto data0 = a.eval(m_ir);
|
||||
const auto index = b.eval(m_ir);
|
||||
const auto zeros = llvm::ConstantAggregateZero::get(get_type<u8[16]>());
|
||||
|
||||
if (auto c = llvm::dyn_cast<llvm::Constant>(index))
|
||||
{
|
||||
v128 mask{};
|
||||
const auto cv = llvm::dyn_cast<llvm::ConstantDataVector>(c);
|
||||
|
||||
if (cv)
|
||||
{
|
||||
for (u32 i = 0; i < 16; i++)
|
||||
{
|
||||
const u64 b_val = cv->getElementAsInteger(i);
|
||||
mask._u8[i] = (b_val < 16) ? static_cast<u8>(b_val) : static_cast<u8>(16);
|
||||
}
|
||||
}
|
||||
|
||||
if (cv || llvm::isa<llvm::ConstantAggregateZero>(c))
|
||||
{
|
||||
result.value = llvm::ConstantDataVector::get(m_context, llvm::ArrayRef(reinterpret_cast<const u8*>(&mask), 16));
|
||||
result.value = m_ir->CreateZExt(result.value, get_type<u32[16]>());
|
||||
result.value = m_ir->CreateShuffleVector(data0, zeros, result.value);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
result.value = m_ir->CreateCall(get_intrinsic<u8[16]>(llvm::Intrinsic::aarch64_neon_tbl1), { data0, index });
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename T3>
|
||||
value_t<u8[16]> tbl2(T1 a, T2 b, T3 indices)
|
||||
{
|
||||
value_t<u8[16]> result;
|
||||
const auto data0 = a.eval(m_ir);
|
||||
const auto data1 = b.eval(m_ir);
|
||||
const auto index = indices.eval(m_ir);
|
||||
|
||||
if (m_use_tbl2)
|
||||
{
|
||||
if (auto c = llvm::dyn_cast<llvm::Constant>(index))
|
||||
{
|
||||
v128 mask{};
|
||||
v128 bitmask{};
|
||||
const auto cv = llvm::dyn_cast<llvm::ConstantDataVector>(c);
|
||||
|
||||
if (cv)
|
||||
{
|
||||
for (u32 i = 0; i < 16; i++)
|
||||
{
|
||||
const u64 b_val = cv->getElementAsInteger(i);
|
||||
mask._u8[i] = (b_val < 32) ? static_cast<u8>(b_val) : static_cast<u8>(0);
|
||||
bitmask._u8[i] = (b_val < 32) ? static_cast<u8>(0xFF) : static_cast<u8>(0x00);
|
||||
}
|
||||
}
|
||||
else if (llvm::isa<llvm::ConstantAggregateZero>(c))
|
||||
{
|
||||
bitmask = v128::from8p(0xFF);
|
||||
}
|
||||
|
||||
if (cv || llvm::isa<llvm::ConstantAggregateZero>(c))
|
||||
{
|
||||
auto m_val = llvm::ConstantDataVector::get(m_context, llvm::ArrayRef(reinterpret_cast<const u8*>(&mask), 16));
|
||||
auto m_ext = m_ir->CreateZExt(m_val, get_type<u32[16]>());
|
||||
auto lookup = m_ir->CreateShuffleVector(data0, data1, m_ext);
|
||||
|
||||
auto z_mask = llvm::ConstantDataVector::get(m_context, llvm::ArrayRef(reinterpret_cast<const u8*>(&bitmask), 16));
|
||||
result.value = m_ir->CreateAnd(lookup, z_mask);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
result.value = m_ir->CreateCall(get_intrinsic<u8[16]>(llvm::Intrinsic::aarch64_neon_tbl2), { data0, data1, index });
|
||||
return result;
|
||||
}
|
||||
|
||||
const auto data0_lookup = m_ir->CreateCall(get_intrinsic<u8[16]>(llvm::Intrinsic::aarch64_neon_tbl1), { data0, index });
|
||||
const auto data1_index = m_ir->CreateSub(index, llvm::ConstantInt::get(get_type<u8[16]>(), 16));
|
||||
const auto data1_lookup = m_ir->CreateCall(get_intrinsic<u8[16]>(llvm::Intrinsic::aarch64_neon_tbl1), { data1, data1_index });
|
||||
|
||||
result.value = m_ir->CreateOr(data0_lookup, data1_lookup);
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename T3>
|
||||
value_t<u8[16]> tbx(T1 fallback, T2 a, T3 indices)
|
||||
{
|
||||
value_t<u8[16]> result;
|
||||
const auto v_fallback = fallback.eval(m_ir);
|
||||
const auto data0 = a.eval(m_ir);
|
||||
const auto index = indices.eval(m_ir);
|
||||
|
||||
result.value = m_ir->CreateCall(get_intrinsic<u8[16]>(llvm::Intrinsic::aarch64_neon_tbx1), { v_fallback, data0, index });
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename T3, typename T4>
|
||||
value_t<u8[16]> tbx2(T1 fallback, T2 a, T3 b, T4 indices)
|
||||
{
|
||||
value_t<u8[16]> result;
|
||||
const auto v_fallback = fallback.eval(m_ir);
|
||||
const auto data0 = a.eval(m_ir);
|
||||
const auto data1 = b.eval(m_ir);
|
||||
const auto index = indices.eval(m_ir);
|
||||
|
||||
if (m_use_tbl2)
|
||||
{
|
||||
result.value = m_ir->CreateCall(get_intrinsic<u8[16]>(llvm::Intrinsic::aarch64_neon_tbx2), { v_fallback, data0, data1, index });
|
||||
return result;
|
||||
}
|
||||
|
||||
const auto first_lookup = m_ir->CreateCall(get_intrinsic<u8[16]>(llvm::Intrinsic::aarch64_neon_tbx1), { v_fallback, data0, index });
|
||||
const auto data1_index = m_ir->CreateSub(index, llvm::ConstantInt::get(get_type<u8[16]>(), 16));
|
||||
|
||||
result.value = m_ir->CreateCall(get_intrinsic<u8[16]>(llvm::Intrinsic::aarch64_neon_tbx1), { first_lookup, data1, data1_index });
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
// (m << 3) >= 0 ? a : b
|
||||
template <typename T, typename U, typename V>
|
||||
static auto select_by_bit4(T&& m, U&& a, V&& b)
|
||||
|
||||
@ -1642,6 +1642,16 @@ void PPUTranslator::VPERM(ppu_opcode_t op)
|
||||
{
|
||||
const auto [a, b, c] = get_vrs<u8[16]>(op.va, op.vb, op.vc);
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
if (op.ra == op.rb)
|
||||
{
|
||||
set_vr(op.vd, tbl(a, (~c & 0xf)));
|
||||
return;
|
||||
}
|
||||
|
||||
set_vr(op.vd, tbl2(b, a, (~c & 0x1f)));
|
||||
return;
|
||||
#else
|
||||
if (op.ra == op.rb)
|
||||
{
|
||||
set_vr(op.vd, pshufb(a, ~c & 0xf));
|
||||
@ -1657,6 +1667,7 @@ void PPUTranslator::VPERM(ppu_opcode_t op)
|
||||
|
||||
const auto i = eval(~c & 0x1f);
|
||||
set_vr(op.vd, select(noncast<s8[16]>(c << 3) >= 0, pshufb(a, i), pshufb(b, i)));
|
||||
#endif
|
||||
}
|
||||
|
||||
void PPUTranslator::VPKPX(ppu_opcode_t op)
|
||||
|
||||
@ -82,6 +82,94 @@ void fmt_class_string<spu_recompiler_base::compare_direction>::format(std::strin
|
||||
});
|
||||
}
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
constexpr const char s_spu_llvm_reg_scavenge_error[] = "Cannot scavenge register without an emergency spill slot";
|
||||
|
||||
class spu_llvm_compile_scope
|
||||
{
|
||||
public:
|
||||
spu_llvm_compile_scope(spu_llvm_compile_context& context, bool use_tbl2) noexcept
|
||||
{
|
||||
context = {};
|
||||
context.use_tbl2 = use_tbl2;
|
||||
spu_llvm_set_compile_context(&context);
|
||||
}
|
||||
|
||||
~spu_llvm_compile_scope() noexcept
|
||||
{
|
||||
spu_llvm_set_compile_context(nullptr);
|
||||
}
|
||||
};
|
||||
|
||||
static spu_program analyse_spu_llvm_program(spu_recompiler_base& compiler, const spu_program& program)
|
||||
{
|
||||
std::vector<be_t<u32>> ls(SPU_LS_SIZE / sizeof(be_t<u32>));
|
||||
|
||||
for (u32 i = 0, pos = program.lower_bound; i < program.data.size(); i++, pos += 4)
|
||||
{
|
||||
ls[pos / 4] = std::bit_cast<be_t<u32>>(program.data[i]);
|
||||
}
|
||||
|
||||
return compiler.analyse(ls.data(), program.entry_point);
|
||||
}
|
||||
|
||||
static spu_function_t compile_spu_llvm_with_retry(std::unique_ptr<spu_recompiler_base>& compiler, const spu_program& program)
|
||||
{
|
||||
spu_llvm_compile_context context;
|
||||
|
||||
{
|
||||
spu_llvm_compile_scope scope(context, true);
|
||||
|
||||
if (const auto result = compiler->compile(spu_program{program}))
|
||||
{
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
if (context.llvm_error.find(s_spu_llvm_reg_scavenge_error) == std::string::npos)
|
||||
{
|
||||
if (!context.llvm_error.empty())
|
||||
{
|
||||
spu_log.error("LLVM failed to compile SPU block 0x%x: %s", program.entry_point, context.llvm_error);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
spu_log.warning("LLVM failed to compile SPU block 0x%x with TBL2/TBX2: %s. Retrying without TBL2/TBX2.", program.entry_point, context.llvm_error);
|
||||
|
||||
// LLVM fatal recovery does not unwind MCJIT state. Abandon the failed
|
||||
// compiler and retry from a fresh analysis/JIT instance.
|
||||
static_cast<void>(compiler.release());
|
||||
compiler = spu_recompiler_base::make_llvm_recompiler();
|
||||
compiler->init();
|
||||
|
||||
const auto retry_program = analyse_spu_llvm_program(*compiler, program);
|
||||
|
||||
if (retry_program != program)
|
||||
{
|
||||
spu_log.error("[0x%05x] SPU analyser failed during TBL2/TBX2 retry, %u vs %u", retry_program.entry_point, retry_program.data.size(), program.data.size());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
spu_llvm_compile_context retry_context;
|
||||
spu_llvm_compile_scope scope(retry_context, false);
|
||||
|
||||
const auto result = compiler->compile(spu_program{retry_program});
|
||||
|
||||
if (result)
|
||||
{
|
||||
spu_log.notice("SPU LLVM block 0x%x compiled successfully without TBL2/TBX2.", program.entry_point);
|
||||
}
|
||||
else if (!retry_context.llvm_error.empty())
|
||||
{
|
||||
spu_log.error("LLVM failed to compile SPU block 0x%x without TBL2/TBX2: %s", program.entry_point, retry_context.llvm_error);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Move 4 args for calling native function from a GHC calling convention function
|
||||
#if defined(ARCH_X64)
|
||||
static u8* move_args_ghc_to_native(u8* raw)
|
||||
@ -906,6 +994,15 @@ void spu_cache::initialize(bool build_existing_cache)
|
||||
|
||||
compiler->init();
|
||||
|
||||
auto compile_program = [&](spu_program&& program) -> spu_function_t
|
||||
{
|
||||
#ifdef ARCH_ARM64
|
||||
return compile_spu_llvm_with_retry(compiler, program);
|
||||
#else
|
||||
return compiler->compile(std::move(program));
|
||||
#endif
|
||||
};
|
||||
|
||||
// Counter for error reporting
|
||||
u32 logged_error = 0;
|
||||
|
||||
@ -977,7 +1074,7 @@ void spu_cache::initialize(bool build_existing_cache)
|
||||
logged_error++;
|
||||
}
|
||||
}
|
||||
else if (!compiler->compile(std::move(func2)))
|
||||
else if (!compile_program(std::move(func2)))
|
||||
{
|
||||
// Likely, out of JIT memory. Signal to prevent further building.
|
||||
fail_flag |= 1;
|
||||
@ -1075,7 +1172,7 @@ void spu_cache::initialize(bool build_existing_cache)
|
||||
const u32 last_inst = std::bit_cast<be_t<u32>>(func2.data.back());
|
||||
const u32 prog_size = ::size32(func2.data);
|
||||
|
||||
if (!compiler->compile(std::move(func2)))
|
||||
if (!compile_program(std::move(func2)))
|
||||
{
|
||||
// Likely, out of JIT memory. Signal to prevent further building.
|
||||
fail_flag |= 1;
|
||||
@ -2096,7 +2193,12 @@ void spu_recompiler_base::dispatch(spu_thread& spu, void*, u8* rip)
|
||||
return;
|
||||
}
|
||||
|
||||
const auto func = spu.jit->compile(spu.jit->analyse(spu._ptr<u32>(0), spu.pc));
|
||||
auto program = spu.jit->analyse(spu._ptr<u32>(0), spu.pc);
|
||||
#ifdef ARCH_ARM64
|
||||
const auto func = compile_spu_llvm_with_retry(spu.jit, program);
|
||||
#else
|
||||
const auto func = spu.jit->compile(std::move(program));
|
||||
#endif
|
||||
|
||||
if (!func)
|
||||
{
|
||||
@ -8903,8 +9005,20 @@ struct spu_llvm_worker
|
||||
{
|
||||
spu_log.error("[0x%05x] SPU Analyser failed, %u vs %u", func2.entry_point, func2.data.size(), size0);
|
||||
}
|
||||
else if (const auto target = compiler->compile(std::move(func2)))
|
||||
else
|
||||
{
|
||||
#ifdef ARCH_ARM64
|
||||
const auto target = compile_spu_llvm_with_retry(compiler, func2);
|
||||
#else
|
||||
const auto target = compiler->compile(std::move(func2));
|
||||
#endif
|
||||
|
||||
if (!target)
|
||||
{
|
||||
spu_log.fatal("[0x%05x] Compilation failed.", func.entry_point);
|
||||
break;
|
||||
}
|
||||
|
||||
// Redirect old function (TODO: patch in multiple places)
|
||||
const s64 rel = reinterpret_cast<u64>(target) - prog->first - 5;
|
||||
|
||||
@ -8922,11 +9036,6 @@ struct spu_llvm_worker
|
||||
|
||||
atomic_storage<u64>::release(*reinterpret_cast<u64*>(prog->first), result);
|
||||
}
|
||||
else
|
||||
{
|
||||
spu_log.fatal("[0x%05x] Compilation failed.", func.entry_point);
|
||||
break;
|
||||
}
|
||||
|
||||
// Clear fake LS
|
||||
std::memset(ls.data() + start / 4, 0, 4 * (size0 - 1));
|
||||
|
||||
@ -62,6 +62,16 @@ const extern spu_decoder<spu_iflag> g_spu_iflag;
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
#include "Emu/CPU/Backends/AArch64/AArch64JIT.h"
|
||||
|
||||
namespace
|
||||
{
|
||||
thread_local spu_llvm_compile_context* g_spu_llvm_compile_context = nullptr;
|
||||
}
|
||||
|
||||
void spu_llvm_set_compile_context(spu_llvm_compile_context* context) noexcept
|
||||
{
|
||||
g_spu_llvm_compile_context = context;
|
||||
}
|
||||
#endif
|
||||
|
||||
class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||
@ -1669,6 +1679,15 @@ public:
|
||||
m_hash_start = hash_start;
|
||||
}
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
m_use_tbl2 = !g_spu_llvm_compile_context || g_spu_llvm_compile_context->use_tbl2;
|
||||
|
||||
if (g_spu_llvm_compile_context)
|
||||
{
|
||||
g_spu_llvm_compile_context->llvm_error.clear();
|
||||
}
|
||||
#endif
|
||||
|
||||
spu_log.notice("Building function 0x%x... (size %u, %s)", func.entry_point, func.data.size(), m_hash);
|
||||
|
||||
m_pos = func.lower_bound;
|
||||
@ -3478,17 +3497,63 @@ public:
|
||||
} _jit_guard;
|
||||
#endif
|
||||
|
||||
if (g_cfg.core.spu_debug)
|
||||
{
|
||||
// Testing only
|
||||
m_jit.add(std::move(_module), m_spurt->get_cache_path() + "llvm/");
|
||||
}
|
||||
else
|
||||
{
|
||||
m_jit.add(std::move(_module));
|
||||
}
|
||||
#ifdef ARCH_ARM64
|
||||
const bool recoverable = !!g_spu_llvm_compile_context;
|
||||
|
||||
m_jit.fin();
|
||||
if (recoverable)
|
||||
{
|
||||
bool added = false;
|
||||
std::string& llvm_error = g_spu_llvm_compile_context->llvm_error;
|
||||
|
||||
if (g_cfg.core.spu_debug)
|
||||
{
|
||||
// Testing only
|
||||
added = m_jit.try_add(std::move(_module), m_spurt->get_cache_path() + "llvm/", llvm_error);
|
||||
}
|
||||
else
|
||||
{
|
||||
added = m_jit.try_add(std::move(_module), llvm_error);
|
||||
}
|
||||
|
||||
if (!added || !m_jit.try_fin(llvm_error))
|
||||
{
|
||||
if (add_to_file)
|
||||
{
|
||||
add_loc->cached = 0;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (g_cfg.core.spu_debug)
|
||||
{
|
||||
// Testing only
|
||||
m_jit.add(std::move(_module), m_spurt->get_cache_path() + "llvm/");
|
||||
}
|
||||
else
|
||||
{
|
||||
m_jit.add(std::move(_module));
|
||||
}
|
||||
|
||||
m_jit.fin();
|
||||
}
|
||||
#else
|
||||
if (g_cfg.core.spu_debug)
|
||||
{
|
||||
// Testing only
|
||||
m_jit.add(std::move(_module), m_spurt->get_cache_path() + "llvm/");
|
||||
}
|
||||
else
|
||||
{
|
||||
m_jit.add(std::move(_module));
|
||||
}
|
||||
|
||||
m_jit.fin();
|
||||
#endif
|
||||
}
|
||||
|
||||
// Register function pointer
|
||||
const spu_function_t fn = reinterpret_cast<spu_function_t>(m_jit.get_engine().getPointerToFunction(main_func));
|
||||
@ -6757,6 +6822,73 @@ public:
|
||||
const auto a = get_vr<u8[16]>(op.ra);
|
||||
const auto b = get_vr<u8[16]>(op.rb);
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
if (auto [ok, as] = match_expr(a, byteswap(match<u8[16]>())); ok)
|
||||
{
|
||||
if (auto [ok, bs] = match_expr(b, byteswap(match<u8[16]>())); ok)
|
||||
{
|
||||
if (op.ra == op.rb)
|
||||
{
|
||||
if (perm_only)
|
||||
{
|
||||
const auto cm = eval(c & 0x0f);
|
||||
set_vr(op.rt4, tbl(as, cm));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto x = tbl(build<u8[16]>(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x80, 0x80), (c >> 4));
|
||||
const auto cm = eval(c & 0x8f);
|
||||
set_vr(op.rt4, tbx(x, as, cm));
|
||||
return;
|
||||
}
|
||||
|
||||
if (perm_only)
|
||||
{
|
||||
const auto cm = eval(c & 0x1f);
|
||||
set_vr(op.rt4, tbl2(as, bs, cm));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto x = tbl(build<u8[16]>(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x80, 0x80), (c >> 4));
|
||||
const auto cm = eval(c & 0x9f);
|
||||
set_vr(op.rt4, tbx2(x, as, bs, cm));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (op.ra == op.rb && !m_interp_magn)
|
||||
{
|
||||
if (perm_only)
|
||||
{
|
||||
const auto cm = eval(c & 0x0f);
|
||||
const auto cr = eval(cm ^ 0x0f);
|
||||
set_vr(op.rt4, tbl(a, cr));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto x = tbl(build<u8[16]>(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x80, 0x80), (c >> 4));
|
||||
const auto cm = eval(c & 0x8f);
|
||||
const auto cr = eval(cm ^ 0x0f);
|
||||
set_vr(op.rt4, tbx(x, a, cr));
|
||||
return;
|
||||
}
|
||||
|
||||
if (perm_only)
|
||||
{
|
||||
const auto cm = eval(c & 0x9f);
|
||||
const auto cr = eval(cm ^ 0x0f);
|
||||
set_vr(op.rt4, tbl2(a, b, cr));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto x = tbl(build<u8[16]>(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x80, 0x80), (c >> 4));
|
||||
// AND should be before XOR so that llvm can combine them into BCAX
|
||||
// Though for some reason it doesn't seem to be doing that.
|
||||
const auto cm = eval(c & ~0x60);
|
||||
const auto cr = eval(cm ^ 0x0f);
|
||||
set_vr(op.rt4, tbx2(x, a, b, cr));
|
||||
return;
|
||||
#else
|
||||
// Data with swapped endian from a load instruction
|
||||
if (auto [ok, as] = match_expr(a, byteswap(match<u8[16]>())); ok)
|
||||
{
|
||||
@ -6900,6 +7032,7 @@ public:
|
||||
set_vr(op.rt4, select_by_bit4(cr, ax, bx));
|
||||
else
|
||||
set_vr(op.rt4, select_by_bit4(cr, ax, bx) | x);
|
||||
#endif
|
||||
}
|
||||
|
||||
void MPYA(spu_opcode_t op)
|
||||
|
||||
@ -77,6 +77,16 @@ struct spu_program
|
||||
bool operator<(const spu_program& rhs) const noexcept;
|
||||
};
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
struct spu_llvm_compile_context
|
||||
{
|
||||
bool use_tbl2 = true;
|
||||
std::string llvm_error;
|
||||
};
|
||||
|
||||
void spu_llvm_set_compile_context(spu_llvm_compile_context* context) noexcept;
|
||||
#endif
|
||||
|
||||
class spu_item
|
||||
{
|
||||
public:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user