From f5c420994d275153c851a0340d0dff8425c4d25d Mon Sep 17 00:00:00 2001 From: Megamouse Date: Thu, 14 May 2026 03:24:54 +0200 Subject: [PATCH] Add unsafe bit_set access for obvious cases --- rpcs3/Emu/Cell/SPUCommonRecompiler.cpp | 84 ++++++++++++-------------- rpcs3/Emu/Cell/SPULLVMRecompiler.cpp | 36 +++++------ rpcs3/Emu/RSX/RSXFIFO.cpp | 6 +- rpcs3/util/types.hpp | 17 ++++++ 4 files changed, 78 insertions(+), 65 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 44a3cfc314..d9702f287f 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -2868,10 +2868,6 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s u32 lsa = entry_point; u32 limit = SPU_LS_SIZE; - if (g_cfg.core.spu_block_size == spu_block_size_type::giga) - { - } - // Weak constant propagation context (for guessing branch targets) std::array, 128> vflags{}; @@ -4005,7 +4001,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Expand MFC_Cmd reg use for (u8 reg : {s_reg_mfc_lsa, s_reg_mfc_tag, s_reg_mfc_size}) { - if (!block.reg_mod[reg]) + if (!block.reg_mod.test_unsafe(reg)) block.reg_use[reg]++; } } @@ -4013,11 +4009,11 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Register reg modification if (u8 reg = m_regmod[ia / 4]; reg < s_reg_max) { - block.reg_mod.set(reg); - block.reg_mod_xf.set(reg, type & spu_itype::xfloat); + block.reg_mod.set_unsafe(reg); + block.reg_mod_xf.set_unsafe(reg, type & spu_itype::xfloat); if (type == spu_itype::SELB && (block.reg_mod_xf[op.ra] || block.reg_mod_xf[op.rb])) - block.reg_mod_xf.set(reg); + block.reg_mod_xf.set_unsafe(reg); // Possible post-dominating register load if (type == spu_itype::LQD && op.ra == s_reg_sp) @@ -4059,13 +4055,13 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { if (i == s_reg_lr || (i >= 2 && i < s_reg_80) || i > s_reg_127) { - if (!block.reg_mod[i]) + if (!block.reg_mod.test_unsafe(i)) block.reg_use[i]++; if (!is_tail) { - block.reg_mod.set(i); - block.reg_mod_xf.set(i, false); + block.reg_mod.set_unsafe(i); + block.reg_mod_xf.set_unsafe(i, false); } } } @@ -4347,7 +4343,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { if (tb.chunk == block.chunk && tb.reg_origin[i] + 1) { - const u32 expected = block.reg_mod[i] ? addr : block.reg_origin[i]; + const u32 expected = block.reg_mod.test_unsafe(i) ? addr : block.reg_origin[i]; if (tb.reg_origin[i] == 0x80000000) { @@ -4364,7 +4360,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (g_cfg.core.spu_block_size == spu_block_size_type::giga && tb.func == block.func && tb.reg_origin_abs[i] + 2) { - const u32 expected = block.reg_mod[i] ? addr : block.reg_origin_abs[i]; + const u32 expected = block.reg_mod.test_unsafe(i) ? addr : block.reg_origin_abs[i]; if (tb.reg_origin_abs[i] == 0x80000000) { @@ -4435,11 +4431,11 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (orig < 0x40000) { auto& src = ::at32(m_bbs, orig); - bb.reg_const.set(i, src.reg_const[i]); + bb.reg_const.set_unsafe(i, src.reg_const.test_unsafe(i)); bb.reg_val32[i] = src.reg_val32[i]; } - if (!bb.reg_save_dom[i] && bb.reg_use[i] && (orig == SPU_LS_SIZE || orig + 2 == 0)) + if (!bb.reg_save_dom.test_unsafe(i) && bb.reg_use[i] && (orig == SPU_LS_SIZE || orig + 2 == 0)) { // Destroy offset if external reg value is used func.reg_save_off[i] = -1; @@ -4573,7 +4569,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { // Clear const if reg is modified here if (u8 reg = m_regmod[ia / 4]; reg < s_reg_max) - bb.reg_const.set(reg, false); + bb.reg_const.set_unsafe(reg, false); break; } } @@ -4581,7 +4577,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // $SP is modified if (m_regmod[ia / 4] == s_reg_sp) { - if (bb.reg_const[s_reg_sp]) + if (bb.reg_const.test_unsafe(s_reg_sp)) { // Making $SP a constant is a funny thing too. bb.stack_sub = 0x80000000; @@ -4799,7 +4795,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Check $80..$127 (should be restored or unmodified) for (u32 i = s_reg_80; is_ok && i <= s_reg_127; i++) { - if (u32 orig = bb.reg_mod[i] ? addr : bb.reg_origin_abs[i]; orig < SPU_LS_SIZE) + if (u32 orig = bb.reg_mod.test_unsafe(i) ? addr : bb.reg_origin_abs[i]; orig < SPU_LS_SIZE) { auto& src = ::at32(m_bbs, orig); @@ -6380,7 +6376,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { for (u32 i = 0; i < s_reg_max; i++) { - if (!reg_mod[i]) + if (!reg_mod.test_unsafe(i)) { reg_use[i] += it->second.reg_use[i]; } @@ -6400,26 +6396,26 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { if (!reduced_loop->loop_dicts.test(i)) { - if (reg_use[i] && reg_mod[i]) + if (reg_use[i] && reg_mod.test_unsafe(i)) { reduced_loop->is_constant_expression = false; - reduced_loop->loop_writes.set(i); - reduced_loop->loop_may_update.reset(i); + reduced_loop->loop_writes.set_unsafe(i); + reduced_loop->loop_may_update.reset_unsafe(i); } else if (reg_use[i]) { - reduced_loop->loop_args.set(i); + reduced_loop->loop_args.set_unsafe(i); - if (reg_use[i] >= 3 && reg_maybe_float[i]) + if (reg_use[i] >= 3 && reg_maybe_float.test_unsafe(i)) { - reduced_loop->gpr_not_nans.set(i); + reduced_loop->gpr_not_nans.set_unsafe(i); } } } else { // Cleanup - reduced_loop->loop_may_update.reset(i); + reduced_loop->loop_may_update.reset_unsafe(i); } } @@ -6427,11 +6423,11 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s for (u32 i = 0; i < s_reg_max; i++) { - if (::at32(reduced_loop->loop_dicts, i) || ::at32(reduced_loop->loop_writes, i)) + if (reduced_loop->loop_dicts.test_unsafe(i) || reduced_loop->loop_writes.test_unsafe(i)) { - if (auto reg_it = reduced_loop->find_reg(i)) + if (const auto reg_it = reduced_loop->find_reg(i)) { - if (reg_it->regs.test(s_reg_max)) + if (reg_it->regs.test_unsafe(s_reg_max)) { is_secret = false; } @@ -7111,7 +7107,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { for (u32 i = 0; i < s_reg_max; i++) { - if (!reg_mod[i]) + if (!reg_mod.test_unsafe(i)) { reg_use[i] += it->second.reg_use[i]; } @@ -7131,26 +7127,26 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { if (!reduced_loop->loop_dicts.test(i)) { - if (reg_use[i] && reg_mod[i]) + if (reg_use[i] && reg_mod.test_unsafe(i)) { reduced_loop->is_constant_expression = false; - reduced_loop->loop_writes.set(i); - reduced_loop->loop_may_update.reset(i); + reduced_loop->loop_writes.set_unsafe(i); + reduced_loop->loop_may_update.reset_unsafe(i); } else if (reg_use[i]) { - reduced_loop->loop_args.set(i); + reduced_loop->loop_args.set_unsafe(i); - if (reg_use[i] >= 3 && reg_maybe_float[i]) + if (reg_use[i] >= 3 && reg_maybe_float.test_unsafe(i)) { - reduced_loop->gpr_not_nans.set(i); + reduced_loop->gpr_not_nans.set_unsafe(i); } } } else { // Cleanup - reduced_loop->loop_may_update.reset(i); + reduced_loop->loop_may_update.reset_unsafe(i); } } @@ -7158,11 +7154,11 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s for (u32 i = 0; i < s_reg_max; i++) { - if (::at32(reduced_loop->loop_dicts, i) || ::at32(reduced_loop->loop_writes, i)) + if (reduced_loop->loop_dicts.test_unsafe(i) || reduced_loop->loop_writes.test_unsafe(i)) { - if (auto reg_it = reduced_loop->find_reg(i)) + if (const auto reg_it = reduced_loop->find_reg(i)) { - if (reg_it->regs.test(s_reg_max)) + if (reg_it->regs.test_unsafe(s_reg_max)) { is_secret = false; } @@ -8680,7 +8676,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s for (u32 i = 0; i < s_reg_max; i++) { - if (::at32(pattern.loop_dicts, i)) + if (pattern.loop_dicts.test_unsafe(i)) { if (regs.size() != 1) { @@ -8693,7 +8689,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s for (u32 i = 0; i < s_reg_max; i++) { - if (pattern.loop_writes.test(i)) + if (pattern.loop_writes.test_unsafe(i)) { if (regs.size() != 1) { @@ -8703,7 +8699,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s fmt::append(regs, " r%u-w", i); } - if (pattern.loop_args.test(i)) + if (pattern.loop_args.test_unsafe(i)) { if (regs.size() != 1) { @@ -8713,7 +8709,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s fmt::append(regs, " r%u-r", i); } - if (pattern.loop_may_update.test(i)) + if (pattern.loop_may_update.test_unsafe(i)) { if (regs.size() != 1) { diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index bd0928f965..93f9ea8793 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -2175,7 +2175,7 @@ public: if (src > 0x40000) { // Use the xfloat hint to create 256-bit (4x double) PHI - llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type() : get_reg_type(i); + llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf.test_unsafe(i) ? get_type() : get_reg_type(i); const auto _phi = m_ir->CreatePHI(type, ::size32(bb.preds), fmt::format("phi0x%05x_r%u", baddr, i)); m_block->phi[i] = _phi; @@ -2581,7 +2581,7 @@ public: { for (u32 i = 0; i < s_reg_max; i++) { - llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type() : get_reg_type(i); + llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf.test_unsafe(i) ? get_type() : get_reg_type(i); if (i < m_reduced_loop_info->loop_dicts.size() && (m_reduced_loop_info->loop_dicts.test(i) || m_reduced_loop_info->loop_writes.test(i))) { @@ -7155,8 +7155,8 @@ public: { if (auto [ok, data] = get_const_vector(ab[i].value, m_pos, __LINE__ + i); ok) { - safe_int_compare.set(i); - safe_finite_compare.set(i); + safe_int_compare.set_unsafe(i); + safe_finite_compare.set_unsafe(i); for (u32 j = 0; j < 4; j++) { @@ -7170,8 +7170,8 @@ public: // Note: Technically this optimization is accurate for any positive value, but due to the fact that // we don't produce "extended range" values the same way as real hardware, it's not safe to apply // this optimization for values outside of the range of x86 floating point hardware. - safe_int_compare.reset(i); - if ((value & 0x7fffffffu) >= 0x7f7ffffeu) safe_finite_compare.reset(i); + safe_int_compare.reset_unsafe(i); + if ((value & 0x7fffffffu) >= 0x7f7ffffeu) safe_finite_compare.reset_unsafe(i); } } } @@ -7179,12 +7179,12 @@ public: if (m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.ra)) { - safe_finite_compare.set(0); + safe_finite_compare.set_unsafe(0); } if (m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.rb)) { - safe_finite_compare.set(1); + safe_finite_compare.set_unsafe(1); } if (safe_int_compare.any()) @@ -7253,7 +7253,7 @@ public: { if (auto [ok, data] = get_const_vector(ab[i].value, m_pos, __LINE__ + i); ok) { - safe_int_compare.set(i); + safe_int_compare.set_unsafe(i); for (u32 j = 0; j < 4; j++) { @@ -7263,7 +7263,7 @@ public: if ((value & 0x7fffffffu) >= 0x7f7fffffu || !exponent) { // See above - safe_int_compare.reset(i); + safe_int_compare.reset_unsafe(i); } } } @@ -7528,8 +7528,8 @@ public: { if (auto [ok, data] = get_const_vector(ab[i].value, m_pos, __LINE__ + i); ok) { - safe_float_compare.set(i); - safe_int_compare.set(i); + safe_float_compare.set_unsafe(i); + safe_int_compare.set_unsafe(i); for (u32 j = 0; j < 4; j++) { @@ -7539,13 +7539,13 @@ public: // unsafe if nan if (exponent == 255) { - safe_float_compare.reset(i); + safe_float_compare.reset_unsafe(i); } // unsafe if denormal or 0 if (!exponent) { - safe_int_compare.reset(i); + safe_int_compare.reset_unsafe(i); } } } @@ -7602,8 +7602,8 @@ public: { if (auto [ok, data] = get_const_vector(ab[i].value, m_pos, __LINE__ + i); ok) { - safe_float_compare.set(i); - safe_int_compare.set(i); + safe_float_compare.set_unsafe(i); + safe_int_compare.set_unsafe(i); for (u32 j = 0; j < 4; j++) { @@ -7613,13 +7613,13 @@ public: // unsafe if nan if (exponent == 255) { - safe_float_compare.reset(i); + safe_float_compare.reset_unsafe(i); } // unsafe if denormal or 0 if (!exponent) { - safe_int_compare.reset(i); + safe_int_compare.reset_unsafe(i); } } } diff --git a/rpcs3/Emu/RSX/RSXFIFO.cpp b/rpcs3/Emu/RSX/RSXFIFO.cpp index de0663363a..76241ac2de 100644 --- a/rpcs3/Emu/RSX/RSXFIFO.cpp +++ b/rpcs3/Emu/RSX/RSXFIFO.cpp @@ -689,11 +689,11 @@ namespace rsx // Check for flow control if (bit_set<2> jump_type; jump_type - .set(0, (cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD) - .set(1, (cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD) + .set_unsafe(0, (cmd & RSX_METHOD_OLD_JUMP_CMD_MASK) == RSX_METHOD_OLD_JUMP_CMD) + .set_unsafe(1, (cmd & RSX_METHOD_NEW_JUMP_CMD_MASK) == RSX_METHOD_NEW_JUMP_CMD) .any()) { - const u32 offs = cmd & (jump_type.test(0) ? RSX_METHOD_OLD_JUMP_OFFSET_MASK : RSX_METHOD_NEW_JUMP_OFFSET_MASK); + const u32 offs = cmd & (jump_type.test_unsafe(0) ? RSX_METHOD_OLD_JUMP_OFFSET_MASK : RSX_METHOD_NEW_JUMP_OFFSET_MASK); if (offs == fifo_ctrl->get_pos()) { //Jump to self. Often preceded by NOP diff --git a/rpcs3/util/types.hpp b/rpcs3/util/types.hpp index 46a819c78c..cd73f925d0 100644 --- a/rpcs3/util/types.hpp +++ b/rpcs3/util/types.hpp @@ -1081,6 +1081,11 @@ public: return m_bitset[pos]; } + [[nodiscard]] bool test_unsafe(usz pos) const + { + return m_bitset[pos]; + } + bit_set& set(usz pos, bool val = true, std::source_location src_loc = std::source_location::current()) { if (pos >= Bits) [[unlikely]] @@ -1090,6 +1095,12 @@ public: return *this; } + bit_set& set_unsafe(usz pos, bool val = true) + { + m_bitset[pos] = val; + return *this; + } + bit_set& reset(usz pos, std::source_location src_loc = std::source_location::current()) { if (pos >= Bits) [[unlikely]] @@ -1099,6 +1110,12 @@ public: return *this; } + bit_set& reset_unsafe(usz pos) + { + m_bitset.reset(pos); + return *this; + } + constexpr bit_set& reset() noexcept { m_bitset.reset();