diff --git a/Utilities/JIT.h b/Utilities/JIT.h index 6dfa9e7cd0..86fc72ed55 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -493,6 +493,10 @@ inline FT build_function_asm(std::string_view name, F&& builder, ::jit_runtime* return reinterpret_cast(uptr(result)); } +#if defined(__INTELLISENSE__) && !defined(LLVM_AVAILABLE) +#define LLVM_AVAILABLE +#endif + #ifdef LLVM_AVAILABLE namespace llvm diff --git a/rpcs3/Emu/CPU/CPUTranslator.cpp b/rpcs3/Emu/CPU/CPUTranslator.cpp index 22413f62b8..6bd7924ea5 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.cpp +++ b/rpcs3/Emu/CPU/CPUTranslator.cpp @@ -210,7 +210,7 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin #endif } -llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type) const +llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type, std::source_location src_loc) const { uint s1 = type->getScalarSizeInBits(); uint s2 = val->getType()->getScalarSizeInBits(); @@ -222,7 +222,7 @@ llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type) const if (s1 != s2) { - fmt::throw_exception("cpu_translator::bitcast(): incompatible type sizes (%u vs %u)", s1, s2); + fmt::throw_exception("cpu_translator::bitcast(): incompatible type sizes (%u vs %u)\nCalled from: %s", s1, s2, src_loc); } if (val->getType() == type) diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 738932808d..ab2aed8156 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -43,6 +43,7 @@ #include #include +#include // Helper function llvm::Value* peek_through_bitcasts(llvm::Value*); @@ -3239,7 +3240,7 @@ public: } // Bitcast with immediate constant folding - llvm::Value* bitcast(llvm::Value* val, llvm::Type* type) const; + llvm::Value* bitcast(llvm::Value* val, llvm::Type* type, std::source_location src_loc = std::source_location::current()) const; template llvm::Value* bitcast(llvm::Value* val) diff --git a/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp b/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp index 97375c4e6d..83b001cc52 100644 --- a/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp +++ b/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp @@ -192,7 +192,46 @@ error_code cellNetCtlDelHandler(s32 hid) error_code cellNetCtlGetInfo(s32 code, vm::ptr info) { - cellNetCtl.warning("cellNetCtlGetInfo(code=0x%x (%s), info=*0x%x)", code, InfoCodeToName(code), info); + bool log_it_once = false; + + switch (code) + { + case CELL_NET_CTL_INFO_ETHER_ADDR: + case CELL_NET_CTL_INFO_DEVICE: + case CELL_NET_CTL_INFO_MTU: + case CELL_NET_CTL_INFO_LINK_TYPE: + case CELL_NET_CTL_INFO_IP_CONFIG: + case CELL_NET_CTL_INFO_IP_ADDRESS: + case CELL_NET_CTL_INFO_NETMASK: + case CELL_NET_CTL_INFO_DEFAULT_ROUTE: + case CELL_NET_CTL_INFO_HTTP_PROXY_CONFIG: + case CELL_NET_CTL_INFO_UPNP_CONFIG: + { + log_it_once = true; + break; + } + default: + { + break; + } + } + + bool log_it = true; + + if (log_it_once && vm::check_addr(info.addr())) + { + struct logged_t + { + std::array, 256> logged_code{}; + }; + + if (g_fxo->get().logged_code[::narrow(code)].exchange(true)) + { + log_it = false; + } + } + + (log_it ? cellNetCtl.warning : cellNetCtl.trace)("cellNetCtlGetInfo(code=0x%x (%s), info=*0x%x)", code, InfoCodeToName(code), info); auto& nph = g_fxo->get>(); diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 4e8c044113..53dc0df200 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -6194,7 +6194,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { const auto arg = reduced_loop->find_reg(reg); - if (arg && reg != op_rt) + if (arg && arg->regs.count() != 0) { if (reg_first == reg) { @@ -6217,6 +6217,12 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } } + if (type & spu_itype::memory || type == spu_itype::RDCH || type == spu_itype::RCHCNT) + { + // Register external origin + org.add_register_origin(s_reg_max); + } + *ensure(reduced_loop->find_reg(op_rt)) = org; } @@ -6359,26 +6365,55 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } } + std::array reg_use{}; + std::bitset reg_maybe_float{}; + std::bitset reg_mod{}; + + for (auto it = m_bbs.find(reduced_loop->loop_pc); it != m_bbs.end() && it->first <= bpc; it++) + { + for (u32 i = 0; i < s_reg_max; i++) + { + if (!reg_mod[i]) + { + reg_use[i] += it->second.reg_use[i]; + } + } + + reg_maybe_float |= it->second.reg_maybe_float; + reg_mod |= it->second.reg_mod; + + // Note: update when sup_conds are implemented + if (it->first == bpc && it->first != reduced_loop->loop_pc) + { + reduced_loop->loop_may_update |= it->second.reg_mod; + } + } + for (u32 i = 0; i < s_reg_max; i++) { - const auto& b = ::at32(m_bbs, reduced_loop->loop_pc); - const auto& b2 = ::at32(m_bbs, bpc); - if (!::at32(reduced_loop->loop_dicts, i)) { - if (b.reg_use[i] || (!::at32(b.reg_mod, i) && b2.reg_use[i])) + if (reg_use[i] && reg_mod[i]) { - if ((b.reg_use[i] && ::at32(b.reg_mod, i)) || ::at32(b2.reg_mod, i)) + reduced_loop->is_constant_expression = false; + reduced_loop->loop_writes.set(i); + reduced_loop->loop_may_update.reset(i); + } + else if (reg_use[i]) + { + reduced_loop->loop_args.set(i); + + if (reg_use[i] >= 3 && reg_maybe_float[i]) { - reduced_loop->is_constant_expression = false; - reduced_loop->loop_writes.set(i); - } - else - { - reduced_loop->loop_args.set(i); + reduced_loop->gpr_not_nans.set(i); } } } + else + { + // Cleanup + reduced_loop->loop_may_update.reset(i); + } } reduced_loop_all.emplace(reduced_loop->loop_pc, *reduced_loop); @@ -6731,6 +6766,13 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s break; } + if (reg_index != i && ::at32(reg->regs, reg_index)) + { + // Unimplemented + break_reduced_loop_pattern(30, reduced_loop->discard()); + break; + } + u32 cond_val_incr = static_cast(reg_org->IMM); if (reg_org->mod1_type == spu_itype::AI || reg_org->mod1_type == spu_itype::AHI) @@ -7049,26 +7091,55 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } } + std::array reg_use{}; + std::bitset reg_maybe_float{}; + std::bitset reg_mod{}; + + for (auto it = m_bbs.find(reduced_loop->loop_pc); it != m_bbs.end() && it->first <= bpc; it++) + { + for (u32 i = 0; i < s_reg_max; i++) + { + if (!reg_mod[i]) + { + reg_use[i] += it->second.reg_use[i]; + } + } + + reg_maybe_float |= it->second.reg_maybe_float; + reg_mod |= it->second.reg_mod; + + // Note: update when sup_conds are implemented + if (it->first == bpc && it->first != reduced_loop->loop_pc) + { + reduced_loop->loop_may_update |= it->second.reg_mod; + } + } + for (u32 i = 0; i < s_reg_max; i++) { - const auto& b = ::at32(m_bbs, reduced_loop->loop_pc); - const auto& b2 = ::at32(m_bbs, bpc); - if (!::at32(reduced_loop->loop_dicts, i)) { - if (b.reg_use[i] || (!::at32(b.reg_mod, i) && b2.reg_use[i])) + if (reg_use[i] && reg_mod[i]) { - if ((b.reg_use[i] && ::at32(b.reg_mod, i)) || ::at32(b2.reg_mod, i)) + reduced_loop->is_constant_expression = false; + reduced_loop->loop_writes.set(i); + reduced_loop->loop_may_update.reset(i); + } + else if (reg_use[i]) + { + reduced_loop->loop_args.set(i); + + if (reg_use[i] >= 3 && reg_maybe_float[i]) { - reduced_loop->is_constant_expression = false; - reduced_loop->loop_writes.set(i); - } - else - { - reduced_loop->loop_args.set(i); + reduced_loop->gpr_not_nans.set(i); } } } + else + { + // Cleanup + reduced_loop->loop_may_update.reset(i); + } } reduced_loop_all.emplace(reduced_loop->loop_pc, *reduced_loop); @@ -8608,6 +8679,16 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s fmt::append(regs, " r%u-r", i); } + + if (::at32(pattern.loop_may_update, i)) + { + if (regs.size() != 1) + { + regs += ","; + } + + fmt::append(regs, " r%u-m", i); + } } regs += " }"; diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index b13c27e376..85d391aceb 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -60,6 +60,7 @@ const extern spu_decoder g_spu_iflag; #pragma GCC diagnostic pop #endif +#pragma optimize("", off) #ifdef ARCH_ARM64 #include "Emu/CPU/Backends/AArch64/AArch64JIT.h" #endif @@ -152,6 +153,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Current register values std::array reg{}; + // Opimization: restoring register state for registers that would be rewritten in other blocks + std::array reg_save_and_restore{}; + // PHI nodes created for this block (if any) std::array phi{}; @@ -177,11 +181,6 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const usz first_id = store_context_first_id[i]; return counter != 1 && first_id != umax && counter < first_id; } - - bool is_gpr_not_NaN_hint(u32 i) const noexcept - { - return block_wide_reg_store_elimination && ::at32(bb->reg_maybe_float, i) && ::at32(bb->reg_use, i) >= 3 && !::at32(bb->reg_mod, i); - } }; struct function_info @@ -197,10 +196,13 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator }; // Current block - block_info* m_block; + block_info* m_block = nullptr; // Current function or chunk - function_info* m_finfo; + function_info* m_finfo = nullptr; + + // Reduced Loop Pattern information (if available) + reduced_loop_t* m_reduced_loop_info = nullptr; // All blocks in the current function chunk std::unordered_map> m_blocks; @@ -2280,7 +2282,7 @@ public: } const bool is_reduced_loop = m_inst_attrs[(baddr - start) / 4] == inst_attr::reduced_loop; - const auto reduced_loop_info = is_reduced_loop ? std::static_pointer_cast(ensure(m_patterns.at(baddr - start).info_ptr)) : nullptr; + m_reduced_loop_info = is_reduced_loop ? std::static_pointer_cast(ensure(m_patterns.at(baddr - start).info_ptr)).get() : nullptr; BasicBlock* block_optimization_phi_parent = nullptr; const auto block_optimization_inner = is_reduced_loop ? BasicBlock::Create(m_context, fmt::format("b-loop-it-0x%x", m_pos), m_function) : nullptr; @@ -2290,11 +2292,24 @@ public: std::array reduced_loop_phi_nodes{}; std::array reduced_loop_init_regs{}; - auto make_reduced_loop_condition = [&](llvm::BasicBlock* optimization_block, bool is_second_time, u32 reserve_iterations) + // Reserve additional iteration for rare case where GPR may not be rewritten after the iteration + // So that it would have to be rewritten by future code + // This avoids using additional PHI connectors + const u32 reserve_iterations = m_reduced_loop_info && m_reduced_loop_info->loop_may_update.count() != 0 ? 3 : 2; + + for (u32 i = 0; i < s_reg_max; i++) + { + if (m_reduced_loop_info && m_reduced_loop_info->loop_may_update.test(i)) + { + m_block->reg_save_and_restore[i] = m_block->reg[i]; + } + } + + auto make_reduced_loop_condition = [&](llvm::BasicBlock* optimization_block, bool is_second_time) { llvm::ICmpInst::Predicate compare{}; - switch (reduced_loop_info->cond_val_compare) + switch (m_reduced_loop_info->cond_val_compare) { case CMP_SLESS: compare = ICmpInst::ICMP_SLT; break; case CMP_SGREATER: compare = ICmpInst::ICMP_SGT; break; @@ -2323,11 +2338,11 @@ public: llvm::Value* loop_dictator_after_adjustment{}; spu_opcode_t reg_target{}; - reg_target.rt = static_cast(reduced_loop_info->cond_val_register_idx); + reg_target.rt = static_cast(m_reduced_loop_info->cond_val_register_idx); - if (reg_target.rt != reduced_loop_info->cond_val_register_idx) + if (reg_target.rt != m_reduced_loop_info->cond_val_register_idx) { - fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition register index: 0x%llx", reduced_loop_info->cond_val_register_idx); + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition register index: 0x%llx", m_reduced_loop_info->cond_val_register_idx); } if (!m_block->reg[reg_target.rt]) @@ -2335,7 +2350,7 @@ public: m_block->reg[reg_target.rt] = reduced_loop_init_regs[reg_target.rt]; } - switch (reduced_loop_info->cond_val_mask) + switch (m_reduced_loop_info->cond_val_mask) { case u8{umax}: { @@ -2360,28 +2375,28 @@ public: } default: { - fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition bit mask: 0x%llx", reduced_loop_info->cond_val_mask); + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition bit mask: 0x%llx", m_reduced_loop_info->cond_val_mask); } } - const u32 type_bits = std::popcount(reduced_loop_info->cond_val_mask); + const u32 type_bits = std::popcount(m_reduced_loop_info->cond_val_mask); llvm::Value* cond_val_incr = nullptr; - if (reduced_loop_info->cond_val_incr_is_immediate) + if (m_reduced_loop_info->cond_val_incr_is_immediate) { - cond_val_incr = m_ir->getIntN(type_bits, reduced_loop_info->cond_val_incr & reduced_loop_info->cond_val_mask); + cond_val_incr = m_ir->getIntN(type_bits, m_reduced_loop_info->cond_val_incr & m_reduced_loop_info->cond_val_mask); } else { spu_opcode_t reg_incr{}; - reg_incr.rt = static_cast(reduced_loop_info->cond_val_incr); + reg_incr.rt = static_cast(m_reduced_loop_info->cond_val_incr); - if (reg_incr.rt != reduced_loop_info->cond_val_incr) + if (reg_incr.rt != m_reduced_loop_info->cond_val_incr) { - fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal increment arguemnt register index: 0x%llx", reduced_loop_info->cond_val_incr); + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal increment arguemnt register index: 0x%llx", m_reduced_loop_info->cond_val_incr); } - switch (reduced_loop_info->cond_val_mask) + switch (m_reduced_loop_info->cond_val_mask) { case u8{umax}: { @@ -2407,7 +2422,7 @@ public: } } - if (reduced_loop_info->cond_val_incr_before_cond && !reduced_loop_info->cond_val_incr_before_cond_taken_in_account) + if (m_reduced_loop_info->cond_val_incr_before_cond && !m_reduced_loop_info->cond_val_incr_before_cond_taken_in_account) { loop_dictator_after_adjustment = m_ir->CreateAdd(loop_dictator_before_adjustment, cond_val_incr); } @@ -2418,21 +2433,21 @@ public: llvm::Value* loop_argument = nullptr; - if (reduced_loop_info->cond_val_is_immediate) + if (m_reduced_loop_info->cond_val_is_immediate) { - loop_argument = m_ir->CreateTrunc(m_ir->getInt64(reduced_loop_info->cond_val_min & reduced_loop_info->cond_val_mask), loop_dictator_before_adjustment->getType()); + loop_argument = m_ir->CreateTrunc(m_ir->getInt64(m_reduced_loop_info->cond_val_min & m_reduced_loop_info->cond_val_mask), loop_dictator_before_adjustment->getType()); } else { spu_opcode_t reg_target2{}; - reg_target2.rt = static_cast(reduced_loop_info->cond_val_register_argument_idx); + reg_target2.rt = static_cast(m_reduced_loop_info->cond_val_register_argument_idx); - if (reg_target2.rt != reduced_loop_info->cond_val_register_argument_idx) + if (reg_target2.rt != m_reduced_loop_info->cond_val_register_argument_idx) { - fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition arguemnt register index: 0x%llx", reduced_loop_info->cond_val_register_argument_idx); + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition arguemnt register index: 0x%llx", m_reduced_loop_info->cond_val_register_argument_idx); } - switch (reduced_loop_info->cond_val_mask) + switch (m_reduced_loop_info->cond_val_mask) { case u8{umax}: { @@ -2464,7 +2479,7 @@ public: { condition = m_ir->CreateICmp(compare, loop_dictator_after_adjustment, loop_argument); } - // else if ((reduced_loop_info->cond_val_compare == CMP_LGREATER || (reduced_loop_info->cond_val_compare == CMP_LGREATER_EQUAL && reduced_loop_info->cond_val_is_immediate && reduced_loop_info->cond_val_incr)) && cond_val_incr->getSExtValue() < 0) + // else if ((m_reduced_loop_info->cond_val_compare == CMP_LGREATER || (m_reduced_loop_info->cond_val_compare == CMP_LGREATER_EQUAL && m_reduced_loop_info->cond_val_is_immediate && m_reduced_loop_info->cond_val_incr)) && cond_val_incr->getSExtValue() < 0) // { // const auto cond_val_incr_multiplied = m_ir->CreateMul(cond_val_incr, reserve_iterations - 1); // condition = m_ir->CreateICmp(compare, select(m_ir->CreateICmpUGE(cond_val_incr_multiplied, loop_dictator_after_adjustment), m_ir->CreateAdd(loop_dictator_after_adjustment, cond_val_incr_multiplied), m_ir->getIntN(type_bits, 0)), loop_argument); @@ -2493,7 +2508,7 @@ public: { const bool is_last = !(count <= 20 && i < s_reg_max); - if (is_last || m_block->is_gpr_not_NaN_hint(i)) + if (is_last || m_reduced_loop_info->is_gpr_not_NaN_hint(i)) { count++; @@ -2542,9 +2557,24 @@ public: break; } } - } - //condition = m_ir->getInt1(0); + // TODO: Optimze so constant evalatuated cases will not be checked + const bool is_cond_need_runtime_verify = compare == ICmpInst::ICMP_NE && (!m_reduced_loop_info->cond_val_is_immediate || m_reduced_loop_info->cond_val_incr % 2 == 0); + + if (is_cond_need_runtime_verify) + { + // Verify that it is actually possible to finish the loop and it is not an infinite loop + + // First: create a mask of the bits that definitely do not change between iterations (0 results in umax which is accurate here) + const auto no_change_bits = m_ir->CreateAnd(m_ir->CreateNot(cond_val_incr), m_ir->CreateSub(cond_val_incr, m_ir->getIntN(type_bits, 1))); + + // Compare that when the mask applied to both the result and the original value is the same + const auto cond_verify = m_ir->CreateICmpEQ(m_ir->CreateAnd(loop_dictator_after_adjustment, no_change_bits), m_ir->CreateAnd(loop_argument, no_change_bits)); + + // Amend condition + condition = m_ir->CreateAnd(cond_verify, condition); + } + } m_ir->CreateCondBr(condition, optimization_block, block_optimization_next); }; @@ -2555,7 +2585,7 @@ public: { llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type() : get_reg_type(i); - if (i < reduced_loop_info->loop_dicts.size() && (reduced_loop_info->loop_dicts.test(i) || reduced_loop_info->loop_writes.test(i))) + if (i < m_reduced_loop_info->loop_dicts.size() && (m_reduced_loop_info->loop_dicts.test(i) || m_reduced_loop_info->loop_writes.test(i))) { // Connect registers which are used and then modified by the block auto value = m_block->reg[i]; @@ -2567,7 +2597,7 @@ public: reduced_loop_init_regs[i] = value; } - else if (i < reduced_loop_info->loop_dicts.size() && reduced_loop_info->loop_args.test(i)) + else if (i < m_reduced_loop_info->loop_dicts.size() && m_reduced_loop_info->loop_args.test(i)) { // Load registers used as arguments of the loop if (!m_block->reg[i]) @@ -2580,8 +2610,8 @@ public: const auto prev_insert_block = m_ir->GetInsertBlock(); block_optimization_phi_parent = prev_insert_block; - - make_reduced_loop_condition(block_optimization_inner, false, 2); + + make_reduced_loop_condition(block_optimization_inner, false); m_ir->SetInsertPoint(block_optimization_inner); for (u32 i = 0; i < s_reg_max; i++) @@ -2611,7 +2641,7 @@ public: for (u32 iteration_emit = 0; is_reduced_loop; m_pos += 4) { - if (m_pos != baddr && m_block_info[m_pos / 4] && reduced_loop_info->loop_end < m_pos) + if (m_pos != baddr && m_block_info[m_pos / 4] && m_reduced_loop_info->loop_end < m_pos) { fmt::throw_exception("LLVM: Reduced Loop Pattern: Exit(1) too early at 0x%x", m_pos); } @@ -2667,8 +2697,8 @@ public: } } - ensure(!!m_block->reg[reduced_loop_info->cond_val_register_idx]); - make_reduced_loop_condition(block_optimization_inner, true, 2); + ensure(!!m_block->reg[m_reduced_loop_info->cond_val_register_idx]); + make_reduced_loop_condition(block_optimization_inner, true); m_ir->SetInsertPoint(block_optimization_next); m_block->block_wide_reg_store_elimination = false; @@ -2763,6 +2793,16 @@ public: } } + for (u32 i = 0; i < s_reg_max; i++) + { + if (m_reduced_loop_info && m_reduced_loop_info->loop_may_update.test(i)) + { + m_block->reg[i] = m_block->reg_save_and_restore[i]; + } + } + + m_reduced_loop_info = nullptr; + // Emit instructions for (m_pos = baddr; m_pos >= start && m_pos < end && !m_ir->GetInsertBlock()->getTerminator(); m_pos += 4) { @@ -6978,7 +7018,7 @@ public: value_t clamp_smax(value_t v, u32 gpr = s_reg_max) { - if (m_block && gpr < s_reg_max && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(gpr)) + if (m_reduced_loop_info && gpr < s_reg_max && m_reduced_loop_info->is_gpr_not_NaN_hint(gpr)) { return v; } @@ -7129,12 +7169,12 @@ public: } } - if (m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.ra)) + if (m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.ra)) { safe_finite_compare.set(0); } - if (m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.rb)) + if (m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.rb)) { safe_finite_compare.set(1); } @@ -7328,8 +7368,8 @@ public: } }); - const u32 a_notnan = m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.ra) ? 1 : 0; - const u32 b_notnan = m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.rb) ? 1 : 0; + const u32 a_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.ra) ? 1 : 0; + const u32 b_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.rb) ? 1 : 0; if (op.ra == op.rb && !m_interp_magn) { @@ -7765,8 +7805,8 @@ public: const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rc); static const auto MT = match(); - const u32 a_notnan = m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.ra) ? 1 : 0; - const u32 b_notnan = m_block && m_block->block_wide_reg_store_elimination && m_block->is_gpr_not_NaN_hint(op.rb) ? 1 : 0; + const u32 a_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.ra) ? 1 : 0; + const u32 b_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.rb) ? 1 : 0; auto check_sqrt_pattern_for_float = [&](f32 float_value) -> bool { diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 54ddcb2f1e..6c629571d9 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -361,10 +361,12 @@ public: std::bitset loop_args; std::bitset loop_dicts; std::bitset loop_writes; + std::bitset loop_may_update; + std::bitset gpr_not_nans; struct origin_t { - std::bitset regs{}; + std::bitset regs{}; u32 modified = 0; spu_itype_t mod1_type = spu_itype::UNK; spu_itype_t mod2_type = spu_itype::UNK; @@ -680,6 +682,11 @@ public: return true; } + bool is_gpr_not_NaN_hint(u32 i) const noexcept + { + return ::at32(gpr_not_nans, i); + } + origin_t get_reg(u32 reg_val) noexcept { const auto org = find_reg(reg_val); diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp index 43c8a5d9f4..3cc40efed1 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp @@ -633,9 +633,17 @@ namespace rsx case 2: break; default: - rsx_log.error("Unknown render mode %d", mode); + { + struct logged_t + { + atomic_t logged_cause[256]{}; + }; + + const auto& is_error = ::at32(g_fxo->get().logged_cause, mode).try_inc(10); + (is_error ? rsx_log.error : rsx_log.trace)("Unknown render mode %d", mode); return; } + } const u32 offset = arg & 0xffffff; auto address_ptr = util::get_report_data_impl(ctx, offset);