From 4832267307382f597de711d4e79c0b355d118c09 Mon Sep 17 00:00:00 2001 From: RipleyTom Date: Fri, 4 Oct 2024 11:44:24 +0200 Subject: [PATCH 1/6] Fixes audio buffering on non-windows platforms --- rpcs3/Emu/Cell/Modules/cellAudio.cpp | 4 ++-- rpcs3/Emu/Cell/Modules/cellAudio.h | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/Cell/Modules/cellAudio.cpp b/rpcs3/Emu/Cell/Modules/cellAudio.cpp index db5dddeae4..8137e3b58f 100644 --- a/rpcs3/Emu/Cell/Modules/cellAudio.cpp +++ b/rpcs3/Emu/Cell/Modules/cellAudio.cpp @@ -813,7 +813,7 @@ void cell_audio_thread::operator()() if (time_left > cfg.period_comparison_margin) { - thread_ctrl::wait_for(get_thread_wait_delay(time_left)); + thread_ctrl::wait_for(time_left - cfg.period_comparison_margin); continue; } } @@ -886,7 +886,7 @@ void cell_audio_thread::operator()() const s64 time_left = m_dynamic_period - time_since_last_period; if (time_left > cfg.period_comparison_margin) { - thread_ctrl::wait_for(get_thread_wait_delay(time_left)); + thread_ctrl::wait_for(time_left - cfg.period_comparison_margin); continue; } diff --git a/rpcs3/Emu/Cell/Modules/cellAudio.h b/rpcs3/Emu/Cell/Modules/cellAudio.h index 1e12a68c9e..cab56aeef7 100644 --- a/rpcs3/Emu/Cell/Modules/cellAudio.h +++ b/rpcs3/Emu/Cell/Modules/cellAudio.h @@ -251,7 +251,12 @@ struct cell_audio_config static constexpr f32 period_average_alpha = 0.02f; // alpha factor for the m_average_period rolling average - static constexpr s64 period_comparison_margin = 250; // when comparing the current period time with the desired period, if it is below this number of usecs we do not wait any longer + // when comparing the current period time with the desired period, if it is below this number of usecs we do not wait any longer(quantum dependent) +#ifdef _WIN32 + static constexpr s64 period_comparison_margin = 250; +#else + static constexpr s64 period_comparison_margin = 5; +#endif u64 fully_untouched_timeout = 0; // timeout if the game has not touched any audio buffer yet u64 partially_untouched_timeout = 0; // timeout if the game has not touched all audio buffers yet @@ -373,11 +378,6 @@ private: void mix(float* out_buffer, s32 offset = 0); void finish_port_volume_stepping(); - constexpr static u64 get_thread_wait_delay(u64 time_left) - { - return (time_left > 350) ? time_left - 250 : 100; - } - void update_config(bool backend_changed); void reset_counters(); From 8ca60df1ab108a3f27f10be671ce97d59058af99 Mon Sep 17 00:00:00 2001 From: Malcolm Jestadt Date: Thu, 3 Oct 2024 17:21:58 -0400 Subject: [PATCH 2/6] SPU LLVM: Add optimized path for spu_re_acc special cases - Uses vfixupimmps (AVX-512), 5 instructions down to 1 --- rpcs3/Emu/CPU/CPUTranslator.h | 16 ++++++++++++++++ rpcs3/Emu/Cell/SPULLVMRecompiler.cpp | 21 ++++++++++++++++++--- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index db2cfad032..0224675dc5 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -3729,6 +3729,22 @@ public: return result; } + template + value_t vfixupimmps(T1 a, T2 b, T3 c, u8 d, u8 e) + { + value_t result; + + const auto data0 = a.eval(m_ir); + const auto data1 = b.eval(m_ir); + const auto data2 = c.eval(m_ir); + const auto immediate = (llvm_const_int{d}); + const auto imm32 = immediate.eval(m_ir); + const auto immediate2 = (llvm_const_int{e}); + const auto imm8 = immediate2.eval(m_ir); + result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_mask_fixupimm_ps_128), {data0, data1, data2, imm32, imm8});\ + return result; + } + llvm::Value* load_const(llvm::GlobalVariable* g, llvm::Value* i, llvm::Type* type = nullptr) { return m_ir->CreateLoad(type ? type : g->getValueType(), m_ir->CreateGEP(g->getValueType(), g, {m_ir->getInt64(0), m_ir->CreateZExtOrTrunc(i, get_type())})); diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 28a4acea59..38816ed917 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -6704,8 +6704,22 @@ public: } }); - register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci) + if (m_use_avx512) { + register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci) + { + const auto div = value(ci->getOperand(0)); + const auto the_one = value(ci->getOperand(1)); + + const auto div_result = the_one / div; + + return vfixupimmps(div_result, div_result, splat(0x00330088u), 0, 0xff); + }); + } + else + { + register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci) + { const auto div = value(ci->getOperand(0)); const auto the_one = value(ci->getOperand(1)); @@ -6718,10 +6732,11 @@ public: const auto and_mask = bitcast(result_cmp_nan) & splat(0xFFFFFFFFu); const auto or_mask = bitcast(result_cmp_inf) & splat(0xFFFFFFFu); - return bitcast((bitcast(div_result) & and_mask) | or_mask); - }); + }); + } + const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rc); static const auto MT = match(); From 967adaf9dbe5572f742002393a0e46fd15e4e6e9 Mon Sep 17 00:00:00 2001 From: Malcolm Jestadt Date: Fri, 4 Oct 2024 19:26:02 -0400 Subject: [PATCH 3/6] SPU LLVM: Compute frest exponent at runtime rather than using the lookup table - Provides a small speedup and smaller codesize --- rpcs3/Emu/CPU/CPUTranslator.h | 2 +- rpcs3/Emu/Cell/SPULLVMRecompiler.cpp | 48 ++++++++++++++-------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 0224675dc5..56612b8279 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -3741,7 +3741,7 @@ public: const auto imm32 = immediate.eval(m_ir); const auto immediate2 = (llvm_const_int{e}); const auto imm8 = immediate2.eval(m_ir); - result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_mask_fixupimm_ps_128), {data0, data1, data2, imm32, imm8});\ + result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_mask_fixupimm_ps_128), {data0, data1, data2, imm32, imm8}); return result; } diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 38816ed917..5c385c2238 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -124,7 +124,6 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Global LUTs llvm::GlobalVariable* m_spu_frest_fraction_lut{}; - llvm::GlobalVariable* m_spu_frest_exponent_lut{}; llvm::GlobalVariable* m_spu_frsqest_fraction_lut{}; llvm::GlobalVariable* m_spu_frsqest_exponent_lut{}; @@ -1507,7 +1506,6 @@ public: { // LUTs for some instructions m_spu_frest_fraction_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 32), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frest_fraction_lut)); - m_spu_frest_exponent_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 256), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frest_exponent_lut)); m_spu_frsqest_fraction_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 64), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frsqest_fraction_lut)); m_spu_frsqest_exponent_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 256), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frsqest_exponent_lut)); } @@ -6040,23 +6038,22 @@ public: const auto a = bitcast(value(ci->getOperand(0))); const auto a_fraction = (a >> splat(18)) & splat(0x1F); - const auto a_exponent = (a >> splat(23)) & splat(0xFF); + const auto a_exponent = (a & splat(0x7F800000u)); + const auto r_exponent = sub_sat(build(0000, 0x7E80, 0000, 0x7E80, 0000, 0x7E80, 0000, 0x7E80), bitcast(a_exponent)); + const auto fix_exponent = select((a_exponent > 0), bitcast(r_exponent), splat(0x7F800000u)); const auto a_sign = (a & splat(0x80000000)); value_t final_result = eval(splat(0)); for (u32 i = 0; i < 4; i++) { const auto eval_fraction = eval(extract(a_fraction, i)); - const auto eval_exponent = eval(extract(a_exponent, i)); - const auto eval_sign = eval(extract(a_sign, i)); value_t r_fraction = load_const(m_spu_frest_fraction_lut, eval_fraction); - value_t r_exponent = load_const(m_spu_frest_exponent_lut, eval_exponent); - final_result = eval(insert(final_result, i, eval(r_fraction | eval_sign | r_exponent))); + final_result = eval(insert(final_result, i, r_fraction)); } - return bitcast(final_result); + return bitcast(bitcast(final_result | bitcast(fix_exponent) | a_sign)); }); set_vr(op.rt, frest(get_vr(op.ra))); @@ -6713,26 +6710,26 @@ public: const auto div_result = the_one / div; - return vfixupimmps(div_result, div_result, splat(0x00330088u), 0, 0xff); + return vfixupimmps(bitcast(splat(0xFFFFFFFFu)), div_result, splat(0x11001188u), 0, 0xff); }); } else { register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci) { - const auto div = value(ci->getOperand(0)); - const auto the_one = value(ci->getOperand(1)); + const auto div = value(ci->getOperand(0)); + const auto the_one = value(ci->getOperand(1)); - const auto div_result = the_one / div; + const auto div_result = the_one / div; - // from ps3 hardware testing: Inf => NaN and NaN => Zero - const auto result_and = bitcast(div_result) & 0x7fffffffu; - const auto result_cmp_inf = sext(result_and == splat(0x7F800000u)); - const auto result_cmp_nan = sext(result_and <= splat(0x7F800000u)); + // from ps3 hardware testing: Inf => NaN and NaN => Zero + const auto result_and = bitcast(div_result) & 0x7fffffffu; + const auto result_cmp_inf = sext(result_and == splat(0x7F800000u)); + const auto result_cmp_nan = sext(result_and <= splat(0x7F800000u)); - const auto and_mask = bitcast(result_cmp_nan) & splat(0xFFFFFFFFu); - const auto or_mask = bitcast(result_cmp_inf) & splat(0xFFFFFFFu); - return bitcast((bitcast(div_result) & and_mask) | or_mask); + const auto and_mask = bitcast(result_cmp_nan) & splat(0xFFFFFFFFu); + const auto or_mask = bitcast(result_cmp_inf) & splat(0xFFFFFFFu); + return bitcast((bitcast(div_result) & and_mask) | or_mask); }); } @@ -7019,20 +7016,23 @@ public: { const auto a = bitcast(value(ci->getOperand(0))); const auto a_fraction = (a >> splat(18)) & splat(0x1F); - const auto a_exponent = (a >> splat(23)) & splat(0xFF); + const auto a_exponent = (a & splat(0x7F800000u)); + const auto r_exponent = sub_sat(build(0000, 0x7E80, 0000, 0x7E80, 0000, 0x7E80, 0000, 0x7E80), bitcast(a_exponent)); + const auto fix_exponent = select((a_exponent > 0), bitcast(r_exponent), splat(0x7F800000u)); const auto a_sign = (a & splat(0x80000000)); value_t b = eval(splat(0)); for (u32 i = 0; i < 4; i++) { const auto eval_fraction = eval(extract(a_fraction, i)); - const auto eval_exponent = eval(extract(a_exponent, i)); - const auto eval_sign = eval(extract(a_sign, i)); + value_t r_fraction = load_const(m_spu_frest_fraction_lut, eval_fraction); - value_t r_exponent = load_const(m_spu_frest_exponent_lut, eval_exponent); - b = eval(insert(b, i, eval(r_fraction | eval_sign | r_exponent))); + + b = eval(insert(b, i, r_fraction)); } + b = eval(b | fix_exponent | a_sign); + const auto base = (b & 0x007ffc00u) << 9; // Base fraction const auto ymul = (b & 0x3ff) * (a & 0x7ffff); // Step fraction * Y fraction (fixed point at 2^-32) const auto comparison = (ymul > base); // Should exponent be adjusted? From 2d1da95bc1165b4808ff6ea0ea174828fe05d14f Mon Sep 17 00:00:00 2001 From: Megamouse Date: Thu, 3 Oct 2024 16:53:16 +0200 Subject: [PATCH 4/6] VS: Fix zstd debug lib --- 3rdparty/zstd/zstd.vcxproj | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/3rdparty/zstd/zstd.vcxproj b/3rdparty/zstd/zstd.vcxproj index 0182727fb0..a5dea941b5 100644 --- a/3rdparty/zstd/zstd.vcxproj +++ b/3rdparty/zstd/zstd.vcxproj @@ -115,7 +115,7 @@ - DynamicLibrary + StaticLibrary true MultiByte @@ -153,10 +153,13 @@ Level4 Disabled + MultiThreadedDebug + /DZSTD_MULTITHREAD %(AdditionalOptions) ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=5;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true false $(InstructionSet) + ProgramDatabase Console From 511f8a91f15adc84deb283a93590621b2cb06807 Mon Sep 17 00:00:00 2001 From: Megamouse Date: Thu, 3 Oct 2024 17:14:09 +0200 Subject: [PATCH 5/6] Update SDL to 2.30.8 --- 3rdparty/libsdl-org/SDL | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/libsdl-org/SDL b/3rdparty/libsdl-org/SDL index 9519b9916c..79ec168f3c 160000 --- a/3rdparty/libsdl-org/SDL +++ b/3rdparty/libsdl-org/SDL @@ -1 +1 @@ -Subproject commit 9519b9916cd29a14587af0507292f2bd31dd5752 +Subproject commit 79ec168f3c1e2fe27335cb8886439f7ef676fb49 From a85cdb5947f533bc030e28447f07d72ba5502147 Mon Sep 17 00:00:00 2001 From: Megamouse Date: Thu, 3 Oct 2024 17:16:35 +0200 Subject: [PATCH 6/6] Update FAudio to 24.10 --- 3rdparty/CMakeLists.txt | 4 ++-- 3rdparty/FAudio | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt index 0a494ed1e5..b6b8620b34 100644 --- a/3rdparty/CMakeLists.txt +++ b/3rdparty/CMakeLists.txt @@ -249,8 +249,8 @@ if(USE_FAUDIO) message(STATUS "RPCS3: Using builtin FAudio") set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared library") add_subdirectory(FAudio EXCLUDE_FROM_ALL) - target_compile_definitions(FAudio INTERFACE -DHAVE_FAUDIO) - set(FAUDIO_TARGET FAudio) + target_compile_definitions(FAudio-static INTERFACE -DHAVE_FAUDIO) + set(FAUDIO_TARGET FAudio-static) endif() endif() endif() diff --git a/3rdparty/FAudio b/3rdparty/FAudio index b6e699d736..54bff04461 160000 --- a/3rdparty/FAudio +++ b/3rdparty/FAudio @@ -1 +1 @@ -Subproject commit b6e699d736e28ac208d2d40c23708adc2d11c2d9 +Subproject commit 54bff0446152a9646965e4892c0c00d45b94076f