diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt
index 0a494ed1e5..b6b8620b34 100644
--- a/3rdparty/CMakeLists.txt
+++ b/3rdparty/CMakeLists.txt
@@ -249,8 +249,8 @@ if(USE_FAUDIO)
message(STATUS "RPCS3: Using builtin FAudio")
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared library")
add_subdirectory(FAudio EXCLUDE_FROM_ALL)
- target_compile_definitions(FAudio INTERFACE -DHAVE_FAUDIO)
- set(FAUDIO_TARGET FAudio)
+ target_compile_definitions(FAudio-static INTERFACE -DHAVE_FAUDIO)
+ set(FAUDIO_TARGET FAudio-static)
endif()
endif()
endif()
diff --git a/3rdparty/FAudio b/3rdparty/FAudio
index b6e699d736..54bff04461 160000
--- a/3rdparty/FAudio
+++ b/3rdparty/FAudio
@@ -1 +1 @@
-Subproject commit b6e699d736e28ac208d2d40c23708adc2d11c2d9
+Subproject commit 54bff0446152a9646965e4892c0c00d45b94076f
diff --git a/3rdparty/libsdl-org/SDL b/3rdparty/libsdl-org/SDL
index 9519b9916c..79ec168f3c 160000
--- a/3rdparty/libsdl-org/SDL
+++ b/3rdparty/libsdl-org/SDL
@@ -1 +1 @@
-Subproject commit 9519b9916cd29a14587af0507292f2bd31dd5752
+Subproject commit 79ec168f3c1e2fe27335cb8886439f7ef676fb49
diff --git a/3rdparty/zstd/zstd.vcxproj b/3rdparty/zstd/zstd.vcxproj
index 0182727fb0..a5dea941b5 100644
--- a/3rdparty/zstd/zstd.vcxproj
+++ b/3rdparty/zstd/zstd.vcxproj
@@ -115,7 +115,7 @@
- DynamicLibrary
+ StaticLibrary
true
MultiByte
@@ -153,10 +153,13 @@
Level4
Disabled
+ MultiThreadedDebug
+ /DZSTD_MULTITHREAD %(AdditionalOptions)
ZSTD_MULTITHREAD=1;ZSTD_LEGACY_SUPPORT=5;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
true
false
$(InstructionSet)
+ ProgramDatabase
Console
diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h
index db2cfad032..56612b8279 100644
--- a/rpcs3/Emu/CPU/CPUTranslator.h
+++ b/rpcs3/Emu/CPU/CPUTranslator.h
@@ -3729,6 +3729,22 @@ public:
return result;
}
+ template
+ value_t vfixupimmps(T1 a, T2 b, T3 c, u8 d, u8 e)
+ {
+ value_t result;
+
+ const auto data0 = a.eval(m_ir);
+ const auto data1 = b.eval(m_ir);
+ const auto data2 = c.eval(m_ir);
+ const auto immediate = (llvm_const_int{d});
+ const auto imm32 = immediate.eval(m_ir);
+ const auto immediate2 = (llvm_const_int{e});
+ const auto imm8 = immediate2.eval(m_ir);
+ result.value = m_ir->CreateCall(get_intrinsic(llvm::Intrinsic::x86_avx512_mask_fixupimm_ps_128), {data0, data1, data2, imm32, imm8});
+ return result;
+ }
+
llvm::Value* load_const(llvm::GlobalVariable* g, llvm::Value* i, llvm::Type* type = nullptr)
{
return m_ir->CreateLoad(type ? type : g->getValueType(), m_ir->CreateGEP(g->getValueType(), g, {m_ir->getInt64(0), m_ir->CreateZExtOrTrunc(i, get_type())}));
diff --git a/rpcs3/Emu/Cell/Modules/cellAudio.cpp b/rpcs3/Emu/Cell/Modules/cellAudio.cpp
index db5dddeae4..8137e3b58f 100644
--- a/rpcs3/Emu/Cell/Modules/cellAudio.cpp
+++ b/rpcs3/Emu/Cell/Modules/cellAudio.cpp
@@ -813,7 +813,7 @@ void cell_audio_thread::operator()()
if (time_left > cfg.period_comparison_margin)
{
- thread_ctrl::wait_for(get_thread_wait_delay(time_left));
+ thread_ctrl::wait_for(time_left - cfg.period_comparison_margin);
continue;
}
}
@@ -886,7 +886,7 @@ void cell_audio_thread::operator()()
const s64 time_left = m_dynamic_period - time_since_last_period;
if (time_left > cfg.period_comparison_margin)
{
- thread_ctrl::wait_for(get_thread_wait_delay(time_left));
+ thread_ctrl::wait_for(time_left - cfg.period_comparison_margin);
continue;
}
diff --git a/rpcs3/Emu/Cell/Modules/cellAudio.h b/rpcs3/Emu/Cell/Modules/cellAudio.h
index 1e12a68c9e..cab56aeef7 100644
--- a/rpcs3/Emu/Cell/Modules/cellAudio.h
+++ b/rpcs3/Emu/Cell/Modules/cellAudio.h
@@ -251,7 +251,12 @@ struct cell_audio_config
static constexpr f32 period_average_alpha = 0.02f; // alpha factor for the m_average_period rolling average
- static constexpr s64 period_comparison_margin = 250; // when comparing the current period time with the desired period, if it is below this number of usecs we do not wait any longer
+ // when comparing the current period time with the desired period, if it is below this number of usecs we do not wait any longer(quantum dependent)
+#ifdef _WIN32
+ static constexpr s64 period_comparison_margin = 250;
+#else
+ static constexpr s64 period_comparison_margin = 5;
+#endif
u64 fully_untouched_timeout = 0; // timeout if the game has not touched any audio buffer yet
u64 partially_untouched_timeout = 0; // timeout if the game has not touched all audio buffers yet
@@ -373,11 +378,6 @@ private:
void mix(float* out_buffer, s32 offset = 0);
void finish_port_volume_stepping();
- constexpr static u64 get_thread_wait_delay(u64 time_left)
- {
- return (time_left > 350) ? time_left - 250 : 100;
- }
-
void update_config(bool backend_changed);
void reset_counters();
diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp
index 28a4acea59..5c385c2238 100644
--- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp
+++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp
@@ -124,7 +124,6 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
// Global LUTs
llvm::GlobalVariable* m_spu_frest_fraction_lut{};
- llvm::GlobalVariable* m_spu_frest_exponent_lut{};
llvm::GlobalVariable* m_spu_frsqest_fraction_lut{};
llvm::GlobalVariable* m_spu_frsqest_exponent_lut{};
@@ -1507,7 +1506,6 @@ public:
{
// LUTs for some instructions
m_spu_frest_fraction_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 32), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frest_fraction_lut));
- m_spu_frest_exponent_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 256), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frest_exponent_lut));
m_spu_frsqest_fraction_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 64), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frsqest_fraction_lut));
m_spu_frsqest_exponent_lut = new llvm::GlobalVariable(*m_module, llvm::ArrayType::get(GetType(), 256), true, llvm::GlobalValue::PrivateLinkage, llvm::ConstantDataArray::get(m_context, spu_frsqest_exponent_lut));
}
@@ -6040,23 +6038,22 @@ public:
const auto a = bitcast(value(ci->getOperand(0)));
const auto a_fraction = (a >> splat(18)) & splat(0x1F);
- const auto a_exponent = (a >> splat(23)) & splat(0xFF);
+ const auto a_exponent = (a & splat(0x7F800000u));
+ const auto r_exponent = sub_sat(build(0000, 0x7E80, 0000, 0x7E80, 0000, 0x7E80, 0000, 0x7E80), bitcast(a_exponent));
+ const auto fix_exponent = select((a_exponent > 0), bitcast(r_exponent), splat(0x7F800000u));
const auto a_sign = (a & splat(0x80000000));
value_t final_result = eval(splat(0));
for (u32 i = 0; i < 4; i++)
{
const auto eval_fraction = eval(extract(a_fraction, i));
- const auto eval_exponent = eval(extract(a_exponent, i));
- const auto eval_sign = eval(extract(a_sign, i));
value_t r_fraction = load_const(m_spu_frest_fraction_lut, eval_fraction);
- value_t r_exponent = load_const(m_spu_frest_exponent_lut, eval_exponent);
- final_result = eval(insert(final_result, i, eval(r_fraction | eval_sign | r_exponent)));
+ final_result = eval(insert(final_result, i, r_fraction));
}
- return bitcast(final_result);
+ return bitcast(bitcast(final_result | bitcast(fix_exponent) | a_sign));
});
set_vr(op.rt, frest(get_vr(op.ra)));
@@ -6704,24 +6701,39 @@ public:
}
});
- register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci)
+ if (m_use_avx512)
{
- const auto div = value(ci->getOperand(0));
- const auto the_one = value(ci->getOperand(1));
+ register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci)
+ {
+ const auto div = value(ci->getOperand(0));
+ const auto the_one = value(ci->getOperand(1));
- const auto div_result = the_one / div;
+ const auto div_result = the_one / div;
- // from ps3 hardware testing: Inf => NaN and NaN => Zero
- const auto result_and = bitcast(div_result) & 0x7fffffffu;
- const auto result_cmp_inf = sext(result_and == splat(0x7F800000u));
- const auto result_cmp_nan = sext(result_and <= splat(0x7F800000u));
+ return vfixupimmps(bitcast(splat(0xFFFFFFFFu)), div_result, splat(0x11001188u), 0, 0xff);
+ });
+ }
+ else
+ {
+ register_intrinsic("spu_re_acc", [&](llvm::CallInst* ci)
+ {
+ const auto div = value(ci->getOperand(0));
+ const auto the_one = value(ci->getOperand(1));
- const auto and_mask = bitcast(result_cmp_nan) & splat(0xFFFFFFFFu);
- const auto or_mask = bitcast(result_cmp_inf) & splat(0xFFFFFFFu);
+ const auto div_result = the_one / div;
- return bitcast((bitcast(div_result) & and_mask) | or_mask);
- });
+ // from ps3 hardware testing: Inf => NaN and NaN => Zero
+ const auto result_and = bitcast(div_result) & 0x7fffffffu;
+ const auto result_cmp_inf = sext(result_and == splat(0x7F800000u));
+ const auto result_cmp_nan = sext(result_and <= splat(0x7F800000u));
+ const auto and_mask = bitcast(result_cmp_nan) & splat(0xFFFFFFFFu);
+ const auto or_mask = bitcast(result_cmp_inf) & splat(0xFFFFFFFu);
+ return bitcast((bitcast(div_result) & and_mask) | or_mask);
+ });
+ }
+
+
const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rc);
static const auto MT = match();
@@ -7004,20 +7016,23 @@ public:
{
const auto a = bitcast(value(ci->getOperand(0)));
const auto a_fraction = (a >> splat(18)) & splat(0x1F);
- const auto a_exponent = (a >> splat(23)) & splat(0xFF);
+ const auto a_exponent = (a & splat(0x7F800000u));
+ const auto r_exponent = sub_sat(build(0000, 0x7E80, 0000, 0x7E80, 0000, 0x7E80, 0000, 0x7E80), bitcast(a_exponent));
+ const auto fix_exponent = select((a_exponent > 0), bitcast(r_exponent), splat(0x7F800000u));
const auto a_sign = (a & splat(0x80000000));
value_t b = eval(splat(0));
for (u32 i = 0; i < 4; i++)
{
const auto eval_fraction = eval(extract(a_fraction, i));
- const auto eval_exponent = eval(extract(a_exponent, i));
- const auto eval_sign = eval(extract(a_sign, i));
+
value_t r_fraction = load_const(m_spu_frest_fraction_lut, eval_fraction);
- value_t r_exponent = load_const(m_spu_frest_exponent_lut, eval_exponent);
- b = eval(insert(b, i, eval(r_fraction | eval_sign | r_exponent)));
+
+ b = eval(insert(b, i, r_fraction));
}
+ b = eval(b | fix_exponent | a_sign);
+
const auto base = (b & 0x007ffc00u) << 9; // Base fraction
const auto ymul = (b & 0x3ff) * (a & 0x7ffff); // Step fraction * Y fraction (fixed point at 2^-32)
const auto comparison = (ymul > base); // Should exponent be adjusted?