From 42f2697b500f251b2268c7e46707276e586a033f Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Mon, 9 Feb 2026 15:40:47 +0100 Subject: [PATCH 1/3] Fix deadlock from missed unlock call after #3946 (#4013) * Fix deadlock from missed unlock call after #3946 * copyright 2026 * Add the same fix to PoolCommit --- src/core/memory.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 561e72617..9d26142ce 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-FileCopyrightText: Copyright 2025-2026 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include "common/alignment.h" @@ -73,7 +73,7 @@ void MemoryManager::SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1 } u64 MemoryManager::ClampRangeSize(VAddr virtual_addr, u64 size) { - static constexpr u64 MinSizeToClamp = 3_GB; + static constexpr u64 MinSizeToClamp = 1_GB; // Dont bother with clamping if the size is small so we dont pay a map lookup on every buffer. if (size < MinSizeToClamp) { return size; @@ -349,7 +349,8 @@ s32 MemoryManager::Free(PAddr phys_addr, u64 size, bool is_checked) { } s32 MemoryManager::PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot, s32 mtype) { - std::scoped_lock lk{mutex, unmap_mutex}; + std::scoped_lock lk{unmap_mutex}; + std::unique_lock lk2{mutex}; ASSERT_MSG(IsValidMapping(virtual_addr, size), "Attempted to access invalid address {:#x}", virtual_addr); @@ -434,6 +435,7 @@ s32 MemoryManager::PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot, s32 // Merge this VMA with similar nearby areas MergeAdjacent(vma_map, new_vma_handle); + lk2.unlock(); if (IsValidGpuMapping(mapped_addr, size)) { rasterizer->MapMemory(mapped_addr, size); } @@ -554,7 +556,7 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo } // Acquire writer lock. - std::scoped_lock lk2{mutex}; + std::unique_lock lk2{mutex}; // Create VMA representing this mapping. auto new_vma_handle = CreateArea(virtual_addr, size, prot, flags, type, name, alignment); @@ -650,6 +652,8 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo // TRACK_ALLOC(mapped_addr, size, "VMEM"); } + lk2.unlock(); + // If this is not a reservation, then map to GPU and address space if (IsValidGpuMapping(mapped_addr, size)) { rasterizer->MapMemory(mapped_addr, size); From ffae535a5ca879f7f543335adbd9035340380702 Mon Sep 17 00:00:00 2001 From: Niram7777 Date: Mon, 9 Feb 2026 18:19:39 +0000 Subject: [PATCH 2/3] [LOG] group same lines with counter (#4010) * [LOG] group same lines with counter * Log in single line counter * Protect log singleton from ps4 threads * Log always compact --- src/common/logging/backend.cpp | 50 ++++++++++++++++++++++++++++------ src/common/logging/log_entry.h | 1 + 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index d7c816da3..168350b96 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -208,26 +209,41 @@ public: } } + std::unique_lock entry_loc(_mutex); + + if (_last_entry.message == message) { + ++_last_entry.counter; + return; + } + + if (_last_entry.counter >= 2) { + _last_entry.message += " x" + std::to_string(_last_entry.counter); + } + + if (_last_entry.counter >= 1) { + if (Config::getLogType() == "async") { + message_queue.EmplaceWait(_last_entry); + } else { + ForEachBackend([this](auto& backend) { backend.Write(this->_last_entry); }); + std::fflush(stdout); + } + } + using std::chrono::duration_cast; using std::chrono::microseconds; using std::chrono::steady_clock; - const Entry entry = { + this->_last_entry = { .timestamp = duration_cast(steady_clock::now() - time_origin), .log_class = log_class, .log_level = log_level, .filename = filename, .line_num = line_num, .function = function, - .message = std::move(message), + .message = message, .thread = Common::GetCurrentThreadName(), + .counter = 1, }; - if (Config::getLogType() == "async") { - message_queue.EmplaceWait(entry); - } else { - ForEachBackend([&entry](auto& backend) { backend.Write(entry); }); - std::fflush(stdout); - } } private: @@ -259,6 +275,22 @@ private: } void StopBackendThread() { + // log last message + if (_last_entry.counter >= 2) { + _last_entry.message += " x" + std::to_string(_last_entry.counter); + } + + if (_last_entry.counter >= 1) { + if (Config::getLogType() == "async") { + message_queue.EmplaceWait(_last_entry); + } else { + ForEachBackend([this](auto& backend) { backend.Write(this->_last_entry); }); + std::fflush(stdout); + } + } + + this->_last_entry = {}; + backend_thread.request_stop(); if (backend_thread.joinable()) { backend_thread.join(); @@ -292,6 +324,8 @@ private: MPSCQueue message_queue{}; std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()}; std::jthread backend_thread; + Entry _last_entry; + std::mutex _mutex; }; } // namespace diff --git a/src/common/logging/log_entry.h b/src/common/logging/log_entry.h index 6c529f878..7b52ad7e1 100644 --- a/src/common/logging/log_entry.h +++ b/src/common/logging/log_entry.h @@ -22,6 +22,7 @@ struct Entry { std::string function; std::string message; std::string thread; + u32 counter = 0; }; } // namespace Common::Log From a706b325f42e362eb1f41a527d9967a3b38ef323 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Mon, 9 Feb 2026 22:59:39 +0200 Subject: [PATCH 3/3] optimize sdl3 audio out (#4015) --- src/core/libraries/audio/sdl_audio_out.cpp | 585 +++++++++++++-------- 1 file changed, 365 insertions(+), 220 deletions(-) diff --git a/src/core/libraries/audio/sdl_audio_out.cpp b/src/core/libraries/audio/sdl_audio_out.cpp index 572525c85..ce2598759 100644 --- a/src/core/libraries/audio/sdl_audio_out.cpp +++ b/src/core/libraries/audio/sdl_audio_out.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -14,15 +15,32 @@ #include "core/libraries/audio/audioout_backend.h" #include "core/libraries/kernel/threads.h" +// SIMD support detection +#if defined(__x86_64__) || defined(_M_X64) +#include +#define HAS_SSE2 +#endif + #define SDL_INVALID_AUDIODEVICEID 0 namespace Libraries::AudioOut { // Volume constants constexpr float VOLUME_0DB = 32768.0f; // 1 << 15 +constexpr float INV_VOLUME_0DB = 1.0f / VOLUME_0DB; +constexpr float VOLUME_EPSILON = 0.001f; +// Timing constants +constexpr u64 VOLUME_CHECK_INTERVAL_US = 50000; // Check every 50ms +constexpr u64 MIN_SLEEP_THRESHOLD_US = 10; +constexpr u64 TIMING_RESYNC_THRESHOLD_US = 100000; // Resync if >100ms behind + +// Queue management +constexpr u32 QUEUE_MULTIPLIER = 4; +// Memory alignment for SIMD +constexpr size_t AUDIO_BUFFER_ALIGNMENT = 32; // Channel positions -enum ChannelPos { +enum ChannelPos : u8 { FL = 0, FR = 1, FC = 2, @@ -45,187 +63,210 @@ public: num_channels(port.format_info.num_channels), is_float(port.format_info.is_float), is_std(port.format_info.is_std), channel_layout(port.format_info.channel_layout) { - // Calculate timing - period_us = (1000000ULL * buffer_frames + sample_rate / 2) / sample_rate; - last_output_time = 0; - next_output_time = 0; - - // Allocate internal buffer - internal_buffer_size = buffer_frames * sizeof(float) * num_channels; - internal_buffer = std::malloc(internal_buffer_size); - if (!internal_buffer) { - LOG_ERROR(Lib_AudioOut, "Failed to allocate internal audio buffer"); - return; + if (!Initialize(port.type)) { + LOG_ERROR(Lib_AudioOut, "Failed to initialize SDL audio backend"); } - - // Initialize current gain - current_gain.store(Config::getVolumeSlider() / 100.0f); - - // Select converter function - SelectConverter(); - - // Open SDL device - if (!OpenDevice(port.type)) { - std::free(internal_buffer); - internal_buffer = nullptr; - return; - } - - CalculateQueueThreshold(); } ~SDLPortBackend() override { - if (stream) { - SDL_DestroyAudioStream(stream); - } - if (internal_buffer) { - std::free(internal_buffer); - } + Cleanup(); } void Output(void* ptr) override { - if (!stream || !internal_buffer) { + if (!stream || !internal_buffer || !convert) [[unlikely]] { return; } - // Check for volume changes and update if needed - UpdateVolumeIfChanged(); - - // Get current time in microseconds - u64 current_time = Kernel::sceKernelGetProcessTime(); - - if (ptr != nullptr) { - // Simple format conversion (no volume application) - convert(ptr, internal_buffer, buffer_frames, nullptr); - - if (next_output_time == 0) { - next_output_time = current_time + period_us; - } else if (current_time > next_output_time) { - next_output_time = current_time + period_us; - } else { - u64 wait_until = next_output_time; - next_output_time += period_us; - - if (current_time < wait_until) { - u64 sleep_us = wait_until - current_time; - if (sleep_us > 10) { - sleep_us -= 10; - std::this_thread::sleep_for(std::chrono::microseconds(sleep_us)); - } - } - } - - last_output_time = current_time; - - // Check queue and clear if backed up - if (const auto queued = SDL_GetAudioStreamQueued(stream); queued >= queue_threshold) { - LOG_DEBUG(Lib_AudioOut, "Clearing backed up audio queue ({} >= {})", queued, - queue_threshold); - SDL_ClearAudioStream(stream); - CalculateQueueThreshold(); - } - - if (!SDL_PutAudioStreamData(stream, internal_buffer, internal_buffer_size)) { - LOG_ERROR(Lib_AudioOut, "Failed to output to SDL audio stream: {}", SDL_GetError()); - } + if (ptr == nullptr) [[unlikely]] { + return; } + + UpdateVolumeIfChanged(); + const u64 current_time = Kernel::sceKernelGetProcessTime(); + convert(ptr, internal_buffer, buffer_frames, nullptr); + HandleTiming(current_time); + + if ((output_count++ & 0xF) == 0) { // Check every 16 outputs + ManageAudioQueue(); + } + + if (!SDL_PutAudioStreamData(stream, internal_buffer, internal_buffer_size)) [[unlikely]] { + LOG_ERROR(Lib_AudioOut, "Failed to output to SDL audio stream: {}", SDL_GetError()); + } + + last_output_time.store(current_time, std::memory_order_release); } void SetVolume(const std::array& ch_volumes) override { - if (!stream) { + if (!stream) [[unlikely]] { return; } + float max_channel_gain = 0.0f; - for (int i = 0; i < num_channels && i < 8; i++) { - float channel_gain = static_cast(ch_volumes[i]) / VOLUME_0DB; + const u32 channels_to_check = std::min(num_channels, 8u); + + for (u32 i = 0; i < channels_to_check; i++) { + const float channel_gain = static_cast(ch_volumes[i]) * INV_VOLUME_0DB; max_channel_gain = std::max(max_channel_gain, channel_gain); } - // Combine with global volume slider - float total_gain = max_channel_gain * (Config::getVolumeSlider() / 100.0f); + const float slider_gain = Config::getVolumeSlider() * 0.01f; // Faster than /100.0f + const float total_gain = max_channel_gain * slider_gain; - std::lock_guard lock(volume_mutex); + const float current = current_gain.load(std::memory_order_acquire); + if (std::abs(total_gain - current) < VOLUME_EPSILON) { + return; + } + + // Apply volume change if (SDL_SetAudioStreamGain(stream, total_gain)) { - current_gain.store(total_gain); + current_gain.store(total_gain, std::memory_order_release); LOG_DEBUG(Lib_AudioOut, "Set combined audio gain to {:.3f} (channel: {:.3f}, slider: {:.3f})", - total_gain, max_channel_gain, Config::getVolumeSlider() / 100.0f); + total_gain, max_channel_gain, slider_gain); } else { LOG_ERROR(Lib_AudioOut, "Failed to set audio stream gain: {}", SDL_GetError()); } } u64 GetLastOutputTime() const { - return last_output_time; + return last_output_time.load(std::memory_order_acquire); } private: - std::atomic volume_update_needed{false}; - u64 last_volume_check_time{0}; - static constexpr u64 VOLUME_CHECK_INTERVAL_US = 50000; // Check every 50ms + bool Initialize(OrbisAudioOutPort type) { + // Calculate timing parameters + period_us = (1000000ULL * buffer_frames + sample_rate / 2) / sample_rate; + + // Allocate aligned internal buffer for SIMD operations + internal_buffer_size = buffer_frames * sizeof(float) * num_channels; + +#ifdef _WIN32 + internal_buffer = _aligned_malloc(internal_buffer_size, AUDIO_BUFFER_ALIGNMENT); +#else + if (posix_memalign(&internal_buffer, AUDIO_BUFFER_ALIGNMENT, internal_buffer_size) != 0) { + internal_buffer = nullptr; + } +#endif + + if (!internal_buffer) { + LOG_ERROR(Lib_AudioOut, "Failed to allocate aligned audio buffer of size {}", + internal_buffer_size); + return false; + } + + // Initialize current gain + current_gain.store(Config::getVolumeSlider() * 0.01f, std::memory_order_relaxed); + + if (!SelectConverter()) { + FreeAlignedBuffer(); + return false; + } + + // Open SDL device + if (!OpenDevice(type)) { + FreeAlignedBuffer(); + return false; + } + + CalculateQueueThreshold(); + return true; + } + + void Cleanup() { + if (stream) { + SDL_DestroyAudioStream(stream); + stream = nullptr; + } + FreeAlignedBuffer(); + } + + void FreeAlignedBuffer() { + if (internal_buffer) { +#ifdef _WIN32 + _aligned_free(internal_buffer); +#else + free(internal_buffer); +#endif + internal_buffer = nullptr; + } + } void UpdateVolumeIfChanged() { - u64 current_time = Kernel::sceKernelGetProcessTime(); + const u64 current_time = Kernel::sceKernelGetProcessTime(); - // Only check volume every 50ms to reduce overhead - if (current_time - last_volume_check_time >= VOLUME_CHECK_INTERVAL_US) { - last_volume_check_time = current_time; + if (current_time - last_volume_check_time < VOLUME_CHECK_INTERVAL_US) { + return; + } - float config_volume = Config::getVolumeSlider() / 100.0f; - float stored_gain = current_gain.load(); + last_volume_check_time = current_time; - if (std::abs(config_volume - stored_gain) > 0.001f) { - if (SDL_SetAudioStreamGain(stream, config_volume)) { - current_gain.store(config_volume); - LOG_DEBUG(Lib_AudioOut, "Updated audio gain to {:.3f}", config_volume); - } else { - LOG_ERROR(Lib_AudioOut, "Failed to set audio stream gain: {}", SDL_GetError()); - } + const float config_volume = Config::getVolumeSlider() * 0.01f; + const float stored_gain = current_gain.load(std::memory_order_acquire); + + // Only update if the difference is significant + if (std::abs(config_volume - stored_gain) > VOLUME_EPSILON) { + if (SDL_SetAudioStreamGain(stream, config_volume)) { + current_gain.store(config_volume, std::memory_order_release); + LOG_DEBUG(Lib_AudioOut, "Updated audio gain to {:.3f}", config_volume); + } else { + LOG_ERROR(Lib_AudioOut, "Failed to set audio stream gain: {}", SDL_GetError()); } } } + + void HandleTiming(u64 current_time) { + if (next_output_time == 0) [[unlikely]] { + // First output - set initial timing + next_output_time = current_time + period_us; + return; + } + + const s64 time_diff = static_cast(current_time - next_output_time); + + if (time_diff > static_cast(TIMING_RESYNC_THRESHOLD_US)) [[unlikely]] { + // We're far behind - resync + next_output_time = current_time + period_us; + } else if (time_diff < 0) { + // We're ahead of schedule - wait + const u64 time_to_wait = static_cast(-time_diff); + next_output_time += period_us; + + if (time_to_wait > MIN_SLEEP_THRESHOLD_US) { + // Sleep for most of the wait period + const u64 sleep_duration = time_to_wait - MIN_SLEEP_THRESHOLD_US; + std::this_thread::sleep_for(std::chrono::microseconds(sleep_duration)); + } + } else { + // Slightly behind or on time - just advance + next_output_time += period_us; + } + } + + void ManageAudioQueue() { + const auto queued = SDL_GetAudioStreamQueued(stream); + + if (queued >= queue_threshold) [[unlikely]] { + LOG_DEBUG(Lib_AudioOut, "Clearing backed up audio queue ({} >= {})", queued, + queue_threshold); + SDL_ClearAudioStream(stream); + CalculateQueueThreshold(); + } + } + bool OpenDevice(OrbisAudioOutPort type) { const SDL_AudioSpec fmt = { - .format = SDL_AUDIO_F32LE, // Always use float for internal processing + .format = SDL_AUDIO_F32LE, .channels = static_cast(num_channels), .freq = static_cast(sample_rate), }; - // Determine device name - std::string device_name = GetDeviceName(type); - SDL_AudioDeviceID dev_id = SDL_INVALID_AUDIODEVICEID; + // Determine device + const std::string device_name = GetDeviceName(type); + const SDL_AudioDeviceID dev_id = SelectAudioDevice(device_name, type); - if (device_name == "None") { - LOG_INFO(Lib_AudioOut, "Audio device disabled for port type {}", - static_cast(type)); + if (dev_id == SDL_INVALID_AUDIODEVICEID) { return false; - } else if (device_name.empty() || device_name == "Default Device") { - dev_id = SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK; - } else { - int num_devices = 0; - SDL_AudioDeviceID* dev_array = SDL_GetAudioPlaybackDevices(&num_devices); - - if (dev_array) { - bool found = false; - for (int i = 0; i < num_devices; i++) { - const char* dev_name = SDL_GetAudioDeviceName(dev_array[i]); - if (dev_name && std::string(dev_name) == device_name) { - dev_id = dev_array[i]; - found = true; - break; - } - } - SDL_free(dev_array); - - if (!found) { - LOG_WARNING(Lib_AudioOut, "Audio device '{}' not found, using default", - device_name); - dev_id = SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK; - } - } else { - LOG_WARNING(Lib_AudioOut, "No audio devices found, using default"); - dev_id = SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK; - } } // Create audio stream @@ -235,30 +276,15 @@ private: return false; } - // Set channel map - if (num_channels > 0) { - std::vector channel_map(num_channels); - - if (is_std && num_channels == 8) { - // Standard 8CH layout - channel_map = {FL, FR, FC, LF, STD_SL, STD_SR, STD_BL, STD_BR}; - } else { - // Use provided channel layout - for (int i = 0; i < num_channels; i++) { - channel_map[i] = channel_layout[i]; - } - } - - if (!SDL_SetAudioStreamInputChannelMap(stream, channel_map.data(), num_channels)) { - LOG_ERROR(Lib_AudioOut, "Failed to set channel map: {}", SDL_GetError()); - SDL_DestroyAudioStream(stream); - stream = nullptr; - return false; - } + // Configure channel mapping + if (!ConfigureChannelMap()) { + SDL_DestroyAudioStream(stream); + stream = nullptr; + return false; } // Set initial volume - float initial_gain = current_gain.load(); + const float initial_gain = current_gain.load(std::memory_order_relaxed); if (!SDL_SetAudioStreamGain(stream, initial_gain)) { LOG_WARNING(Lib_AudioOut, "Failed to set initial audio gain: {}", SDL_GetError()); } @@ -276,24 +302,81 @@ private: return true; } - std::string GetDeviceName(OrbisAudioOutPort type) { + SDL_AudioDeviceID SelectAudioDevice(const std::string& device_name, OrbisAudioOutPort type) { + if (device_name == "None") { + LOG_INFO(Lib_AudioOut, "Audio device disabled for port type {}", + static_cast(type)); + return SDL_INVALID_AUDIODEVICEID; + } + + if (device_name.empty() || device_name == "Default Device") { + return SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK; + } + + // Search for specific device + int num_devices = 0; + SDL_AudioDeviceID* dev_array = SDL_GetAudioPlaybackDevices(&num_devices); + + if (!dev_array) { + LOG_WARNING(Lib_AudioOut, "No audio devices found, using default"); + return SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK; + } + + SDL_AudioDeviceID selected_device = SDL_INVALID_AUDIODEVICEID; + + for (int i = 0; i < num_devices; i++) { + const char* dev_name = SDL_GetAudioDeviceName(dev_array[i]); + if (dev_name && device_name == dev_name) { + selected_device = dev_array[i]; + break; + } + } + + SDL_free(dev_array); + + if (selected_device == SDL_INVALID_AUDIODEVICEID) { + LOG_WARNING(Lib_AudioOut, "Audio device '{}' not found, using default", device_name); + return SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK; + } + + return selected_device; + } + + bool ConfigureChannelMap() { + if (num_channels == 0) { + return true; + } + + std::vector channel_map(num_channels); + + if (is_std && num_channels == 8) { + // Standard 8CH layout requires remapping + channel_map = {FL, FR, FC, LF, STD_SL, STD_SR, STD_BL, STD_BR}; + } else { + std::copy_n(channel_layout.begin(), num_channels, channel_map.begin()); + } + + if (!SDL_SetAudioStreamInputChannelMap(stream, channel_map.data(), num_channels)) { + LOG_ERROR(Lib_AudioOut, "Failed to set channel map: {}", SDL_GetError()); + return false; + } + + return true; + } + + std::string GetDeviceName(OrbisAudioOutPort type) const { switch (type) { case OrbisAudioOutPort::Main: case OrbisAudioOutPort::Bgm: return Config::getMainOutputDevice(); - // case OrbisAudioOutPort::Voice: - // case OrbisAudioOutPort::Personal: - // return Config::getHeadphoneOutputDevice(); case OrbisAudioOutPort::PadSpk: return Config::getPadSpkOutputDevice(); - // case OrbisAudioOutPort::Aux: - // return Config::getSpecialOutputDevice(); default: return Config::getMainOutputDevice(); } } - void SelectConverter() { + bool SelectConverter() { if (is_float) { switch (num_channels) { case 1: @@ -303,15 +386,11 @@ private: convert = &ConvertF32Stereo; break; case 8: - if (is_std) { - convert = &ConvertF32Std8CH; - } else { - convert = &ConvertF32_8CH; - } + convert = is_std ? &ConvertF32Std8CH : &ConvertF32_8CH; break; default: LOG_ERROR(Lib_AudioOut, "Unsupported float channel count: {}", num_channels); - convert = nullptr; + return false; } } else { switch (num_channels) { @@ -319,32 +398,43 @@ private: convert = &ConvertS16Mono; break; case 2: +#if defined(HAS_SSE2) + convert = &ConvertS16StereoSIMD; +#else convert = &ConvertS16Stereo; +#endif break; case 8: +#if defined(HAS_SSE2) + convert = &ConvertS16_8CH_SIMD; +#else convert = &ConvertS16_8CH; +#endif break; default: LOG_ERROR(Lib_AudioOut, "Unsupported S16 channel count: {}", num_channels); - convert = nullptr; + return false; } } + + return true; } void CalculateQueueThreshold() { - if (!stream) + if (!stream) { return; + } SDL_AudioSpec discard; - int sdl_buffer_frames; + int sdl_buffer_frames = 0; + if (!SDL_GetAudioDeviceFormat(SDL_GetAudioStreamDevice(stream), &discard, &sdl_buffer_frames)) { LOG_WARNING(Lib_AudioOut, "Failed to get SDL buffer size: {}", SDL_GetError()); - sdl_buffer_frames = 0; } - u32 sdl_buffer_size = sdl_buffer_frames * sizeof(float) * num_channels; - queue_threshold = std::max(guest_buffer_size, sdl_buffer_size) * 4; + const u32 sdl_buffer_size = sdl_buffer_frames * sizeof(float) * num_channels; + queue_threshold = std::max(guest_buffer_size, sdl_buffer_size) * QUEUE_MULTIPLIER; LOG_DEBUG(Lib_AudioOut, "Audio queue threshold: {} bytes (SDL buffer: {} frames)", queue_threshold, sdl_buffer_frames); @@ -352,15 +442,12 @@ private: using ConverterFunc = void (*)(const void* src, void* dst, u32 frames, const float* volumes); - // Remove volume parameter and application from all converters static void ConvertS16Mono(const void* src, void* dst, u32 frames, const float*) { const s16* s = static_cast(src); float* d = static_cast(dst); - constexpr float inv_scale = 1.0f / VOLUME_0DB; - for (u32 i = 0; i < frames; i++) { - d[i] = s[i] * inv_scale; + d[i] = s[i] * INV_VOLUME_0DB; } } @@ -368,28 +455,82 @@ private: const s16* s = static_cast(src); float* d = static_cast(dst); - constexpr float inv_scale = 1.0f / VOLUME_0DB; - - for (u32 i = 0; i < frames; i++) { - d[i * 2] = s[i * 2] * inv_scale; - d[i * 2 + 1] = s[i * 2 + 1] * inv_scale; + const u32 num_samples = frames << 1; // * 2 + for (u32 i = 0; i < num_samples; i++) { + d[i] = s[i] * INV_VOLUME_0DB; } } +#ifdef HAS_SSE2 + static void ConvertS16StereoSIMD(const void* src, void* dst, u32 frames, const float*) { + const s16* s = static_cast(src); + float* d = static_cast(dst); + + const __m128 scale = _mm_set1_ps(INV_VOLUME_0DB); + const u32 num_samples = frames << 1; + u32 i = 0; + + // Process 8 samples at a time (4 stereo frames) + for (; i + 8 <= num_samples; i += 8) { + // Load 8 s16 values + __m128i s16_vals = _mm_loadu_si128(reinterpret_cast(&s[i])); + + // Convert to 32-bit integers + __m128i s32_lo = _mm_cvtepi16_epi32(s16_vals); + __m128i s32_hi = _mm_cvtepi16_epi32(_mm_srli_si128(s16_vals, 8)); + + // Convert to float and scale + __m128 f_lo = _mm_mul_ps(_mm_cvtepi32_ps(s32_lo), scale); + __m128 f_hi = _mm_mul_ps(_mm_cvtepi32_ps(s32_hi), scale); + + // Store results + _mm_storeu_ps(&d[i], f_lo); + _mm_storeu_ps(&d[i + 4], f_hi); + } + + // Handle remaining samples + for (; i < num_samples; i++) { + d[i] = s[i] * INV_VOLUME_0DB; + } + } +#endif + static void ConvertS16_8CH(const void* src, void* dst, u32 frames, const float*) { const s16* s = static_cast(src); float* d = static_cast(dst); - constexpr float inv_scale = 1.0f / VOLUME_0DB; - - for (u32 i = 0; i < frames; i++) { - for (int ch = 0; ch < 8; ch++) { - d[i * 8 + ch] = s[i * 8 + ch] * inv_scale; - } + const u32 num_samples = frames << 3; // * 8 + for (u32 i = 0; i < num_samples; i++) { + d[i] = s[i] * INV_VOLUME_0DB; } } - // Float converters become simple memcpy or passthrough +#ifdef HAS_SSE2 + static void ConvertS16_8CH_SIMD(const void* src, void* dst, u32 frames, const float*) { + const s16* s = static_cast(src); + float* d = static_cast(dst); + + const __m128 scale = _mm_set1_ps(INV_VOLUME_0DB); + const u32 num_samples = frames << 3; + u32 i = 0; + + // Process 8 samples at a time + for (; i + 8 <= num_samples; i += 8) { + __m128i s16_vals = _mm_loadu_si128(reinterpret_cast(&s[i])); + __m128i s32_lo = _mm_cvtepi16_epi32(s16_vals); + __m128i s32_hi = _mm_cvtepi16_epi32(_mm_srli_si128(s16_vals, 8)); + __m128 f_lo = _mm_mul_ps(_mm_cvtepi32_ps(s32_lo), scale); + __m128 f_hi = _mm_mul_ps(_mm_cvtepi32_ps(s32_hi), scale); + _mm_storeu_ps(&d[i], f_lo); + _mm_storeu_ps(&d[i + 4], f_hi); + } + + for (; i < num_samples; i++) { + d[i] = s[i] * INV_VOLUME_0DB; + } + } +#endif + static void ConvertF32Mono(const void* src, void* dst, u32 frames, const float*) { std::memcpy(dst, src, frames * sizeof(float)); } @@ -406,50 +547,54 @@ private: const float* s = static_cast(src); float* d = static_cast(dst); + // Channel remapping for standard 8CH layout for (u32 i = 0; i < frames; i++) { - d[i * 8 + FL] = s[i * 8 + FL]; - d[i * 8 + FR] = s[i * 8 + FR]; - d[i * 8 + FC] = s[i * 8 + FC]; - d[i * 8 + LF] = s[i * 8 + LF]; - d[i * 8 + SL] = s[i * 8 + STD_SL]; // Channel remapping still needed - d[i * 8 + SR] = s[i * 8 + STD_SR]; - d[i * 8 + BL] = s[i * 8 + STD_BL]; - d[i * 8 + BR] = s[i * 8 + STD_BR]; + const u32 offset = i << 3; // * 8 + + d[offset + FL] = s[offset + FL]; + d[offset + FR] = s[offset + FR]; + d[offset + FC] = s[offset + FC]; + d[offset + LF] = s[offset + LF]; + d[offset + SL] = s[offset + STD_SL]; + d[offset + SR] = s[offset + STD_SR]; + d[offset + BL] = s[offset + STD_BL]; + d[offset + BR] = s[offset + STD_BR]; } } - // Member variables - u32 frame_size; - u32 guest_buffer_size; - u32 buffer_frames; - u32 sample_rate; - u32 num_channels; - bool is_float; - bool is_std; - std::array channel_layout; + // Audio format parameters + const u32 frame_size; + const u32 guest_buffer_size; + const u32 buffer_frames; + const u32 sample_rate; + const u32 num_channels; + const bool is_float; + const bool is_std; + const std::array channel_layout; - u64 period_us; - u64 last_output_time; - u64 next_output_time; + alignas(64) u64 period_us{0}; + alignas(64) std::atomic last_output_time{0}; + u64 next_output_time{0}; + u64 last_volume_check_time{0}; + u32 output_count{0}; // Buffers - u32 internal_buffer_size; - void* internal_buffer; + u32 internal_buffer_size{0}; + void* internal_buffer{nullptr}; - // Converter function - ConverterFunc convert; + // Converter function pointer + ConverterFunc convert{nullptr}; - // Volume tracking - std::atomic current_gain{1.0f}; - mutable std::mutex volume_mutex; + // Volume management + alignas(64) std::atomic current_gain{1.0f}; - // SDL - SDL_AudioStream* stream{}; - u32 queue_threshold{}; + // SDL audio stream + SDL_AudioStream* stream{nullptr}; + u32 queue_threshold{0}; }; std::unique_ptr SDLAudioOut::Open(PortOut& port) { return std::make_unique(port); } -} // namespace Libraries::AudioOut \ No newline at end of file +} // namespace Libraries::AudioOut