From 94d0f2e7edade6e9e96d38aed1f38f1d43247e6f Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Fri, 14 Nov 2025 21:50:14 -0600 Subject: [PATCH 1/7] Avoid initializing Shader::PsColorBuffer in RefreshGraphicsKey (#3799) The bitfield in the struct is padded, which produces uninitialized memory on initialization. To avoid modifying the struct while making our GraphicsPipelineKey struct properly hashable, set values directly instead of re-initializing. This fixes pipeline compile spam, and the subsequent poor performance, on certain setups. --- .../renderer_vulkan/vk_pipeline_cache.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index be9543737..4706bff24 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -355,13 +355,12 @@ bool PipelineCache::RefreshGraphicsKey() { } // Fill color target information - key.color_buffers[cb] = Shader::PsColorBuffer{ - .data_format = col_buf.GetDataFmt(), - .num_format = col_buf.GetNumberFmt(), - .num_conversion = col_buf.GetNumberConversion(), - .export_format = regs.color_export_format.GetFormat(cb), - .swizzle = col_buf.Swizzle(), - }; + auto& color_buffer = key.color_buffers[cb]; + color_buffer.data_format = col_buf.GetDataFmt(); + color_buffer.num_format = col_buf.GetNumberFmt(); + color_buffer.num_conversion = col_buf.GetNumberConversion(); + color_buffer.export_format = regs.color_export_format.GetFormat(cb); + color_buffer.swizzle = col_buf.Swizzle(); } // Compile and bind shader stages @@ -379,7 +378,7 @@ bool PipelineCache::RefreshGraphicsKey() { continue; } if ((key.mrt_mask & (1u << cb)) == 0) { - key.color_buffers[cb] = {}; + std::memset(&key.color_buffers[cb], 0, sizeof(Shader::PsColorBuffer)); continue; } From 2f556366268ec9273364521afd53afa68f07cb99 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Sat, 15 Nov 2025 07:44:25 +0200 Subject: [PATCH 2/7] vk_rasterizer: Attempt to optimize compute clears (#3795) --- src/video_core/buffer_cache/buffer_cache.cpp | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 63 ++++++++++++++++++- .../renderer_vulkan/vk_rasterizer.h | 1 + 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 04c473f1b..cb18bc190 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -889,7 +889,7 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, }); TouchBuffer(buffer); } - if (is_texel_buffer) { + if (is_texel_buffer && !is_written) { return SynchronizeBufferFromImage(buffer, device_addr, size); } return false; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a47c523e1..37b8051e8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -380,7 +380,8 @@ void Rasterizer::OnSubmit() { } bool Rasterizer::BindResources(const Pipeline* pipeline) { - if (IsComputeImageCopy(pipeline) || IsComputeMetaClear(pipeline)) { + if (IsComputeImageCopy(pipeline) || IsComputeMetaClear(pipeline) || + IsComputeImageClear(pipeline)) { return false; } @@ -520,6 +521,66 @@ bool Rasterizer::IsComputeImageCopy(const Pipeline* pipeline) { return true; } +bool Rasterizer::IsComputeImageClear(const Pipeline* pipeline) { + if (!pipeline->IsCompute()) { + return false; + } + + // Ensure shader only has 2 bound buffers + const auto& cs_pgm = liverpool->GetCsRegs(); + const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute); + if (cs_pgm.num_thread_x.full != 64 || info.buffers.size() != 2 || !info.images.empty()) { + return false; + } + + // From those 2 buffers, first must hold the clear vector and second the image being cleared + const auto& desc0 = info.buffers[0]; + const auto& desc1 = info.buffers[1]; + if (desc0.is_formatted || !desc1.is_formatted || desc0.is_written || !desc1.is_written) { + return false; + } + + // First buffer must have size of vec4 and second the size of a single layer + const AmdGpu::Buffer buf0 = desc0.GetSharp(info); + const AmdGpu::Buffer buf1 = desc1.GetSharp(info); + const u32 buf1_bpp = AmdGpu::NumBitsPerBlock(buf1.GetDataFmt()); + if (buf0.GetSize() != 16 || (cs_pgm.dim_x * 128ULL * (buf1_bpp / 8)) != buf1.GetSize()) { + return false; + } + + // Find image the buffer alias + const auto image1_id = + texture_cache.FindImageFromRange(buf1.base_address, buf1.GetSize(), false); + if (!image1_id) { + return false; + } + + // Image clear must be valid + VideoCore::Image& image1 = texture_cache.GetImage(image1_id); + if (image1.info.guest_size != buf1.GetSize() || image1.info.num_bits != buf1_bpp || + image1.info.props.is_depth) { + return false; + } + + // Perform image clear + const float* values = reinterpret_cast(buf0.base_address); + const vk::ClearValue clear = { + .color = {.float32 = std::array{values[0], values[1], values[2], values[3]}}, + }; + const VideoCore::SubresourceRange range = { + .base = + { + .level = 0, + .layer = 0, + }, + .extent = image1.info.resources, + }; + image1.Clear(clear, range); + image1.flags |= VideoCore::ImageFlagBits::GpuModified; + image1.flags &= ~VideoCore::ImageFlagBits::Dirty; + return true; +} + void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding, Shader::PushData& push_data) { buffer_bindings.clear(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 524a8f06d..96a3c95e8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -112,6 +112,7 @@ private: bool IsComputeMetaClear(const Pipeline* pipeline); bool IsComputeImageCopy(const Pipeline* pipeline); + bool IsComputeImageClear(const Pipeline* pipeline); private: friend class VideoCore::BufferCache; From 6a9f9abda09029edd8c2ae37a7a550b1033fce9d Mon Sep 17 00:00:00 2001 From: Missake Date: Sat, 15 Nov 2025 06:48:04 +0100 Subject: [PATCH 3/7] Delete lines about Qt in building doc for Windows (#3800) --- documents/building-windows.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/documents/building-windows.md b/documents/building-windows.md index d1f8a6895..88c5b6830 100644 --- a/documents/building-windows.md +++ b/documents/building-windows.md @@ -41,10 +41,6 @@ Go through the Git for Windows installation as normal Your shadps4.exe will be in `C:\path\to\source\Build\x64-Clang-Release\` -To automatically populate the necessary files to run shadPS4.exe, run in a command prompt or terminal: -`C:\Qt\\msvc2022_64\bin\windeployqt6.exe "C:\path\to\shadps4.exe"` -(Change Qt path if you've installed it to non-default path) - ## Option 2: MSYS2/MinGW > [!IMPORTANT] @@ -72,7 +68,6 @@ ARM64-based computers, follow: 1. Open "MSYS2 CLANGARM64" from your new applications 2. Run `pacman -Syu`, let it complete; 3. Run `pacman -S --needed git mingw-w64-clang-aarch64-binutils mingw-w64-clang-aarch64-clang mingw-w64-clang-aarch64-rapidjson mingw-w64-clang-aarch64-cmake mingw-w64-clang-aarch64-ninja mingw-w64-clang-aarch64-ffmpeg` - 1. Optional (Qt only): run `pacman -S --needed mingw-w64-clang-aarch64-qt6-base mingw-w64-clang-aarch64-qt6-tools mingw-w64-clang-aarch64-qt6-multimedia` 4. Run `git clone --depth 1 --recursive https://github.com/shadps4-emu/shadPS4` 5. Run `cd shadPS4` 6. Run `cmake -S . -B build -DCMAKE_C_COMPILER="clang.exe" -DCMAKE_CXX_COMPILER="clang++.exe" -DCMAKE_CXX_FLAGS="-O2 -march=native"` From ed14359c87bc6ed9781d151cbb1f34b9fe045f4c Mon Sep 17 00:00:00 2001 From: rainmakerv2 <30595646+rainmakerv3@users.noreply.github.com> Date: Sun, 16 Nov 2025 16:36:54 +0800 Subject: [PATCH 4/7] Re-implement custom trophy sounds using sdl3 mixer (#3805) * re-implement custom trophy sounds using sdl3 mixer * fix build vars * Don't change SDL version --- .gitmodules | 4 ++ CMakeLists.txt | 3 +- externals/CMakeLists.txt | 12 +++++ externals/sdl3_mixer | 1 + src/core/libraries/np/trophy_ui.cpp | 73 ++++++++++++----------------- src/core/libraries/np/trophy_ui.h | 4 ++ 6 files changed, 52 insertions(+), 45 deletions(-) create mode 160000 externals/sdl3_mixer diff --git a/.gitmodules b/.gitmodules index 43ba2a7c1..b8d1544e4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -113,3 +113,7 @@ [submodule "externals/json"] path = externals/json url = https://github.com/nlohmann/json.git +[submodule "externals/sdl3_mixer"] + path = externals/sdl3_mixer + url = https://github.com/libsdl-org/SDL_mixer + shallow = true diff --git a/CMakeLists.txt b/CMakeLists.txt index d26581790..1491a3e1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -229,6 +229,7 @@ find_package(magic_enum 0.9.7 CONFIG) find_package(PNG 1.6 MODULE) find_package(RenderDoc 1.6.0 MODULE) find_package(SDL3 3.1.2 CONFIG) +find_package(SDL3_mixer 2.8.1 CONFIG) find_package(stb MODULE) find_package(toml11 4.2.0 CONFIG) find_package(tsl-robin-map 1.3.0 CONFIG) @@ -1070,7 +1071,7 @@ add_executable(shadps4 create_target_directory_groups(shadps4) target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG) -target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json) +target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 SDL3_mixer::SDL3_mixer pugixml::pugixml stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json) target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h") target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h") diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index a26f58d69..a45a03bf6 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -63,6 +63,18 @@ if (NOT TARGET SDL3::SDL3) add_subdirectory(sdl3) endif() +# SDL3_mixer +if (NOT TARGET SDL3_Mixer::SDL3_Mixer) + set(SDLMIXER_FLAC OFF) + set(SDLMIXER_OGG OFF) + set(SDLMIXER_MOD OFF) + set(SDLMIXER_MIDI OFF) + set(SDLMIXER_OPUS OFF) + set(SDLMIXER_WAVPACK OFF) + set(BUILD_SHARED_LIBS OFF) + add_subdirectory(sdl3_mixer) +endif() + # vulkan-headers if (NOT TARGET Vulkan::Headers) set(VULKAN_HEADERS_ENABLE_MODULE OFF) diff --git a/externals/sdl3_mixer b/externals/sdl3_mixer new file mode 160000 index 000000000..4182794ea --- /dev/null +++ b/externals/sdl3_mixer @@ -0,0 +1 @@ +Subproject commit 4182794ea45fe28568728670c6f1583855d0e85c diff --git a/src/core/libraries/np/trophy_ui.cpp b/src/core/libraries/np/trophy_ui.cpp index 94e7eb5f2..b803403c4 100644 --- a/src/core/libraries/np/trophy_ui.cpp +++ b/src/core/libraries/np/trophy_ui.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -92,59 +91,45 @@ TrophyUI::TrophyUI(const std::filesystem::path& trophyIconPath, const std::strin AddLayer(this); - bool customsoundplayed = false; -#ifdef ENABLE_QT_GUI - QString musicPathWav = QString::fromStdString(CustomTrophy_Dir.string() + "/trophy.wav"); - QString musicPathMp3 = QString::fromStdString(CustomTrophy_Dir.string() + "/trophy.mp3"); - if (fs::exists(musicPathWav.toStdString())) { - BackgroundMusicPlayer::getInstance().setVolume(100); - BackgroundMusicPlayer::getInstance().playMusic(musicPathWav, false); - customsoundplayed = true; - } else if (fs::exists(musicPathMp3.toStdString())) { - BackgroundMusicPlayer::getInstance().setVolume(100); - BackgroundMusicPlayer::getInstance().playMusic(musicPathMp3, false); - customsoundplayed = true; + MIX_Init(); + mixer = MIX_CreateMixerDevice(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, NULL); + if (!mixer) { + LOG_ERROR(Lib_NpTrophy, "Could not initialize SDL Mixer, {}", SDL_GetError()); + return; } -#endif - if (!customsoundplayed) { + MIX_SetMasterGain(mixer, static_cast(Config::getVolumeSlider() / 100.f)); + auto musicPathMp3 = CustomTrophy_Dir / "trophy.mp3"; + auto musicPathWav = CustomTrophy_Dir / "trophy.wav"; + + if (std::filesystem::exists(musicPathMp3)) { + audio = MIX_LoadAudio(mixer, musicPathMp3.string().c_str(), false); + } else if (std::filesystem::exists(musicPathWav)) { + audio = MIX_LoadAudio(mixer, musicPathWav.string().c_str(), false); + } else { auto soundFile = resource.open("src/images/trophy.wav"); std::vector soundData = std::vector(soundFile.begin(), soundFile.end()); + audio = + MIX_LoadAudio_IO(mixer, SDL_IOFromMem(soundData.data(), soundData.size()), false, true); + // due to low volume of default sound file + MIX_SetMasterGain(mixer, MIX_GetMasterGain(mixer) * 1.3f); + } - SDL_AudioSpec spec; - Uint8* audioBuf; - Uint32 audioLen; + if (!audio) { + LOG_ERROR(Lib_NpTrophy, "Could not loud audio file, {}", SDL_GetError()); + return; + } - if (!SDL_LoadWAV_IO(SDL_IOFromMem(soundData.data(), soundData.size()), true, &spec, - &audioBuf, &audioLen)) { - LOG_ERROR(Lib_NpTrophy, "Cannot load trophy sound: {}", SDL_GetError()); - SDL_free(audioBuf); - return; - } - - SDL_AudioStream* stream = - SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &spec, nullptr, nullptr); - if (!stream) { - LOG_ERROR(Lib_NpTrophy, "Cannot create audio stream for trophy sound: {}", - SDL_GetError()); - SDL_free(audioBuf); - return; - } - - if (!SDL_PutAudioStreamData(stream, audioBuf, audioLen)) { - LOG_ERROR(Lib_NpTrophy, "Cannot add trophy sound data to stream: {}", SDL_GetError()); - SDL_free(audioBuf); - return; - } - - // Set audio gain 20% higher since audio file itself is soft - SDL_SetAudioStreamGain(stream, Config::getVolumeSlider() / 100.0f * 1.2f); - SDL_ResumeAudioStreamDevice(stream); - SDL_free(audioBuf); + if (!MIX_PlayAudio(mixer, audio)) { + LOG_ERROR(Lib_NpTrophy, "Could not play audio file, {}", SDL_GetError()); } } TrophyUI::~TrophyUI() { + MIX_DestroyAudio(audio); + MIX_DestroyMixer(mixer); + MIX_Quit(); + Finish(); } diff --git a/src/core/libraries/np/trophy_ui.h b/src/core/libraries/np/trophy_ui.h index fbadac8f4..2734471b3 100644 --- a/src/core/libraries/np/trophy_ui.h +++ b/src/core/libraries/np/trophy_ui.h @@ -5,6 +5,7 @@ #include #include +#include #include #include "common/fixed_value.h" @@ -30,6 +31,9 @@ private: std::string_view trophy_type; ImGui::RefCountedTexture trophy_icon; ImGui::RefCountedTexture trophy_type_icon; + + MIX_Mixer* mixer; + MIX_Audio* audio; }; struct TrophyInfo { From aa5c045555ebd211855121064fdf53e08430cb35 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Sun, 16 Nov 2025 17:34:23 +0200 Subject: [PATCH 5/7] logging: Format message after filter check (#3808) --- src/common/logging/backend.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 6b68651de..4a85c4cde 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -182,7 +182,13 @@ public: } void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num, - const char* function, std::string message) { + const char* function, const char* format, const fmt::format_args& args) { + if (!filter.CheckMessage(log_class, log_level) || !Config::getLoggingEnabled()) { + return; + } + + const auto message = fmt::vformat(format, args); + // Propagate important log messages to the profiler if (IsProfilerConnected()) { const auto& msg_str = fmt::format("[{}] {}", GetLogClassName(log_class), message); @@ -201,10 +207,6 @@ public: } } - if (!filter.CheckMessage(log_class, log_level) || !Config::getLoggingEnabled()) { - return; - } - using std::chrono::duration_cast; using std::chrono::microseconds; using std::chrono::steady_clock; @@ -324,8 +326,8 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename, unsigned int line_num, const char* function, const char* format, const fmt::format_args& args) { if (!initialization_in_progress_suppress_logging) [[likely]] { - Impl::Instance().PushEntry(log_class, log_level, filename, line_num, function, - fmt::vformat(format, args)); + Impl::Instance().PushEntry(log_class, log_level, filename, line_num, function, format, + args); } } } // namespace Common::Log From 5b699090e6bb5936f9d2d809ee6309e3de12868a Mon Sep 17 00:00:00 2001 From: Alexandre Bouvier Date: Mon, 17 Nov 2025 16:56:01 +0000 Subject: [PATCH 6/7] cmake: fix sdl3_mixer target name (#3811) --- externals/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index a45a03bf6..5f7ae94c4 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -64,7 +64,7 @@ if (NOT TARGET SDL3::SDL3) endif() # SDL3_mixer -if (NOT TARGET SDL3_Mixer::SDL3_Mixer) +if (NOT TARGET SDL3_mixer::SDL3_mixer) set(SDLMIXER_FLAC OFF) set(SDLMIXER_OGG OFF) set(SDLMIXER_MOD OFF) From 3f86c2e94ad645d8a26b676b026b39fb1f2b8071 Mon Sep 17 00:00:00 2001 From: TheTurtle Date: Tue, 18 Nov 2025 08:46:51 +0200 Subject: [PATCH 7/7] buffer_cache: Split DMA fault handling code from buffer cache (#3809) Its better not to have that raw code there --- CMakeLists.txt | 2 + .../ir/passes/shader_info_collection_pass.cpp | 2 +- src/video_core/buffer_cache/buffer_cache.cpp | 238 +----------------- src/video_core/buffer_cache/buffer_cache.h | 21 +- src/video_core/buffer_cache/fault_manager.cpp | 177 +++++++++++++ src/video_core/buffer_cache/fault_manager.h | 42 ++++ .../host_shaders/fault_buffer_process.comp | 27 +- .../renderer_vulkan/vk_rasterizer.cpp | 13 +- .../renderer_vulkan/vk_scheduler.cpp | 17 +- 9 files changed, 249 insertions(+), 290 deletions(-) create mode 100644 src/video_core/buffer_cache/fault_manager.cpp create mode 100644 src/video_core/buffer_cache/fault_manager.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1491a3e1e..ddaf2422c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -960,6 +960,8 @@ set(VIDEO_CORE src/video_core/amdgpu/cb_db_extent.h src/video_core/buffer_cache/buffer.h src/video_core/buffer_cache/buffer_cache.cpp src/video_core/buffer_cache/buffer_cache.h + src/video_core/buffer_cache/fault_manager.cpp + src/video_core/buffer_cache/fault_manager.h src/video_core/buffer_cache/memory_tracker.h src/video_core/buffer_cache/range_set.h src/video_core/buffer_cache/region_definitions.h diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index 3df4f8b86..c298a1092 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -190,7 +190,7 @@ void CollectShaderInfoPass(IR::Program& program, const Profile& profile) { }); info.buffers.push_back({ .used_types = IR::Type::U32, - .inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::FAULT_BUFFER_SIZE), + .inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits::max()), .buffer_type = BufferType::FaultBuffer, .is_written = true, }); diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index cb18bc190..ac3fac5b1 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -2,49 +2,42 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include #include "common/alignment.h" #include "common/debug.h" #include "common/scope_exit.h" -#include "common/types.h" #include "core/memory.h" #include "video_core/amdgpu/liverpool.h" #include "video_core/buffer_cache/buffer_cache.h" #include "video_core/buffer_cache/memory_tracker.h" -#include "video_core/host_shaders/fault_buffer_process_comp.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/texture_cache/texture_cache.h" namespace VideoCore { static constexpr size_t DataShareBufferSize = 64_KB; static constexpr size_t StagingBufferSize = 512_MB; +static constexpr size_t DownloadBufferSize = 32_MB; static constexpr size_t UboStreamBufferSize = 64_MB; -static constexpr size_t DownloadBufferSize = 128_MB; static constexpr size_t DeviceBufferSize = 128_MB; -static constexpr size_t MaxPageFaults = 1024; BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_, PageManager& tracker) : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, memory{Core::Memory::Instance()}, texture_cache{texture_cache_}, + fault_manager{instance, scheduler, *this, CACHING_PAGEBITS, CACHING_NUMPAGES}, staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize}, stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize}, download_buffer{instance, scheduler, MemoryUsage::Download, DownloadBufferSize}, device_buffer{instance, scheduler, MemoryUsage::DeviceLocal, DeviceBufferSize}, gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize}, bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal, - 0, AllFlags, BDA_PAGETABLE_SIZE}, - fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_BUFFER_SIZE) { + 0, AllFlags, BDA_PAGETABLE_SIZE} { Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer"); Vulkan::SetObjectName(instance.GetDevice(), bda_pagetable_buffer.Handle(), "BDA Page Table Buffer"); - Vulkan::SetObjectName(instance.GetDevice(), fault_buffer.Handle(), "Fault Buffer"); memory_tracker = std::make_unique(tracker); @@ -57,80 +50,6 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s const vk::Buffer& null_buffer = slot_buffers[null_id].buffer; Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer"); - // Prepare the fault buffer parsing pipeline - boost::container::static_vector bindings{ - { - .binding = 0, - .descriptorType = vk::DescriptorType::eStorageBuffer, - .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eCompute, - }, - { - .binding = 1, - .descriptorType = vk::DescriptorType::eStorageBuffer, - .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eCompute, - }, - }; - - const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = { - .flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR, - .bindingCount = static_cast(bindings.size()), - .pBindings = bindings.data(), - }; - auto [desc_layout_result, desc_layout] = - instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci); - ASSERT_MSG(desc_layout_result == vk::Result::eSuccess, - "Failed to create descriptor set layout: {}", vk::to_string(desc_layout_result)); - fault_process_desc_layout = std::move(desc_layout); - - const auto& module = Vulkan::Compile(HostShaders::FAULT_BUFFER_PROCESS_COMP, - vk::ShaderStageFlagBits::eCompute, instance.GetDevice()); - Vulkan::SetObjectName(instance.GetDevice(), module, "Fault Buffer Parser"); - - const vk::SpecializationMapEntry specialization_map_entry = { - .constantID = 0, - .offset = 0, - .size = sizeof(u32), - }; - - const vk::SpecializationInfo specialization_info = { - .mapEntryCount = 1, - .pMapEntries = &specialization_map_entry, - .dataSize = sizeof(u32), - .pData = &CACHING_PAGEBITS, - }; - - const vk::PipelineShaderStageCreateInfo shader_ci = { - .stage = vk::ShaderStageFlagBits::eCompute, - .module = module, - .pName = "main", - .pSpecializationInfo = &specialization_info, - }; - - const vk::PipelineLayoutCreateInfo layout_info = { - .setLayoutCount = 1U, - .pSetLayouts = &(*fault_process_desc_layout), - }; - auto [layout_result, layout] = instance.GetDevice().createPipelineLayoutUnique(layout_info); - ASSERT_MSG(layout_result == vk::Result::eSuccess, "Failed to create pipeline layout: {}", - vk::to_string(layout_result)); - fault_process_pipeline_layout = std::move(layout); - - const vk::ComputePipelineCreateInfo pipeline_info = { - .stage = shader_ci, - .layout = *fault_process_pipeline_layout, - }; - auto [pipeline_result, pipeline] = - instance.GetDevice().createComputePipelineUnique({}, pipeline_info); - ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create compute pipeline: {}", - vk::to_string(pipeline_result)); - fault_process_pipeline = std::move(pipeline); - Vulkan::SetObjectName(instance.GetDevice(), *fault_process_pipeline, - "Fault Buffer Parser Pipeline"); - - instance.GetDevice().destroyShaderModule(module); - // Set up garbage collection parameters if (!instance.CanReportMemoryUsage()) { trigger_gc_memory = DEFAULT_TRIGGER_GC_MEMORY; @@ -656,14 +575,10 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { wanted_size = static_cast(device_addr_end - device_addr); const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size); const u32 size = static_cast(overlap.end - overlap.begin); - const BufferId new_buffer_id = [&] { - std::scoped_lock lk{slot_buffers_mutex}; - return slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, - AllFlags | vk::BufferUsageFlagBits::eShaderDeviceAddress, size); - }(); + const BufferId new_buffer_id = + slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, + AllFlags | vk::BufferUsageFlagBits::eShaderDeviceAddress, size); auto& new_buffer = slot_buffers[new_buffer_id]; - const size_t size_bytes = new_buffer.SizeBytes(); - const auto cmdbuf = scheduler.CommandBuffer(); for (const BufferId overlap_id : overlap.ids) { JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); } @@ -672,126 +587,7 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) { } void BufferCache::ProcessFaultBuffer() { - // Run fault processing shader - const auto [mapped, offset] = download_buffer.Map(MaxPageFaults * sizeof(u64)); - vk::BufferMemoryBarrier2 fault_buffer_barrier{ - .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .srcAccessMask = vk::AccessFlagBits2::eShaderWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, - .dstAccessMask = vk::AccessFlagBits2::eShaderRead, - .buffer = fault_buffer.Handle(), - .offset = 0, - .size = FAULT_BUFFER_SIZE, - }; - vk::BufferMemoryBarrier2 download_barrier{ - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, - .dstAccessMask = vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite, - .buffer = download_buffer.Handle(), - .offset = offset, - .size = MaxPageFaults * sizeof(u64), - }; - std::array barriers{fault_buffer_barrier, download_barrier}; - vk::DescriptorBufferInfo fault_buffer_info{ - .buffer = fault_buffer.Handle(), - .offset = 0, - .range = FAULT_BUFFER_SIZE, - }; - vk::DescriptorBufferInfo download_info{ - .buffer = download_buffer.Handle(), - .offset = offset, - .range = MaxPageFaults * sizeof(u64), - }; - boost::container::small_vector writes{ - { - .dstSet = VK_NULL_HANDLE, - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eStorageBuffer, - .pBufferInfo = &fault_buffer_info, - }, - { - .dstSet = VK_NULL_HANDLE, - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eStorageBuffer, - .pBufferInfo = &download_info, - }, - }; - download_buffer.Commit(); - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.fillBuffer(download_buffer.Handle(), offset, MaxPageFaults * sizeof(u64), 0); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 2, - .pBufferMemoryBarriers = barriers.data(), - }); - cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline); - cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0, - writes); - constexpr u32 num_threads = CACHING_NUMPAGES / 32; // 1 bit per page, 32 pages per workgroup - constexpr u32 num_workgroups = Common::DivCeil(num_threads, 64u); - cmdbuf.dispatch(num_workgroups, 1, 1); - - // Reset fault buffer - const vk::BufferMemoryBarrier2 reset_pre_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eComputeShader, - .srcAccessMask = vk::AccessFlagBits2::eShaderRead, - .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .buffer = fault_buffer.Handle(), - .offset = 0, - .size = FAULT_BUFFER_SIZE, - }; - const vk::BufferMemoryBarrier2 reset_post_barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite, - .buffer = fault_buffer.Handle(), - .offset = 0, - .size = FAULT_BUFFER_SIZE, - }; - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &reset_pre_barrier, - }); - cmdbuf.fillBuffer(fault_buffer.buffer, 0, FAULT_BUFFER_SIZE, 0); - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .dependencyFlags = vk::DependencyFlagBits::eByRegion, - .bufferMemoryBarrierCount = 1, - .pBufferMemoryBarriers = &reset_post_barrier, - }); - - // Defer creating buffers - scheduler.DeferOperation([this, mapped]() { - // Create the fault buffers batched - boost::icl::interval_set fault_ranges; - const u64* fault_ptr = std::bit_cast(mapped); - const u32 fault_count = static_cast(*(fault_ptr++)); - for (u32 i = 0; i < fault_count; ++i) { - const VAddr fault = *(fault_ptr++); - const VAddr fault_end = fault + CACHING_PAGESIZE; // This can be adjusted - fault_ranges += - boost::icl::interval_set::interval_type::right_open(fault, fault_end); - LOG_INFO(Render_Vulkan, "Accessed non-GPU cached memory at {:#x}", fault); - } - for (const auto& range : fault_ranges) { - const VAddr start = range.lower(); - const VAddr end = range.upper(); - const u64 page_start = start >> CACHING_PAGEBITS; - const u64 page_end = Common::DivCeil(end, CACHING_PAGESIZE); - // Buffer size is in 32 bits - ASSERT_MSG((range.upper() - range.lower()) <= std::numeric_limits::max(), - "Buffer size is too large"); - CreateBuffer(start, static_cast(end - start)); - } - }); + fault_manager.ProcessFaultBuffer(); } void BufferCache::Register(BufferId buffer_id) { @@ -972,10 +768,7 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, } void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) { - if (device_addr == 0) { - return; - } - VAddr device_addr_end = device_addr + size; + const VAddr device_addr_end = device_addr + size; ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { RENDERER_TRACE; VAddr start = std::max(buffer.CpuAddr(), device_addr); @@ -985,21 +778,6 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) { }); } -void BufferCache::MemoryBarrier() { - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - vk::MemoryBarrier2 barrier = { - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eMemoryWrite, - .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, - }; - cmdbuf.pipelineBarrier2(vk::DependencyInfo{ - .memoryBarrierCount = 1, - .pMemoryBarriers = &barrier, - }); -} - void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value, u32 num_bytes) { scheduler.EndRendering(); diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index ccf77b4f5..6954f979e 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -3,12 +3,12 @@ #pragma once -#include #include #include "common/lru_cache.h" #include "common/slot_vector.h" #include "common/types.h" #include "video_core/buffer_cache/buffer.h" +#include "video_core/buffer_cache/fault_manager.h" #include "video_core/buffer_cache/range_set.h" #include "video_core/multi_level_page_table.h" @@ -40,9 +40,7 @@ public: static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; static constexpr u64 DEVICE_PAGESIZE = 16_KB; static constexpr u64 CACHING_NUMPAGES = u64{1} << (40 - CACHING_PAGEBITS); - static constexpr u64 BDA_PAGETABLE_SIZE = CACHING_NUMPAGES * sizeof(vk::DeviceAddress); - static constexpr u64 FAULT_BUFFER_SIZE = CACHING_NUMPAGES / 8; // Bit per page // Default values for garbage collection static constexpr s64 DEFAULT_TRIGGER_GC_MEMORY = 1_GB; @@ -68,12 +66,6 @@ public: bool has_stream_leap = false; }; - using IntervalSet = - boost::icl::interval_set; - using IntervalType = typename IntervalSet::interval_type; - public: explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, AmdGpu::Liverpool* liverpool, TextureCache& texture_cache, @@ -92,7 +84,7 @@ public: /// Retrieves the fault buffer. [[nodiscard]] Buffer* GetFaultBuffer() noexcept { - return &fault_buffer; + return fault_manager.GetFaultBuffer(); } /// Retrieves the buffer with the specified id. @@ -160,9 +152,6 @@ public: /// Synchronizes all buffers neede for DMA. void SynchronizeDmaBuffers(); - /// Record memory barrier. Used for buffers when accessed via BDA. - void MemoryBarrier(); - /// Runs the garbage collector. void RunGarbageCollector(); @@ -217,6 +206,7 @@ private: AmdGpu::Liverpool* liverpool; Core::MemoryManager* memory; TextureCache& texture_cache; + FaultManager fault_manager; std::unique_ptr memory_tracker; StreamBuffer staging_buffer; StreamBuffer stream_buffer; @@ -224,8 +214,6 @@ private: StreamBuffer device_buffer; Buffer gds_buffer; Buffer bda_pagetable_buffer; - Buffer fault_buffer; - std::shared_mutex slot_buffers_mutex; Common::SlotVector slot_buffers; u64 total_used_memory = 0; u64 trigger_gc_memory = 0; @@ -235,9 +223,6 @@ private: RangeSet gpu_modified_ranges; SplitRangeMap buffer_ranges; PageTable page_table; - vk::UniqueDescriptorSetLayout fault_process_desc_layout; - vk::UniquePipeline fault_process_pipeline; - vk::UniquePipelineLayout fault_process_pipeline_layout; }; } // namespace VideoCore diff --git a/src/video_core/buffer_cache/fault_manager.cpp b/src/video_core/buffer_cache/fault_manager.cpp new file mode 100644 index 000000000..e967ffd0e --- /dev/null +++ b/src/video_core/buffer_cache/fault_manager.cpp @@ -0,0 +1,177 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/div_ceil.h" +#include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/buffer_cache/fault_manager.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_platform.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" + +#include "video_core/host_shaders/fault_buffer_process_comp.h" + +namespace VideoCore { + +static constexpr size_t MaxPageFaults = 1024; +static constexpr size_t PageFaultAreaSize = MaxPageFaults * sizeof(u64); + +FaultManager::FaultManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler_, + BufferCache& buffer_cache_, u32 caching_pagebits, u64 caching_num_pages_) + : scheduler{scheduler_}, buffer_cache{buffer_cache_}, + caching_pagesize{1ULL << caching_pagebits}, caching_num_pages{caching_num_pages_}, + fault_buffer_size{caching_num_pages_ / 8}, + fault_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, fault_buffer_size}, + download_buffer{instance, scheduler, MemoryUsage::Download, + 0, AllFlags, MaxPendingFaults * PageFaultAreaSize} { + const auto device = instance.GetDevice(); + Vulkan::SetObjectName(device, fault_buffer.Handle(), "Fault Buffer"); + + const std::array bindings = {{ + { + .binding = 0, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute, + }, + { + .binding = 1, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute, + }, + }}; + const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = { + .flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR, + .bindingCount = 2, + .pBindings = bindings.data(), + }; + fault_process_desc_layout = + Vulkan::Check(device.createDescriptorSetLayoutUnique(desc_layout_ci)); + + std::vector defines{{fmt::format("CACHING_PAGEBITS={}", caching_pagebits), + fmt::format("MAX_PAGE_FAULTS={}", MaxPageFaults)}}; + const auto module = Vulkan::Compile(HostShaders::FAULT_BUFFER_PROCESS_COMP, + vk::ShaderStageFlagBits::eCompute, device, defines); + Vulkan::SetObjectName(device, module, "Fault Buffer Parser"); + + const vk::PipelineShaderStageCreateInfo shader_ci = { + .stage = vk::ShaderStageFlagBits::eCompute, + .module = module, + .pName = "main", + }; + + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = 1U, + .pSetLayouts = &(*fault_process_desc_layout), + }; + fault_process_pipeline_layout = Vulkan::Check(device.createPipelineLayoutUnique(layout_info)); + + const vk::ComputePipelineCreateInfo pipeline_info = { + .stage = shader_ci, + .layout = *fault_process_pipeline_layout, + }; + fault_process_pipeline = Vulkan::Check(device.createComputePipelineUnique({}, pipeline_info)); + Vulkan::SetObjectName(device, *fault_process_pipeline, "Fault Buffer Parser Pipeline"); + + device.destroyShaderModule(module); +} + +void FaultManager::ProcessFaultBuffer() { + if (u64 wait_tick = fault_areas[current_area]) { + scheduler.Wait(wait_tick); + scheduler.PopPendingOperations(); + } + + const u32 offset = current_area * PageFaultAreaSize; + u8* mapped = download_buffer.mapped_data.data() + offset; + std::memset(mapped, 0, PageFaultAreaSize); + + const vk::BufferMemoryBarrier2 pre_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .srcAccessMask = vk::AccessFlagBits2::eShaderWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, + .dstAccessMask = vk::AccessFlagBits2::eShaderRead, + .buffer = fault_buffer.Handle(), + .offset = 0, + .size = fault_buffer_size, + }; + const vk::BufferMemoryBarrier2 post_barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eComputeShader, + .srcAccessMask = vk::AccessFlagBits2::eShaderWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eAllCommands, + .dstAccessMask = vk::AccessFlagBits2::eShaderWrite, + .buffer = fault_buffer.Handle(), + .offset = 0, + .size = fault_buffer_size, + }; + const vk::DescriptorBufferInfo fault_buffer_info = { + .buffer = fault_buffer.Handle(), + .offset = 0, + .range = fault_buffer_size, + }; + const vk::DescriptorBufferInfo download_info = { + .buffer = download_buffer.Handle(), + .offset = offset, + .range = PageFaultAreaSize, + }; + const std::array writes = {{ + { + .dstSet = VK_NULL_HANDLE, + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .pBufferInfo = &fault_buffer_info, + }, + { + .dstSet = VK_NULL_HANDLE, + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .pBufferInfo = &download_info, + }, + }}; + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &pre_barrier, + }); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline); + cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0, + writes); + // 1 bit per page, 32 pages per workgroup + const u32 num_threads = caching_num_pages / 32; + const u32 num_workgroups = Common::DivCeil(num_threads, 64u); + cmdbuf.dispatch(num_workgroups, 1, 1); + + cmdbuf.pipelineBarrier2(vk::DependencyInfo{ + .dependencyFlags = vk::DependencyFlagBits::eByRegion, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &post_barrier, + }); + + scheduler.DeferOperation([this, mapped, area = current_area] { + fault_ranges.Clear(); + const u64* fault_buf = std::bit_cast(mapped); + const u32 fault_count = fault_buf[0]; + for (u32 i = 1; i <= fault_count; ++i) { + fault_ranges.Add(fault_buf[i], caching_pagesize); + LOG_INFO(Render_Vulkan, "Accessed non-GPU cached memory at {:#x}", fault_buf[i]); + } + fault_ranges.ForEach([&](VAddr start, VAddr end) { + ASSERT_MSG((end - start) <= std::numeric_limits::max(), + "Buffer size is too large"); + buffer_cache.FindBuffer(start, static_cast(end - start)); + }); + fault_areas[area] = 0; + }); + + fault_areas[current_area++] = scheduler.CurrentTick(); + current_area %= MaxPendingFaults; +} + +} // namespace VideoCore diff --git a/src/video_core/buffer_cache/fault_manager.h b/src/video_core/buffer_cache/fault_manager.h new file mode 100644 index 000000000..4fd545433 --- /dev/null +++ b/src/video_core/buffer_cache/fault_manager.h @@ -0,0 +1,42 @@ +// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/buffer_cache/buffer.h" +#include "video_core/buffer_cache/range_set.h" + +namespace VideoCore { + +class BufferCache; + +class FaultManager { + static constexpr size_t MaxPendingFaults = 8; + +public: + explicit FaultManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, + BufferCache& buffer_cache, u32 caching_pagebits, u64 caching_num_pages); + + [[nodiscard]] Buffer* GetFaultBuffer() noexcept { + return &fault_buffer; + } + + void ProcessFaultBuffer(); + +private: + Vulkan::Scheduler& scheduler; + BufferCache& buffer_cache; + RangeSet fault_ranges; + u64 caching_pagesize; + u64 caching_num_pages; + u64 fault_buffer_size; + Buffer fault_buffer; + Buffer download_buffer; + std::array fault_areas{}; + u32 current_area{}; + vk::UniqueDescriptorSetLayout fault_process_desc_layout; + vk::UniquePipeline fault_process_pipeline; + vk::UniquePipelineLayout fault_process_pipeline_layout; +}; + +} // namespace VideoCore diff --git a/src/video_core/host_shaders/fault_buffer_process.comp b/src/video_core/host_shaders/fault_buffer_process.comp index a712cf441..04a86bad3 100644 --- a/src/video_core/host_shaders/fault_buffer_process.comp +++ b/src/video_core/host_shaders/fault_buffer_process.comp @@ -13,30 +13,23 @@ layout(std430, binding = 0) buffer input_buf { layout(std430, binding = 1) buffer output_buf { uint64_t download_buffer[]; }; - -// Overlap for 32 bit atomics layout(std430, binding = 1) buffer output_buf32 { uint download_buffer32[]; }; -layout(constant_id = 0) const uint CACHING_PAGEBITS = 0; - void main() { - uint id = gl_GlobalInvocationID.x; + const uint id = gl_GlobalInvocationID.x; uint word = fault_buffer[id]; - if (word == 0u) { - return; - } - // 1 page per bit - uint base_bit = id * 32u; + fault_buffer[id] = 0u; + const uint base_bit = id * 32u; while (word != 0u) { - uint bit = findLSB(word); - word &= word - 1; - uint page = base_bit + bit; - uint store_index = atomicAdd(download_buffer32[0], 1u) + 1u; - // It is very unlikely, but should we check for overflow? - if (store_index < 1024u) { // only support 1024 page faults - download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS; + const uint store_index = atomicAdd(download_buffer32[0], 1u) + 1u; + if (store_index >= MAX_PAGE_FAULTS) { + return; } + const uint bit = findLSB(word); + word &= word - 1; + const uint page = base_bit + bit; + download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS; } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 37b8051e8..8d00ff2d0 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -407,18 +407,13 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { if (uses_dma) { // We only use fault buffer for DMA right now. - { - Common::RecursiveSharedLock lock{mapped_ranges_mutex}; - for (auto& range : mapped_ranges) { - buffer_cache.SynchronizeBuffersInRange(range.lower(), - range.upper() - range.lower()); - } + Common::RecursiveSharedLock lock{mapped_ranges_mutex}; + for (auto& range : mapped_ranges) { + buffer_cache.SynchronizeBuffersInRange(range.lower(), range.upper() - range.lower()); } - buffer_cache.MemoryBarrier(); + fault_process_pending = true; } - fault_process_pending |= uses_dma; - return true; } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 78286957f..da7467dfb 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -84,15 +84,6 @@ void Scheduler::Wait(u64 tick) { Flush(info); } master_semaphore.Wait(tick); - - // CAUTION: This can introduce unexpected variation in the wait time. - // We don't currently sync the GPU, and some games are very sensitive to this. - // If this becomes a problem, it can be commented out. - // Idealy we would implement proper gpu sync. - while (!pending_ops.empty() && pending_ops.front().gpu_tick <= tick) { - pending_ops.front().callback(); - pending_ops.pop(); - } } void Scheduler::PopPendingOperations() { @@ -109,9 +100,7 @@ void Scheduler::AllocateWorkerCommandBuffers() { }; current_cmdbuf = command_pool.Commit(); - auto begin_result = current_cmdbuf.begin(begin_info); - ASSERT_MSG(begin_result == vk::Result::eSuccess, "Failed to begin command buffer: {}", - vk::to_string(begin_result)); + Check(current_cmdbuf.begin(begin_info)); // Invalidate dynamic state so it gets applied to the new command buffer. dynamic_state.Invalidate(); @@ -139,9 +128,7 @@ void Scheduler::SubmitExecution(SubmitInfo& info) { #endif EndRendering(); - auto end_result = current_cmdbuf.end(); - ASSERT_MSG(end_result == vk::Result::eSuccess, "Failed to end command buffer: {}", - vk::to_string(end_result)); + Check(current_cmdbuf.end()); const vk::Semaphore timeline = master_semaphore.Handle(); info.AddSignal(timeline, signal_value);