Merge branch 'shadps4-emu:main' into gc2

This commit is contained in:
Lander Gallastegi 2025-11-19 16:58:58 +01:00 committed by GitHub
commit d74e3d6d54
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 310 additions and 342 deletions

4
.gitmodules vendored
View File

@ -113,3 +113,7 @@
[submodule "externals/json"]
path = externals/json
url = https://github.com/nlohmann/json.git
[submodule "externals/sdl3_mixer"]
path = externals/sdl3_mixer
url = https://github.com/libsdl-org/SDL_mixer
shallow = true

View File

@ -229,6 +229,7 @@ find_package(magic_enum 0.9.7 CONFIG)
find_package(PNG 1.6 MODULE)
find_package(RenderDoc 1.6.0 MODULE)
find_package(SDL3 3.1.2 CONFIG)
find_package(SDL3_mixer 2.8.1 CONFIG)
find_package(stb MODULE)
find_package(toml11 4.2.0 CONFIG)
find_package(tsl-robin-map 1.3.0 CONFIG)
@ -959,6 +960,8 @@ set(VIDEO_CORE src/video_core/amdgpu/cb_db_extent.h
src/video_core/buffer_cache/buffer.h
src/video_core/buffer_cache/buffer_cache.cpp
src/video_core/buffer_cache/buffer_cache.h
src/video_core/buffer_cache/fault_manager.cpp
src/video_core/buffer_cache/fault_manager.h
src/video_core/buffer_cache/memory_tracker.h
src/video_core/buffer_cache/range_set.h
src/video_core/buffer_cache/region_definitions.h
@ -1070,7 +1073,7 @@ add_executable(shadps4
create_target_directory_groups(shadps4)
target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG)
target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 pugixml::pugixml stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json)
target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 SDL3_mixer::SDL3_mixer pugixml::pugixml stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json)
target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h")
target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h")

View File

@ -63,6 +63,18 @@ if (NOT TARGET SDL3::SDL3)
add_subdirectory(sdl3)
endif()
# SDL3_mixer
if (NOT TARGET SDL3_mixer::SDL3_mixer)
set(SDLMIXER_FLAC OFF)
set(SDLMIXER_OGG OFF)
set(SDLMIXER_MOD OFF)
set(SDLMIXER_MIDI OFF)
set(SDLMIXER_OPUS OFF)
set(SDLMIXER_WAVPACK OFF)
set(BUILD_SHARED_LIBS OFF)
add_subdirectory(sdl3_mixer)
endif()
# vulkan-headers
if (NOT TARGET Vulkan::Headers)
set(VULKAN_HEADERS_ENABLE_MODULE OFF)

1
externals/sdl3_mixer vendored Submodule

@ -0,0 +1 @@
Subproject commit 4182794ea45fe28568728670c6f1583855d0e85c

View File

@ -182,7 +182,13 @@ public:
}
void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num,
const char* function, std::string message) {
const char* function, const char* format, const fmt::format_args& args) {
if (!filter.CheckMessage(log_class, log_level) || !Config::getLoggingEnabled()) {
return;
}
const auto message = fmt::vformat(format, args);
// Propagate important log messages to the profiler
if (IsProfilerConnected()) {
const auto& msg_str = fmt::format("[{}] {}", GetLogClassName(log_class), message);
@ -201,10 +207,6 @@ public:
}
}
if (!filter.CheckMessage(log_class, log_level) || !Config::getLoggingEnabled()) {
return;
}
using std::chrono::duration_cast;
using std::chrono::microseconds;
using std::chrono::steady_clock;
@ -324,8 +326,8 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename,
unsigned int line_num, const char* function, const char* format,
const fmt::format_args& args) {
if (!initialization_in_progress_suppress_logging) [[likely]] {
Impl::Instance().PushEntry(log_class, log_level, filename, line_num, function,
fmt::vformat(format, args));
Impl::Instance().PushEntry(log_class, log_level, filename, line_num, function, format,
args);
}
}
} // namespace Common::Log

View File

@ -5,7 +5,6 @@
#include <filesystem>
#include <fstream>
#include <mutex>
#include <SDL3/SDL_audio.h>
#include <cmrc/cmrc.hpp>
#include <imgui.h>
@ -92,59 +91,45 @@ TrophyUI::TrophyUI(const std::filesystem::path& trophyIconPath, const std::strin
AddLayer(this);
bool customsoundplayed = false;
#ifdef ENABLE_QT_GUI
QString musicPathWav = QString::fromStdString(CustomTrophy_Dir.string() + "/trophy.wav");
QString musicPathMp3 = QString::fromStdString(CustomTrophy_Dir.string() + "/trophy.mp3");
if (fs::exists(musicPathWav.toStdString())) {
BackgroundMusicPlayer::getInstance().setVolume(100);
BackgroundMusicPlayer::getInstance().playMusic(musicPathWav, false);
customsoundplayed = true;
} else if (fs::exists(musicPathMp3.toStdString())) {
BackgroundMusicPlayer::getInstance().setVolume(100);
BackgroundMusicPlayer::getInstance().playMusic(musicPathMp3, false);
customsoundplayed = true;
MIX_Init();
mixer = MIX_CreateMixerDevice(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, NULL);
if (!mixer) {
LOG_ERROR(Lib_NpTrophy, "Could not initialize SDL Mixer, {}", SDL_GetError());
return;
}
#endif
if (!customsoundplayed) {
MIX_SetMasterGain(mixer, static_cast<float>(Config::getVolumeSlider() / 100.f));
auto musicPathMp3 = CustomTrophy_Dir / "trophy.mp3";
auto musicPathWav = CustomTrophy_Dir / "trophy.wav";
if (std::filesystem::exists(musicPathMp3)) {
audio = MIX_LoadAudio(mixer, musicPathMp3.string().c_str(), false);
} else if (std::filesystem::exists(musicPathWav)) {
audio = MIX_LoadAudio(mixer, musicPathWav.string().c_str(), false);
} else {
auto soundFile = resource.open("src/images/trophy.wav");
std::vector<u8> soundData = std::vector<u8>(soundFile.begin(), soundFile.end());
audio =
MIX_LoadAudio_IO(mixer, SDL_IOFromMem(soundData.data(), soundData.size()), false, true);
// due to low volume of default sound file
MIX_SetMasterGain(mixer, MIX_GetMasterGain(mixer) * 1.3f);
}
SDL_AudioSpec spec;
Uint8* audioBuf;
Uint32 audioLen;
if (!audio) {
LOG_ERROR(Lib_NpTrophy, "Could not loud audio file, {}", SDL_GetError());
return;
}
if (!SDL_LoadWAV_IO(SDL_IOFromMem(soundData.data(), soundData.size()), true, &spec,
&audioBuf, &audioLen)) {
LOG_ERROR(Lib_NpTrophy, "Cannot load trophy sound: {}", SDL_GetError());
SDL_free(audioBuf);
return;
}
SDL_AudioStream* stream =
SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &spec, nullptr, nullptr);
if (!stream) {
LOG_ERROR(Lib_NpTrophy, "Cannot create audio stream for trophy sound: {}",
SDL_GetError());
SDL_free(audioBuf);
return;
}
if (!SDL_PutAudioStreamData(stream, audioBuf, audioLen)) {
LOG_ERROR(Lib_NpTrophy, "Cannot add trophy sound data to stream: {}", SDL_GetError());
SDL_free(audioBuf);
return;
}
// Set audio gain 20% higher since audio file itself is soft
SDL_SetAudioStreamGain(stream, Config::getVolumeSlider() / 100.0f * 1.2f);
SDL_ResumeAudioStreamDevice(stream);
SDL_free(audioBuf);
if (!MIX_PlayAudio(mixer, audio)) {
LOG_ERROR(Lib_NpTrophy, "Could not play audio file, {}", SDL_GetError());
}
}
TrophyUI::~TrophyUI() {
MIX_DestroyAudio(audio);
MIX_DestroyMixer(mixer);
MIX_Quit();
Finish();
}

View File

@ -5,6 +5,7 @@
#include <string>
#include <variant>
#include <SDL3_mixer/SDL_mixer.h>
#include <queue>
#include "common/fixed_value.h"
@ -30,6 +31,9 @@ private:
std::string_view trophy_type;
ImGui::RefCountedTexture trophy_icon;
ImGui::RefCountedTexture trophy_type_icon;
MIX_Mixer* mixer;
MIX_Audio* audio;
};
struct TrophyInfo {

View File

@ -190,7 +190,7 @@ void CollectShaderInfoPass(IR::Program& program, const Profile& profile) {
});
info.buffers.push_back({
.used_types = IR::Type::U32,
.inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::FAULT_BUFFER_SIZE),
.inline_cbuf = AmdGpu::Buffer::Placeholder(std::numeric_limits<u32>::max()),
.buffer_type = BufferType::FaultBuffer,
.is_written = true,
});

View File

@ -2,49 +2,42 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <mutex>
#include "common/alignment.h"
#include "common/debug.h"
#include "common/scope_exit.h"
#include "common/types.h"
#include "core/memory.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/buffer_cache/memory_tracker.h"
#include "video_core/host_shaders/fault_buffer_process_comp.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCore {
static constexpr size_t DataShareBufferSize = 64_KB;
static constexpr size_t StagingBufferSize = 512_MB;
static constexpr size_t DownloadBufferSize = 32_MB;
static constexpr size_t UboStreamBufferSize = 64_MB;
static constexpr size_t DownloadBufferSize = 128_MB;
static constexpr size_t DeviceBufferSize = 128_MB;
static constexpr size_t MaxPageFaults = 1024;
BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_, TextureCache& texture_cache_,
PageManager& tracker)
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
memory{Core::Memory::Instance()}, texture_cache{texture_cache_},
fault_manager{instance, scheduler, *this, CACHING_PAGEBITS, CACHING_NUMPAGES},
staging_buffer{instance, scheduler, MemoryUsage::Upload, StagingBufferSize},
stream_buffer{instance, scheduler, MemoryUsage::Stream, UboStreamBufferSize},
download_buffer{instance, scheduler, MemoryUsage::Download, DownloadBufferSize},
device_buffer{instance, scheduler, MemoryUsage::DeviceLocal, DeviceBufferSize},
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal,
0, AllFlags, BDA_PAGETABLE_SIZE},
fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_BUFFER_SIZE) {
0, AllFlags, BDA_PAGETABLE_SIZE} {
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
Vulkan::SetObjectName(instance.GetDevice(), bda_pagetable_buffer.Handle(),
"BDA Page Table Buffer");
Vulkan::SetObjectName(instance.GetDevice(), fault_buffer.Handle(), "Fault Buffer");
memory_tracker = std::make_unique<MemoryTracker>(tracker);
@ -57,80 +50,6 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
const vk::Buffer& null_buffer = slot_buffers[null_id].buffer;
Vulkan::SetObjectName(instance.GetDevice(), null_buffer, "Null Buffer");
// Prepare the fault buffer parsing pipeline
boost::container::static_vector<vk::DescriptorSetLayoutBinding, 2> bindings{
{
.binding = 0,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
},
{
.binding = 1,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
},
};
const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
.bindingCount = static_cast<u32>(bindings.size()),
.pBindings = bindings.data(),
};
auto [desc_layout_result, desc_layout] =
instance.GetDevice().createDescriptorSetLayoutUnique(desc_layout_ci);
ASSERT_MSG(desc_layout_result == vk::Result::eSuccess,
"Failed to create descriptor set layout: {}", vk::to_string(desc_layout_result));
fault_process_desc_layout = std::move(desc_layout);
const auto& module = Vulkan::Compile(HostShaders::FAULT_BUFFER_PROCESS_COMP,
vk::ShaderStageFlagBits::eCompute, instance.GetDevice());
Vulkan::SetObjectName(instance.GetDevice(), module, "Fault Buffer Parser");
const vk::SpecializationMapEntry specialization_map_entry = {
.constantID = 0,
.offset = 0,
.size = sizeof(u32),
};
const vk::SpecializationInfo specialization_info = {
.mapEntryCount = 1,
.pMapEntries = &specialization_map_entry,
.dataSize = sizeof(u32),
.pData = &CACHING_PAGEBITS,
};
const vk::PipelineShaderStageCreateInfo shader_ci = {
.stage = vk::ShaderStageFlagBits::eCompute,
.module = module,
.pName = "main",
.pSpecializationInfo = &specialization_info,
};
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 1U,
.pSetLayouts = &(*fault_process_desc_layout),
};
auto [layout_result, layout] = instance.GetDevice().createPipelineLayoutUnique(layout_info);
ASSERT_MSG(layout_result == vk::Result::eSuccess, "Failed to create pipeline layout: {}",
vk::to_string(layout_result));
fault_process_pipeline_layout = std::move(layout);
const vk::ComputePipelineCreateInfo pipeline_info = {
.stage = shader_ci,
.layout = *fault_process_pipeline_layout,
};
auto [pipeline_result, pipeline] =
instance.GetDevice().createComputePipelineUnique({}, pipeline_info);
ASSERT_MSG(pipeline_result == vk::Result::eSuccess, "Failed to create compute pipeline: {}",
vk::to_string(pipeline_result));
fault_process_pipeline = std::move(pipeline);
Vulkan::SetObjectName(instance.GetDevice(), *fault_process_pipeline,
"Fault Buffer Parser Pipeline");
instance.GetDevice().destroyShaderModule(module);
// Set up garbage collection parameters
if (!instance.CanReportMemoryUsage()) {
trigger_gc_memory = DEFAULT_TRIGGER_GC_MEMORY;
@ -724,14 +643,10 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
wanted_size = static_cast<u32>(device_addr_end - device_addr);
const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size);
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
const BufferId new_buffer_id = [&] {
std::scoped_lock lk{slot_buffers_mutex};
return slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin,
AllFlags | vk::BufferUsageFlagBits::eShaderDeviceAddress, size);
}();
const BufferId new_buffer_id =
slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin,
AllFlags | vk::BufferUsageFlagBits::eShaderDeviceAddress, size);
auto& new_buffer = slot_buffers[new_buffer_id];
const size_t size_bytes = new_buffer.SizeBytes();
const auto cmdbuf = scheduler.CommandBuffer();
for (const BufferId overlap_id : overlap.ids) {
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
}
@ -740,126 +655,7 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
}
void BufferCache::ProcessFaultBuffer() {
// Run fault processing shader
const auto [mapped, offset] = download_buffer.Map(MaxPageFaults * sizeof(u64));
vk::BufferMemoryBarrier2 fault_buffer_barrier{
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
.buffer = fault_buffer.Handle(),
.offset = 0,
.size = FAULT_BUFFER_SIZE,
};
vk::BufferMemoryBarrier2 download_barrier{
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eShaderWrite,
.buffer = download_buffer.Handle(),
.offset = offset,
.size = MaxPageFaults * sizeof(u64),
};
std::array<vk::BufferMemoryBarrier2, 2> barriers{fault_buffer_barrier, download_barrier};
vk::DescriptorBufferInfo fault_buffer_info{
.buffer = fault_buffer.Handle(),
.offset = 0,
.range = FAULT_BUFFER_SIZE,
};
vk::DescriptorBufferInfo download_info{
.buffer = download_buffer.Handle(),
.offset = offset,
.range = MaxPageFaults * sizeof(u64),
};
boost::container::small_vector<vk::WriteDescriptorSet, 2> writes{
{
.dstSet = VK_NULL_HANDLE,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.pBufferInfo = &fault_buffer_info,
},
{
.dstSet = VK_NULL_HANDLE,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.pBufferInfo = &download_info,
},
};
download_buffer.Commit();
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.fillBuffer(download_buffer.Handle(), offset, MaxPageFaults * sizeof(u64), 0);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 2,
.pBufferMemoryBarriers = barriers.data(),
});
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline);
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0,
writes);
constexpr u32 num_threads = CACHING_NUMPAGES / 32; // 1 bit per page, 32 pages per workgroup
constexpr u32 num_workgroups = Common::DivCeil(num_threads, 64u);
cmdbuf.dispatch(num_workgroups, 1, 1);
// Reset fault buffer
const vk::BufferMemoryBarrier2 reset_pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.srcAccessMask = vk::AccessFlagBits2::eShaderRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = fault_buffer.Handle(),
.offset = 0,
.size = FAULT_BUFFER_SIZE,
};
const vk::BufferMemoryBarrier2 reset_post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
.buffer = fault_buffer.Handle(),
.offset = 0,
.size = FAULT_BUFFER_SIZE,
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &reset_pre_barrier,
});
cmdbuf.fillBuffer(fault_buffer.buffer, 0, FAULT_BUFFER_SIZE, 0);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &reset_post_barrier,
});
// Defer creating buffers
scheduler.DeferOperation([this, mapped]() {
// Create the fault buffers batched
boost::icl::interval_set<VAddr> fault_ranges;
const u64* fault_ptr = std::bit_cast<const u64*>(mapped);
const u32 fault_count = static_cast<u32>(*(fault_ptr++));
for (u32 i = 0; i < fault_count; ++i) {
const VAddr fault = *(fault_ptr++);
const VAddr fault_end = fault + CACHING_PAGESIZE; // This can be adjusted
fault_ranges +=
boost::icl::interval_set<VAddr>::interval_type::right_open(fault, fault_end);
LOG_INFO(Render_Vulkan, "Accessed non-GPU cached memory at {:#x}", fault);
}
for (const auto& range : fault_ranges) {
const VAddr start = range.lower();
const VAddr end = range.upper();
const u64 page_start = start >> CACHING_PAGEBITS;
const u64 page_end = Common::DivCeil(end, CACHING_PAGESIZE);
// Buffer size is in 32 bits
ASSERT_MSG((range.upper() - range.lower()) <= std::numeric_limits<u32>::max(),
"Buffer size is too large");
CreateBuffer(start, static_cast<u32>(end - start));
}
});
fault_manager.ProcessFaultBuffer();
}
void BufferCache::Register(BufferId buffer_id) {
@ -1040,10 +836,7 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
}
void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
if (device_addr == 0) {
return;
}
VAddr device_addr_end = device_addr + size;
const VAddr device_addr_end = device_addr + size;
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
RENDERER_TRACE;
VAddr start = std::max(buffer.CpuAddr(), device_addr);
@ -1053,21 +846,6 @@ void BufferCache::SynchronizeBuffersInRange(VAddr device_addr, u64 size) {
});
}
void BufferCache::MemoryBarrier() {
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
vk::MemoryBarrier2 barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
};
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.memoryBarrierCount = 1,
.pMemoryBarriers = &barrier,
});
}
void BufferCache::InlineDataBuffer(Buffer& buffer, VAddr address, const void* value,
u32 num_bytes) {
scheduler.EndRendering();

View File

@ -3,13 +3,13 @@
#pragma once
#include <shared_mutex>
#include <boost/container/small_vector.hpp>
#include "common/enum.h"
#include "common/lru_cache.h"
#include "common/slot_vector.h"
#include "common/types.h"
#include "video_core/buffer_cache/buffer.h"
#include "video_core/buffer_cache/fault_manager.h"
#include "video_core/buffer_cache/range_set.h"
#include "video_core/multi_level_page_table.h"
#include "video_core/texture_cache/image.h"
@ -51,9 +51,7 @@ public:
static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS;
static constexpr u64 DEVICE_PAGESIZE = 16_KB;
static constexpr u64 CACHING_NUMPAGES = u64{1} << (40 - CACHING_PAGEBITS);
static constexpr u64 BDA_PAGETABLE_SIZE = CACHING_NUMPAGES * sizeof(vk::DeviceAddress);
static constexpr u64 FAULT_BUFFER_SIZE = CACHING_NUMPAGES / 8; // Bit per page
// Default values for garbage collection
static constexpr s64 DEFAULT_TRIGGER_GC_MEMORY = 1_GB;
@ -79,12 +77,6 @@ public:
bool has_stream_leap = false;
};
using IntervalSet =
boost::icl::interval_set<VAddr, std::less,
ICL_INTERVAL_INSTANCE(ICL_INTERVAL_DEFAULT, VAddr, std::less),
RangeSetsAllocator>;
using IntervalType = typename IntervalSet::interval_type;
public:
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
AmdGpu::Liverpool* liverpool, TextureCache& texture_cache,
@ -103,7 +95,7 @@ public:
/// Retrieves the fault buffer.
[[nodiscard]] Buffer* GetFaultBuffer() noexcept {
return &fault_buffer;
return fault_manager.GetFaultBuffer();
}
/// Retrieves the buffer with the specified id.
@ -177,9 +169,6 @@ public:
/// Synchronizes all buffers neede for DMA.
void SynchronizeDmaBuffers();
/// Record memory barrier. Used for buffers when accessed via BDA.
void MemoryBarrier();
/// Runs the garbage collector.
void RunGarbageCollector();
@ -233,6 +222,7 @@ private:
AmdGpu::Liverpool* liverpool;
Core::MemoryManager* memory;
TextureCache& texture_cache;
FaultManager fault_manager;
std::unique_ptr<MemoryTracker> memory_tracker;
StreamBuffer staging_buffer;
StreamBuffer stream_buffer;
@ -240,8 +230,6 @@ private:
StreamBuffer device_buffer;
Buffer gds_buffer;
Buffer bda_pagetable_buffer;
Buffer fault_buffer;
std::shared_mutex slot_buffers_mutex;
Common::SlotVector<Buffer> slot_buffers;
u64 total_used_memory = 0;
u64 trigger_gc_memory = 0;
@ -251,9 +239,6 @@ private:
RangeSet gpu_modified_ranges;
SplitRangeMap<BufferId> buffer_ranges;
PageTable page_table;
vk::UniqueDescriptorSetLayout fault_process_desc_layout;
vk::UniquePipeline fault_process_pipeline;
vk::UniquePipelineLayout fault_process_pipeline_layout;
};
} // namespace VideoCore

View File

@ -0,0 +1,177 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/div_ceil.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/buffer_cache/fault_manager.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_platform.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/host_shaders/fault_buffer_process_comp.h"
namespace VideoCore {
static constexpr size_t MaxPageFaults = 1024;
static constexpr size_t PageFaultAreaSize = MaxPageFaults * sizeof(u64);
FaultManager::FaultManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler_,
BufferCache& buffer_cache_, u32 caching_pagebits, u64 caching_num_pages_)
: scheduler{scheduler_}, buffer_cache{buffer_cache_},
caching_pagesize{1ULL << caching_pagebits}, caching_num_pages{caching_num_pages_},
fault_buffer_size{caching_num_pages_ / 8},
fault_buffer{instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, fault_buffer_size},
download_buffer{instance, scheduler, MemoryUsage::Download,
0, AllFlags, MaxPendingFaults * PageFaultAreaSize} {
const auto device = instance.GetDevice();
Vulkan::SetObjectName(device, fault_buffer.Handle(), "Fault Buffer");
const std::array<vk::DescriptorSetLayoutBinding, 2> bindings = {{
{
.binding = 0,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
},
{
.binding = 1,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
},
}};
const vk::DescriptorSetLayoutCreateInfo desc_layout_ci = {
.flags = vk::DescriptorSetLayoutCreateFlagBits::ePushDescriptorKHR,
.bindingCount = 2,
.pBindings = bindings.data(),
};
fault_process_desc_layout =
Vulkan::Check(device.createDescriptorSetLayoutUnique(desc_layout_ci));
std::vector<std::string> defines{{fmt::format("CACHING_PAGEBITS={}", caching_pagebits),
fmt::format("MAX_PAGE_FAULTS={}", MaxPageFaults)}};
const auto module = Vulkan::Compile(HostShaders::FAULT_BUFFER_PROCESS_COMP,
vk::ShaderStageFlagBits::eCompute, device, defines);
Vulkan::SetObjectName(device, module, "Fault Buffer Parser");
const vk::PipelineShaderStageCreateInfo shader_ci = {
.stage = vk::ShaderStageFlagBits::eCompute,
.module = module,
.pName = "main",
};
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 1U,
.pSetLayouts = &(*fault_process_desc_layout),
};
fault_process_pipeline_layout = Vulkan::Check(device.createPipelineLayoutUnique(layout_info));
const vk::ComputePipelineCreateInfo pipeline_info = {
.stage = shader_ci,
.layout = *fault_process_pipeline_layout,
};
fault_process_pipeline = Vulkan::Check(device.createComputePipelineUnique({}, pipeline_info));
Vulkan::SetObjectName(device, *fault_process_pipeline, "Fault Buffer Parser Pipeline");
device.destroyShaderModule(module);
}
void FaultManager::ProcessFaultBuffer() {
if (u64 wait_tick = fault_areas[current_area]) {
scheduler.Wait(wait_tick);
scheduler.PopPendingOperations();
}
const u32 offset = current_area * PageFaultAreaSize;
u8* mapped = download_buffer.mapped_data.data() + offset;
std::memset(mapped, 0, PageFaultAreaSize);
const vk::BufferMemoryBarrier2 pre_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
.buffer = fault_buffer.Handle(),
.offset = 0,
.size = fault_buffer_size,
};
const vk::BufferMemoryBarrier2 post_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eShaderWrite,
.buffer = fault_buffer.Handle(),
.offset = 0,
.size = fault_buffer_size,
};
const vk::DescriptorBufferInfo fault_buffer_info = {
.buffer = fault_buffer.Handle(),
.offset = 0,
.range = fault_buffer_size,
};
const vk::DescriptorBufferInfo download_info = {
.buffer = download_buffer.Handle(),
.offset = offset,
.range = PageFaultAreaSize,
};
const std::array<vk::WriteDescriptorSet, 2> writes = {{
{
.dstSet = VK_NULL_HANDLE,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.pBufferInfo = &fault_buffer_info,
},
{
.dstSet = VK_NULL_HANDLE,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageBuffer,
.pBufferInfo = &download_info,
},
}};
scheduler.EndRendering();
const auto cmdbuf = scheduler.CommandBuffer();
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &pre_barrier,
});
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, *fault_process_pipeline);
cmdbuf.pushDescriptorSetKHR(vk::PipelineBindPoint::eCompute, *fault_process_pipeline_layout, 0,
writes);
// 1 bit per page, 32 pages per workgroup
const u32 num_threads = caching_num_pages / 32;
const u32 num_workgroups = Common::DivCeil(num_threads, 64u);
cmdbuf.dispatch(num_workgroups, 1, 1);
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &post_barrier,
});
scheduler.DeferOperation([this, mapped, area = current_area] {
fault_ranges.Clear();
const u64* fault_buf = std::bit_cast<const u64*>(mapped);
const u32 fault_count = fault_buf[0];
for (u32 i = 1; i <= fault_count; ++i) {
fault_ranges.Add(fault_buf[i], caching_pagesize);
LOG_INFO(Render_Vulkan, "Accessed non-GPU cached memory at {:#x}", fault_buf[i]);
}
fault_ranges.ForEach([&](VAddr start, VAddr end) {
ASSERT_MSG((end - start) <= std::numeric_limits<u32>::max(),
"Buffer size is too large");
buffer_cache.FindBuffer(start, static_cast<u32>(end - start));
});
fault_areas[area] = 0;
});
fault_areas[current_area++] = scheduler.CurrentTick();
current_area %= MaxPendingFaults;
}
} // namespace VideoCore

View File

@ -0,0 +1,42 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include "video_core/buffer_cache/buffer.h"
#include "video_core/buffer_cache/range_set.h"
namespace VideoCore {
class BufferCache;
class FaultManager {
static constexpr size_t MaxPendingFaults = 8;
public:
explicit FaultManager(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
BufferCache& buffer_cache, u32 caching_pagebits, u64 caching_num_pages);
[[nodiscard]] Buffer* GetFaultBuffer() noexcept {
return &fault_buffer;
}
void ProcessFaultBuffer();
private:
Vulkan::Scheduler& scheduler;
BufferCache& buffer_cache;
RangeSet fault_ranges;
u64 caching_pagesize;
u64 caching_num_pages;
u64 fault_buffer_size;
Buffer fault_buffer;
Buffer download_buffer;
std::array<u64, MaxPendingFaults> fault_areas{};
u32 current_area{};
vk::UniqueDescriptorSetLayout fault_process_desc_layout;
vk::UniquePipeline fault_process_pipeline;
vk::UniquePipelineLayout fault_process_pipeline_layout;
};
} // namespace VideoCore

View File

@ -13,30 +13,23 @@ layout(std430, binding = 0) buffer input_buf {
layout(std430, binding = 1) buffer output_buf {
uint64_t download_buffer[];
};
// Overlap for 32 bit atomics
layout(std430, binding = 1) buffer output_buf32 {
uint download_buffer32[];
};
layout(constant_id = 0) const uint CACHING_PAGEBITS = 0;
void main() {
uint id = gl_GlobalInvocationID.x;
const uint id = gl_GlobalInvocationID.x;
uint word = fault_buffer[id];
if (word == 0u) {
return;
}
// 1 page per bit
uint base_bit = id * 32u;
fault_buffer[id] = 0u;
const uint base_bit = id * 32u;
while (word != 0u) {
uint bit = findLSB(word);
word &= word - 1;
uint page = base_bit + bit;
uint store_index = atomicAdd(download_buffer32[0], 1u) + 1u;
// It is very unlikely, but should we check for overflow?
if (store_index < 1024u) { // only support 1024 page faults
download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS;
const uint store_index = atomicAdd(download_buffer32[0], 1u) + 1u;
if (store_index >= MAX_PAGE_FAULTS) {
return;
}
const uint bit = findLSB(word);
word &= word - 1;
const uint page = base_bit + bit;
download_buffer[store_index] = uint64_t(page) << CACHING_PAGEBITS;
}
}

View File

@ -407,18 +407,13 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
if (uses_dma) {
// We only use fault buffer for DMA right now.
{
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
for (auto& range : mapped_ranges) {
buffer_cache.SynchronizeBuffersInRange(range.lower(),
range.upper() - range.lower());
}
Common::RecursiveSharedLock lock{mapped_ranges_mutex};
for (auto& range : mapped_ranges) {
buffer_cache.SynchronizeBuffersInRange(range.lower(), range.upper() - range.lower());
}
buffer_cache.MemoryBarrier();
fault_process_pending = true;
}
fault_process_pending |= uses_dma;
return true;
}

View File

@ -84,15 +84,6 @@ void Scheduler::Wait(u64 tick) {
Flush(info);
}
master_semaphore.Wait(tick);
// CAUTION: This can introduce unexpected variation in the wait time.
// We don't currently sync the GPU, and some games are very sensitive to this.
// If this becomes a problem, it can be commented out.
// Idealy we would implement proper gpu sync.
while (!pending_ops.empty() && pending_ops.front().gpu_tick <= tick) {
pending_ops.front().callback();
pending_ops.pop();
}
}
void Scheduler::PopPendingOperations() {
@ -109,9 +100,7 @@ void Scheduler::AllocateWorkerCommandBuffers() {
};
current_cmdbuf = command_pool.Commit();
auto begin_result = current_cmdbuf.begin(begin_info);
ASSERT_MSG(begin_result == vk::Result::eSuccess, "Failed to begin command buffer: {}",
vk::to_string(begin_result));
Check(current_cmdbuf.begin(begin_info));
// Invalidate dynamic state so it gets applied to the new command buffer.
dynamic_state.Invalidate();
@ -139,9 +128,7 @@ void Scheduler::SubmitExecution(SubmitInfo& info) {
#endif
EndRendering();
auto end_result = current_cmdbuf.end();
ASSERT_MSG(end_result == vk::Result::eSuccess, "Failed to end command buffer: {}",
vk::to_string(end_result));
Check(current_cmdbuf.end());
const vk::Semaphore timeline = master_semaphore.Handle();
info.AddSignal(timeline, signal_value);