From 22e103b033d7f87afb591ff18ec6693e6818306d Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 6 Aug 2025 18:32:30 +0200 Subject: [PATCH 01/46] Use general dirty flag --- src/video_core/texture_cache/image.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index c30edad79..69cbe3fc0 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -113,7 +113,7 @@ struct Image { bool SafeToDownload() const { return True(flags & ImageFlagBits::GpuModified) && - False(flags & (ImageFlagBits::GpuDirty | ImageFlagBits::CpuDirty)); + False(flags & ImageFlagBits::Dirty); } const Vulkan::Instance* instance; From 0f70970442f9b72c9356fced489d7066eff84890 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 6 Aug 2025 18:37:51 +0200 Subject: [PATCH 02/46] clang-format --- src/video_core/texture_cache/image.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 69cbe3fc0..9703315cb 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -112,8 +112,7 @@ struct Image { } bool SafeToDownload() const { - return True(flags & ImageFlagBits::GpuModified) && - False(flags & ImageFlagBits::Dirty); + return True(flags & ImageFlagBits::GpuModified) && False(flags & ImageFlagBits::Dirty); } const Vulkan::Instance* instance; From dff30484a57476ffa9dcb41522025697cacf7b04 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 9 Aug 2025 20:07:28 +0200 Subject: [PATCH 03/46] Handle non-linear image download --- .../texture_cache/texture_cache.cpp | 60 ++++++++++--------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 9f7894f1e..22e538fe3 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -96,34 +96,43 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { return; } auto& download_buffer = buffer_cache.GetUtilityBuffer(MemoryUsage::Download); - const u32 download_size = image.info.pitch * image.info.size.height * - image.info.resources.layers * (image.info.num_bits / 8); - ASSERT(download_size <= image.info.guest_size); - const auto [download, offset] = download_buffer.Map(download_size); - download_buffer.Commit(); - const vk::BufferImageCopy image_download = { - .bufferOffset = offset, - .bufferRowLength = image.info.pitch, - .bufferImageHeight = image.info.size.height, - .imageSubresource = - { - .aspectMask = image.info.props.is_depth ? vk::ImageAspectFlagBits::eDepth - : vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, + const auto image_addr = image.info.guest_address; + const auto image_size = image.info.guest_size; + const auto image_mips = image.info.resources.levels; + boost::container::small_vector buffer_copies; + for (u32 mip = 0; mip < image_mips; ++mip) { + const auto& width = std::max(image.info.size.width >> mip, 1u); + const auto& height = std::max(image.info.size.height >> mip, 1u); + const auto& depth = + image.info.props.is_volume ? std::max(image.info.size.depth >> mip, 1u) : 1u; + const auto [mip_size, mip_pitch, mip_height, mip_offset] = image.info.mips_layout[mip]; + const u32 extent_width = mip_pitch ? std::min(mip_pitch, width) : width; + const u32 extent_height = mip_height ? std::min(mip_height, height) : height; + buffer_copies.push_back(vk::BufferImageCopy{ + .bufferOffset = mip_offset, + .bufferRowLength = mip_pitch, + .bufferImageHeight = mip_height, + .imageSubresource{ + .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, + .mipLevel = mip, .baseArrayLayer = 0, .layerCount = image.info.resources.layers, }, - .imageOffset = {0, 0, 0}, - .imageExtent = {image.info.size.width, image.info.size.height, 1}, - }; + .imageOffset = {0, 0, 0}, + .imageExtent = {extent_width, extent_height, depth}, + }); + } + if (buffer_copies.empty()) { + return; + } + const auto [download, offset] = download_buffer.Map(image_size); + download_buffer.Commit(); scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); - cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, - download_buffer.Handle(), image_download); - scheduler.DeferOperation([device_addr = image.info.guest_address, download, download_size] { + tile_manager.TileImage(image.image, buffer_copies, download_buffer.Handle(), offset, + image.info); + scheduler.DeferOperation([image_addr, download, image_size] { auto* memory = Core::Memory::Instance(); - memory->TryWriteBacking(std::bit_cast(device_addr), download, download_size); + memory->TryWriteBacking(std::bit_cast(image_addr), download, image_size); }); } @@ -923,11 +932,6 @@ void TextureCache::RunGarbageCollector() { --num_deletions; auto& image = slot_images[image_id]; const bool download = image.SafeToDownload(); - const bool tiled = image.info.IsTiled(); - if (tiled && download) { - // This is a workaround for now. We can't handle non-linear image downloads. - return false; - } if (download && !pressured) { return false; } From d9958dedc4f98afa13b980614ceb8f36e9bd11b9 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 10 Aug 2025 01:04:38 +0200 Subject: [PATCH 04/46] Layout transition --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/texture_cache/texture_cache.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ec0c38bda..17082314b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -452,8 +452,8 @@ void Rasterizer::OnSubmit() { buffer_cache.ProcessFaultBuffer(); } texture_cache.ProcessDownloadImages(); - texture_cache.RunGarbageCollector(); buffer_cache.RunGarbageCollector(); + texture_cache.RunGarbageCollector(); } bool Rasterizer::BindResources(const Pipeline* pipeline) { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 22e538fe3..760811cb2 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -128,6 +128,7 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { const auto [download, offset] = download_buffer.Map(image_size); download_buffer.Commit(); scheduler.EndRendering(); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); tile_manager.TileImage(image.image, buffer_copies, download_buffer.Handle(), offset, image.info); scheduler.DeferOperation([image_addr, download, image_size] { From 58b5618c558b1df613965656457d370b970800ff Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 10 Aug 2025 19:49:42 +0200 Subject: [PATCH 05/46] Invalidate buffer cache memory on image download --- src/video_core/buffer_cache/buffer_cache.cpp | 11 +++++++---- src/video_core/buffer_cache/buffer_cache.h | 2 +- src/video_core/buffer_cache/memory_tracker.h | 16 ++++++++++++++++ src/video_core/renderer_vulkan/vk_rasterizer.cpp | 4 ++-- src/video_core/texture_cache/texture_cache.cpp | 8 +++++++- 5 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index c1e203b30..152883333 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -154,12 +154,16 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s BufferCache::~BufferCache() = default; -void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { +void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool flush) { if (!IsRegionRegistered(device_addr, size)) { return; } - memory_tracker->InvalidateRegion( - device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); + if (flush) { + memory_tracker->InvalidateRegion( + device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); + } else { + memory_tracker->InvalidateRegion(device_addr, size); + } } void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) { @@ -1210,7 +1214,6 @@ void BufferCache::RunGarbageCollector() { } --max_deletions; Buffer& buffer = slot_buffers[buffer_id]; - // InvalidateMemory(buffer.CpuAddr(), buffer.SizeBytes()); DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes(), true); DeleteBuffer(buffer_id); }; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index aecc97db0..9e35e7ef2 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -114,7 +114,7 @@ public: } /// Invalidates any buffer in the logical page range. - void InvalidateMemory(VAddr device_addr, u64 size); + void InvalidateMemory(VAddr device_addr, u64 size, bool flush); /// Flushes any GPU modified buffer in the logical page range back to CPU memory. void ReadMemory(VAddr device_addr, u64 size, bool is_write = false); diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index ec0878c3b..a59fff64a 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -75,6 +75,8 @@ public: manager->template IsRegionModified(offset, size)) { return true; } + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); manager->template ChangeRegionState( manager->GetCpuAddr() + offset, size); return false; @@ -85,6 +87,20 @@ public: }); } + /// Removes all protection from a page (lose any non downloaded GPU modifications) + void InvalidateRegion(VAddr cpu_addr, u64 size) noexcept { + IteratePages(cpu_addr, size, [](RegionManager* manager, u64 offset, size_t size) { + // Perform both the GPU modification check and CPU state change with the lock + // in case we are racing with GPU thread trying to mark the page as GPU + // modified. + std::scoped_lock lk{manager->lock}; + manager->template ChangeRegionState(manager->GetCpuAddr() + offset, + size); + manager->template ChangeRegionState(manager->GetCpuAddr() + offset, + size); + }); + } + /// Call 'func' for each CPU modified range and unmark those pages as CPU modified void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func, auto&& on_upload) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 17082314b..c436d7f15 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1036,7 +1036,7 @@ bool Rasterizer::InvalidateMemory(VAddr addr, u64 size) { // Not GPU mapped memory, can skip invalidation logic entirely. return false; } - buffer_cache.InvalidateMemory(addr, size); + buffer_cache.InvalidateMemory(addr, size, true); texture_cache.InvalidateMemory(addr, size); return true; } @@ -1070,7 +1070,7 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) { } void Rasterizer::UnmapMemory(VAddr addr, u64 size) { - buffer_cache.InvalidateMemory(addr, size); + buffer_cache.InvalidateMemory(addr, size, true); texture_cache.UnmapMemory(addr, size); page_manager.OnGpuUnmap(addr, size); { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 760811cb2..035abce1d 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -131,9 +131,15 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); tile_manager.TileImage(image.image, buffer_copies, download_buffer.Handle(), offset, image.info); - scheduler.DeferOperation([image_addr, download, image_size] { + scheduler.DeferOperation([this, image_addr, download, image_size] { auto* memory = Core::Memory::Instance(); + // Should we download directly to main memory or put contents into the buffer cache? memory->TryWriteBacking(std::bit_cast(image_addr), download, image_size); + // Can happen that the buffer that the image is read from still holds + // old invalid data. We need to invalidate memory in buffer cache so that + // contents are uploaded from main memory the next time buffers in this + // memory region are accessed. + buffer_cache.InvalidateMemory(image_addr, image_size, false); }); } From 5b8057da4ce0df3918e3b1776d57215af439ebb4 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 11 Aug 2025 19:29:49 +0200 Subject: [PATCH 06/46] Sync texture downloads --- src/video_core/buffer_cache/buffer_cache.cpp | 16 +++++----------- src/video_core/buffer_cache/buffer_cache.h | 1 - src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/texture_cache/texture_cache.cpp | 11 +++++++++++ 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 152883333..1cf88f1c8 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -169,11 +169,11 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool flush) { void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) { liverpool->SendCommand([this, device_addr, size, is_write] { Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)]; - DownloadBufferMemory(buffer, device_addr, size, is_write); + DownloadBufferMemory(buffer, device_addr, size, is_write); + scheduler.Finish(); }); } -template void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write) { boost::container::small_vector copies; u64 total_size_bytes = 0; @@ -208,7 +208,7 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyBuffer(buffer.buffer, download_buffer.Handle(), copies); - const auto write_data = [&]() { + scheduler.DeferOperation([&]() { auto* memory = Core::Memory::Instance(); for (const auto& copy : copies) { const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset; @@ -220,13 +220,7 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si if (is_write) { memory_tracker->MarkRegionAsCpuModified(device_addr, size); } - }; - if constexpr (async) { - scheduler.DeferOperation(write_data); - } else { - scheduler.Finish(); - write_data(); - } + }); } void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) { @@ -1214,7 +1208,7 @@ void BufferCache::RunGarbageCollector() { } --max_deletions; Buffer& buffer = slot_buffers[buffer_id]; - DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes(), true); + DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes(), true); DeleteBuffer(buffer_id); }; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 9e35e7ef2..3192b9018 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -183,7 +183,6 @@ private: return !buffer_id || slot_buffers[buffer_id].is_deleted; } - template void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write); [[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c436d7f15..533803ccc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -452,8 +452,8 @@ void Rasterizer::OnSubmit() { buffer_cache.ProcessFaultBuffer(); } texture_cache.ProcessDownloadImages(); - buffer_cache.RunGarbageCollector(); texture_cache.RunGarbageCollector(); + buffer_cache.RunGarbageCollector(); } bool Rasterizer::BindResources(const Pipeline* pipeline) { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 035abce1d..9b78a6c9d 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -131,6 +131,7 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); tile_manager.TileImage(image.image, buffer_copies, download_buffer.Handle(), offset, image.info); + LOG_WARNING(Render_Vulkan, "Downloading image {:x} ", image_addr); scheduler.DeferOperation([this, image_addr, download, image_size] { auto* memory = Core::Memory::Instance(); // Should we download directly to main memory or put contents into the buffer cache? @@ -140,6 +141,7 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { // contents are uploaded from main memory the next time buffers in this // memory region are accessed. buffer_cache.InvalidateMemory(image_addr, image_size, false); + LOG_WARNING(Render_Vulkan, "Downloaded image {:x} ", image_addr); }); } @@ -922,6 +924,7 @@ void TextureCache::RunGarbageCollector() { std::scoped_lock lock{mutex}; bool pressured = false; bool aggresive = false; + bool downloaded = false; u64 ticks_to_destroy = 0; size_t num_deletions = 0; @@ -944,6 +947,7 @@ void TextureCache::RunGarbageCollector() { } if (download) { DownloadImageMemory(image_id); + downloaded = true; } FreeImage(image_id); if (total_used_memory < critical_gc_memory) { @@ -969,6 +973,13 @@ void TextureCache::RunGarbageCollector() { configure(true); lru_cache.ForEachItemBelow(gc_tick - ticks_to_destroy, clean_up); } + + if (downloaded) { + // We need to make downloads synchronous. It is possible that the contents + // of the image are requested before they are downloaded in which case + // outdated buffer cache contents are used instead. + scheduler.Finish(); + } } void TextureCache::TouchImage(const Image& image) { From 9d267bec2cf94ac64e3af20b84d32dabd94b0c07 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 11 Aug 2025 20:23:35 +0200 Subject: [PATCH 07/46] Correctly capture arguments --- src/video_core/buffer_cache/buffer_cache.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 1cf88f1c8..82dd0b9f1 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -208,7 +208,8 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyBuffer(buffer.buffer, download_buffer.Handle(), copies); - scheduler.DeferOperation([&]() { + scheduler.DeferOperation([this, &buffer, copies = std::move(copies), download, offset, + device_addr, size, is_write]() { auto* memory = Core::Memory::Instance(); for (const auto& copy : copies) { const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset; From a8cd9e359ce7555941432e84d36842cfb4de1e15 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 17 Aug 2025 00:05:20 +0200 Subject: [PATCH 08/46] Account for big downloads --- src/video_core/buffer_cache/buffer.cpp | 27 +++++++++++ src/video_core/buffer_cache/buffer.h | 48 +++++++++++++++++++ .../texture_cache/texture_cache.cpp | 8 ++-- 3 files changed, 78 insertions(+), 5 deletions(-) diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp index e85a6eb18..c7b1ab4f4 100644 --- a/src/video_core/buffer_cache/buffer.cpp +++ b/src/video_core/buffer_cache/buffer.cpp @@ -219,4 +219,31 @@ bool StreamBuffer::WaitPendingOperations(u64 requested_upper_bound, bool allow_w return true; } +StreamBufferMapping::StreamBufferMapping(StreamBuffer& stream_buffer, u64 size, u64 alignment, + bool allow_wait) { + const auto [data, offset] = stream_buffer.Map(size, alignment, allow_wait); + if (!data) { + // This happens if the size is too big or no waiting is allowed when it is required + is_temp_buffer = true; + this->buffer = new VideoCore::Buffer(*stream_buffer.instance, *stream_buffer.scheduler, + stream_buffer.usage, 0, AllFlags, size); + this->data = this->buffer->mapped_data.data(); + this->offset = 0; + ASSERT_MSG(this->data, "Failed to map temporary buffer"); + } else { + is_temp_buffer = false; + buffer = &stream_buffer; + this->data = data; + this->offset = offset; + } +} + +StreamBufferMapping::~StreamBufferMapping() { + if (is_temp_buffer) { + ASSERT(buffer); + auto scheduler = buffer->scheduler; + scheduler->DeferOperation([buffer = this->buffer]() mutable { delete buffer; }); + } +} + } // namespace VideoCore diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index b02f8c181..0bc33e57f 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include #include #include @@ -213,4 +214,51 @@ private: u64 wait_bound{}; }; +class StreamBufferMapping { +public: + StreamBufferMapping(StreamBuffer& stream_buffer, u64 size, u64 alignment = 0, + bool allow_wait = true); + ~StreamBufferMapping(); + + StreamBufferMapping(const StreamBufferMapping&) = delete; + StreamBufferMapping& operator=(const StreamBufferMapping&) = delete; + + StreamBufferMapping(StreamBufferMapping&& other) + : buffer{std::exchange(other.buffer, nullptr)}, data{std::exchange(other.data, nullptr)}, + offset{std::exchange(other.offset, 0)}, + is_temp_buffer{std::exchange(other.is_temp_buffer, false)} {} + + StreamBufferMapping& operator=(StreamBufferMapping&& other) { + if (this != &other) { + buffer = std::exchange(other.buffer, nullptr); + data = std::exchange(other.data, nullptr); + offset = std::exchange(other.offset, 0); + is_temp_buffer = std::exchange(other.is_temp_buffer, false); + } + return *this; + } + + VideoCore::Buffer* Buffer() const { + return buffer; + } + + u8* Data() const { + return data; + } + + u64 Offset() const { + return offset; + } + + bool TemporaryBuffer() const { + return is_temp_buffer; + } + +private: + VideoCore::Buffer* buffer; + u8* data{}; + u64 offset{}; + bool is_temp_buffer{}; +}; + } // namespace VideoCore diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 9b78a6c9d..553d46aef 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -125,14 +125,13 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { if (buffer_copies.empty()) { return; } - const auto [download, offset] = download_buffer.Map(image_size); + StreamBufferMapping mapping(download_buffer, image_size); download_buffer.Commit(); scheduler.EndRendering(); image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); - tile_manager.TileImage(image.image, buffer_copies, download_buffer.Handle(), offset, + tile_manager.TileImage(image.image, buffer_copies, mapping.Buffer()->Handle(), mapping.Offset(), image.info); - LOG_WARNING(Render_Vulkan, "Downloading image {:x} ", image_addr); - scheduler.DeferOperation([this, image_addr, download, image_size] { + scheduler.DeferOperation([this, image_addr, download = mapping.Data(), image_size] { auto* memory = Core::Memory::Instance(); // Should we download directly to main memory or put contents into the buffer cache? memory->TryWriteBacking(std::bit_cast(image_addr), download, image_size); @@ -141,7 +140,6 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { // contents are uploaded from main memory the next time buffers in this // memory region are accessed. buffer_cache.InvalidateMemory(image_addr, image_size, false); - LOG_WARNING(Render_Vulkan, "Downloaded image {:x} ", image_addr); }); } From a63d1daa00be6b292bf292112f4e6a215d37ea97 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 21 Aug 2025 23:54:39 +0200 Subject: [PATCH 09/46] (test) image gpu dirty --- src/video_core/buffer_cache/buffer_cache.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 82dd0b9f1..c9ed406f4 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -395,9 +395,7 @@ void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, // Avoid using ObtainBuffer here as that might give us the stream buffer. const BufferId buffer_id = FindBuffer(src, num_bytes); auto& buffer = slot_buffers[buffer_id]; - if (SynchronizeBuffer(buffer, src, num_bytes, false, true)) { - texture_cache.InvalidateMemoryFromGPU(dst, num_bytes); - } + SynchronizeBuffer(buffer, src, num_bytes, false, true); return buffer; }(); auto& dst_buffer = [&] -> const Buffer& { @@ -905,8 +903,12 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, }); TouchBuffer(buffer); } - if (is_texel_buffer) { - return SynchronizeBufferFromImage(buffer, device_addr, size); + if (is_texel_buffer || is_written) { + const bool synced = SynchronizeBufferFromImage(buffer, device_addr, size); + if (is_written) { + texture_cache.InvalidateMemoryFromGPU(device_addr, size); + } + return synced; } return false; } @@ -953,6 +955,9 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, ASSERT_MSG(device_addr == image.info.guest_address, "Texel buffer aliases image subresources {:x} : {:x}", device_addr, image.info.guest_address); + if (!image.SafeToDownload()) { + return false; + } const u32 buf_offset = buffer.Offset(image.info.guest_address); boost::container::small_vector buffer_copies; u32 copy_size = 0; From 542b8feda99603a1648c37bba2d03d24d0654463 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 27 Aug 2025 00:19:59 +0200 Subject: [PATCH 10/46] Revert "(test) image gpu dirty" This reverts commit a63d1daa00be6b292bf292112f4e6a215d37ea97. --- src/video_core/buffer_cache/buffer_cache.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index c9ed406f4..82dd0b9f1 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -395,7 +395,9 @@ void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, // Avoid using ObtainBuffer here as that might give us the stream buffer. const BufferId buffer_id = FindBuffer(src, num_bytes); auto& buffer = slot_buffers[buffer_id]; - SynchronizeBuffer(buffer, src, num_bytes, false, true); + if (SynchronizeBuffer(buffer, src, num_bytes, false, true)) { + texture_cache.InvalidateMemoryFromGPU(dst, num_bytes); + } return buffer; }(); auto& dst_buffer = [&] -> const Buffer& { @@ -903,12 +905,8 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, }); TouchBuffer(buffer); } - if (is_texel_buffer || is_written) { - const bool synced = SynchronizeBufferFromImage(buffer, device_addr, size); - if (is_written) { - texture_cache.InvalidateMemoryFromGPU(device_addr, size); - } - return synced; + if (is_texel_buffer) { + return SynchronizeBufferFromImage(buffer, device_addr, size); } return false; } @@ -955,9 +953,6 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, ASSERT_MSG(device_addr == image.info.guest_address, "Texel buffer aliases image subresources {:x} : {:x}", device_addr, image.info.guest_address); - if (!image.SafeToDownload()) { - return false; - } const u32 buf_offset = buffer.Offset(image.info.guest_address); boost::container::small_vector buffer_copies; u32 copy_size = 0; From 8ae5a5dfcce5aeb74c7dfc93dcaefc5775dc535b Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 27 Aug 2025 00:35:37 +0200 Subject: [PATCH 11/46] Test only download non-GPU modified images --- src/video_core/buffer_cache/buffer_cache.cpp | 21 +++++++++---------- src/video_core/buffer_cache/buffer_cache.h | 4 ++-- .../renderer_vulkan/vk_rasterizer.cpp | 4 ++-- .../texture_cache/texture_cache.cpp | 11 +++++----- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 82dd0b9f1..80f6c7843 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -154,27 +154,24 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s BufferCache::~BufferCache() = default; -void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool flush) { +void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { if (!IsRegionRegistered(device_addr, size)) { return; } - if (flush) { - memory_tracker->InvalidateRegion( - device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); - } else { - memory_tracker->InvalidateRegion(device_addr, size); - } + memory_tracker->InvalidateRegion( + device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); } void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) { liverpool->SendCommand([this, device_addr, size, is_write] { Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)]; - DownloadBufferMemory(buffer, device_addr, size, is_write); - scheduler.Finish(); + if (DownloadBufferMemory(buffer, device_addr, size, is_write)) { + scheduler.Finish(); + } }); } -void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write) { +bool BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write) { boost::container::small_vector copies; u64 total_size_bytes = 0; memory_tracker->ForEachDownloadRange( @@ -197,7 +194,7 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si gpu_modified_ranges.Subtract(device_addr_out, range_size); }); if (total_size_bytes == 0) { - return; + return false; } const auto [download, offset] = download_buffer.Map(total_size_bytes); for (auto& copy : copies) { @@ -222,6 +219,8 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si memory_tracker->MarkRegionAsCpuModified(device_addr, size); } }); + + return true; } void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 3192b9018..3067cd4b2 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -114,7 +114,7 @@ public: } /// Invalidates any buffer in the logical page range. - void InvalidateMemory(VAddr device_addr, u64 size, bool flush); + void InvalidateMemory(VAddr device_addr, u64 size); /// Flushes any GPU modified buffer in the logical page range back to CPU memory. void ReadMemory(VAddr device_addr, u64 size, bool is_write = false); @@ -183,7 +183,7 @@ private: return !buffer_id || slot_buffers[buffer_id].is_deleted; } - void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write); + bool DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write); [[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 533803ccc..ec0c38bda 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1036,7 +1036,7 @@ bool Rasterizer::InvalidateMemory(VAddr addr, u64 size) { // Not GPU mapped memory, can skip invalidation logic entirely. return false; } - buffer_cache.InvalidateMemory(addr, size, true); + buffer_cache.InvalidateMemory(addr, size); texture_cache.InvalidateMemory(addr, size); return true; } @@ -1070,7 +1070,7 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) { } void Rasterizer::UnmapMemory(VAddr addr, u64 size) { - buffer_cache.InvalidateMemory(addr, size, true); + buffer_cache.InvalidateMemory(addr, size); texture_cache.UnmapMemory(addr, size); page_manager.OnGpuUnmap(addr, size); { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 553d46aef..2af404672 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -132,14 +132,15 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { tile_manager.TileImage(image.image, buffer_copies, mapping.Buffer()->Handle(), mapping.Offset(), image.info); scheduler.DeferOperation([this, image_addr, download = mapping.Data(), image_size] { + if (buffer_cache.IsRegionGpuModified(image_addr, image_size)) { + return; + } + LOG_WARNING(Render_Vulkan, "Downloading image memory at {:#x} ({} bytes)", image_addr, + image_size); auto* memory = Core::Memory::Instance(); // Should we download directly to main memory or put contents into the buffer cache? memory->TryWriteBacking(std::bit_cast(image_addr), download, image_size); - // Can happen that the buffer that the image is read from still holds - // old invalid data. We need to invalidate memory in buffer cache so that - // contents are uploaded from main memory the next time buffers in this - // memory region are accessed. - buffer_cache.InvalidateMemory(image_addr, image_size, false); + buffer_cache.InvalidateMemory(image_addr, image_size); }); } From cda16ec742adcd4c38631dc9a30e39c5095d847a Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 28 Aug 2025 21:13:28 +0200 Subject: [PATCH 12/46] Revert tests and maintain gpumodified --- src/video_core/buffer_cache/buffer_cache.cpp | 12 ++++++++---- src/video_core/buffer_cache/buffer_cache.h | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 4 ++-- src/video_core/texture_cache/texture_cache.cpp | 8 ++++---- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 80f6c7843..ae20ddc15 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -154,12 +154,16 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s BufferCache::~BufferCache() = default; -void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { +void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool download) { if (!IsRegionRegistered(device_addr, size)) { return; } - memory_tracker->InvalidateRegion( - device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); + if (download) { + memory_tracker->InvalidateRegion( + device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); + } else { + memory_tracker->InvalidateRegion(device_addr, size); + } } void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) { @@ -860,6 +864,7 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, size_t total_size_bytes = 0; VAddr buffer_start = buffer.CpuAddr(); vk::Buffer src_buffer = VK_NULL_HANDLE; + TouchBuffer(buffer); memory_tracker->ForEachUploadRange( device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) { @@ -902,7 +907,6 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &post_barrier, }); - TouchBuffer(buffer); } if (is_texel_buffer) { return SynchronizeBufferFromImage(buffer, device_addr, size); diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 3067cd4b2..b54e59a9c 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -114,7 +114,7 @@ public: } /// Invalidates any buffer in the logical page range. - void InvalidateMemory(VAddr device_addr, u64 size); + void InvalidateMemory(VAddr device_addr, u64 size, bool download); /// Flushes any GPU modified buffer in the logical page range back to CPU memory. void ReadMemory(VAddr device_addr, u64 size, bool is_write = false); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ec0c38bda..533803ccc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1036,7 +1036,7 @@ bool Rasterizer::InvalidateMemory(VAddr addr, u64 size) { // Not GPU mapped memory, can skip invalidation logic entirely. return false; } - buffer_cache.InvalidateMemory(addr, size); + buffer_cache.InvalidateMemory(addr, size, true); texture_cache.InvalidateMemory(addr, size); return true; } @@ -1070,7 +1070,7 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) { } void Rasterizer::UnmapMemory(VAddr addr, u64 size) { - buffer_cache.InvalidateMemory(addr, size); + buffer_cache.InvalidateMemory(addr, size, true); texture_cache.UnmapMemory(addr, size); page_manager.OnGpuUnmap(addr, size); { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 2af404672..bfe2cd153 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -133,14 +133,13 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { image.info); scheduler.DeferOperation([this, image_addr, download = mapping.Data(), image_size] { if (buffer_cache.IsRegionGpuModified(image_addr, image_size)) { - return; + LOG_WARNING(Render_Vulkan, + "Image {:x} was modified by GPU during download", image_addr); } - LOG_WARNING(Render_Vulkan, "Downloading image memory at {:#x} ({} bytes)", image_addr, - image_size); auto* memory = Core::Memory::Instance(); // Should we download directly to main memory or put contents into the buffer cache? memory->TryWriteBacking(std::bit_cast(image_addr), download, image_size); - buffer_cache.InvalidateMemory(image_addr, image_size); + buffer_cache.InvalidateMemory(image_addr, image_size, false); }); } @@ -417,6 +416,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { TrackImage(new_image_id); new_image.flags &= ~ImageFlagBits::Dirty; + new_image.flags |= src_image.flags & ImageFlagBits::GpuModified; return new_image_id; } From 0727390d2615774ef3aade2667699218b4791141 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 30 Aug 2025 02:15:03 +0200 Subject: [PATCH 13/46] Handle GPU modified ranges --- src/video_core/buffer_cache/buffer_cache.cpp | 59 ++++++++++++++++++- src/video_core/buffer_cache/buffer_cache.h | 4 ++ src/video_core/page_manager.h | 4 ++ .../texture_cache/texture_cache.cpp | 22 ++++--- 4 files changed, 77 insertions(+), 12 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index ae20ddc15..3ef89eb39 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -163,6 +163,7 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool download) { device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); } else { memory_tracker->InvalidateRegion(device_addr, size); + gpu_modified_ranges.Subtract(device_addr, size); } } @@ -208,12 +209,12 @@ bool BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si download_buffer.Commit(); scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyBuffer(buffer.buffer, download_buffer.Handle(), copies); - scheduler.DeferOperation([this, &buffer, copies = std::move(copies), download, offset, + cmdbuf.copyBuffer(buffer.Handle(), download_buffer.Handle(), copies); + scheduler.DeferOperation([this, buf_addr = buffer.CpuAddr(), copies = std::move(copies), download, offset, device_addr, size, is_write]() { auto* memory = Core::Memory::Instance(); for (const auto& copy : copies) { - const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset; + const VAddr copy_device_addr = buf_addr + copy.srcOffset; const u64 dst_offset = copy.dstOffset - offset; memory->TryWriteBacking(std::bit_cast(copy_device_addr), download + dst_offset, copy.size); @@ -227,6 +228,58 @@ bool BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si return true; } +void BufferCache::ReadEdgeImagePages(const Image& image) { + // May happen that after downloading the image and invalidating region, + // that there were GPU modified ranges that are lost due to CPU reuploading. + // This doesn't change tracker state and it is spected to call DownloadImageMemory after this. + const VAddr image_addr = image.info.guest_address; + const u64 image_size = image.info.guest_size; + const VAddr image_end = image_addr + image_size; + const VAddr page_start = PageManager::GetPageAddr(image_addr); + const VAddr page_end = PageManager::GetNextPageAddr(image_end - 1); + boost::container::small_vector copies; + u64 total_size_bytes = 0; + const auto [buffer, offset] = ObtainBufferForImage(image_addr, image_size); + const auto add_download = [&](VAddr start, VAddr end) { + const u64 new_offset = start - buffer->CpuAddr(); + const u64 new_size = end - start; + copies.push_back(vk::BufferCopy{ + .srcOffset = new_offset, + .dstOffset = total_size_bytes, + .size = new_size, + }); + // Align up to avoid cache conflicts + constexpr u64 align = 64ULL; + constexpr u64 mask = ~(align - 1ULL); + total_size_bytes += (new_size + align - 1) & mask; + }; + gpu_modified_ranges.ForEachInRange(page_start, image_addr - page_start, add_download); + gpu_modified_ranges.ForEachInRange(image_end, page_end - image_end, add_download); + gpu_modified_ranges.Subtract(page_start, page_end - page_start); + if (total_size_bytes == 0) { + return; + } + const auto [download, download_offset] = download_buffer.Map(total_size_bytes); + for (auto& copy : copies) { + // Modify copies to have the staging offset in mind + copy.dstOffset += download_offset; + } + download_buffer.Commit(); + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.copyBuffer(buffer->Handle(), download_buffer.Handle(), copies); + scheduler.DeferOperation([this, buf_addr = buffer->CpuAddr(), copies = std::move(copies), download, download_offset, + image_addr, image_size]() { + auto* memory = Core::Memory::Instance(); + for (const auto& copy : copies) { + const VAddr copy_device_addr = buf_addr + copy.srcOffset; + const u64 dst_offset = copy.dstOffset - download_offset; + memory->TryWriteBacking(std::bit_cast(copy_device_addr), download + dst_offset, + copy.size); + } + }); +} + void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) { const auto& regs = liverpool->regs; Vulkan::VertexInputs attributes; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b54e59a9c..695515c3f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -11,6 +11,7 @@ #include "video_core/buffer_cache/buffer.h" #include "video_core/buffer_cache/range_set.h" #include "video_core/multi_level_page_table.h" +#include "video_core/texture_cache/image.h" namespace AmdGpu { struct Liverpool; @@ -119,6 +120,9 @@ public: /// Flushes any GPU modified buffer in the logical page range back to CPU memory. void ReadMemory(VAddr device_addr, u64 size, bool is_write = false); + /// Flushes GPU modified ranges of the uncovered part of the edge pages of an image. + void ReadEdgeImagePages(const Image& image); + /// Binds host vertex buffers for the current draw. void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline); diff --git a/src/video_core/page_manager.h b/src/video_core/page_manager.h index 4ca41cb43..03445d3ed 100644 --- a/src/video_core/page_manager.h +++ b/src/video_core/page_manager.h @@ -51,6 +51,10 @@ public: return Common::AlignUp(addr + 1, PAGE_SIZE); } + static constexpr size_t GetPageSize() { + return PAGE_SIZE; + } + private: struct Impl; std::unique_ptr impl; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index bfe2cd153..c3ccc19e3 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -132,10 +132,6 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { tile_manager.TileImage(image.image, buffer_copies, mapping.Buffer()->Handle(), mapping.Offset(), image.info); scheduler.DeferOperation([this, image_addr, download = mapping.Data(), image_size] { - if (buffer_cache.IsRegionGpuModified(image_addr, image_size)) { - LOG_WARNING(Render_Vulkan, - "Image {:x} was modified by GPU during download", image_addr); - } auto* memory = Core::Memory::Instance(); // Should we download directly to main memory or put contents into the buffer cache? memory->TryWriteBacking(std::bit_cast(image_addr), download, image_size); @@ -923,9 +919,9 @@ void TextureCache::RunGarbageCollector() { std::scoped_lock lock{mutex}; bool pressured = false; bool aggresive = false; - bool downloaded = false; u64 ticks_to_destroy = 0; size_t num_deletions = 0; + boost::container::small_vector download_pending; const auto configure = [&](bool allow_aggressive) { pressured = total_used_memory >= pressure_gc_memory; @@ -945,10 +941,13 @@ void TextureCache::RunGarbageCollector() { return false; } if (download) { - DownloadImageMemory(image_id); - downloaded = true; + download_pending.push_back(image_id); + buffer_cache.ReadEdgeImagePages(image); + UntrackImage(image_id); + UnregisterImage(image_id); + } else { + FreeImage(image_id); } - FreeImage(image_id); if (total_used_memory < critical_gc_memory) { if (aggresive) { num_deletions >>= 2; @@ -973,7 +972,12 @@ void TextureCache::RunGarbageCollector() { lru_cache.ForEachItemBelow(gc_tick - ticks_to_destroy, clean_up); } - if (downloaded) { + for (const auto& image_id : download_pending) { + DownloadImageMemory(image_id); + DeleteImage(image_id); + } + + if (!download_pending.empty()) { // We need to make downloads synchronous. It is possible that the contents // of the image are requested before they are downloaded in which case // outdated buffer cache contents are used instead. From a449ee9624f4f78546161ce9d58b063f52d7eb3c Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 30 Aug 2025 02:15:16 +0200 Subject: [PATCH 14/46] Demote logs to debug/info --- src/video_core/texture_cache/tile_manager.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index d872f8b2e..1ce1b696f 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -135,9 +135,9 @@ vk::Pipeline TileManager::GetTilingPipeline(const ImageInfo& info, bool is_tiler vk::ShaderStageFlagBits::eCompute, device, defines); const auto module_name = fmt::format("{}_{} {}", magic_enum::enum_name(info.tile_mode), info.num_bits, is_tiler ? "tiler" : "detiler"); - LOG_WARNING(Render_Vulkan, "Compiling shader {}", module_name); + LOG_INFO(Render_Vulkan, "Compiling shader {}", module_name); for (const auto& def : defines) { - LOG_WARNING(Render_Vulkan, "#define {}", def); + LOG_DEBUG(Render_Vulkan, "#define {}", def); } Vulkan::SetObjectName(device, module, module_name); const vk::PipelineShaderStageCreateInfo shader_ci = { From 0feb5eb1f81ace323d7a8822fde90afb226e504b Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 30 Aug 2025 02:18:01 +0200 Subject: [PATCH 15/46] tracker renamed to page_manager --- .../texture_cache/texture_cache.cpp | 20 +++++++++---------- src/video_core/texture_cache/texture_cache.h | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index c3ccc19e3..38aa6f958 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -22,8 +22,8 @@ static constexpr u64 PageShift = 12; static constexpr u64 NumFramesBeforeRemoval = 32; TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, - BufferCache& buffer_cache_, PageManager& tracker_) - : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_}, + BufferCache& buffer_cache_, PageManager& page_manager_) + : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, page_manager{page_manager_}, blit_helper{instance, scheduler}, tile_manager{instance, scheduler, buffer_cache.GetUtilityBuffer(MemoryUsage::Stream)} { // Create basic null image at fixed image ID. @@ -813,7 +813,7 @@ void TextureCache::TrackImage(ImageId image_id) { // Re-track the whole image image.track_addr = image_begin; image.track_addr_end = image_end; - tracker.UpdatePageWatchers<1>(image_begin, image.info.guest_size); + page_manager.UpdatePageWatchers<1>(image_begin, image.info.guest_size); } else { if (image_begin < image.track_addr) { TrackImageHead(image_id); @@ -836,7 +836,7 @@ void TextureCache::TrackImageHead(ImageId image_id) { ASSERT(image.track_addr != 0 && image_begin < image.track_addr); const auto size = image.track_addr - image_begin; image.track_addr = image_begin; - tracker.UpdatePageWatchers<1>(image_begin, size); + page_manager.UpdatePageWatchers<1>(image_begin, size); } void TextureCache::TrackImageTail(ImageId image_id) { @@ -852,7 +852,7 @@ void TextureCache::TrackImageTail(ImageId image_id) { const auto addr = image.track_addr_end; const auto size = image_end - image.track_addr_end; image.track_addr_end = image_end; - tracker.UpdatePageWatchers<1>(addr, size); + page_manager.UpdatePageWatchers<1>(addr, size); } void TextureCache::UntrackImage(ImageId image_id) { @@ -865,7 +865,7 @@ void TextureCache::UntrackImage(ImageId image_id) { image.track_addr = 0; image.track_addr_end = 0; if (size != 0) { - tracker.UpdatePageWatchers(addr, size); + page_manager.UpdatePageWatchers(addr, size); } } @@ -875,7 +875,7 @@ void TextureCache::UntrackImageHead(ImageId image_id) { if (!image.IsTracked() || image_begin < image.track_addr) { return; } - const auto addr = tracker.GetNextPageAddr(image_begin); + const auto addr = page_manager.GetNextPageAddr(image_begin); const auto size = addr - image_begin; image.track_addr = addr; if (image.track_addr == image.track_addr_end) { @@ -884,7 +884,7 @@ void TextureCache::UntrackImageHead(ImageId image_id) { // Cehck its hash later. MarkAsMaybeDirty(image_id, image); } - tracker.UpdatePageWatchers(image_begin, size); + page_manager.UpdatePageWatchers(image_begin, size); } void TextureCache::UntrackImageTail(ImageId image_id) { @@ -894,7 +894,7 @@ void TextureCache::UntrackImageTail(ImageId image_id) { return; } ASSERT(image.track_addr_end != 0); - const auto addr = tracker.GetPageAddr(image_end); + const auto addr = page_manager.GetPageAddr(image_end); const auto size = image_end - addr; image.track_addr_end = addr; if (image.track_addr == image.track_addr_end) { @@ -903,7 +903,7 @@ void TextureCache::UntrackImageTail(ImageId image_id) { // Cehck its hash later. MarkAsMaybeDirty(image_id, image); } - tracker.UpdatePageWatchers(addr, size); + page_manager.UpdatePageWatchers(addr, size); } void TextureCache::RunGarbageCollector() { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b63a7abf2..c8e321921 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -89,7 +89,7 @@ public: public: TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, - BufferCache& buffer_cache, PageManager& tracker); + BufferCache& buffer_cache, PageManager& page_manager); ~TextureCache(); TileManager& GetTileManager() noexcept { @@ -313,7 +313,7 @@ private: const Vulkan::Instance& instance; Vulkan::Scheduler& scheduler; BufferCache& buffer_cache; - PageManager& tracker; + PageManager& page_manager; BlitHelper blit_helper; TileManager tile_manager; Common::SlotVector slot_images; From 3698b80e8ffb36d20eee9f55da8195c1366be428 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 30 Aug 2025 02:20:04 +0200 Subject: [PATCH 16/46] clang-format --- src/video_core/buffer_cache/buffer_cache.cpp | 8 ++++---- src/video_core/texture_cache/texture_cache.cpp | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 3ef89eb39..6d509b077 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -210,8 +210,8 @@ bool BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyBuffer(buffer.Handle(), download_buffer.Handle(), copies); - scheduler.DeferOperation([this, buf_addr = buffer.CpuAddr(), copies = std::move(copies), download, offset, - device_addr, size, is_write]() { + scheduler.DeferOperation([this, buf_addr = buffer.CpuAddr(), copies = std::move(copies), + download, offset, device_addr, size, is_write]() { auto* memory = Core::Memory::Instance(); for (const auto& copy : copies) { const VAddr copy_device_addr = buf_addr + copy.srcOffset; @@ -268,8 +268,8 @@ void BufferCache::ReadEdgeImagePages(const Image& image) { scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyBuffer(buffer->Handle(), download_buffer.Handle(), copies); - scheduler.DeferOperation([this, buf_addr = buffer->CpuAddr(), copies = std::move(copies), download, download_offset, - image_addr, image_size]() { + scheduler.DeferOperation([this, buf_addr = buffer->CpuAddr(), copies = std::move(copies), + download, download_offset, image_addr, image_size]() { auto* memory = Core::Memory::Instance(); for (const auto& copy : copies) { const VAddr copy_device_addr = buf_addr + copy.srcOffset; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 38aa6f958..e02abbbff 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -23,8 +23,8 @@ static constexpr u64 NumFramesBeforeRemoval = 32; TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, BufferCache& buffer_cache_, PageManager& page_manager_) - : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, page_manager{page_manager_}, - blit_helper{instance, scheduler}, + : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, + page_manager{page_manager_}, blit_helper{instance, scheduler}, tile_manager{instance, scheduler, buffer_cache.GetUtilityBuffer(MemoryUsage::Stream)} { // Create basic null image at fixed image ID. const auto null_id = GetNullImage(vk::Format::eR8G8B8A8Unorm); From 3e4b308f0f494d12eb92f6f051111e822850da6a Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 10 Sep 2025 23:55:02 +0200 Subject: [PATCH 17/46] Test not collecting gpu modified ranges --- src/video_core/texture_cache/texture_cache.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index e02abbbff..5af7889c1 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -940,6 +940,10 @@ void TextureCache::RunGarbageCollector() { if (download && !pressured) { return false; } + if (download && + buffer_cache.IsRegionGpuModified(image.info.guest_address, image.info.guest_size)) { + return false; + } if (download) { download_pending.push_back(image_id); buffer_cache.ReadEdgeImagePages(image); From ee61f48aa8446d581c0f6ab598b1c23046fd3afa Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 6 Aug 2025 18:32:30 +0200 Subject: [PATCH 18/46] Use general dirty flag --- src/video_core/texture_cache/image.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index c30edad79..69cbe3fc0 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -113,7 +113,7 @@ struct Image { bool SafeToDownload() const { return True(flags & ImageFlagBits::GpuModified) && - False(flags & (ImageFlagBits::GpuDirty | ImageFlagBits::CpuDirty)); + False(flags & ImageFlagBits::Dirty); } const Vulkan::Instance* instance; From cdf05f863861e4fce4ba1faa75990598539d1433 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 6 Aug 2025 18:37:51 +0200 Subject: [PATCH 19/46] clang-format --- src/video_core/texture_cache/image.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 69cbe3fc0..9703315cb 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -112,8 +112,7 @@ struct Image { } bool SafeToDownload() const { - return True(flags & ImageFlagBits::GpuModified) && - False(flags & ImageFlagBits::Dirty); + return True(flags & ImageFlagBits::GpuModified) && False(flags & ImageFlagBits::Dirty); } const Vulkan::Instance* instance; From 760dcf36574bd177b063572682aeb8120bc3c99e Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 9 Aug 2025 20:07:28 +0200 Subject: [PATCH 20/46] Handle non-linear image download --- .../texture_cache/texture_cache.cpp | 60 ++++++++++--------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 2da037a6e..cf12f34ae 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -96,34 +96,43 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { return; } auto& download_buffer = buffer_cache.GetUtilityBuffer(MemoryUsage::Download); - const u32 download_size = image.info.pitch * image.info.size.height * - image.info.resources.layers * (image.info.num_bits / 8); - ASSERT(download_size <= image.info.guest_size); - const auto [download, offset] = download_buffer.Map(download_size); - download_buffer.Commit(); - const vk::BufferImageCopy image_download = { - .bufferOffset = offset, - .bufferRowLength = image.info.pitch, - .bufferImageHeight = image.info.size.height, - .imageSubresource = - { - .aspectMask = image.info.props.is_depth ? vk::ImageAspectFlagBits::eDepth - : vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, + const auto image_addr = image.info.guest_address; + const auto image_size = image.info.guest_size; + const auto image_mips = image.info.resources.levels; + boost::container::small_vector buffer_copies; + for (u32 mip = 0; mip < image_mips; ++mip) { + const auto& width = std::max(image.info.size.width >> mip, 1u); + const auto& height = std::max(image.info.size.height >> mip, 1u); + const auto& depth = + image.info.props.is_volume ? std::max(image.info.size.depth >> mip, 1u) : 1u; + const auto [mip_size, mip_pitch, mip_height, mip_offset] = image.info.mips_layout[mip]; + const u32 extent_width = mip_pitch ? std::min(mip_pitch, width) : width; + const u32 extent_height = mip_height ? std::min(mip_height, height) : height; + buffer_copies.push_back(vk::BufferImageCopy{ + .bufferOffset = mip_offset, + .bufferRowLength = mip_pitch, + .bufferImageHeight = mip_height, + .imageSubresource{ + .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, + .mipLevel = mip, .baseArrayLayer = 0, .layerCount = image.info.resources.layers, }, - .imageOffset = {0, 0, 0}, - .imageExtent = {image.info.size.width, image.info.size.height, 1}, - }; + .imageOffset = {0, 0, 0}, + .imageExtent = {extent_width, extent_height, depth}, + }); + } + if (buffer_copies.empty()) { + return; + } + const auto [download, offset] = download_buffer.Map(image_size); + download_buffer.Commit(); scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); - cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, - download_buffer.Handle(), image_download); - scheduler.DeferOperation([device_addr = image.info.guest_address, download, download_size] { + tile_manager.TileImage(image.image, buffer_copies, download_buffer.Handle(), offset, + image.info); + scheduler.DeferOperation([image_addr, download, image_size] { auto* memory = Core::Memory::Instance(); - memory->TryWriteBacking(std::bit_cast(device_addr), download, download_size); + memory->TryWriteBacking(std::bit_cast(image_addr), download, image_size); }); } @@ -924,11 +933,6 @@ void TextureCache::RunGarbageCollector() { --num_deletions; auto& image = slot_images[image_id]; const bool download = image.SafeToDownload(); - const bool tiled = image.info.IsTiled(); - if (tiled && download) { - // This is a workaround for now. We can't handle non-linear image downloads. - return false; - } if (download && !pressured) { return false; } From 07880b95f3fc888da2d6b06199699c0d52665a39 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 10 Aug 2025 01:04:38 +0200 Subject: [PATCH 21/46] Layout transition --- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/texture_cache/texture_cache.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f1a25e900..463bd04ca 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -462,8 +462,8 @@ void Rasterizer::OnSubmit() { buffer_cache.ProcessFaultBuffer(); } texture_cache.ProcessDownloadImages(); - texture_cache.RunGarbageCollector(); buffer_cache.RunGarbageCollector(); + texture_cache.RunGarbageCollector(); } bool Rasterizer::BindResources(const Pipeline* pipeline) { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index cf12f34ae..71fc9633a 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -128,6 +128,7 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { const auto [download, offset] = download_buffer.Map(image_size); download_buffer.Commit(); scheduler.EndRendering(); + image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); tile_manager.TileImage(image.image, buffer_copies, download_buffer.Handle(), offset, image.info); scheduler.DeferOperation([image_addr, download, image_size] { From 46e97233912f37fe011194cf2efc2f6ca31218c4 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 10 Aug 2025 19:49:42 +0200 Subject: [PATCH 22/46] Invalidate buffer cache memory on image download --- src/video_core/buffer_cache/buffer_cache.cpp | 11 +++++++---- src/video_core/buffer_cache/buffer_cache.h | 2 +- src/video_core/buffer_cache/memory_tracker.h | 16 ++++++++++++++++ src/video_core/renderer_vulkan/vk_rasterizer.cpp | 4 ++-- src/video_core/texture_cache/texture_cache.cpp | 8 +++++++- 5 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 41703dfe7..bef146fbc 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -154,12 +154,16 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s BufferCache::~BufferCache() = default; -void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { +void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool flush) { if (!IsRegionRegistered(device_addr, size)) { return; } - memory_tracker->InvalidateRegion( - device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); + if (flush) { + memory_tracker->InvalidateRegion( + device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); + } else { + memory_tracker->InvalidateRegion(device_addr, size); + } } void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) { @@ -1211,7 +1215,6 @@ void BufferCache::RunGarbageCollector() { } --max_deletions; Buffer& buffer = slot_buffers[buffer_id]; - // InvalidateMemory(buffer.CpuAddr(), buffer.SizeBytes()); DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes(), true); DeleteBuffer(buffer_id); }; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index aecc97db0..9e35e7ef2 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -114,7 +114,7 @@ public: } /// Invalidates any buffer in the logical page range. - void InvalidateMemory(VAddr device_addr, u64 size); + void InvalidateMemory(VAddr device_addr, u64 size, bool flush); /// Flushes any GPU modified buffer in the logical page range back to CPU memory. void ReadMemory(VAddr device_addr, u64 size, bool is_write = false); diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index ec0878c3b..a59fff64a 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -75,6 +75,8 @@ public: manager->template IsRegionModified(offset, size)) { return true; } + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); manager->template ChangeRegionState( manager->GetCpuAddr() + offset, size); return false; @@ -85,6 +87,20 @@ public: }); } + /// Removes all protection from a page (lose any non downloaded GPU modifications) + void InvalidateRegion(VAddr cpu_addr, u64 size) noexcept { + IteratePages(cpu_addr, size, [](RegionManager* manager, u64 offset, size_t size) { + // Perform both the GPU modification check and CPU state change with the lock + // in case we are racing with GPU thread trying to mark the page as GPU + // modified. + std::scoped_lock lk{manager->lock}; + manager->template ChangeRegionState(manager->GetCpuAddr() + offset, + size); + manager->template ChangeRegionState(manager->GetCpuAddr() + offset, + size); + }); + } + /// Call 'func' for each CPU modified range and unmark those pages as CPU modified void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, bool is_written, auto&& func, auto&& on_upload) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 463bd04ca..868b6360d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1049,7 +1049,7 @@ bool Rasterizer::InvalidateMemory(VAddr addr, u64 size) { // Not GPU mapped memory, can skip invalidation logic entirely. return false; } - buffer_cache.InvalidateMemory(addr, size); + buffer_cache.InvalidateMemory(addr, size, true); texture_cache.InvalidateMemory(addr, size); return true; } @@ -1083,7 +1083,7 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) { } void Rasterizer::UnmapMemory(VAddr addr, u64 size) { - buffer_cache.InvalidateMemory(addr, size); + buffer_cache.InvalidateMemory(addr, size, true); texture_cache.UnmapMemory(addr, size); page_manager.OnGpuUnmap(addr, size); { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 71fc9633a..3d64b26bb 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -131,9 +131,15 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); tile_manager.TileImage(image.image, buffer_copies, download_buffer.Handle(), offset, image.info); - scheduler.DeferOperation([image_addr, download, image_size] { + scheduler.DeferOperation([this, image_addr, download, image_size] { auto* memory = Core::Memory::Instance(); + // Should we download directly to main memory or put contents into the buffer cache? memory->TryWriteBacking(std::bit_cast(image_addr), download, image_size); + // Can happen that the buffer that the image is read from still holds + // old invalid data. We need to invalidate memory in buffer cache so that + // contents are uploaded from main memory the next time buffers in this + // memory region are accessed. + buffer_cache.InvalidateMemory(image_addr, image_size, false); }); } From 01b87d0ce82a7464751b30893586031a21872c1f Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 11 Aug 2025 19:29:49 +0200 Subject: [PATCH 23/46] Sync texture downloads --- src/video_core/buffer_cache/buffer_cache.cpp | 16 +++++----------- src/video_core/buffer_cache/buffer_cache.h | 1 - src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 +- src/video_core/texture_cache/texture_cache.cpp | 11 +++++++++++ 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index bef146fbc..44577f1b5 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -169,11 +169,11 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool flush) { void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) { liverpool->SendCommand([this, device_addr, size, is_write] { Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)]; - DownloadBufferMemory(buffer, device_addr, size, is_write); + DownloadBufferMemory(buffer, device_addr, size, is_write); + scheduler.Finish(); }); } -template void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write) { boost::container::small_vector copies; u64 total_size_bytes = 0; @@ -208,7 +208,7 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyBuffer(buffer.buffer, download_buffer.Handle(), copies); - const auto write_data = [&]() { + scheduler.DeferOperation([&]() { auto* memory = Core::Memory::Instance(); for (const auto& copy : copies) { const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset; @@ -220,13 +220,7 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si if (is_write) { memory_tracker->MarkRegionAsCpuModified(device_addr, size); } - }; - if constexpr (async) { - scheduler.DeferOperation(write_data); - } else { - scheduler.Finish(); - write_data(); - } + }); } void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) { @@ -1215,7 +1209,7 @@ void BufferCache::RunGarbageCollector() { } --max_deletions; Buffer& buffer = slot_buffers[buffer_id]; - DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes(), true); + DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes(), true); DeleteBuffer(buffer_id); }; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 9e35e7ef2..3192b9018 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -183,7 +183,6 @@ private: return !buffer_id || slot_buffers[buffer_id].is_deleted; } - template void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write); [[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 868b6360d..acd3a90d1 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -462,8 +462,8 @@ void Rasterizer::OnSubmit() { buffer_cache.ProcessFaultBuffer(); } texture_cache.ProcessDownloadImages(); - buffer_cache.RunGarbageCollector(); texture_cache.RunGarbageCollector(); + buffer_cache.RunGarbageCollector(); } bool Rasterizer::BindResources(const Pipeline* pipeline) { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 3d64b26bb..0e38c7f39 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -131,6 +131,7 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); tile_manager.TileImage(image.image, buffer_copies, download_buffer.Handle(), offset, image.info); + LOG_WARNING(Render_Vulkan, "Downloading image {:x} ", image_addr); scheduler.DeferOperation([this, image_addr, download, image_size] { auto* memory = Core::Memory::Instance(); // Should we download directly to main memory or put contents into the buffer cache? @@ -140,6 +141,7 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { // contents are uploaded from main memory the next time buffers in this // memory region are accessed. buffer_cache.InvalidateMemory(image_addr, image_size, false); + LOG_WARNING(Render_Vulkan, "Downloaded image {:x} ", image_addr); }); } @@ -923,6 +925,7 @@ void TextureCache::RunGarbageCollector() { std::scoped_lock lock{mutex}; bool pressured = false; bool aggresive = false; + bool downloaded = false; u64 ticks_to_destroy = 0; size_t num_deletions = 0; @@ -945,6 +948,7 @@ void TextureCache::RunGarbageCollector() { } if (download) { DownloadImageMemory(image_id); + downloaded = true; } FreeImage(image_id); if (total_used_memory < critical_gc_memory) { @@ -970,6 +974,13 @@ void TextureCache::RunGarbageCollector() { configure(true); lru_cache.ForEachItemBelow(gc_tick - ticks_to_destroy, clean_up); } + + if (downloaded) { + // We need to make downloads synchronous. It is possible that the contents + // of the image are requested before they are downloaded in which case + // outdated buffer cache contents are used instead. + scheduler.Finish(); + } } void TextureCache::TouchImage(const Image& image) { From 9fc0804f3dc34d1cf8c8263b6b471df5220d63ce Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 11 Aug 2025 20:23:35 +0200 Subject: [PATCH 24/46] Correctly capture arguments --- src/video_core/buffer_cache/buffer_cache.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 44577f1b5..23e20c370 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -208,7 +208,8 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyBuffer(buffer.buffer, download_buffer.Handle(), copies); - scheduler.DeferOperation([&]() { + scheduler.DeferOperation([this, &buffer, copies = std::move(copies), download, offset, + device_addr, size, is_write]() { auto* memory = Core::Memory::Instance(); for (const auto& copy : copies) { const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset; From c0c9fd898d9c490839492f1bba0cad782c4335a6 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 17 Aug 2025 00:05:20 +0200 Subject: [PATCH 25/46] Account for big downloads --- src/video_core/buffer_cache/buffer.cpp | 27 +++++++++++ src/video_core/buffer_cache/buffer.h | 48 +++++++++++++++++++ .../texture_cache/texture_cache.cpp | 8 ++-- 3 files changed, 78 insertions(+), 5 deletions(-) diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp index e85a6eb18..c7b1ab4f4 100644 --- a/src/video_core/buffer_cache/buffer.cpp +++ b/src/video_core/buffer_cache/buffer.cpp @@ -219,4 +219,31 @@ bool StreamBuffer::WaitPendingOperations(u64 requested_upper_bound, bool allow_w return true; } +StreamBufferMapping::StreamBufferMapping(StreamBuffer& stream_buffer, u64 size, u64 alignment, + bool allow_wait) { + const auto [data, offset] = stream_buffer.Map(size, alignment, allow_wait); + if (!data) { + // This happens if the size is too big or no waiting is allowed when it is required + is_temp_buffer = true; + this->buffer = new VideoCore::Buffer(*stream_buffer.instance, *stream_buffer.scheduler, + stream_buffer.usage, 0, AllFlags, size); + this->data = this->buffer->mapped_data.data(); + this->offset = 0; + ASSERT_MSG(this->data, "Failed to map temporary buffer"); + } else { + is_temp_buffer = false; + buffer = &stream_buffer; + this->data = data; + this->offset = offset; + } +} + +StreamBufferMapping::~StreamBufferMapping() { + if (is_temp_buffer) { + ASSERT(buffer); + auto scheduler = buffer->scheduler; + scheduler->DeferOperation([buffer = this->buffer]() mutable { delete buffer; }); + } +} + } // namespace VideoCore diff --git a/src/video_core/buffer_cache/buffer.h b/src/video_core/buffer_cache/buffer.h index b02f8c181..0bc33e57f 100644 --- a/src/video_core/buffer_cache/buffer.h +++ b/src/video_core/buffer_cache/buffer.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include #include #include @@ -213,4 +214,51 @@ private: u64 wait_bound{}; }; +class StreamBufferMapping { +public: + StreamBufferMapping(StreamBuffer& stream_buffer, u64 size, u64 alignment = 0, + bool allow_wait = true); + ~StreamBufferMapping(); + + StreamBufferMapping(const StreamBufferMapping&) = delete; + StreamBufferMapping& operator=(const StreamBufferMapping&) = delete; + + StreamBufferMapping(StreamBufferMapping&& other) + : buffer{std::exchange(other.buffer, nullptr)}, data{std::exchange(other.data, nullptr)}, + offset{std::exchange(other.offset, 0)}, + is_temp_buffer{std::exchange(other.is_temp_buffer, false)} {} + + StreamBufferMapping& operator=(StreamBufferMapping&& other) { + if (this != &other) { + buffer = std::exchange(other.buffer, nullptr); + data = std::exchange(other.data, nullptr); + offset = std::exchange(other.offset, 0); + is_temp_buffer = std::exchange(other.is_temp_buffer, false); + } + return *this; + } + + VideoCore::Buffer* Buffer() const { + return buffer; + } + + u8* Data() const { + return data; + } + + u64 Offset() const { + return offset; + } + + bool TemporaryBuffer() const { + return is_temp_buffer; + } + +private: + VideoCore::Buffer* buffer; + u8* data{}; + u64 offset{}; + bool is_temp_buffer{}; +}; + } // namespace VideoCore diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 0e38c7f39..34849f5ce 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -125,14 +125,13 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { if (buffer_copies.empty()) { return; } - const auto [download, offset] = download_buffer.Map(image_size); + StreamBufferMapping mapping(download_buffer, image_size); download_buffer.Commit(); scheduler.EndRendering(); image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); - tile_manager.TileImage(image.image, buffer_copies, download_buffer.Handle(), offset, + tile_manager.TileImage(image.image, buffer_copies, mapping.Buffer()->Handle(), mapping.Offset(), image.info); - LOG_WARNING(Render_Vulkan, "Downloading image {:x} ", image_addr); - scheduler.DeferOperation([this, image_addr, download, image_size] { + scheduler.DeferOperation([this, image_addr, download = mapping.Data(), image_size] { auto* memory = Core::Memory::Instance(); // Should we download directly to main memory or put contents into the buffer cache? memory->TryWriteBacking(std::bit_cast(image_addr), download, image_size); @@ -141,7 +140,6 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { // contents are uploaded from main memory the next time buffers in this // memory region are accessed. buffer_cache.InvalidateMemory(image_addr, image_size, false); - LOG_WARNING(Render_Vulkan, "Downloaded image {:x} ", image_addr); }); } From df527abedfb83c7c1c38fceb4be5f71c4628cbc1 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 21 Aug 2025 23:54:39 +0200 Subject: [PATCH 26/46] (test) image gpu dirty --- src/video_core/buffer_cache/buffer_cache.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 23e20c370..9bc3ec979 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -396,9 +396,7 @@ void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, // Avoid using ObtainBuffer here as that might give us the stream buffer. const BufferId buffer_id = FindBuffer(src, num_bytes); auto& buffer = slot_buffers[buffer_id]; - if (SynchronizeBuffer(buffer, src, num_bytes, false, true)) { - texture_cache.InvalidateMemoryFromGPU(dst, num_bytes); - } + SynchronizeBuffer(buffer, src, num_bytes, false, true); return buffer; }(); auto& dst_buffer = [&] -> const Buffer& { @@ -906,8 +904,12 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, }); TouchBuffer(buffer); } - if (is_texel_buffer) { - return SynchronizeBufferFromImage(buffer, device_addr, size); + if (is_texel_buffer || is_written) { + const bool synced = SynchronizeBufferFromImage(buffer, device_addr, size); + if (is_written) { + texture_cache.InvalidateMemoryFromGPU(device_addr, size); + } + return synced; } return false; } @@ -954,6 +956,9 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, ASSERT_MSG(device_addr == image.info.guest_address, "Texel buffer aliases image subresources {:x} : {:x}", device_addr, image.info.guest_address); + if (!image.SafeToDownload()) { + return false; + } const u32 buf_offset = buffer.Offset(image.info.guest_address); boost::container::small_vector buffer_copies; u32 copy_size = 0; From 19e84b074067a04f8a9b3370e398aab24248496c Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 27 Aug 2025 00:19:59 +0200 Subject: [PATCH 27/46] Revert "(test) image gpu dirty" This reverts commit a63d1daa00be6b292bf292112f4e6a215d37ea97. --- src/video_core/buffer_cache/buffer_cache.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 9bc3ec979..23e20c370 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -396,7 +396,9 @@ void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, // Avoid using ObtainBuffer here as that might give us the stream buffer. const BufferId buffer_id = FindBuffer(src, num_bytes); auto& buffer = slot_buffers[buffer_id]; - SynchronizeBuffer(buffer, src, num_bytes, false, true); + if (SynchronizeBuffer(buffer, src, num_bytes, false, true)) { + texture_cache.InvalidateMemoryFromGPU(dst, num_bytes); + } return buffer; }(); auto& dst_buffer = [&] -> const Buffer& { @@ -904,12 +906,8 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, }); TouchBuffer(buffer); } - if (is_texel_buffer || is_written) { - const bool synced = SynchronizeBufferFromImage(buffer, device_addr, size); - if (is_written) { - texture_cache.InvalidateMemoryFromGPU(device_addr, size); - } - return synced; + if (is_texel_buffer) { + return SynchronizeBufferFromImage(buffer, device_addr, size); } return false; } @@ -956,9 +954,6 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, ASSERT_MSG(device_addr == image.info.guest_address, "Texel buffer aliases image subresources {:x} : {:x}", device_addr, image.info.guest_address); - if (!image.SafeToDownload()) { - return false; - } const u32 buf_offset = buffer.Offset(image.info.guest_address); boost::container::small_vector buffer_copies; u32 copy_size = 0; From a6b1da664d1a330b10eda2e6f3a60f9119ae54b3 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 27 Aug 2025 00:35:37 +0200 Subject: [PATCH 28/46] Test only download non-GPU modified images --- src/video_core/buffer_cache/buffer_cache.cpp | 21 +++++++++---------- src/video_core/buffer_cache/buffer_cache.h | 4 ++-- .../renderer_vulkan/vk_rasterizer.cpp | 4 ++-- .../texture_cache/texture_cache.cpp | 11 +++++----- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 23e20c370..213c0a1a7 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -154,27 +154,24 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s BufferCache::~BufferCache() = default; -void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool flush) { +void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { if (!IsRegionRegistered(device_addr, size)) { return; } - if (flush) { - memory_tracker->InvalidateRegion( - device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); - } else { - memory_tracker->InvalidateRegion(device_addr, size); - } + memory_tracker->InvalidateRegion( + device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); } void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) { liverpool->SendCommand([this, device_addr, size, is_write] { Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)]; - DownloadBufferMemory(buffer, device_addr, size, is_write); - scheduler.Finish(); + if (DownloadBufferMemory(buffer, device_addr, size, is_write)) { + scheduler.Finish(); + } }); } -void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write) { +bool BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write) { boost::container::small_vector copies; u64 total_size_bytes = 0; memory_tracker->ForEachDownloadRange( @@ -197,7 +194,7 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si gpu_modified_ranges.Subtract(device_addr_out, range_size); }); if (total_size_bytes == 0) { - return; + return false; } const auto [download, offset] = download_buffer.Map(total_size_bytes); for (auto& copy : copies) { @@ -222,6 +219,8 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si memory_tracker->MarkRegionAsCpuModified(device_addr, size); } }); + + return true; } void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 3192b9018..3067cd4b2 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -114,7 +114,7 @@ public: } /// Invalidates any buffer in the logical page range. - void InvalidateMemory(VAddr device_addr, u64 size, bool flush); + void InvalidateMemory(VAddr device_addr, u64 size); /// Flushes any GPU modified buffer in the logical page range back to CPU memory. void ReadMemory(VAddr device_addr, u64 size, bool is_write = false); @@ -183,7 +183,7 @@ private: return !buffer_id || slot_buffers[buffer_id].is_deleted; } - void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write); + bool DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write); [[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index acd3a90d1..f1a25e900 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1049,7 +1049,7 @@ bool Rasterizer::InvalidateMemory(VAddr addr, u64 size) { // Not GPU mapped memory, can skip invalidation logic entirely. return false; } - buffer_cache.InvalidateMemory(addr, size, true); + buffer_cache.InvalidateMemory(addr, size); texture_cache.InvalidateMemory(addr, size); return true; } @@ -1083,7 +1083,7 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) { } void Rasterizer::UnmapMemory(VAddr addr, u64 size) { - buffer_cache.InvalidateMemory(addr, size, true); + buffer_cache.InvalidateMemory(addr, size); texture_cache.UnmapMemory(addr, size); page_manager.OnGpuUnmap(addr, size); { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 34849f5ce..3df548543 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -132,14 +132,15 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { tile_manager.TileImage(image.image, buffer_copies, mapping.Buffer()->Handle(), mapping.Offset(), image.info); scheduler.DeferOperation([this, image_addr, download = mapping.Data(), image_size] { + if (buffer_cache.IsRegionGpuModified(image_addr, image_size)) { + return; + } + LOG_WARNING(Render_Vulkan, "Downloading image memory at {:#x} ({} bytes)", image_addr, + image_size); auto* memory = Core::Memory::Instance(); // Should we download directly to main memory or put contents into the buffer cache? memory->TryWriteBacking(std::bit_cast(image_addr), download, image_size); - // Can happen that the buffer that the image is read from still holds - // old invalid data. We need to invalidate memory in buffer cache so that - // contents are uploaded from main memory the next time buffers in this - // memory region are accessed. - buffer_cache.InvalidateMemory(image_addr, image_size, false); + buffer_cache.InvalidateMemory(image_addr, image_size); }); } From c9a5b0ec0660c7ff66032c0f7feea1d0bf739609 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 28 Aug 2025 21:13:28 +0200 Subject: [PATCH 29/46] Revert tests and maintain gpumodified --- src/video_core/buffer_cache/buffer_cache.cpp | 12 ++++++++---- src/video_core/buffer_cache/buffer_cache.h | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 4 ++-- src/video_core/texture_cache/texture_cache.cpp | 8 ++++---- 4 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 213c0a1a7..46dbcc949 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -154,12 +154,16 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s BufferCache::~BufferCache() = default; -void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) { +void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool download) { if (!IsRegionRegistered(device_addr, size)) { return; } - memory_tracker->InvalidateRegion( - device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); + if (download) { + memory_tracker->InvalidateRegion( + device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); + } else { + memory_tracker->InvalidateRegion(device_addr, size); + } } void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) { @@ -861,6 +865,7 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, size_t total_size_bytes = 0; VAddr buffer_start = buffer.CpuAddr(); vk::Buffer src_buffer = VK_NULL_HANDLE; + TouchBuffer(buffer); memory_tracker->ForEachUploadRange( device_addr, size, is_written, [&](u64 device_addr_out, u64 range_size) { @@ -903,7 +908,6 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, .bufferMemoryBarrierCount = 1, .pBufferMemoryBarriers = &post_barrier, }); - TouchBuffer(buffer); } if (is_texel_buffer) { return SynchronizeBufferFromImage(buffer, device_addr, size); diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 3067cd4b2..b54e59a9c 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -114,7 +114,7 @@ public: } /// Invalidates any buffer in the logical page range. - void InvalidateMemory(VAddr device_addr, u64 size); + void InvalidateMemory(VAddr device_addr, u64 size, bool download); /// Flushes any GPU modified buffer in the logical page range back to CPU memory. void ReadMemory(VAddr device_addr, u64 size, bool is_write = false); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f1a25e900..acd3a90d1 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -1049,7 +1049,7 @@ bool Rasterizer::InvalidateMemory(VAddr addr, u64 size) { // Not GPU mapped memory, can skip invalidation logic entirely. return false; } - buffer_cache.InvalidateMemory(addr, size); + buffer_cache.InvalidateMemory(addr, size, true); texture_cache.InvalidateMemory(addr, size); return true; } @@ -1083,7 +1083,7 @@ void Rasterizer::MapMemory(VAddr addr, u64 size) { } void Rasterizer::UnmapMemory(VAddr addr, u64 size) { - buffer_cache.InvalidateMemory(addr, size); + buffer_cache.InvalidateMemory(addr, size, true); texture_cache.UnmapMemory(addr, size); page_manager.OnGpuUnmap(addr, size); { diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 3df548543..9edcc6c9f 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -133,14 +133,13 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { image.info); scheduler.DeferOperation([this, image_addr, download = mapping.Data(), image_size] { if (buffer_cache.IsRegionGpuModified(image_addr, image_size)) { - return; + LOG_WARNING(Render_Vulkan, + "Image {:x} was modified by GPU during download", image_addr); } - LOG_WARNING(Render_Vulkan, "Downloading image memory at {:#x} ({} bytes)", image_addr, - image_size); auto* memory = Core::Memory::Instance(); // Should we download directly to main memory or put contents into the buffer cache? memory->TryWriteBacking(std::bit_cast(image_addr), download, image_size); - buffer_cache.InvalidateMemory(image_addr, image_size); + buffer_cache.InvalidateMemory(image_addr, image_size, false); }); } @@ -417,6 +416,7 @@ ImageId TextureCache::ExpandImage(const ImageInfo& info, ImageId image_id) { TrackImage(new_image_id); new_image.flags &= ~ImageFlagBits::Dirty; + new_image.flags |= src_image.flags & ImageFlagBits::GpuModified; return new_image_id; } From 9fdff4f2df7d805be37f687caecc95aa37afaffc Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 30 Aug 2025 02:15:03 +0200 Subject: [PATCH 30/46] Handle GPU modified ranges --- src/video_core/buffer_cache/buffer_cache.cpp | 59 ++++++++++++++++++- src/video_core/buffer_cache/buffer_cache.h | 4 ++ src/video_core/page_manager.h | 4 ++ .../texture_cache/texture_cache.cpp | 22 ++++--- 4 files changed, 77 insertions(+), 12 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 46dbcc949..59dd01ad4 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -163,6 +163,7 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool download) { device_addr, size, [this, device_addr, size] { ReadMemory(device_addr, size, true); }); } else { memory_tracker->InvalidateRegion(device_addr, size); + gpu_modified_ranges.Subtract(device_addr, size); } } @@ -208,12 +209,12 @@ bool BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si download_buffer.Commit(); scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); - cmdbuf.copyBuffer(buffer.buffer, download_buffer.Handle(), copies); - scheduler.DeferOperation([this, &buffer, copies = std::move(copies), download, offset, + cmdbuf.copyBuffer(buffer.Handle(), download_buffer.Handle(), copies); + scheduler.DeferOperation([this, buf_addr = buffer.CpuAddr(), copies = std::move(copies), download, offset, device_addr, size, is_write]() { auto* memory = Core::Memory::Instance(); for (const auto& copy : copies) { - const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset; + const VAddr copy_device_addr = buf_addr + copy.srcOffset; const u64 dst_offset = copy.dstOffset - offset; memory->TryWriteBacking(std::bit_cast(copy_device_addr), download + dst_offset, copy.size); @@ -227,6 +228,58 @@ bool BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si return true; } +void BufferCache::ReadEdgeImagePages(const Image& image) { + // May happen that after downloading the image and invalidating region, + // that there were GPU modified ranges that are lost due to CPU reuploading. + // This doesn't change tracker state and it is spected to call DownloadImageMemory after this. + const VAddr image_addr = image.info.guest_address; + const u64 image_size = image.info.guest_size; + const VAddr image_end = image_addr + image_size; + const VAddr page_start = PageManager::GetPageAddr(image_addr); + const VAddr page_end = PageManager::GetNextPageAddr(image_end - 1); + boost::container::small_vector copies; + u64 total_size_bytes = 0; + const auto [buffer, offset] = ObtainBufferForImage(image_addr, image_size); + const auto add_download = [&](VAddr start, VAddr end) { + const u64 new_offset = start - buffer->CpuAddr(); + const u64 new_size = end - start; + copies.push_back(vk::BufferCopy{ + .srcOffset = new_offset, + .dstOffset = total_size_bytes, + .size = new_size, + }); + // Align up to avoid cache conflicts + constexpr u64 align = 64ULL; + constexpr u64 mask = ~(align - 1ULL); + total_size_bytes += (new_size + align - 1) & mask; + }; + gpu_modified_ranges.ForEachInRange(page_start, image_addr - page_start, add_download); + gpu_modified_ranges.ForEachInRange(image_end, page_end - image_end, add_download); + gpu_modified_ranges.Subtract(page_start, page_end - page_start); + if (total_size_bytes == 0) { + return; + } + const auto [download, download_offset] = download_buffer.Map(total_size_bytes); + for (auto& copy : copies) { + // Modify copies to have the staging offset in mind + copy.dstOffset += download_offset; + } + download_buffer.Commit(); + scheduler.EndRendering(); + const auto cmdbuf = scheduler.CommandBuffer(); + cmdbuf.copyBuffer(buffer->Handle(), download_buffer.Handle(), copies); + scheduler.DeferOperation([this, buf_addr = buffer->CpuAddr(), copies = std::move(copies), download, download_offset, + image_addr, image_size]() { + auto* memory = Core::Memory::Instance(); + for (const auto& copy : copies) { + const VAddr copy_device_addr = buf_addr + copy.srcOffset; + const u64 dst_offset = copy.dstOffset - download_offset; + memory->TryWriteBacking(std::bit_cast(copy_device_addr), download + dst_offset, + copy.size); + } + }); +} + void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) { const auto& regs = liverpool->regs; Vulkan::VertexInputs attributes; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b54e59a9c..695515c3f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -11,6 +11,7 @@ #include "video_core/buffer_cache/buffer.h" #include "video_core/buffer_cache/range_set.h" #include "video_core/multi_level_page_table.h" +#include "video_core/texture_cache/image.h" namespace AmdGpu { struct Liverpool; @@ -119,6 +120,9 @@ public: /// Flushes any GPU modified buffer in the logical page range back to CPU memory. void ReadMemory(VAddr device_addr, u64 size, bool is_write = false); + /// Flushes GPU modified ranges of the uncovered part of the edge pages of an image. + void ReadEdgeImagePages(const Image& image); + /// Binds host vertex buffers for the current draw. void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline); diff --git a/src/video_core/page_manager.h b/src/video_core/page_manager.h index 4ca41cb43..03445d3ed 100644 --- a/src/video_core/page_manager.h +++ b/src/video_core/page_manager.h @@ -51,6 +51,10 @@ public: return Common::AlignUp(addr + 1, PAGE_SIZE); } + static constexpr size_t GetPageSize() { + return PAGE_SIZE; + } + private: struct Impl; std::unique_ptr impl; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 9edcc6c9f..53f954e18 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -132,10 +132,6 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { tile_manager.TileImage(image.image, buffer_copies, mapping.Buffer()->Handle(), mapping.Offset(), image.info); scheduler.DeferOperation([this, image_addr, download = mapping.Data(), image_size] { - if (buffer_cache.IsRegionGpuModified(image_addr, image_size)) { - LOG_WARNING(Render_Vulkan, - "Image {:x} was modified by GPU during download", image_addr); - } auto* memory = Core::Memory::Instance(); // Should we download directly to main memory or put contents into the buffer cache? memory->TryWriteBacking(std::bit_cast(image_addr), download, image_size); @@ -924,9 +920,9 @@ void TextureCache::RunGarbageCollector() { std::scoped_lock lock{mutex}; bool pressured = false; bool aggresive = false; - bool downloaded = false; u64 ticks_to_destroy = 0; size_t num_deletions = 0; + boost::container::small_vector download_pending; const auto configure = [&](bool allow_aggressive) { pressured = total_used_memory >= pressure_gc_memory; @@ -946,10 +942,13 @@ void TextureCache::RunGarbageCollector() { return false; } if (download) { - DownloadImageMemory(image_id); - downloaded = true; + download_pending.push_back(image_id); + buffer_cache.ReadEdgeImagePages(image); + UntrackImage(image_id); + UnregisterImage(image_id); + } else { + FreeImage(image_id); } - FreeImage(image_id); if (total_used_memory < critical_gc_memory) { if (aggresive) { num_deletions >>= 2; @@ -974,7 +973,12 @@ void TextureCache::RunGarbageCollector() { lru_cache.ForEachItemBelow(gc_tick - ticks_to_destroy, clean_up); } - if (downloaded) { + for (const auto& image_id : download_pending) { + DownloadImageMemory(image_id); + DeleteImage(image_id); + } + + if (!download_pending.empty()) { // We need to make downloads synchronous. It is possible that the contents // of the image are requested before they are downloaded in which case // outdated buffer cache contents are used instead. From 0711f1e00b3b3e3efa24b515e76a1c891a6e3c2a Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 30 Aug 2025 02:15:16 +0200 Subject: [PATCH 31/46] Demote logs to debug/info --- src/video_core/texture_cache/tile_manager.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 5154dad46..14b68d9f7 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -137,9 +137,9 @@ vk::Pipeline TileManager::GetTilingPipeline(const ImageInfo& info, bool is_tiler vk::ShaderStageFlagBits::eCompute, device, defines); const auto module_name = fmt::format("{}_{} {}", magic_enum::enum_name(info.tile_mode), info.num_bits, is_tiler ? "tiler" : "detiler"); - LOG_WARNING(Render_Vulkan, "Compiling shader {}", module_name); + LOG_INFO(Render_Vulkan, "Compiling shader {}", module_name); for (const auto& def : defines) { - LOG_WARNING(Render_Vulkan, "#define {}", def); + LOG_DEBUG(Render_Vulkan, "#define {}", def); } Vulkan::SetObjectName(device, module, module_name); const vk::PipelineShaderStageCreateInfo shader_ci = { From 1aded8d7821e19cdc4781e43f3267f2dd44280bb Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 30 Aug 2025 02:18:01 +0200 Subject: [PATCH 32/46] tracker renamed to page_manager --- .../texture_cache/texture_cache.cpp | 20 +++++++++---------- src/video_core/texture_cache/texture_cache.h | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 53f954e18..312cf3e3b 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -22,8 +22,8 @@ static constexpr u64 PageShift = 12; static constexpr u64 NumFramesBeforeRemoval = 32; TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, - BufferCache& buffer_cache_, PageManager& tracker_) - : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_}, + BufferCache& buffer_cache_, PageManager& page_manager_) + : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, page_manager{page_manager_}, blit_helper{instance, scheduler}, tile_manager{instance, scheduler, buffer_cache.GetUtilityBuffer(MemoryUsage::Stream)} { // Create basic null image at fixed image ID. @@ -814,7 +814,7 @@ void TextureCache::TrackImage(ImageId image_id) { // Re-track the whole image image.track_addr = image_begin; image.track_addr_end = image_end; - tracker.UpdatePageWatchers<1>(image_begin, image.info.guest_size); + page_manager.UpdatePageWatchers<1>(image_begin, image.info.guest_size); } else { if (image_begin < image.track_addr) { TrackImageHead(image_id); @@ -837,7 +837,7 @@ void TextureCache::TrackImageHead(ImageId image_id) { ASSERT(image.track_addr != 0 && image_begin < image.track_addr); const auto size = image.track_addr - image_begin; image.track_addr = image_begin; - tracker.UpdatePageWatchers<1>(image_begin, size); + page_manager.UpdatePageWatchers<1>(image_begin, size); } void TextureCache::TrackImageTail(ImageId image_id) { @@ -853,7 +853,7 @@ void TextureCache::TrackImageTail(ImageId image_id) { const auto addr = image.track_addr_end; const auto size = image_end - image.track_addr_end; image.track_addr_end = image_end; - tracker.UpdatePageWatchers<1>(addr, size); + page_manager.UpdatePageWatchers<1>(addr, size); } void TextureCache::UntrackImage(ImageId image_id) { @@ -866,7 +866,7 @@ void TextureCache::UntrackImage(ImageId image_id) { image.track_addr = 0; image.track_addr_end = 0; if (size != 0) { - tracker.UpdatePageWatchers(addr, size); + page_manager.UpdatePageWatchers(addr, size); } } @@ -876,7 +876,7 @@ void TextureCache::UntrackImageHead(ImageId image_id) { if (!image.IsTracked() || image_begin < image.track_addr) { return; } - const auto addr = tracker.GetNextPageAddr(image_begin); + const auto addr = page_manager.GetNextPageAddr(image_begin); const auto size = addr - image_begin; image.track_addr = addr; if (image.track_addr == image.track_addr_end) { @@ -885,7 +885,7 @@ void TextureCache::UntrackImageHead(ImageId image_id) { // Cehck its hash later. MarkAsMaybeDirty(image_id, image); } - tracker.UpdatePageWatchers(image_begin, size); + page_manager.UpdatePageWatchers(image_begin, size); } void TextureCache::UntrackImageTail(ImageId image_id) { @@ -895,7 +895,7 @@ void TextureCache::UntrackImageTail(ImageId image_id) { return; } ASSERT(image.track_addr_end != 0); - const auto addr = tracker.GetPageAddr(image_end); + const auto addr = page_manager.GetPageAddr(image_end); const auto size = image_end - addr; image.track_addr_end = addr; if (image.track_addr == image.track_addr_end) { @@ -904,7 +904,7 @@ void TextureCache::UntrackImageTail(ImageId image_id) { // Cehck its hash later. MarkAsMaybeDirty(image_id, image); } - tracker.UpdatePageWatchers(addr, size); + page_manager.UpdatePageWatchers(addr, size); } void TextureCache::RunGarbageCollector() { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 99b5ea971..99d79d142 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -85,7 +85,7 @@ public: public: TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, - BufferCache& buffer_cache, PageManager& tracker); + BufferCache& buffer_cache, PageManager& page_manager); ~TextureCache(); TileManager& GetTileManager() noexcept { @@ -309,7 +309,7 @@ private: const Vulkan::Instance& instance; Vulkan::Scheduler& scheduler; BufferCache& buffer_cache; - PageManager& tracker; + PageManager& page_manager; BlitHelper blit_helper; TileManager tile_manager; Common::SlotVector slot_images; From 4b7c4cc2a3285a18b6223af93eaea217987636c1 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 30 Aug 2025 02:20:04 +0200 Subject: [PATCH 33/46] clang-format --- src/video_core/buffer_cache/buffer_cache.cpp | 8 ++++---- src/video_core/texture_cache/texture_cache.cpp | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 59dd01ad4..b2753b534 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -210,8 +210,8 @@ bool BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyBuffer(buffer.Handle(), download_buffer.Handle(), copies); - scheduler.DeferOperation([this, buf_addr = buffer.CpuAddr(), copies = std::move(copies), download, offset, - device_addr, size, is_write]() { + scheduler.DeferOperation([this, buf_addr = buffer.CpuAddr(), copies = std::move(copies), + download, offset, device_addr, size, is_write]() { auto* memory = Core::Memory::Instance(); for (const auto& copy : copies) { const VAddr copy_device_addr = buf_addr + copy.srcOffset; @@ -268,8 +268,8 @@ void BufferCache::ReadEdgeImagePages(const Image& image) { scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyBuffer(buffer->Handle(), download_buffer.Handle(), copies); - scheduler.DeferOperation([this, buf_addr = buffer->CpuAddr(), copies = std::move(copies), download, download_offset, - image_addr, image_size]() { + scheduler.DeferOperation([this, buf_addr = buffer->CpuAddr(), copies = std::move(copies), + download, download_offset, image_addr, image_size]() { auto* memory = Core::Memory::Instance(); for (const auto& copy : copies) { const VAddr copy_device_addr = buf_addr + copy.srcOffset; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 312cf3e3b..f5fc9de62 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -23,8 +23,8 @@ static constexpr u64 NumFramesBeforeRemoval = 32; TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, BufferCache& buffer_cache_, PageManager& page_manager_) - : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, page_manager{page_manager_}, - blit_helper{instance, scheduler}, + : instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, + page_manager{page_manager_}, blit_helper{instance, scheduler}, tile_manager{instance, scheduler, buffer_cache.GetUtilityBuffer(MemoryUsage::Stream)} { // Create basic null image at fixed image ID. const auto null_id = GetNullImage(vk::Format::eR8G8B8A8Unorm); From 121a43d1cf5cd2f39706dadf4a63a404fbb144fc Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 10 Sep 2025 23:55:02 +0200 Subject: [PATCH 34/46] Test not collecting gpu modified ranges --- src/video_core/texture_cache/texture_cache.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index f5fc9de62..2ee01f902 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -941,6 +941,10 @@ void TextureCache::RunGarbageCollector() { if (download && !pressured) { return false; } + if (download && + buffer_cache.IsRegionGpuModified(image.info.guest_address, image.info.guest_size)) { + return false; + } if (download) { download_pending.push_back(image_id); buffer_cache.ReadEdgeImagePages(image); From 13ef8a0a69a4d2bee95a3bdae5f496b0b095189e Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 13 Sep 2025 18:04:45 +0200 Subject: [PATCH 35/46] Better handling of GPU written in shader hle --- src/video_core/buffer_cache/buffer_cache.cpp | 45 ++++++++++++------- src/video_core/buffer_cache/buffer_cache.h | 19 ++++++-- src/video_core/buffer_cache/memory_tracker.h | 11 +++++ .../renderer_vulkan/vk_rasterizer.cpp | 17 ++++--- .../renderer_vulkan/vk_shader_hle.cpp | 13 +++++- src/video_core/texture_cache/image.h | 4 +- .../texture_cache/texture_cache.cpp | 22 ++++++--- src/video_core/texture_cache/texture_cache.h | 5 ++- 8 files changed, 101 insertions(+), 35 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index b2753b534..6f65ab62a 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -339,7 +339,7 @@ void BufferCache::BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline) { // Map buffers for merged ranges for (auto& range : ranges_merged) { const u64 size = memory->ClampRangeSize(range.base_address, range.GetSize()); - const auto [buffer, offset] = ObtainBuffer(range.base_address, size, false); + const auto [buffer, offset] = ObtainBuffer(range.base_address, size); range.vk_buffer = buffer->buffer; range.offset = offset; } @@ -393,7 +393,7 @@ void BufferCache::BindIndexBuffer(u32 index_offset) { // Bind index buffer. const u32 index_buffer_size = regs.num_indices * index_size; - const auto [vk_buffer, offset] = ObtainBuffer(index_address, index_buffer_size, false); + const auto [vk_buffer, offset] = ObtainBuffer(index_address, index_buffer_size); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindIndexBuffer(vk_buffer->Handle(), offset, index_type); } @@ -449,20 +449,17 @@ void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, if (src_gds) { return gds_buffer; } - // Avoid using ObtainBuffer here as that might give us the stream buffer. - const BufferId buffer_id = FindBuffer(src, num_bytes); - auto& buffer = slot_buffers[buffer_id]; - if (SynchronizeBuffer(buffer, src, num_bytes, false, true)) { - texture_cache.InvalidateMemoryFromGPU(dst, num_bytes); - } - return buffer; + const auto [buffer, offset] = + ObtainBuffer(src, num_bytes, + ObtainBufferFlags::IgnoreStreamBuffer | ObtainBufferFlags::IsTexelBuffer | + ObtainBufferFlags::InvalidateTextureCache); + return *buffer; }(); auto& dst_buffer = [&] -> const Buffer& { if (dst_gds) { return gds_buffer; } - // Prefer using ObtainBuffer here as that will auto-mark the region as GPU modified. - const auto [buffer, offset] = ObtainBuffer(dst, num_bytes, true); + const auto [buffer, offset] = ObtainBuffer(dst, num_bytes, ObtainBufferFlags::IsWritten); return *buffer; }(); vk::BufferCopy region{ @@ -525,10 +522,14 @@ void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, }); } -std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, bool is_written, - bool is_texel_buffer, BufferId buffer_id) { +std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, + ObtainBufferFlags flags, BufferId buffer_id) { // For read-only buffers use device local stream buffer to reduce renderpass breaks. - if (!is_written && size <= CACHING_PAGESIZE && !IsRegionGpuModified(device_addr, size)) { + const bool is_written = True(flags & ObtainBufferFlags::IsWritten); + const bool is_texel_buffer = True(flags & ObtainBufferFlags::IsTexelBuffer); + const bool skip_stream_buffer = True(flags & ObtainBufferFlags::IgnoreStreamBuffer); + if (!is_written && !skip_stream_buffer && size <= CACHING_PAGESIZE && + !IsRegionGpuModified(device_addr, size)) { const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment()); return {&stream_buffer, offset}; } @@ -536,9 +537,13 @@ std::pair BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b buffer_id = FindBuffer(device_addr, size); } Buffer& buffer = slot_buffers[buffer_id]; - SynchronizeBuffer(buffer, device_addr, size, is_written, is_texel_buffer); + const bool img_synced = + SynchronizeBuffer(buffer, device_addr, size, is_written, is_texel_buffer); + if (img_synced && True(flags & ObtainBufferFlags::InvalidateTextureCache)) { + texture_cache.InvalidateMemoryFromGPU(device_addr, size); + } if (is_written) { - gpu_modified_ranges.Add(device_addr, size); + MarkRegionAsGpuModified(device_addr, size); } return {&buffer, buffer.Offset(device_addr)}; } @@ -554,7 +559,7 @@ std::pair BufferCache::ObtainBufferForImage(VAddr gpu_addr, u32 si } // If some buffer within was GPU modified create a full buffer to avoid losing GPU data. if (IsRegionGpuModified(gpu_addr, size)) { - return ObtainBuffer(gpu_addr, size, false, false); + return ObtainBuffer(gpu_addr, size); } // In all other cases, just do a CPU copy to the staging buffer. const auto [data, offset] = staging_buffer.Map(size, 16); @@ -576,6 +581,12 @@ bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) { return memory_tracker->IsRegionGpuModified(addr, size); } +void BufferCache::MarkRegionAsGpuModified(VAddr addr, size_t size) { + gpu_modified_ranges.Add(addr, size); + memory_tracker->MarkRegionAsGpuModified(addr, size); + texture_cache.MarkAsMaybeReused(addr, size); +} + BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) { if (device_addr == 0) { return NULL_BUFFER_ID; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 695515c3f..5b92b4a65 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -5,6 +5,7 @@ #include #include +#include "common/enum.h" #include "common/lru_cache.h" #include "common/slot_vector.h" #include "common/types.h" @@ -35,6 +36,15 @@ class TextureCache; class MemoryTracker; class PageManager; +enum class ObtainBufferFlags { + None = 0, + IsWritten = 1 << 0, + IsTexelBuffer = 1 << 1, + IgnoreStreamBuffer = 1 << 2, + InvalidateTextureCache = 1 << 3, +}; +DECLARE_ENUM_FLAG_OPERATORS(ObtainBufferFlags) + class BufferCache { public: static constexpr u32 CACHING_PAGEBITS = 14; @@ -139,9 +149,9 @@ public: void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds); /// Obtains a buffer for the specified region. - [[nodiscard]] std::pair ObtainBuffer(VAddr gpu_addr, u32 size, bool is_written, - bool is_texel_buffer = false, - BufferId buffer_id = {}); + [[nodiscard]] std::pair ObtainBuffer( + VAddr gpu_addr, u32 size, ObtainBufferFlags flags = ObtainBufferFlags::None, + BufferId buffer_id = {}); /// Attempts to obtain a buffer without modifying the cache contents. [[nodiscard]] std::pair ObtainBufferForImage(VAddr gpu_addr, u32 size); @@ -155,6 +165,9 @@ public: /// Return true when a CPU region is modified from the GPU [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + /// Mark region as modified from the GPU + void MarkRegionAsGpuModified(VAddr addr, size_t size); + /// Return buffer id for the specified region BufferId FindBuffer(VAddr device_addr, u32 size); diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index a59fff64a..f887b8962 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -51,6 +51,17 @@ public: }); } + void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) { + IteratePages( + dirty_cpu_addr, query_size, [this](RegionManager* manager, u64 offset, size_t size) { + std::scoped_lock lk{manager->lock}; + manager->template ChangeRegionState(manager->GetCpuAddr() + offset, + size); + manager->template ChangeRegionState(manager->GetCpuAddr() + offset, + size); + }); + } + /// Unmark region as modified from the host GPU void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept { IteratePages(dirty_cpu_addr, query_size, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index acd3a90d1..51dd8a42e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -350,12 +350,12 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3 } const auto& [buffer, base] = - buffer_cache.ObtainBuffer(arg_address + offset, stride * max_count, false); + buffer_cache.ObtainBuffer(arg_address + offset, stride * max_count); VideoCore::Buffer* count_buffer{}; u32 count_base{}; if (count_address != 0) { - std::tie(count_buffer, count_base) = buffer_cache.ObtainBuffer(count_address, 4, false); + std::tie(count_buffer, count_base) = buffer_cache.ObtainBuffer(count_address, 4); } BeginRendering(*pipeline, state); @@ -436,7 +436,7 @@ void Rasterizer::DispatchIndirect(VAddr address, u32 offset, u32 size) { scheduler.EndRendering(); - const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size, false); + const auto [buffer, base] = buffer_cache.ObtainBuffer(address + offset, size); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline->Handle()); @@ -661,8 +661,15 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding buffer_infos.emplace_back(null_buffer.Handle(), 0, VK_WHOLE_SIZE); } } else { - const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer( - vsharp.base_address, size, desc.is_written, desc.is_formatted, buffer_id); + VideoCore::ObtainBufferFlags flags = {}; + if (desc.is_written) { + flags |= VideoCore::ObtainBufferFlags::IsWritten; + } + if (desc.is_formatted) { + flags |= VideoCore::ObtainBufferFlags::IsTexelBuffer; + } + const auto [vk_buffer, offset] = + buffer_cache.ObtainBuffer(vsharp.base_address, size, flags, buffer_id); const u32 alignment = is_storage ? instance.StorageMinAlignment() : instance.UniformMinAlignment(); const u32 offset_aligned = Common::AlignDown(offset, alignment); diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.cpp b/src/video_core/renderer_vulkan/vk_shader_hle.cpp index d73fdbeb1..d222d0a01 100644 --- a/src/video_core/renderer_vulkan/vk_shader_hle.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_hle.cpp @@ -96,9 +96,10 @@ static bool ExecuteCopyShaderHLE(const Shader::Info& info, // Obtain buffers for the total source and destination ranges. const auto [src_buf, src_buf_offset] = buffer_cache.ObtainBuffer( - src_buf_sharp.base_address + src_offset_min, src_offset_max - src_offset_min, false); + src_buf_sharp.base_address + src_offset_min, src_offset_max - src_offset_min); const auto [dst_buf, dst_buf_offset] = buffer_cache.ObtainBuffer( - dst_buf_sharp.base_address + dst_offset_min, dst_offset_max - dst_offset_min, true); + dst_buf_sharp.base_address + dst_offset_min, dst_offset_max - dst_offset_min, + VideoCore::ObtainBufferFlags::IgnoreStreamBuffer); // Apply found buffer base. const auto vk_copies = std::span{copies}.subspan(batch_start, batch_end - batch_start); @@ -118,6 +119,14 @@ static bool ExecuteCopyShaderHLE(const Shader::Info& info, vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {}); + // Mark destination regions as GPU modified. + for (u32 i = 0; i < cs_program.dim_x; i++) { + const auto& [dst_idx, src_idx, end] = ctl_buf[i]; + const u32 dst_addr = dst_buf_sharp.base_address + (dst_idx * buf_stride); + const u32 size = (end + 1) * buf_stride; + buffer_cache.MarkRegionAsGpuModified(dst_addr, size); + } + return true; } diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 9703315cb..c572b0a5c 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -25,9 +25,11 @@ enum ImageFlagBits : u32 { Empty = 0, MaybeCpuDirty = 1 << 0, ///< The page this image is in was touched before the image address CpuDirty = 1 << 1, ///< Contents have been modified from the CPU - GpuDirty = 1 << 2, ///< Contents have been modified from the GPU (valid data in buffer cache) + GpuDirty = + 1 << 2, ///< Image contents have been modified from the GPU (valid data in buffer cache) Dirty = MaybeCpuDirty | CpuDirty | GpuDirty, GpuModified = 1 << 3, ///< Contents have been modified from the GPU + MaybeReused = 1 << 4, ///< Memory region containing this image was maybe reused by the GPU Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked }; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 2ee01f902..70d1be3b1 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -193,6 +193,13 @@ void TextureCache::InvalidateMemoryFromGPU(VAddr address, size_t max_size) { }); } +void TextureCache::MarkAsMaybeReused(VAddr addr, size_t size) { + std::scoped_lock lock{mutex}; + ForEachImageInRegion(addr, size, [&](ImageId image_id, Image& image) { + image.flags |= ImageFlagBits::MaybeReused; + }); +} + void TextureCache::UnmapMemory(VAddr cpu_addr, size_t size) { std::scoped_lock lk{mutex}; @@ -519,12 +526,15 @@ ImageId TextureCache::FindImageFromRange(VAddr address, size_t size, bool ensure if (image_ids.size() == 1) { // Sometimes image size might not exactly match with requested buffer size // If we only found 1 candidate image use it without too many questions. + Image& image = slot_images[image_ids[0]]; + TouchImage(image); return image_ids.back(); } if (!image_ids.empty()) { for (s32 i = 0; i < image_ids.size(); ++i) { Image& image = slot_images[image_ids[i]]; if (image.info.guest_size == size) { + TouchImage(image); return image_ids[i]; } } @@ -937,14 +947,11 @@ void TextureCache::RunGarbageCollector() { } --num_deletions; auto& image = slot_images[image_id]; - const bool download = image.SafeToDownload(); + const bool download = + image.SafeToDownload() && False(image.flags & ImageFlagBits::MaybeReused); if (download && !pressured) { return false; } - if (download && - buffer_cache.IsRegionGpuModified(image.info.guest_address, image.info.guest_size)) { - return false; - } if (download) { download_pending.push_back(image_id); buffer_cache.ReadEdgeImagePages(image); @@ -990,8 +997,11 @@ void TextureCache::RunGarbageCollector() { } } -void TextureCache::TouchImage(const Image& image) { +void TextureCache::TouchImage(Image& image) { lru_cache.Touch(image.lru_id, gc_tick); + + // Image is still valid + image.flags &= ~ImageFlagBits::MaybeReused; } void TextureCache::DeleteImage(ImageId image_id) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 99d79d142..49fca8a3c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -98,6 +98,9 @@ public: /// Marks an image as dirty if it exists at the provided address. void InvalidateMemoryFromGPU(VAddr address, size_t max_size); + /// Marks an image as maybe reused if it exists within the provided range. + void MarkAsMaybeReused(VAddr addr, size_t size); + /// Evicts any images that overlap the unmapped range. void UnmapMemory(VAddr cpu_addr, size_t size); @@ -297,7 +300,7 @@ private: void DeleteImage(ImageId image_id); /// Touch the image in the LRU cache. - void TouchImage(const Image& image); + void TouchImage(Image& image); void FreeImage(ImageId image_id) { UntrackImage(image_id); From 488edb17b47599084fe0c0facec670290174a4ef Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 14 Sep 2025 03:15:12 +0200 Subject: [PATCH 36/46] Use correct type for address --- src/video_core/buffer_cache/memory_tracker.h | 2 -- src/video_core/renderer_vulkan/vk_shader_hle.cpp | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index f887b8962..a1bff4ed7 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -57,8 +57,6 @@ public: std::scoped_lock lk{manager->lock}; manager->template ChangeRegionState(manager->GetCpuAddr() + offset, size); - manager->template ChangeRegionState(manager->GetCpuAddr() + offset, - size); }); } diff --git a/src/video_core/renderer_vulkan/vk_shader_hle.cpp b/src/video_core/renderer_vulkan/vk_shader_hle.cpp index d222d0a01..b1badac4e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_hle.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_hle.cpp @@ -122,7 +122,7 @@ static bool ExecuteCopyShaderHLE(const Shader::Info& info, // Mark destination regions as GPU modified. for (u32 i = 0; i < cs_program.dim_x; i++) { const auto& [dst_idx, src_idx, end] = ctl_buf[i]; - const u32 dst_addr = dst_buf_sharp.base_address + (dst_idx * buf_stride); + const VAddr dst_addr = dst_buf_sharp.base_address + (dst_idx * buf_stride); const u32 size = (end + 1) * buf_stride; buffer_cache.MarkRegionAsGpuModified(dst_addr, size); } From 72be8a763251668bb0c97f1bbdbbb9f165760965 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sun, 14 Sep 2025 03:44:44 +0200 Subject: [PATCH 37/46] clang-format --- src/video_core/buffer_cache/memory_tracker.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/video_core/buffer_cache/memory_tracker.h b/src/video_core/buffer_cache/memory_tracker.h index a1bff4ed7..a606e00b7 100644 --- a/src/video_core/buffer_cache/memory_tracker.h +++ b/src/video_core/buffer_cache/memory_tracker.h @@ -52,12 +52,12 @@ public: } void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) { - IteratePages( - dirty_cpu_addr, query_size, [this](RegionManager* manager, u64 offset, size_t size) { - std::scoped_lock lk{manager->lock}; - manager->template ChangeRegionState(manager->GetCpuAddr() + offset, - size); - }); + IteratePages(dirty_cpu_addr, query_size, + [this](RegionManager* manager, u64 offset, size_t size) { + std::scoped_lock lk{manager->lock}; + manager->template ChangeRegionState( + manager->GetCpuAddr() + offset, size); + }); } /// Unmark region as modified from the host GPU From 81bfd5adce9ac064b323e49244c90aea938bbb8a Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Wed, 17 Sep 2025 23:42:42 +0200 Subject: [PATCH 38/46] clang-format --- src/video_core/texture_cache/texture_cache.cpp | 3 ++- src/video_core/texture_cache/texture_cache.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index b9db578ab..a23631fdf 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -138,7 +138,8 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { image.info); { std::unique_lock lock(downloaded_images_mutex); - downloaded_images_queue.emplace(scheduler.CurrentTick(), image_addr, mapping.Data(), image_size); + downloaded_images_queue.emplace(scheduler.CurrentTick(), image_addr, mapping.Data(), + image_size); downloaded_images_cv.notify_one(); } } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7aed6fefb..ab2e94919 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -89,7 +89,8 @@ public: public: TextureCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler, - AmdGpu::Liverpool* liverpool, BufferCache& buffer_cache, PageManager& page_manager); + AmdGpu::Liverpool* liverpool, BufferCache& buffer_cache, + PageManager& page_manager); ~TextureCache(); TileManager& GetTileManager() noexcept { From e04b09d29420d6e96242ccc8b9746c111828d6b8 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 18 Sep 2025 21:03:24 +0200 Subject: [PATCH 39/46] Temporarily add the 2GB budget --- src/video_core/renderer_vulkan/vk_instance.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 09f68d764..92acf23bf 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -411,7 +411,8 @@ public: /// Returns the total memory budget available to the device. [[nodiscard]] u64 GetTotalMemoryBudget() const { - return total_memory_budget; + return 2_GB; // TODO: this is for better garbage collection testing, temporary + // return total_memory_budget; } /// Determines if a format is supported for a set of feature flags. From dab54a081d016300a6cea90c50365ca2f8067547 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Mon, 3 Nov 2025 21:22:48 +0100 Subject: [PATCH 40/46] clang-format --- src/video_core/buffer_cache/buffer_cache.cpp | 3 ++- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 4 ++-- src/video_core/texture_cache/texture_cache.cpp | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 00ff3e99d..1cdb1db63 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -444,7 +444,8 @@ void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, if (dst_gds) { return gds_buffer; } - const auto [buffer, offset] = ObtainBuffer(dst, num_bytes, ObtainBufferFlags::IsWritten | ObtainBufferFlags::IsTexelBuffer); + const auto [buffer, offset] = ObtainBuffer( + dst, num_bytes, ObtainBufferFlags::IsWritten | ObtainBufferFlags::IsTexelBuffer); return *buffer; }(); const vk::BufferCopy region = { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 777eff555..2984581a5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -580,8 +580,8 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding if (desc.is_formatted) { flags |= VideoCore::ObtainBufferFlags::IsTexelBuffer; } - const auto [vk_buffer, offset] = buffer_cache.ObtainBuffer( - vsharp.base_address, size, flags, buffer_id); + const auto [vk_buffer, offset] = + buffer_cache.ObtainBuffer(vsharp.base_address, size, flags, buffer_id); const u32 offset_aligned = Common::AlignDown(offset, alignment); const u32 adjust = offset - offset_aligned; ASSERT(adjust % 4 == 0); diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 5b5112ab9..c1a30be41 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -134,7 +134,8 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { download_buffer.Commit(); scheduler.EndRendering(); image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); - tile_manager.TileImage(image, buffer_copies, mapping.Buffer()->Handle(), mapping.Offset(), copy_size); + tile_manager.TileImage(image, buffer_copies, mapping.Buffer()->Handle(), mapping.Offset(), + copy_size); { std::unique_lock lock(downloaded_images_mutex); downloaded_images_queue.emplace(scheduler.CurrentTick(), image_addr, mapping.Data(), From 7b881ded254a1ef77d49e0c7dbdd26ffedcc4359 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Tue, 4 Nov 2025 20:20:21 +0100 Subject: [PATCH 41/46] Handle zero size --- src/video_core/buffer_cache/range_set.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/buffer_cache/range_set.h b/src/video_core/buffer_cache/range_set.h index 5c8e78c7c..b86801bd7 100644 --- a/src/video_core/buffer_cache/range_set.h +++ b/src/video_core/buffer_cache/range_set.h @@ -72,7 +72,7 @@ struct RangeSet { template void ForEachInRange(VAddr base_addr, size_t size, Func&& func) const { - if (m_ranges_set.empty()) { + if (m_ranges_set.empty() || size == 0) { return; } const VAddr start_address = base_addr; @@ -176,7 +176,7 @@ public: template void ForEachInRange(VAddr base_addr, size_t size, Func&& func) const { - if (m_ranges_map.empty()) { + if (m_ranges_map.empty() || size == 0) { return; } const VAddr start_address = base_addr; @@ -280,7 +280,7 @@ public: template void ForEachInRange(VAddr base_addr, size_t size, Func&& func) const { - if (m_ranges_map.empty()) { + if (m_ranges_map.empty() || size == 0) { return; } const VAddr start_address = base_addr; From a681ade4466fafbcdf9467575eeaf0f4b9a008b3 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Thu, 20 Nov 2025 18:08:35 +0100 Subject: [PATCH 42/46] Fix for scheduler.wait hack removal --- src/video_core/buffer_cache/buffer_cache.cpp | 27 ++++++++++++-------- src/video_core/buffer_cache/buffer_cache.h | 3 ++- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index fc3aa50cd..6b052e03d 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -89,13 +89,12 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool download) { void BufferCache::ReadMemory(VAddr device_addr, u64 size, bool is_write) { liverpool->SendCommand([this, device_addr, size, is_write] { Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)]; - if (DownloadBufferMemory(buffer, device_addr, size, is_write)) { - scheduler.Finish(); - } + DownloadBufferMemory(buffer, device_addr, size, is_write); }); } -bool BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write) { +template +void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write) { boost::container::small_vector copies; u64 total_size_bytes = 0; memory_tracker->ForEachDownloadRange( @@ -118,7 +117,7 @@ bool BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si gpu_modified_ranges.Subtract(device_addr_out, range_size); }); if (total_size_bytes == 0) { - return false; + return; } const auto [download, offset] = download_buffer.Map(total_size_bytes); for (auto& copy : copies) { @@ -129,8 +128,9 @@ bool BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); cmdbuf.copyBuffer(buffer.Handle(), download_buffer.Handle(), copies); - scheduler.DeferOperation([this, buf_addr = buffer.CpuAddr(), copies = std::move(copies), - download, offset, device_addr, size, is_write]() { + + const auto write_func = [this, buf_addr = buffer.CpuAddr(), copies = std::move(copies), + download, offset, device_addr, size, is_write]() { auto* memory = Core::Memory::Instance(); for (const auto& copy : copies) { const VAddr copy_device_addr = buf_addr + copy.srcOffset; @@ -142,9 +142,16 @@ bool BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si if (is_write) { memory_tracker->MarkRegionAsCpuModified(device_addr, size); } - }); + }; - return true; + if constexpr (async) { + scheduler.DeferOperation(write_func); + } else { + scheduler.Finish(); + write_func(); + } + + return; } void BufferCache::ReadEdgeImagePages(const Image& image) { @@ -964,7 +971,7 @@ void BufferCache::RunGarbageCollector() { } --max_deletions; Buffer& buffer = slot_buffers[buffer_id]; - DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes(), true); + DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes(), true); DeleteBuffer(buffer_id); }; } diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 7dd6094d9..a9d9ed33d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -186,7 +186,8 @@ private: return !buffer_id || slot_buffers[buffer_id].is_deleted; } - bool DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write); + template + void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size, bool is_write); [[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size); From beab20fa0138eb89705225caeb4a68106a285fac Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Fri, 5 Dec 2025 23:26:13 +0100 Subject: [PATCH 43/46] Fix header --- src/video_core/texture_cache/texture_cache.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index beed3706b..713a6a7b8 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -3,12 +3,9 @@ #pragma once -#include #include -#include #include #include -#include #include #include "common/lru_cache.h" @@ -259,6 +256,7 @@ private: ImageId GetNullImage(vk::Format format); /// Copies image memory back to CPU. + template void DownloadImageMemory(ImageId image_id); /// Thread function for copying downloaded images out to CPU memory. From cabbfd4f7ea32fe4727a0a50e97e6a3ffe773fea Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Fri, 5 Dec 2025 23:42:48 +0100 Subject: [PATCH 44/46] Fix compillation --- src/video_core/buffer_cache/buffer_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 6e31a7775..aec2bc77b 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -337,7 +337,7 @@ void BufferCache::FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gd if (is_gds) { return &gds_buffer; } - const auto [buffer, offset] = ObtainBuffer(address, num_bytes, true); + const auto [buffer, offset] = ObtainBuffer(address, num_bytes, ObtainBufferFlags::IsWritten); return buffer; }(); buffer->Fill(buffer->Offset(address), num_bytes, value); From 7ee50635c0fef28184f8a0878ff3a0a3dc076b1f Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Fri, 5 Dec 2025 23:45:22 +0100 Subject: [PATCH 45/46] clang-format --- src/video_core/buffer_cache/buffer_cache.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index aec2bc77b..18ab2d7d0 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -337,7 +337,8 @@ void BufferCache::FillBuffer(VAddr address, u32 num_bytes, u32 value, bool is_gd if (is_gds) { return &gds_buffer; } - const auto [buffer, offset] = ObtainBuffer(address, num_bytes, ObtainBufferFlags::IsWritten); + const auto [buffer, offset] = + ObtainBuffer(address, num_bytes, ObtainBufferFlags::IsWritten); return buffer; }(); buffer->Fill(buffer->Offset(address), num_bytes, value); From 1ed8a5d111cd36e6c20d47351a68c182b4379082 Mon Sep 17 00:00:00 2001 From: Lander Gallastegi Date: Sat, 6 Dec 2025 15:07:19 +0100 Subject: [PATCH 46/46] Move insteed of copy --- src/video_core/texture_cache/texture_cache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index a3ba955fb..c5ff0d913 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -145,9 +145,9 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { }; if constexpr (priority) { - scheduler.DeferPriorityOperation(operation); + scheduler.DeferPriorityOperation(std::move(operation)); } else { - scheduler.DeferOperation(operation); + scheduler.DeferOperation(std::move(operation)); } }