From 98d9e9669bf1d39bf9e2d55f9847cdd4d3086a84 Mon Sep 17 00:00:00 2001 From: Arthur Cuesta <49216888+cuesta4@users.noreply.github.com> Date: Mon, 8 Jun 2026 11:06:50 -0300 Subject: [PATCH] Fix stale guest memory when reusing GPU-written linear images ## Fix stale guest memory when reusing GPU-written linear images through overlapping aliases ### Problem Some games reuse the same guest memory range through different image representations. A linear image may be written by the GPU and later reinterpreted as a tiled or compressed texture. With **Enable Readback Linear Images** enabled, shadPS4 already scheduled GPU-to-CPU readbacks for linear images. However, those readbacks were asynchronous. This left a timing window where an overlapping image could be recreated from guest memory before the pending linear-image writeback had completed. In that case, the new texture was uploaded from stale guest-memory data even though the most recent contents already existed on the GPU. This could result in progressive texture corruption when the aliased representation was used later. ### Fix Keep the existing linear-image readback path asynchronous in the common case, but synchronize it on demand when an overlapping alias actually depends on the updated guest-memory contents. The updated flow is: ` ext GPU writes a linear image - schedule the existing asynchronous readback later, an overlapping image needs to be reconstructed from guest memory - detect pending linear-image readbacks for the overlapping range - wait once for the relevant batch - commit the completed data to guest memory - reconstruct the alias from coherent data ` This preserves the existing readback infrastructure and only adds the missing dependency point before alias reconstruction. The existing path performs preventive asynchronous readbacks. This change adds the required demand-driven synchronization when another image representation needs those bytes immediately. This avoids the broader synchronization cost of forcing every linear-image readback to complete immediately while still guaranteeing correctness for dependent alias reuse. --- .../texture_cache/texture_cache.cpp | 91 ++++++++++++++++--- src/video_core/texture_cache/texture_cache.h | 21 ++++- 2 files changed, 98 insertions(+), 14 deletions(-) diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 7c94a0176..195c6767e 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -86,17 +86,20 @@ ImageId TextureCache::GetNullImage(const vk::Format format) { } void TextureCache::ProcessDownloadImages() { + std::scoped_lock lock{mutex}; for (const ImageId image_id : download_images) { DownloadImageMemory(image_id); } download_images.clear(); } -void TextureCache::DownloadImageMemory(ImageId image_id) { +std::optional TextureCache::ScheduleImageDownload( + ImageId image_id) { Image& image = slot_images[image_id]; - if (False(image.flags & ImageFlagBits::GpuModified)) { - return; + if (!image.SafeToDownload() || image.info.props.is_tiled || image.info.guest_address == 0) { + return {}; } + auto& download_buffer = buffer_cache.GetUtilityBuffer(MemoryUsage::Download); const u32 download_size = image.info.pitch * image.info.size.height * image.info.resources.layers * (image.info.num_bits / 8); @@ -118,17 +121,68 @@ void TextureCache::DownloadImageMemory(ImageId image_id) { .imageOffset = {0, 0, 0}, .imageExtent = {image.info.size.width, image.info.size.height, 1}, }; - scheduler.EndRendering(); - const auto cmdbuf = scheduler.CommandBuffer(); - image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {}); - cmdbuf.copyImageToBuffer(image.GetImage(), vk::ImageLayout::eTransferSrcOptimal, - download_buffer.Handle(), image_download); + image.Download(std::span{&image_download, 1}, download_buffer.Handle(), offset, + download_size); - scheduler.DeferPriorityOperation( - [this, device_addr = image.info.guest_address, download, download_size] { - Core::Memory::Instance()->TryWriteBacking(std::bit_cast(device_addr), download, - download_size); - }); + const u64 serial = ++image_download_serial; + latest_image_downloads[image.info.guest_address] = serial; + return PendingImageDownload{ + .serial = serial, + .guest_address = image.info.guest_address, + .data = download, + .size = download_size, + }; +} + +void TextureCache::WritebackImageMemory(const PendingImageDownload& download) { + const auto it = latest_image_downloads.find(download.guest_address); + if (it == latest_image_downloads.end() || it->second != download.serial) { + return; + } + if (!Core::Memory::Instance()->TryWriteBacking(std::bit_cast(download.guest_address), + download.data, download.size)) { + return; + } + latest_image_downloads.erase(it); +} + +void TextureCache::DownloadImageMemory(ImageId image_id) { + const auto download = ScheduleImageDownload(image_id); + if (!download) { + return; + } + scheduler.DeferPriorityOperation([this, download = *download] { + std::scoped_lock lock{mutex}; + WritebackImageMemory(download); + }); +} + +void TextureCache::SynchronizeGuestMemory(VAddr address, size_t size, + const Image* excluded_image) { + if (!readback_linear_images) { + return; + } + + boost::container::small_vector downloads; + ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) { + if (&image == excluded_image) { + return; + } + if (auto download = ScheduleImageDownload(image_id)) { + downloads.push_back(*download); + download_images.erase(image_id); + } + }); + if (downloads.empty()) { + return; + } + + // The requested alias may be uploaded from guest memory immediately after this call. Wait for + // overlapping linear images to reach the download buffer before exposing their contents. + scheduler.Finish(); + for (const auto& download : downloads) { + WritebackImageMemory(download); + } } void TextureCache::MarkAsMaybeDirty(ImageId image_id, Image& image) { @@ -557,6 +611,13 @@ ImageId TextureCache::FindImage(ImageDesc& desc, bool exact_fmt) { image_id = cache_id; } + // A different representation may be uploaded from guest memory while an overlapping linear + // image still contains newer GPU-written data. Resolve pending writes before touching the + // overlap set. + if (!image_id) { + SynchronizeGuestMemory(info.guest_address, info.guest_size); + } + // Try to resolve overlaps (if any) int view_mip{-1}; int view_slice{-1}; @@ -721,6 +782,10 @@ void TextureCache::RefreshImage(Image& image) { return; } + // RefreshImage reuploads from guest memory. Make overlapping GPU-written linear aliases + // visible before obtaining the upload source. + SynchronizeGuestMemory(image.info.guest_address, image.info.guest_size, &image); + RENDERER_TRACE; TRACE_HINT(fmt::format("{:x}:{:x}", image.info.guest_address, image.info.guest_size)); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 34a8472c4..59366573e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -271,9 +272,25 @@ private: /// Gets or creates a null image for a particular format. ImageId GetNullImage(vk::Format format); - /// Copies image memory back to CPU. + struct PendingImageDownload { + u64 serial; + VAddr guest_address; + u8* data; + u32 size; + }; + + /// Schedules a copy of linear image memory into the download buffer. + [[nodiscard]] std::optional ScheduleImageDownload(ImageId image_id); + + /// Copies a completed image download into guest memory unless a newer download superseded it. + void WritebackImageMemory(const PendingImageDownload& download); + + /// Copies image memory back to CPU asynchronously. void DownloadImageMemory(ImageId image_id); + /// Makes GPU-written linear aliases visible before reuploading overlapping guest memory. + void SynchronizeGuestMemory(VAddr address, size_t size, const Image* excluded_image = nullptr); + /// Thread function for copying downloaded images out to CPU memory. void DownloadedImagesThread(const std::stop_token& token); @@ -323,6 +340,8 @@ private: tsl::robin_map samplers; tsl::robin_map null_images; std::unordered_set download_images; + tsl::robin_map latest_image_downloads; + u64 image_download_serial{}; u64 total_used_memory = 0; u64 trigger_gc_memory = 0; u64 pressure_gc_memory = 0;