diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index d80d2db18..7bf08a3c7 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "video_core/buffer_cache/buffer.h" +#include "video_core/host_shaders/tiling_comp.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" @@ -10,8 +11,6 @@ #include "video_core/texture_cache/image_view.h" #include "video_core/texture_cache/tile_manager.h" -#include "video_core/host_shaders/tiling_comp.h" - #include #include @@ -194,6 +193,20 @@ TileManager::Result TileManager::DetileImage(vk::Buffer in_buffer, u32 in_offset scheduler.EndRendering(); const auto cmdbuf = scheduler.CommandBuffer(); + + const vk::BufferMemoryBarrier pre_dispatch_barrier{ + .srcAccessMask = vk::AccessFlagBits::eHostWrite | vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = in_buffer, + .offset = in_offset, + .size = info.guest_size, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eHost | vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eComputeShader, {}, {}, pre_dispatch_barrier, + {}); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, GetTilingPipeline(info, false)); const vk::DescriptorBufferInfo tiled_buffer_info{ @@ -238,6 +251,19 @@ TileManager::Result TileManager::DetileImage(vk::Buffer in_buffer, u32 in_offset const auto dim_x = (info.guest_size / (info.num_bits / 8)) / 64; cmdbuf.dispatch(dim_x, 1, 1); + + const vk::BufferMemoryBarrier post_dispatch_barrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = out_buffer, + .offset = 0, + .size = info.guest_size, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eTransfer, {}, {}, post_dispatch_barrier, {}); + return {out_buffer, 0}; } @@ -249,6 +275,21 @@ void TileManager::TileImage(Image& in_image, std::span buff copy.bufferOffset += out_offset; } in_image.Download(buffer_copies, out_buffer, out_offset, copy_size); + + const auto cmdbuf = scheduler.CommandBuffer(); + const vk::BufferMemoryBarrier linear_post_barrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eHostRead, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = out_buffer, + .offset = out_offset, + .size = info.guest_size, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer | + vk::PipelineStageFlagBits::eHost, + {}, {}, linear_post_barrier, {}); return; } @@ -276,8 +317,23 @@ void TileManager::TileImage(Image& in_image, std::span buff vmaDestroyBuffer(instance.GetAllocator(), temp_buffer, temp_allocation); }); - const auto cmdbuf = scheduler.CommandBuffer(); + scheduler.EndRendering(); + in_image.Download(buffer_copies, temp_buffer, 0, copy_size); + const auto cmdbuf = scheduler.CommandBuffer(); + + const vk::BufferMemoryBarrier pre_dispatch_barrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = temp_buffer, + .offset = 0, + .size = info.guest_size, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eComputeShader, {}, {}, pre_dispatch_barrier, + {}); cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, GetTilingPipeline(info, true)); @@ -323,6 +379,19 @@ void TileManager::TileImage(Image& in_image, std::span buff const auto dim_x = (info.guest_size / (info.num_bits / 8)) / 64; cmdbuf.dispatch(dim_x, 1, 1); + + const vk::BufferMemoryBarrier post_dispatch_barrier{ + .srcAccessMask = vk::AccessFlagBits::eShaderWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eHostRead, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = out_buffer, + .offset = out_offset, + .size = info.guest_size, + }; + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eHost, + {}, {}, post_dispatch_barrier, {}); } } // namespace VideoCore