diff --git a/.gitmodules b/.gitmodules index 38aed89a0..43ba2a7c1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -108,7 +108,7 @@ branch = dist [submodule "externals/MoltenVK"] path = externals/MoltenVK - url = https://github.com/KhronosGroup/MoltenVK.git + url = https://github.com/shadPS4-emu/ext-MoltenVK.git shallow = true [submodule "externals/json"] path = externals/json diff --git a/documents/building-windows.md b/documents/building-windows.md index d1f8a6895..88c5b6830 100644 --- a/documents/building-windows.md +++ b/documents/building-windows.md @@ -41,10 +41,6 @@ Go through the Git for Windows installation as normal Your shadps4.exe will be in `C:\path\to\source\Build\x64-Clang-Release\` -To automatically populate the necessary files to run shadPS4.exe, run in a command prompt or terminal: -`C:\Qt\\msvc2022_64\bin\windeployqt6.exe "C:\path\to\shadps4.exe"` -(Change Qt path if you've installed it to non-default path) - ## Option 2: MSYS2/MinGW > [!IMPORTANT] @@ -72,7 +68,6 @@ ARM64-based computers, follow: 1. Open "MSYS2 CLANGARM64" from your new applications 2. Run `pacman -Syu`, let it complete; 3. Run `pacman -S --needed git mingw-w64-clang-aarch64-binutils mingw-w64-clang-aarch64-clang mingw-w64-clang-aarch64-rapidjson mingw-w64-clang-aarch64-cmake mingw-w64-clang-aarch64-ninja mingw-w64-clang-aarch64-ffmpeg` - 1. Optional (Qt only): run `pacman -S --needed mingw-w64-clang-aarch64-qt6-base mingw-w64-clang-aarch64-qt6-tools mingw-w64-clang-aarch64-qt6-multimedia` 4. Run `git clone --depth 1 --recursive https://github.com/shadps4-emu/shadPS4` 5. Run `cd shadPS4` 6. Run `cmake -S . -B build -DCMAKE_C_COMPILER="clang.exe" -DCMAKE_CXX_COMPILER="clang++.exe" -DCMAKE_CXX_FLAGS="-O2 -march=native"` diff --git a/externals/MoltenVK b/externals/MoltenVK index b23d42534..f168dec05 160000 --- a/externals/MoltenVK +++ b/externals/MoltenVK @@ -1 +1 @@ -Subproject commit b23d42534622cd9926fe526fec1b7f8795a2853c +Subproject commit f168dec05998ab0ca09a400bab6831a95c0bdb2e diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index 04c473f1b..cb18bc190 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -889,7 +889,7 @@ bool BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size, }); TouchBuffer(buffer); } - if (is_texel_buffer) { + if (is_texel_buffer && !is_written) { return SynchronizeBufferFromImage(buffer, device_addr, size); } return false; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 5cd37f335..002ec132f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -355,13 +355,12 @@ bool PipelineCache::RefreshGraphicsKey() { } // Fill color target information - key.color_buffers[cb] = Shader::PsColorBuffer{ - .data_format = col_buf.GetDataFmt(), - .num_format = col_buf.GetNumberFmt(), - .num_conversion = col_buf.GetNumberConversion(), - .export_format = regs.color_export_format.GetFormat(cb), - .swizzle = col_buf.Swizzle(), - }; + auto& color_buffer = key.color_buffers[cb]; + color_buffer.data_format = col_buf.GetDataFmt(); + color_buffer.num_format = col_buf.GetNumberFmt(); + color_buffer.num_conversion = col_buf.GetNumberConversion(); + color_buffer.export_format = regs.color_export_format.GetFormat(cb); + color_buffer.swizzle = col_buf.Swizzle(); } // Compile and bind shader stages @@ -379,7 +378,7 @@ bool PipelineCache::RefreshGraphicsKey() { continue; } if ((key.mrt_mask & (1u << cb)) == 0) { - key.color_buffers[cb] = {}; + std::memset(&key.color_buffers[cb], 0, sizeof(Shader::PsColorBuffer)); continue; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a47c523e1..37b8051e8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -380,7 +380,8 @@ void Rasterizer::OnSubmit() { } bool Rasterizer::BindResources(const Pipeline* pipeline) { - if (IsComputeImageCopy(pipeline) || IsComputeMetaClear(pipeline)) { + if (IsComputeImageCopy(pipeline) || IsComputeMetaClear(pipeline) || + IsComputeImageClear(pipeline)) { return false; } @@ -520,6 +521,66 @@ bool Rasterizer::IsComputeImageCopy(const Pipeline* pipeline) { return true; } +bool Rasterizer::IsComputeImageClear(const Pipeline* pipeline) { + if (!pipeline->IsCompute()) { + return false; + } + + // Ensure shader only has 2 bound buffers + const auto& cs_pgm = liverpool->GetCsRegs(); + const auto& info = pipeline->GetStage(Shader::LogicalStage::Compute); + if (cs_pgm.num_thread_x.full != 64 || info.buffers.size() != 2 || !info.images.empty()) { + return false; + } + + // From those 2 buffers, first must hold the clear vector and second the image being cleared + const auto& desc0 = info.buffers[0]; + const auto& desc1 = info.buffers[1]; + if (desc0.is_formatted || !desc1.is_formatted || desc0.is_written || !desc1.is_written) { + return false; + } + + // First buffer must have size of vec4 and second the size of a single layer + const AmdGpu::Buffer buf0 = desc0.GetSharp(info); + const AmdGpu::Buffer buf1 = desc1.GetSharp(info); + const u32 buf1_bpp = AmdGpu::NumBitsPerBlock(buf1.GetDataFmt()); + if (buf0.GetSize() != 16 || (cs_pgm.dim_x * 128ULL * (buf1_bpp / 8)) != buf1.GetSize()) { + return false; + } + + // Find image the buffer alias + const auto image1_id = + texture_cache.FindImageFromRange(buf1.base_address, buf1.GetSize(), false); + if (!image1_id) { + return false; + } + + // Image clear must be valid + VideoCore::Image& image1 = texture_cache.GetImage(image1_id); + if (image1.info.guest_size != buf1.GetSize() || image1.info.num_bits != buf1_bpp || + image1.info.props.is_depth) { + return false; + } + + // Perform image clear + const float* values = reinterpret_cast(buf0.base_address); + const vk::ClearValue clear = { + .color = {.float32 = std::array{values[0], values[1], values[2], values[3]}}, + }; + const VideoCore::SubresourceRange range = { + .base = + { + .level = 0, + .layer = 0, + }, + .extent = image1.info.resources, + }; + image1.Clear(clear, range); + image1.flags |= VideoCore::ImageFlagBits::GpuModified; + image1.flags &= ~VideoCore::ImageFlagBits::Dirty; + return true; +} + void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Bindings& binding, Shader::PushData& push_data) { buffer_bindings.clear(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 524a8f06d..96a3c95e8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -112,6 +112,7 @@ private: bool IsComputeMetaClear(const Pipeline* pipeline); bool IsComputeImageCopy(const Pipeline* pipeline); + bool IsComputeImageClear(const Pipeline* pipeline); private: friend class VideoCore::BufferCache;