From f336096b12cf853b8eb8137cb67646fee7cfde17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A1=D0=B0=D0=BD=D1=8C=D0=BA=D0=B0=20=D0=A7=D0=B5=D1=82?= =?UTF-8?q?=D0=B2=D1=91=D1=80=D1=82=D1=8B=D0=B9?= Date: Fri, 13 Mar 2026 17:38:22 +0700 Subject: [PATCH 01/11] PSF file format: close file after encode() (#4122) --- src/core/file_format/psf.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/file_format/psf.cpp b/src/core/file_format/psf.cpp index e647059f0..c5be7410a 100644 --- a/src/core/file_format/psf.cpp +++ b/src/core/file_format/psf.cpp @@ -113,6 +113,7 @@ bool PSF::Encode(const std::filesystem::path& filepath) const { LOG_ERROR(Core, "Failed to write PSF file. Written {} Expected {}", written, psf_buffer.size()); } + file.Close(); return written == psf_buffer.size(); } From 844cfe51850e97bd3b3aee5ad99a390394b356f3 Mon Sep 17 00:00:00 2001 From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com> Date: Sat, 14 Mar 2026 10:12:26 -0500 Subject: [PATCH 02/11] Lib.Ssl2: Stub data for sceSslGetCaCerts (#4127) * Test * More robust logic for storing and freeing dummy data Anything heap allocated is invalidated when the function returns. Use malloc to allocate the string instead, and make sure to free those allocations in sceSslFreeCaCerts. --- src/core/libraries/network/ssl2.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/core/libraries/network/ssl2.cpp b/src/core/libraries/network/ssl2.cpp index 0b408d094..3a7fd71e5 100644 --- a/src/core/libraries/network/ssl2.cpp +++ b/src/core/libraries/network/ssl2.cpp @@ -114,7 +114,13 @@ int PS4_SYSV_ABI sceSslFreeCaCerts(s32 ssl_ctx_id, OrbisSslCaCerts* certs) { if (certs == nullptr) { return ORBIS_SSL_ERROR_INVALID_ARGUMENT; } - delete (certs->certs); + if (certs->certs != nullptr) { + for (s32 data = 0; data < certs->num; data++) { + free(certs->certs[data].ptr); + } + delete (certs->certs); + } + // delete (certs->pool); return ORBIS_OK; } @@ -139,7 +145,12 @@ int PS4_SYSV_ABI sceSslGetCaCerts(s32 ssl_ctx_id, OrbisSslCaCerts* certs) { if (certs == nullptr) { return ORBIS_SSL_ERROR_INVALID_ARGUMENT; } - certs->certs = new OrbisSslData{nullptr, 0}; + // Allocate a buffer to store dummy data in. + const char* dummy_data = "dummy"; + u64 dummy_length = strlen(dummy_data) + 1; + char* data = static_cast(malloc(dummy_length)); + strncpy(data, dummy_data, dummy_length); + certs->certs = new OrbisSslData{data, dummy_length}; certs->num = 1; certs->pool = nullptr; return ORBIS_OK; From 30ff9cf05045124d47843512568fdd7cfbbbff3d Mon Sep 17 00:00:00 2001 From: shinra-electric <50119606+shinra-electric@users.noreply.github.com> Date: Sat, 14 Mar 2026 17:15:29 +0000 Subject: [PATCH 03/11] CI: Update actions/cache due to Node 20 deprecation (#4128) * Upload-artifact v4 --> v6 * Download-artifact v5 --> v8 * Checkout v5 --> v6 * cache v4 --> v5 --- .github/workflows/build.yml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ffe7c22fb..b54698e3a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -26,14 +26,14 @@ jobs: runs-on: ubuntu-24.04 continue-on-error: true steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: fsfe/reuse-action@v5 clang-format: runs-on: ubuntu-24.04 continue-on-error: true steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 with: fetch-depth: 0 - name: Install @@ -54,7 +54,7 @@ jobs: shorthash: ${{ steps.vars.outputs.shorthash }} fullhash: ${{ steps.vars.outputs.fullhash }} steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Get date and git hash id: vars run: | @@ -69,12 +69,12 @@ jobs: runs-on: windows-2025 needs: get-info steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 with: submodules: recursive - name: Cache CMake Configuration - uses: actions/cache@v4 + uses: actions/cache@v5 env: cache-name: ${{ runner.os }}-sdl-ninja-cache-cmake-configuration with: @@ -99,7 +99,7 @@ jobs: run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS - name: Upload Windows SDL artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: shadps4-win64-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }} path: ${{github.workspace}}/build/shadPS4.exe @@ -108,7 +108,7 @@ jobs: runs-on: macos-15 needs: get-info steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 with: submodules: recursive @@ -118,7 +118,7 @@ jobs: xcode-version: latest - name: Cache CMake Configuration - uses: actions/cache@v4 + uses: actions/cache@v5 env: cache-name: ${{ runner.os }}-sdl-cache-cmake-configuration with: @@ -150,7 +150,7 @@ jobs: mv ${{github.workspace}}/build/shadps4 upload mv ${{github.workspace}}/build/MoltenVK_icd.json upload mv ${{github.workspace}}/build/libMoltenVK.dylib upload - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v6 with: name: shadps4-macos-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }} path: upload/ @@ -159,7 +159,7 @@ jobs: runs-on: ubuntu-24.04 needs: get-info steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 with: submodules: recursive @@ -172,7 +172,7 @@ jobs: run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang-19 mold build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev libxcursor-dev libxi-dev libxss-dev libxtst-dev - name: Cache CMake Configuration - uses: actions/cache@v4 + uses: actions/cache@v5 env: cache-name: ${{ runner.os }}-sdl-cache-cmake-configuration with: @@ -200,7 +200,7 @@ jobs: run: | ls -la ${{ github.workspace }}/build/shadps4 - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v6 with: name: shadps4-ubuntu64-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }} path: ${{ github.workspace }}/build/shadps4 @@ -211,7 +211,7 @@ jobs: - name: Package and Upload Linux SDL artifact run: | tar cf shadps4-linux-sdl.tar.gz -C ${{github.workspace}}/build shadps4 - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v6 with: name: shadps4-linux-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }} path: Shadps4-sdl.AppImage @@ -220,7 +220,7 @@ jobs: runs-on: ubuntu-24.04 needs: get-info steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 with: submodules: recursive @@ -228,7 +228,7 @@ jobs: run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 mold build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev libxcursor-dev libxi-dev libxss-dev libxtst-dev - name: Cache CMake Configuration - uses: actions/cache@v4 + uses: actions/cache@v5 env: cache-name: ${{ runner.os }}-sdl-gcc-cache-cmake-configuration with: @@ -258,7 +258,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download all artifacts - uses: actions/download-artifact@v5 + uses: actions/download-artifact@v8 with: path: ./artifacts From 4d62930075925f5ff7c1d0d9db015e2b0b588397 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Tue, 17 Mar 2026 09:22:22 +0200 Subject: [PATCH 04/11] tagged 0.15.0 release --- CMakeLists.txt | 8 ++++---- dist/net.shadps4.shadPS4.metainfo.xml | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dd02a6378..b75592a4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -202,13 +202,13 @@ execute_process( # Set Version set(EMULATOR_VERSION_MAJOR "0") -set(EMULATOR_VERSION_MINOR "14") -set(EMULATOR_VERSION_PATCH "1") +set(EMULATOR_VERSION_MINOR "15") +set(EMULATOR_VERSION_PATCH "0") set_source_files_properties(src/shadps4.rc PROPERTIES COMPILE_DEFINITIONS "EMULATOR_VERSION_MAJOR=${EMULATOR_VERSION_MAJOR};EMULATOR_VERSION_MINOR=${EMULATOR_VERSION_MINOR};EMULATOR_VERSION_PATCH=${EMULATOR_VERSION_PATCH}") -set(APP_VERSION "${EMULATOR_VERSION_MAJOR}.${EMULATOR_VERSION_MINOR}.${EMULATOR_VERSION_PATCH} WIP") -set(APP_IS_RELEASE false) +set(APP_VERSION "${EMULATOR_VERSION_MAJOR}.${EMULATOR_VERSION_MINOR}.${EMULATOR_VERSION_PATCH}") +set(APP_IS_RELEASE true) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/common/scm_rev.cpp.in" "${CMAKE_CURRENT_BINARY_DIR}/src/common/scm_rev.cpp" @ONLY) message("-- end git things, remote: ${GIT_REMOTE_NAME}, branch: ${GIT_BRANCH}, link: ${GIT_REMOTE_URL}") diff --git a/dist/net.shadps4.shadPS4.metainfo.xml b/dist/net.shadps4.shadPS4.metainfo.xml index 210ca1c5e..8a7fa852b 100644 --- a/dist/net.shadps4.shadPS4.metainfo.xml +++ b/dist/net.shadps4.shadPS4.metainfo.xml @@ -38,7 +38,10 @@ Game - + + https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.15.0 + + https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.14.0 From 3a3ef5b05f3dc5276857e3e0bcadb03f3f8c89f5 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Tue, 17 Mar 2026 10:26:34 +0200 Subject: [PATCH 05/11] started 0.15.1 WIP --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b75592a4c..ee6f37802 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -203,12 +203,12 @@ execute_process( # Set Version set(EMULATOR_VERSION_MAJOR "0") set(EMULATOR_VERSION_MINOR "15") -set(EMULATOR_VERSION_PATCH "0") +set(EMULATOR_VERSION_PATCH "1") set_source_files_properties(src/shadps4.rc PROPERTIES COMPILE_DEFINITIONS "EMULATOR_VERSION_MAJOR=${EMULATOR_VERSION_MAJOR};EMULATOR_VERSION_MINOR=${EMULATOR_VERSION_MINOR};EMULATOR_VERSION_PATCH=${EMULATOR_VERSION_PATCH}") -set(APP_VERSION "${EMULATOR_VERSION_MAJOR}.${EMULATOR_VERSION_MINOR}.${EMULATOR_VERSION_PATCH}") -set(APP_IS_RELEASE true) +set(APP_VERSION "${EMULATOR_VERSION_MAJOR}.${EMULATOR_VERSION_MINOR}.${EMULATOR_VERSION_PATCH} WIP") +set(APP_IS_RELEASE false) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/common/scm_rev.cpp.in" "${CMAKE_CURRENT_BINARY_DIR}/src/common/scm_rev.cpp" @ONLY) message("-- end git things, remote: ${GIT_REMOTE_NAME}, branch: ${GIT_BRANCH}, link: ${GIT_REMOTE_URL}") From e6b743032dc7f48ba3e839cc4a726b5c2cf3d705 Mon Sep 17 00:00:00 2001 From: kalaposfos13 <153381648+kalaposfos13@users.noreply.github.com> Date: Tue, 17 Mar 2026 14:58:31 +0100 Subject: [PATCH 06/11] Don't print unresolved libc and libSceFios2 stubs (#4137) --- src/core/linker.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/core/linker.cpp b/src/core/linker.cpp index 6640c7204..3f410e926 100644 --- a/src/core/linker.cpp +++ b/src/core/linker.cpp @@ -361,8 +361,10 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul return_info->virtual_address = AeroLib::GetStub(sr.name.c_str()); return_info->name = "Unknown !!!"; } - LOG_WARNING(Core_Linker, "Linker: Stub resolved {} as {} (lib: {}, mod: {})", sr.name, - return_info->name, library->name, module->name); + if (library->name != "libc" && library->name != "libSceFios2") { + LOG_WARNING(Core_Linker, "Linker: Stub resolved {} as {} (lib: {}, mod: {})", sr.name, + return_info->name, library->name, module->name); + } return false; } From 88c34372402ed32f5102c2504604800df8d0139f Mon Sep 17 00:00:00 2001 From: shinra-electric <50119606+shinra-electric@users.noreply.github.com> Date: Tue, 17 Mar 2026 16:36:59 +0000 Subject: [PATCH 07/11] Bump ccache-action (#4138) --- .github/workflows/build.yml | 92 ++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b54698e3a..3d77c5800 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -46,7 +46,7 @@ jobs: env: COMMIT_RANGE: ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }} run: ./.ci/clang-format.sh - + get-info: runs-on: ubuntu-24.04 outputs: @@ -78,14 +78,14 @@ jobs: env: cache-name: ${{ runner.os }}-sdl-ninja-cache-cmake-configuration with: - path: | + path: | ${{github.workspace}}/build key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} restore-keys: | ${{ env.cache-name }}- - name: Cache CMake Build - uses: hendrikmuhs/ccache-action@v1.2.19 + uses: hendrikmuhs/ccache-action@v1.2.21 env: cache-name: ${{ runner.os }}-sdl-cache-cmake-build with: @@ -119,17 +119,17 @@ jobs: - name: Cache CMake Configuration uses: actions/cache@v5 - env: + env: cache-name: ${{ runner.os }}-sdl-cache-cmake-configuration - with: - path: | - ${{github.workspace}}/build - key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - restore-keys: | - ${{ env.cache-name }}- + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- - name: Cache CMake Build - uses: hendrikmuhs/ccache-action@v1.2.19 + uses: hendrikmuhs/ccache-action@v1.2.21 env: cache-name: ${{runner.os}}-sdl-cache-cmake-build with: @@ -173,17 +173,17 @@ jobs: - name: Cache CMake Configuration uses: actions/cache@v5 - env: + env: cache-name: ${{ runner.os }}-sdl-cache-cmake-configuration - with: - path: | - ${{github.workspace}}/build - key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - restore-keys: | - ${{ env.cache-name }}- + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- - name: Cache CMake Build - uses: hendrikmuhs/ccache-action@v1.2.19 + uses: hendrikmuhs/ccache-action@v1.2.21 env: cache-name: ${{ runner.os }}-sdl-cache-cmake-build with: @@ -195,11 +195,11 @@ jobs: - name: Build run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc) - - - name: Package and Upload Linux(ubuntu64) SDL artifact + + - name: Package and Upload Linux(ubuntu64) SDL artifact run: | ls -la ${{ github.workspace }}/build/shadps4 - + - uses: actions/upload-artifact@v6 with: name: shadps4-ubuntu64-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }} @@ -207,7 +207,7 @@ jobs: - name: Run AppImage packaging script run: ./.github/linux-appimage-sdl.sh - + - name: Package and Upload Linux SDL artifact run: | tar cf shadps4-linux-sdl.tar.gz -C ${{github.workspace}}/build shadps4 @@ -229,17 +229,17 @@ jobs: - name: Cache CMake Configuration uses: actions/cache@v5 - env: + env: cache-name: ${{ runner.os }}-sdl-gcc-cache-cmake-configuration - with: - path: | - ${{github.workspace}}/build - key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} - restore-keys: | - ${{ env.cache-name }}- + with: + path: | + ${{github.workspace}}/build + key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }} + restore-keys: | + ${{ env.cache-name }}- - name: Cache CMake Build - uses: hendrikmuhs/ccache-action@v1.2.19 + uses: hendrikmuhs/ccache-action@v1.2.21 env: cache-name: ${{ runner.os }}-sdl-gcc-cache-cmake-build with: @@ -266,7 +266,7 @@ jobs: run: | chmod -R a+x ./artifacts/shadps4-linux-sdl-* chmod -R a+x ./artifacts/shadps4-macos-sdl-* - + - name: Compress individual directories (without parent directory) run: | cd ./artifacts @@ -277,7 +277,7 @@ jobs: (cd "$dir_name" && zip -r "../${dir_name}.zip" .) fi done - + - name: Get latest release information id: get_latest_release env: @@ -351,52 +351,52 @@ jobs: upload_url="https://uploads.github.com/repos/$REPO/releases/$release_id/assets?name=$filename" curl -X POST -H "Authorization: token $GITHUB_TOKEN" -H "Content-Type: application/octet-stream" --data-binary @"$file" "$upload_url" done - + - name: Get current pre-release information env: GITHUB_TOKEN: ${{ secrets.SHADPS4_TOKEN_REPO }} run: | api_url="https://api.github.com/repos/${{ github.repository }}/releases" - + # Get all releases (sorted by date) releases=$(curl -H "Authorization: token $GITHUB_TOKEN" "$api_url") - + # Capture the most recent pre-release (assuming the first one is the latest) current_release=$(echo "$releases" | jq -c '.[] | select(.prerelease == true) | .published_at' | sort -r | head -n 1) - + # Remove extra quotes from captured date current_release=$(echo $current_release | tr -d '"') - + # Export the current published_at to be available for the next step echo "CURRENT_PUBLISHED_AT=$current_release" >> $GITHUB_ENV - + - name: Delete old pre-releases and tags env: GITHUB_TOKEN: ${{ secrets.SHADPS4_TOKEN_REPO }} run: | api_url="https://api.github.com/repos/${{ github.repository }}/releases" - + # Get current pre-releases releases=$(curl -H "Authorization: token $GITHUB_TOKEN" "$api_url") - + # Remove extra quotes from captured date CURRENT_PUBLISHED_AT=$(echo $CURRENT_PUBLISHED_AT | tr -d '"') - + # Convert CURRENT_PUBLISHED_AT para timestamp Unix current_published_ts=$(date -d "$CURRENT_PUBLISHED_AT" +%s) - + # Identify pre-releases echo "$releases" | jq -c '.[] | select(.prerelease == true)' | while read -r release; do release_date=$(echo "$release" | jq -r '.published_at') release_id=$(echo "$release" | jq -r '.id') release_tag=$(echo "$release" | jq -r '.tag_name') - + # Remove extra quotes from captured date release_date=$(echo $release_date | tr -d '"') - + # Convert release_date para timestamp Unix release_date_ts=$(date -d "$release_date" +%s) - + # Compare timestamps and delete old pre-releases if [[ "$release_date_ts" -lt "$current_published_ts" ]]; then echo "Deleting old pre-release: $release_id from $release_date with tag: $release_tag" From 1bb152d9769a454652e2658b2e35c429c96127ee Mon Sep 17 00:00:00 2001 From: baggins183 Date: Tue, 17 Mar 2026 12:47:19 -0700 Subject: [PATCH 08/11] IMAGE_STORE_MIP fallback (#4075) * fallback for IMAGE_STORE_MIP when not natively supported * Lod should be treated as absolute, independent of sharp's base_level (judging by other implemented instructions) * fix descriptor set layouts * dumb error * force fallback for testing * treat Lod as relative to base_level * optimization when lod index is constant --- .../backend/spirv/emit_spirv_image.cpp | 26 +++++-- .../backend/spirv/spirv_emit_context.cpp | 16 ++++- .../backend/spirv/spirv_emit_context.h | 2 +- src/shader_recompiler/ir/passes/ir_passes.h | 2 +- .../ir/passes/resource_tracking_pass.cpp | 59 +++++++++++++-- src/shader_recompiler/recompiler.cpp | 2 +- src/shader_recompiler/resource.h | 11 +++ src/shader_recompiler/specialization.h | 5 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 6 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 6 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 71 ++++++++++++++----- 12 files changed, 168 insertions(+), 40 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index e2a969b61..0b05dcef4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -220,20 +220,33 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id ms) { const auto& texture = ctx.images[handle & 0xFFFF]; - const Id image = ctx.OpLoad(texture.image_type, texture.id); const Id color_type = texture.data_types->Get(4); ImageOperands operands; operands.Add(spv::ImageOperandsMask::Sample, ms); Id texel; if (!texture.is_storage) { + const Id image = ctx.OpLoad(texture.image_type, texture.id); operands.Add(spv::ImageOperandsMask::Lod, lod); texel = ctx.OpImageFetch(color_type, image, coords, operands.mask, operands.operands); } else { + Id image_ptr = texture.id; if (ctx.profile.supports_image_load_store_lod) { operands.Add(spv::ImageOperandsMask::Lod, lod); } else if (Sirit::ValidId(lod)) { - LOG_WARNING(Render, "Image read with LOD not supported by driver"); +#if 1 + // It's confusing what interactions will cause this code path so leave it as + // unreachable until a case is found. + // Normally IMAGE_LOAD_MIP should translate -> OpImageFetch + UNREACHABLE_MSG("Unsupported ImageRead with Lod"); +#else + LOG_WARNING(Render, "Fallback for ImageRead with LOD"); + ASSERT(texture.mip_fallback_mode == MipStorageFallbackMode::DynamicIndex); + const Id single_image_ptr_type = + ctx.TypePointer(spv::StorageClass::UniformConstant, texture.image_type); + image_ptr = ctx.OpAccessChain(single_image_ptr_type, image_ptr, std::array{lod}); +#endif } + const Id image = ctx.OpLoad(texture.image_type, image_ptr); texel = ctx.OpImageRead(color_type, image, coords, operands.mask, operands.operands); } return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texel) : texel; @@ -242,15 +255,20 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id ms, Id color) { const auto& texture = ctx.images[handle & 0xFFFF]; - const Id image = ctx.OpLoad(texture.image_type, texture.id); + Id image_ptr = texture.id; const Id color_type = texture.data_types->Get(4); ImageOperands operands; operands.Add(spv::ImageOperandsMask::Sample, ms); if (ctx.profile.supports_image_load_store_lod) { operands.Add(spv::ImageOperandsMask::Lod, lod); } else if (Sirit::ValidId(lod)) { - LOG_WARNING(Render, "Image write with LOD not supported by driver"); + LOG_WARNING(Render, "Fallback for ImageWrite with LOD"); + ASSERT(texture.mip_fallback_mode == MipStorageFallbackMode::DynamicIndex); + const Id single_image_ptr_type = + ctx.TypePointer(spv::StorageClass::UniformConstant, texture.image_type); + image_ptr = ctx.OpAccessChain(single_image_ptr_type, image_ptr, std::array{lod}); } + const Id image = ctx.OpLoad(texture.image_type, image_ptr); const Id texel = texture.is_integer ? ctx.OpBitcast(color_type, color) : color; ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 261155ab5..c0e469964 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -961,23 +961,33 @@ void EmitContext::DefineImagesAndSamplers() { const auto nfmt = sharp.GetNumberFmt(); const bool is_integer = AmdGpu::IsInteger(nfmt); const bool is_storage = image_desc.is_written; + const MipStorageFallbackMode mip_fallback_mode = image_desc.mip_fallback_mode; const VectorIds& data_types = GetAttributeType(*this, nfmt); const Id sampled_type = data_types[1]; const Id image_type{ImageType(*this, image_desc, sampled_type)}; - const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; + + const u32 num_bindings = image_desc.NumBindings(info); + Id pointee_type = image_type; + if (mip_fallback_mode == MipStorageFallbackMode::DynamicIndex) { + pointee_type = TypeArray(pointee_type, ConstU32(num_bindings)); + } + + const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, pointee_type)}; const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; - Decorate(id, spv::Decoration::Binding, binding.unified++); + Decorate(id, spv::Decoration::Binding, binding.unified); + binding.unified += num_bindings; Decorate(id, spv::Decoration::DescriptorSet, 0U); + // TODO better naming for resources (flattened sharp_idx is not informative) Name(id, fmt::format("{}_{}{}", stage, "img", image_desc.sharp_idx)); images.push_back({ .data_types = &data_types, .id = id, .sampled_type = is_storage ? sampled_type : TypeSampledImage(image_type), - .pointer_type = pointer_type, .image_type = image_type, .view_type = sharp.GetViewType(image_desc.is_array), .is_integer = is_integer, .is_storage = is_storage, + .mip_fallback_mode = mip_fallback_mode, }); interfaces.push_back(id); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 9bb2b7d7a..a9c6f0968 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -293,11 +293,11 @@ public: const VectorIds* data_types; Id id; Id sampled_type; - Id pointer_type; Id image_type; AmdGpu::ImageType view_type; bool is_integer = false; bool is_storage = false; + MipStorageFallbackMode mip_fallback_mode{}; }; enum class PointerType : u32 { diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h index f103b6736..1b14a1c6b 100644 --- a/src/shader_recompiler/ir/passes/ir_passes.h +++ b/src/shader_recompiler/ir/passes/ir_passes.h @@ -19,7 +19,7 @@ void DeadCodeEliminationPass(IR::Program& program); void ConstantPropagationPass(IR::BlockList& program); void FlattenExtendedUserdataPass(IR::Program& program); void ReadLaneEliminationPass(IR::Program& program); -void ResourceTrackingPass(IR::Program& program); +void ResourceTrackingPass(IR::Program& program, const Profile& profile); void CollectShaderInfoPass(IR::Program& program, const Profile& profile); void LowerBufferFormatToRaw(IR::Program& program); void LowerFp64ToFp32(IR::Program& program); diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 4c41e94e9..3b7888ab3 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -9,6 +9,7 @@ #include "shader_recompiler/ir/operand_helper.h" #include "shader_recompiler/ir/program.h" #include "shader_recompiler/ir/reinterpret.h" +#include "shader_recompiler/profile.h" #include "video_core/amdgpu/resource.h" namespace Shader::Optimization { @@ -255,7 +256,9 @@ public: u32 Add(const ImageResource& desc) { const u32 index{Add(image_resources, desc, [&desc](const auto& existing) { - return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array; + return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array && + desc.mip_fallback_mode == existing.mip_fallback_mode && + desc.constant_mip_index == existing.constant_mip_index; })}; auto& image = image_resources[index]; image.is_atomic |= desc.is_atomic; @@ -529,14 +532,21 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& inst.SetArg(0, ir.Imm32(buffer_binding)); } -void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) { +void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors, + const Profile& profile) { // Read image sharp. const auto inst_info = inst.Flags(); const IR::Inst* image_handle = inst.Arg(0).InstRecursive(); const auto tsharp = TrackSharp(image_handle, block, inst_info.pc); const bool is_atomic = IsImageAtomicInstruction(inst); const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite || is_atomic; - const ImageResource image_res = { + const bool is_storage = + inst.GetOpcode() == IR::Opcode::ImageRead || inst.GetOpcode() == IR::Opcode::ImageWrite; + // ImageRead with !is_written gets emitted as OpImageFetch with LOD operand, doesn't + // need fallback (TODO is this 100% true?) + const bool needs_mip_storage_fallback = + inst_info.has_lod && is_written && !profile.supports_image_load_store_lod; + ImageResource image_res = { .sharp_idx = tsharp, .is_depth = bool(inst_info.is_depth), .is_atomic = is_atomic, @@ -544,9 +554,42 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& .is_written = is_written, .is_r128 = bool(inst_info.is_r128), }; + auto image = image_res.GetSharp(info); ASSERT(image.GetType() != AmdGpu::ImageType::Invalid); + if (needs_mip_storage_fallback) { + // If the mip level to IMAGE_(LOAD/STORE)_MIP is a constant, set up ImageResource + // so that we will only bind a single level. + // If index is dynamic, we will bind levels as an array + const auto view_type = image.GetViewType(image_res.is_array); + + IR::Inst* body = inst.Arg(1).InstRecursive(); + const auto lod_arg = [&] -> IR::Value { + switch (view_type) { + case AmdGpu::ImageType::Color1D: // x, [lod] + return body->Arg(1); + case AmdGpu::ImageType::Color1DArray: // x, slice, [lod] + case AmdGpu::ImageType::Color2D: // x, y, [lod] + return body->Arg(2); + case AmdGpu::ImageType::Color2DArray: // x, y, slice, [lod] + case AmdGpu::ImageType::Color3D: // x, y, z, [lod] + return body->Arg(3); + case AmdGpu::ImageType::Color2DMsaa: + case AmdGpu::ImageType::Color2DMsaaArray: + default: + UNREACHABLE_MSG("Invalid image type {}", view_type); + } + }(); + + if (lod_arg.IsImmediate()) { + image_res.mip_fallback_mode = MipStorageFallbackMode::ConstantIndex; + image_res.constant_mip_index = lod_arg.U32(); + } else { + image_res.mip_fallback_mode = MipStorageFallbackMode::DynamicIndex; + } + } + // Patch image instruction if image is FMask. if (AmdGpu::IsFmask(image.GetDataFmt())) { ASSERT_MSG(!is_written, "FMask storage instructions are not supported"); @@ -1080,7 +1123,11 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) { const auto has_ms = view_type == AmdGpu::ImageType::Color2DMsaa || view_type == AmdGpu::ImageType::Color2DMsaaArray; ASSERT(!inst_info.has_lod || !has_ms); - const auto lod = inst_info.has_lod ? IR::U32{arg} : IR::U32{}; + // If we are binding a single mip level as fallback, drop the argument + const auto lod = + (inst_info.has_lod && image_res.mip_fallback_mode != MipStorageFallbackMode::ConstantIndex) + ? IR::U32{arg} + : IR::U32{}; const auto ms = has_ms ? IR::U32{arg} : IR::U32{}; const auto is_storage = image_res.is_written; @@ -1111,7 +1158,7 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) { } } -void ResourceTrackingPass(IR::Program& program) { +void ResourceTrackingPass(IR::Program& program, const Profile& profile) { // Iterate resource instructions and patch them after finding the sharp. auto& info = program.info; @@ -1122,7 +1169,7 @@ void ResourceTrackingPass(IR::Program& program) { if (IsBufferInstruction(inst)) { PatchBufferSharp(*block, inst, info, descriptors); } else if (IsImageInstruction(inst)) { - PatchImageSharp(*block, inst, info, descriptors); + PatchImageSharp(*block, inst, info, descriptors, profile); } } } diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index f4fa45afc..d6efa2890 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -80,7 +80,7 @@ IR::Program TranslateProgram(const std::span& code, Pools& pools, Inf Shader::Optimization::RingAccessElimination(program, runtime_info); Shader::Optimization::ReadLaneEliminationPass(program); Shader::Optimization::FlattenExtendedUserdataPass(program); - Shader::Optimization::ResourceTrackingPass(program); + Shader::Optimization::ResourceTrackingPass(program, profile); Shader::Optimization::LowerBufferFormatToRaw(program); Shader::Optimization::SharedMemorySimplifyPass(program, profile); Shader::Optimization::SharedMemoryToStoragePass(program, runtime_info, profile); diff --git a/src/shader_recompiler/resource.h b/src/shader_recompiler/resource.h index 5ae3179f6..82a861e2a 100644 --- a/src/shader_recompiler/resource.h +++ b/src/shader_recompiler/resource.h @@ -71,6 +71,8 @@ struct BufferResource { }; using BufferResourceList = boost::container::static_vector; +enum class MipStorageFallbackMode : u32 { None, DynamicIndex, ConstantIndex }; + struct ImageResource { u32 sharp_idx; bool is_depth{}; @@ -78,6 +80,8 @@ struct ImageResource { bool is_array{}; bool is_written{}; bool is_r128{}; + MipStorageFallbackMode mip_fallback_mode{}; + u32 constant_mip_index{}; constexpr AmdGpu::Image GetSharp(const auto& info) const noexcept { AmdGpu::Image image{}; @@ -102,6 +106,13 @@ struct ImageResource { } return image; } + + u32 NumBindings(const auto& info) const { + const AmdGpu::Image tsharp = GetSharp(info); + return (mip_fallback_mode == MipStorageFallbackMode::DynamicIndex) + ? (tsharp.last_level - tsharp.base_level + 1) + : 1; + } }; using ImageResourceList = boost::container::static_vector; diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 4f6bb44bf..fa14583af 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -52,6 +52,8 @@ struct ImageSpecialization { bool is_srgb = false; AmdGpu::CompMapping dst_select{}; AmdGpu::NumberConversion num_conversion{}; + // FIXME any pipeline cache changes needed? + u32 num_bindings = 0; bool operator==(const ImageSpecialization&) const = default; }; @@ -133,7 +135,7 @@ struct StageSpecialization { } }); ForEachSharp(binding, images, info->images, - [](auto& spec, const auto& desc, AmdGpu::Image sharp) { + [&](auto& spec, const auto& desc, AmdGpu::Image sharp) { spec.type = sharp.GetViewType(desc.is_array); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); spec.is_storage = desc.is_written; @@ -144,6 +146,7 @@ struct StageSpecialization { spec.is_srgb = sharp.GetNumberFmt() == AmdGpu::NumberFormat::Srgb; } spec.num_conversion = sharp.GetNumberConversion(); + spec.num_bindings = desc.NumBindings(*info); }); ForEachSharp(binding, fmasks, info->fmasks, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index eecd416d1..ba0a3afa2 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -48,13 +48,15 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler, }); } for (const auto& image : info->images) { + const u32 num_bindings = image.NumBindings(*info); bindings.push_back({ - .binding = binding++, + .binding = binding, .descriptorType = image.is_written ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage, - .descriptorCount = 1, + .descriptorCount = num_bindings, .stageFlags = vk::ShaderStageFlagBits::eCompute, }); + binding += num_bindings; } for (const auto& sampler : info->samplers) { bindings.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 242c9b6f2..bc9ef571b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -457,13 +457,15 @@ void GraphicsPipeline::BuildDescSetLayout(bool preloading) { }); } for (const auto& image : stage->images) { + const u32 num_bindings = image.NumBindings(*stage); bindings.push_back({ - .binding = binding++, + .binding = binding, .descriptorType = image.is_written ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage, - .descriptorCount = 1, + .descriptorCount = num_bindings, .stageFlags = stage_bit, }); + binding += num_bindings; } for (const auto& sampler : stage->samplers) { bindings.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1b0af1d17..fdf6b3f2d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -246,7 +246,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_fp32_round_to_zero = bool(vk12_props.shaderRoundingModeRTZFloat32), .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), - .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), + .supports_image_load_store_lod = /*instance_.IsImageLoadStoreLodSupported()*/ false, // TEST .supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(), .supports_trinary_minmax = instance_.IsAmdShaderTrinaryMinMaxSupported(), // TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed. diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 737c9feed..80af19372 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -662,6 +662,13 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding) { image_bindings.clear(); + const u32 first_image_idx = image_infos.size(); + // For loading/storing to explicit mip levels, when no native instruction support, bind an array + // of descriptors consecutively, 1 for each mip level. The shader can index this with LOD + // operand. + // This array holds the size of each consecutive array with the number of bindings consumed. + // This is currently always 1 for anything other than mip fallback arrays. + boost::container::small_vector image_descriptor_array_sizes; for (const auto& image_desc : stage.images) { const auto tsharp = image_desc.GetSharp(stage); @@ -671,25 +678,43 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin if (tsharp.GetDataFmt() == AmdGpu::DataFormat::FormatInvalid) { image_bindings.emplace_back(std::piecewise_construct, std::tuple{}, std::tuple{}); + image_descriptor_array_sizes.push_back(1); continue; } - auto& [image_id, desc] = image_bindings.emplace_back(std::piecewise_construct, std::tuple{}, - std::tuple{tsharp, image_desc}); - image_id = texture_cache.FindImage(desc); - auto* image = &texture_cache.GetImage(image_id); - if (image->depth_id) { - // If this image has an associated depth image, it's a stencil attachment. - // Redirect the access to the actual depth-stencil buffer. - image_id = image->depth_id; - image = &texture_cache.GetImage(image_id); + const Shader::MipStorageFallbackMode mip_fallback_mode = image_desc.mip_fallback_mode; + const u32 num_bindings = image_desc.NumBindings(stage); + + for (auto i = 0; i < num_bindings; i++) { + auto& [image_id, desc] = image_bindings.emplace_back( + std::piecewise_construct, std::tuple{}, std::tuple{tsharp, image_desc}); + + if (mip_fallback_mode == Shader::MipStorageFallbackMode::ConstantIndex) { + ASSERT(num_bindings == 1); + desc.view_info.range.base.level += image_desc.constant_mip_index; + desc.view_info.range.extent.levels = 1; + } else if (mip_fallback_mode == Shader::MipStorageFallbackMode::DynamicIndex) { + desc.view_info.range.base.level += i; + desc.view_info.range.extent.levels = 1; + } + + image_id = texture_cache.FindImage(desc); + auto* image = &texture_cache.GetImage(image_id); + if (image->depth_id) { + // If this image has an associated depth image, it's a stencil attachment. + // Redirect the access to the actual depth-stencil buffer. + image_id = image->depth_id; + image = &texture_cache.GetImage(image_id); + } + if (image->binding.is_bound) { + // The image is already bound. In case if it is about to be used as storage we + // need to force general layout on it. + image->binding.force_general |= image_desc.is_written; + } + image->binding.is_bound = 1u; } - if (image->binding.is_bound) { - // The image is already bound. In case if it is about to be used as storage we need - // to force general layout on it. - image->binding.force_general |= image_desc.is_written; - } - image->binding.is_bound = 1u; + + image_descriptor_array_sizes.push_back(num_bindings); } // Second pass to re-bind images that were updated after binding @@ -749,16 +774,26 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.backing->state.layout); } + } + u32 image_info_idx = first_image_idx; + u32 image_binding_idx = 0; + for (u32 array_size : image_descriptor_array_sizes) { + const auto& [_, desc] = image_bindings[image_binding_idx]; + const bool is_storage = desc.type == VideoCore::TextureCache::BindingType::Storage; set_writes.push_back({ .dstSet = VK_NULL_HANDLE, - .dstBinding = binding.unified++, + .dstBinding = binding.unified, .dstArrayElement = 0, - .descriptorCount = 1, + .descriptorCount = array_size, .descriptorType = is_storage ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage, - .pImageInfo = &image_infos.back(), + .pImageInfo = &image_infos[image_info_idx], }); + + image_info_idx += array_size; + image_binding_idx += array_size; + binding.unified += array_size; } for (const auto& sampler : stage.samplers) { From 6e843d0c4ba977d584d04698d20cf2570131af00 Mon Sep 17 00:00:00 2001 From: georgemoralis Date: Tue, 17 Mar 2026 22:18:26 +0200 Subject: [PATCH 09/11] feeling dangerous , let's re-enable lod where supported --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index fdf6b3f2d..1b0af1d17 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -246,7 +246,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .support_fp32_denorm_flush = bool(vk12_props.shaderDenormFlushToZeroFloat32), .support_fp32_round_to_zero = bool(vk12_props.shaderRoundingModeRTZFloat32), .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), - .supports_image_load_store_lod = /*instance_.IsImageLoadStoreLodSupported()*/ false, // TEST + .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), .supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(), .supports_trinary_minmax = instance_.IsAmdShaderTrinaryMinMaxSupported(), // TODO: Emitted bounds checks cause problems with phi control flow; needs to be fixed. From 9a3e7b097c9d0153143a5f4ce3fbb245275a7f16 Mon Sep 17 00:00:00 2001 From: rosenkolev1 <50500415+rosenkolev1@users.noreply.github.com> Date: Wed, 18 Mar 2026 09:40:37 +0200 Subject: [PATCH 10/11] Make thread TidCounter atomic (#4133) --- src/core/libraries/kernel/threads/pthread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp index f97451154..3742db5cf 100644 --- a/src/core/libraries/kernel/threads/pthread.cpp +++ b/src/core/libraries/kernel/threads/pthread.cpp @@ -242,7 +242,7 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt new_thread->attr.sched_policy = curthread->attr.sched_policy; } - static int TidCounter = 1; + static std::atomic TidCounter = 1; new_thread->tid = ++TidCounter; if (new_thread->attr.stackaddr_attr == nullptr) { From 2ca342970a8c5f0a03ae026cb9004d6a49794b75 Mon Sep 17 00:00:00 2001 From: Kravickas Date: Wed, 18 Mar 2026 09:05:20 +0100 Subject: [PATCH 11/11] MIP fixes (#4141) * int32-modifiers GCN VOP3 abs/neg modifier bits always operate on the sign bit (bit 31) regardless of instruction type. For integer operands this means: abs = clear bit 31 (x & 0x7FFFFFFF) neg = toggle bit 31 (x ^ 0x80000000) * int64-modifiers Previously GetSrc64 completely ignored input modifiers for integer operands. Now unpacks to two U32s, modifies the high dword's bit 31 (= bit 63 of the 64-bit value), and repacks. * V_MUL_LEGACY_F32 GCN V_MUL_LEGACY_F32: if either source is zero, result is +0.0 regardless of the other operand (even NaN or Inf). Standard IEEE multiply produces NaN for 0*Inf. The fix adds a zero-check select before the multiply. --- .../frontend/translate/translate.cpp | 21 +++++++++++++++++-- .../frontend/translate/translate.h | 1 + .../frontend/translate/vector_alu.cpp | 15 ++++++++++++- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 3aa70e2ec..611070a86 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -352,10 +352,10 @@ T Translator::GetSrc(const InstOperand& operand) { } } else { if (operand.input_modifier.abs) { - value = ir.IAbs(value); + value = ir.BitwiseAnd(value, ir.Imm32(0x7FFFFFFFu)); } if (operand.input_modifier.neg) { - value = ir.INeg(value); + value = ir.BitwiseXor(value, ir.Imm32(0x80000000u)); } } return value; @@ -453,6 +453,23 @@ T Translator::GetSrc64(const InstOperand& operand) { if (operand.input_modifier.neg) { value = ir.FPNeg(value); } + } else { + // GCN VOP3 abs/neg modifier bits operate on the sign bit (bit 63 for + // 64-bit values). Unpack, modify the high dword's bit 31, repack. + if (operand.input_modifier.abs) { + const auto unpacked = ir.UnpackUint2x32(value); + const auto lo = IR::U32{ir.CompositeExtract(unpacked, 0)}; + const auto hi = IR::U32{ir.CompositeExtract(unpacked, 1)}; + const auto hi_abs = ir.BitwiseAnd(hi, ir.Imm32(0x7FFFFFFFu)); + value = ir.PackUint2x32(ir.CompositeConstruct(lo, hi_abs)); + } + if (operand.input_modifier.neg) { + const auto unpacked = ir.UnpackUint2x32(value); + const auto lo = IR::U32{ir.CompositeExtract(unpacked, 0)}; + const auto hi = IR::U32{ir.CompositeExtract(unpacked, 1)}; + const auto hi_neg = ir.BitwiseXor(hi, ir.Imm32(0x80000000u)); + value = ir.PackUint2x32(ir.CompositeConstruct(lo, hi_neg)); + } } return value; } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 08b0192f5..5ee75e336 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -153,6 +153,7 @@ public: void V_SUB_F32(const GcnInst& inst); void V_SUBREV_F32(const GcnInst& inst); void V_MUL_F32(const GcnInst& inst); + void V_MUL_LEGACY_F32(const GcnInst& inst); void V_MUL_I32_I24(const GcnInst& inst, bool is_signed); void V_MIN_F32(const GcnInst& inst, bool is_legacy = false); void V_MAX_F32(const GcnInst& inst, bool is_legacy = false); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 08a0f6527..23236b702 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -25,7 +25,7 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { case Opcode::V_MAC_LEGACY_F32: return V_MAC_F32(inst); case Opcode::V_MUL_LEGACY_F32: - return V_MUL_F32(inst); + return V_MUL_LEGACY_F32(inst); case Opcode::V_MUL_F32: return V_MUL_F32(inst); case Opcode::V_MUL_I32_I24: @@ -493,6 +493,19 @@ void Translator::V_MUL_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); } +void Translator::V_MUL_LEGACY_F32(const GcnInst& inst) { + // GCN V_MUL_LEGACY_F32: if either source is zero, the result is +0.0 + // regardless of the other operand (even if NaN or Inf). + // Standard IEEE multiply would produce NaN for 0 * Inf. + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 zero{ir.Imm32(0.0f)}; + const IR::U1 src0_zero{ir.FPEqual(src0, zero)}; + const IR::U1 src1_zero{ir.FPEqual(src1, zero)}; + const IR::U1 either_zero{ir.LogicalOr(src0_zero, src1_zero)}; + SetDst(inst.dst[0], IR::F32{ir.Select(either_zero, zero, ir.FPMul(src0, src1))}); +} + void Translator::V_MUL_I32_I24(const GcnInst& inst, bool is_signed) { const IR::U32 src0{ ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), is_signed)};