diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index ffe7c22fb..3d77c5800 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -26,14 +26,14 @@ jobs:
runs-on: ubuntu-24.04
continue-on-error: true
steps:
- - uses: actions/checkout@v5
+ - uses: actions/checkout@v6
- uses: fsfe/reuse-action@v5
clang-format:
runs-on: ubuntu-24.04
continue-on-error: true
steps:
- - uses: actions/checkout@v5
+ - uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Install
@@ -46,7 +46,7 @@ jobs:
env:
COMMIT_RANGE: ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}
run: ./.ci/clang-format.sh
-
+
get-info:
runs-on: ubuntu-24.04
outputs:
@@ -54,7 +54,7 @@ jobs:
shorthash: ${{ steps.vars.outputs.shorthash }}
fullhash: ${{ steps.vars.outputs.fullhash }}
steps:
- - uses: actions/checkout@v5
+ - uses: actions/checkout@v6
- name: Get date and git hash
id: vars
run: |
@@ -69,23 +69,23 @@ jobs:
runs-on: windows-2025
needs: get-info
steps:
- - uses: actions/checkout@v5
+ - uses: actions/checkout@v6
with:
submodules: recursive
- name: Cache CMake Configuration
- uses: actions/cache@v4
+ uses: actions/cache@v5
env:
cache-name: ${{ runner.os }}-sdl-ninja-cache-cmake-configuration
with:
- path: |
+ path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.19
+ uses: hendrikmuhs/ccache-action@v1.2.21
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
with:
@@ -99,7 +99,7 @@ jobs:
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS
- name: Upload Windows SDL artifact
- uses: actions/upload-artifact@v4
+ uses: actions/upload-artifact@v6
with:
name: shadps4-win64-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}
path: ${{github.workspace}}/build/shadPS4.exe
@@ -108,7 +108,7 @@ jobs:
runs-on: macos-15
needs: get-info
steps:
- - uses: actions/checkout@v5
+ - uses: actions/checkout@v6
with:
submodules: recursive
@@ -118,18 +118,18 @@ jobs:
xcode-version: latest
- name: Cache CMake Configuration
- uses: actions/cache@v4
- env:
+ uses: actions/cache@v5
+ env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-configuration
- with:
- path: |
- ${{github.workspace}}/build
- key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- restore-keys: |
- ${{ env.cache-name }}-
+ with:
+ path: |
+ ${{github.workspace}}/build
+ key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
+ restore-keys: |
+ ${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.19
+ uses: hendrikmuhs/ccache-action@v1.2.21
env:
cache-name: ${{runner.os}}-sdl-cache-cmake-build
with:
@@ -150,7 +150,7 @@ jobs:
mv ${{github.workspace}}/build/shadps4 upload
mv ${{github.workspace}}/build/MoltenVK_icd.json upload
mv ${{github.workspace}}/build/libMoltenVK.dylib upload
- - uses: actions/upload-artifact@v4
+ - uses: actions/upload-artifact@v6
with:
name: shadps4-macos-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}
path: upload/
@@ -159,7 +159,7 @@ jobs:
runs-on: ubuntu-24.04
needs: get-info
steps:
- - uses: actions/checkout@v5
+ - uses: actions/checkout@v6
with:
submodules: recursive
@@ -172,18 +172,18 @@ jobs:
run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang-19 mold build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev libxcursor-dev libxi-dev libxss-dev libxtst-dev
- name: Cache CMake Configuration
- uses: actions/cache@v4
- env:
+ uses: actions/cache@v5
+ env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-configuration
- with:
- path: |
- ${{github.workspace}}/build
- key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- restore-keys: |
- ${{ env.cache-name }}-
+ with:
+ path: |
+ ${{github.workspace}}/build
+ key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
+ restore-keys: |
+ ${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.19
+ uses: hendrikmuhs/ccache-action@v1.2.21
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
with:
@@ -195,23 +195,23 @@ jobs:
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)
-
- - name: Package and Upload Linux(ubuntu64) SDL artifact
+
+ - name: Package and Upload Linux(ubuntu64) SDL artifact
run: |
ls -la ${{ github.workspace }}/build/shadps4
-
- - uses: actions/upload-artifact@v4
+
+ - uses: actions/upload-artifact@v6
with:
name: shadps4-ubuntu64-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}
path: ${{ github.workspace }}/build/shadps4
- name: Run AppImage packaging script
run: ./.github/linux-appimage-sdl.sh
-
+
- name: Package and Upload Linux SDL artifact
run: |
tar cf shadps4-linux-sdl.tar.gz -C ${{github.workspace}}/build shadps4
- - uses: actions/upload-artifact@v4
+ - uses: actions/upload-artifact@v6
with:
name: shadps4-linux-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}
path: Shadps4-sdl.AppImage
@@ -220,7 +220,7 @@ jobs:
runs-on: ubuntu-24.04
needs: get-info
steps:
- - uses: actions/checkout@v5
+ - uses: actions/checkout@v6
with:
submodules: recursive
@@ -228,18 +228,18 @@ jobs:
run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 mold build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev libxcursor-dev libxi-dev libxss-dev libxtst-dev
- name: Cache CMake Configuration
- uses: actions/cache@v4
- env:
+ uses: actions/cache@v5
+ env:
cache-name: ${{ runner.os }}-sdl-gcc-cache-cmake-configuration
- with:
- path: |
- ${{github.workspace}}/build
- key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
- restore-keys: |
- ${{ env.cache-name }}-
+ with:
+ path: |
+ ${{github.workspace}}/build
+ key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
+ restore-keys: |
+ ${{ env.cache-name }}-
- name: Cache CMake Build
- uses: hendrikmuhs/ccache-action@v1.2.19
+ uses: hendrikmuhs/ccache-action@v1.2.21
env:
cache-name: ${{ runner.os }}-sdl-gcc-cache-cmake-build
with:
@@ -258,7 +258,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Download all artifacts
- uses: actions/download-artifact@v5
+ uses: actions/download-artifact@v8
with:
path: ./artifacts
@@ -266,7 +266,7 @@ jobs:
run: |
chmod -R a+x ./artifacts/shadps4-linux-sdl-*
chmod -R a+x ./artifacts/shadps4-macos-sdl-*
-
+
- name: Compress individual directories (without parent directory)
run: |
cd ./artifacts
@@ -277,7 +277,7 @@ jobs:
(cd "$dir_name" && zip -r "../${dir_name}.zip" .)
fi
done
-
+
- name: Get latest release information
id: get_latest_release
env:
@@ -351,52 +351,52 @@ jobs:
upload_url="https://uploads.github.com/repos/$REPO/releases/$release_id/assets?name=$filename"
curl -X POST -H "Authorization: token $GITHUB_TOKEN" -H "Content-Type: application/octet-stream" --data-binary @"$file" "$upload_url"
done
-
+
- name: Get current pre-release information
env:
GITHUB_TOKEN: ${{ secrets.SHADPS4_TOKEN_REPO }}
run: |
api_url="https://api.github.com/repos/${{ github.repository }}/releases"
-
+
# Get all releases (sorted by date)
releases=$(curl -H "Authorization: token $GITHUB_TOKEN" "$api_url")
-
+
# Capture the most recent pre-release (assuming the first one is the latest)
current_release=$(echo "$releases" | jq -c '.[] | select(.prerelease == true) | .published_at' | sort -r | head -n 1)
-
+
# Remove extra quotes from captured date
current_release=$(echo $current_release | tr -d '"')
-
+
# Export the current published_at to be available for the next step
echo "CURRENT_PUBLISHED_AT=$current_release" >> $GITHUB_ENV
-
+
- name: Delete old pre-releases and tags
env:
GITHUB_TOKEN: ${{ secrets.SHADPS4_TOKEN_REPO }}
run: |
api_url="https://api.github.com/repos/${{ github.repository }}/releases"
-
+
# Get current pre-releases
releases=$(curl -H "Authorization: token $GITHUB_TOKEN" "$api_url")
-
+
# Remove extra quotes from captured date
CURRENT_PUBLISHED_AT=$(echo $CURRENT_PUBLISHED_AT | tr -d '"')
-
+
# Convert CURRENT_PUBLISHED_AT para timestamp Unix
current_published_ts=$(date -d "$CURRENT_PUBLISHED_AT" +%s)
-
+
# Identify pre-releases
echo "$releases" | jq -c '.[] | select(.prerelease == true)' | while read -r release; do
release_date=$(echo "$release" | jq -r '.published_at')
release_id=$(echo "$release" | jq -r '.id')
release_tag=$(echo "$release" | jq -r '.tag_name')
-
+
# Remove extra quotes from captured date
release_date=$(echo $release_date | tr -d '"')
-
+
# Convert release_date para timestamp Unix
release_date_ts=$(date -d "$release_date" +%s)
-
+
# Compare timestamps and delete old pre-releases
if [[ "$release_date_ts" -lt "$current_published_ts" ]]; then
echo "Deleting old pre-release: $release_id from $release_date with tag: $release_tag"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e1d05cdce..a61bc2370 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -202,7 +202,7 @@ execute_process(
# Set Version
set(EMULATOR_VERSION_MAJOR "0")
-set(EMULATOR_VERSION_MINOR "14")
+set(EMULATOR_VERSION_MINOR "15")
set(EMULATOR_VERSION_PATCH "1")
set_source_files_properties(src/shadps4.rc PROPERTIES COMPILE_DEFINITIONS "EMULATOR_VERSION_MAJOR=${EMULATOR_VERSION_MAJOR};EMULATOR_VERSION_MINOR=${EMULATOR_VERSION_MINOR};EMULATOR_VERSION_PATCH=${EMULATOR_VERSION_PATCH}")
diff --git a/dist/net.shadps4.shadPS4.metainfo.xml b/dist/net.shadps4.shadPS4.metainfo.xml
index 210ca1c5e..8a7fa852b 100644
--- a/dist/net.shadps4.shadPS4.metainfo.xml
+++ b/dist/net.shadps4.shadPS4.metainfo.xml
@@ -38,7 +38,10 @@
Game
-
+
+ https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.15.0
+
+
https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.14.0
diff --git a/src/core/file_format/psf.cpp b/src/core/file_format/psf.cpp
index e647059f0..c5be7410a 100644
--- a/src/core/file_format/psf.cpp
+++ b/src/core/file_format/psf.cpp
@@ -113,6 +113,7 @@ bool PSF::Encode(const std::filesystem::path& filepath) const {
LOG_ERROR(Core, "Failed to write PSF file. Written {} Expected {}", written,
psf_buffer.size());
}
+ file.Close();
return written == psf_buffer.size();
}
diff --git a/src/core/libraries/kernel/threads/pthread.cpp b/src/core/libraries/kernel/threads/pthread.cpp
index f97451154..3742db5cf 100644
--- a/src/core/libraries/kernel/threads/pthread.cpp
+++ b/src/core/libraries/kernel/threads/pthread.cpp
@@ -242,7 +242,7 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt
new_thread->attr.sched_policy = curthread->attr.sched_policy;
}
- static int TidCounter = 1;
+ static std::atomic TidCounter = 1;
new_thread->tid = ++TidCounter;
if (new_thread->attr.stackaddr_attr == nullptr) {
diff --git a/src/core/libraries/network/ssl2.cpp b/src/core/libraries/network/ssl2.cpp
index 0b408d094..3a7fd71e5 100644
--- a/src/core/libraries/network/ssl2.cpp
+++ b/src/core/libraries/network/ssl2.cpp
@@ -114,7 +114,13 @@ int PS4_SYSV_ABI sceSslFreeCaCerts(s32 ssl_ctx_id, OrbisSslCaCerts* certs) {
if (certs == nullptr) {
return ORBIS_SSL_ERROR_INVALID_ARGUMENT;
}
- delete (certs->certs);
+ if (certs->certs != nullptr) {
+ for (s32 data = 0; data < certs->num; data++) {
+ free(certs->certs[data].ptr);
+ }
+ delete (certs->certs);
+ }
+
// delete (certs->pool);
return ORBIS_OK;
}
@@ -139,7 +145,12 @@ int PS4_SYSV_ABI sceSslGetCaCerts(s32 ssl_ctx_id, OrbisSslCaCerts* certs) {
if (certs == nullptr) {
return ORBIS_SSL_ERROR_INVALID_ARGUMENT;
}
- certs->certs = new OrbisSslData{nullptr, 0};
+ // Allocate a buffer to store dummy data in.
+ const char* dummy_data = "dummy";
+ u64 dummy_length = strlen(dummy_data) + 1;
+ char* data = static_cast(malloc(dummy_length));
+ strncpy(data, dummy_data, dummy_length);
+ certs->certs = new OrbisSslData{data, dummy_length};
certs->num = 1;
certs->pool = nullptr;
return ORBIS_OK;
diff --git a/src/core/linker.cpp b/src/core/linker.cpp
index a8a97b2a6..889f3a298 100644
--- a/src/core/linker.cpp
+++ b/src/core/linker.cpp
@@ -361,8 +361,10 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul
return_info->virtual_address = AeroLib::GetStub(sr.name.c_str());
return_info->name = "Unknown !!!";
}
- LOG_WARNING(Core_Linker, "Linker: Stub resolved {} as {} (lib: {}, mod: {})", sr.name,
- return_info->name, library->name, module->name);
+ if (library->name != "libc" && library->name != "libSceFios2") {
+ LOG_WARNING(Core_Linker, "Linker: Stub resolved {} as {} (lib: {}, mod: {})", sr.name,
+ return_info->name, library->name, module->name);
+ }
return false;
}
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
index e2a969b61..0b05dcef4 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -220,20 +220,33 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id ms) {
const auto& texture = ctx.images[handle & 0xFFFF];
- const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id color_type = texture.data_types->Get(4);
ImageOperands operands;
operands.Add(spv::ImageOperandsMask::Sample, ms);
Id texel;
if (!texture.is_storage) {
+ const Id image = ctx.OpLoad(texture.image_type, texture.id);
operands.Add(spv::ImageOperandsMask::Lod, lod);
texel = ctx.OpImageFetch(color_type, image, coords, operands.mask, operands.operands);
} else {
+ Id image_ptr = texture.id;
if (ctx.profile.supports_image_load_store_lod) {
operands.Add(spv::ImageOperandsMask::Lod, lod);
} else if (Sirit::ValidId(lod)) {
- LOG_WARNING(Render, "Image read with LOD not supported by driver");
+#if 1
+ // It's confusing what interactions will cause this code path so leave it as
+ // unreachable until a case is found.
+ // Normally IMAGE_LOAD_MIP should translate -> OpImageFetch
+ UNREACHABLE_MSG("Unsupported ImageRead with Lod");
+#else
+ LOG_WARNING(Render, "Fallback for ImageRead with LOD");
+ ASSERT(texture.mip_fallback_mode == MipStorageFallbackMode::DynamicIndex);
+ const Id single_image_ptr_type =
+ ctx.TypePointer(spv::StorageClass::UniformConstant, texture.image_type);
+ image_ptr = ctx.OpAccessChain(single_image_ptr_type, image_ptr, std::array{lod});
+#endif
}
+ const Id image = ctx.OpLoad(texture.image_type, image_ptr);
texel = ctx.OpImageRead(color_type, image, coords, operands.mask, operands.operands);
}
return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texel) : texel;
@@ -242,15 +255,20 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id ms,
Id color) {
const auto& texture = ctx.images[handle & 0xFFFF];
- const Id image = ctx.OpLoad(texture.image_type, texture.id);
+ Id image_ptr = texture.id;
const Id color_type = texture.data_types->Get(4);
ImageOperands operands;
operands.Add(spv::ImageOperandsMask::Sample, ms);
if (ctx.profile.supports_image_load_store_lod) {
operands.Add(spv::ImageOperandsMask::Lod, lod);
} else if (Sirit::ValidId(lod)) {
- LOG_WARNING(Render, "Image write with LOD not supported by driver");
+ LOG_WARNING(Render, "Fallback for ImageWrite with LOD");
+ ASSERT(texture.mip_fallback_mode == MipStorageFallbackMode::DynamicIndex);
+ const Id single_image_ptr_type =
+ ctx.TypePointer(spv::StorageClass::UniformConstant, texture.image_type);
+ image_ptr = ctx.OpAccessChain(single_image_ptr_type, image_ptr, std::array{lod});
}
+ const Id image = ctx.OpLoad(texture.image_type, image_ptr);
const Id texel = texture.is_integer ? ctx.OpBitcast(color_type, color) : color;
ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands);
}
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index 261155ab5..c0e469964 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -961,23 +961,33 @@ void EmitContext::DefineImagesAndSamplers() {
const auto nfmt = sharp.GetNumberFmt();
const bool is_integer = AmdGpu::IsInteger(nfmt);
const bool is_storage = image_desc.is_written;
+ const MipStorageFallbackMode mip_fallback_mode = image_desc.mip_fallback_mode;
const VectorIds& data_types = GetAttributeType(*this, nfmt);
const Id sampled_type = data_types[1];
const Id image_type{ImageType(*this, image_desc, sampled_type)};
- const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
+
+ const u32 num_bindings = image_desc.NumBindings(info);
+ Id pointee_type = image_type;
+ if (mip_fallback_mode == MipStorageFallbackMode::DynamicIndex) {
+ pointee_type = TypeArray(pointee_type, ConstU32(num_bindings));
+ }
+
+ const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, pointee_type)};
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
- Decorate(id, spv::Decoration::Binding, binding.unified++);
+ Decorate(id, spv::Decoration::Binding, binding.unified);
+ binding.unified += num_bindings;
Decorate(id, spv::Decoration::DescriptorSet, 0U);
+ // TODO better naming for resources (flattened sharp_idx is not informative)
Name(id, fmt::format("{}_{}{}", stage, "img", image_desc.sharp_idx));
images.push_back({
.data_types = &data_types,
.id = id,
.sampled_type = is_storage ? sampled_type : TypeSampledImage(image_type),
- .pointer_type = pointer_type,
.image_type = image_type,
.view_type = sharp.GetViewType(image_desc.is_array),
.is_integer = is_integer,
.is_storage = is_storage,
+ .mip_fallback_mode = mip_fallback_mode,
});
interfaces.push_back(id);
}
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index 9bb2b7d7a..a9c6f0968 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -293,11 +293,11 @@ public:
const VectorIds* data_types;
Id id;
Id sampled_type;
- Id pointer_type;
Id image_type;
AmdGpu::ImageType view_type;
bool is_integer = false;
bool is_storage = false;
+ MipStorageFallbackMode mip_fallback_mode{};
};
enum class PointerType : u32 {
diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp
index 0489a5d88..de3822296 100644
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@@ -352,10 +352,10 @@ T Translator::GetSrc(const InstOperand& operand) {
}
} else {
if (operand.input_modifier.abs) {
- value = ir.IAbs(value);
+ value = ir.BitwiseAnd(value, ir.Imm32(0x7FFFFFFFu));
}
if (operand.input_modifier.neg) {
- value = ir.INeg(value);
+ value = ir.BitwiseXor(value, ir.Imm32(0x80000000u));
}
}
return value;
@@ -453,6 +453,23 @@ T Translator::GetSrc64(const InstOperand& operand) {
if (operand.input_modifier.neg) {
value = ir.FPNeg(value);
}
+ } else {
+ // GCN VOP3 abs/neg modifier bits operate on the sign bit (bit 63 for
+ // 64-bit values). Unpack, modify the high dword's bit 31, repack.
+ if (operand.input_modifier.abs) {
+ const auto unpacked = ir.UnpackUint2x32(value);
+ const auto lo = IR::U32{ir.CompositeExtract(unpacked, 0)};
+ const auto hi = IR::U32{ir.CompositeExtract(unpacked, 1)};
+ const auto hi_abs = ir.BitwiseAnd(hi, ir.Imm32(0x7FFFFFFFu));
+ value = ir.PackUint2x32(ir.CompositeConstruct(lo, hi_abs));
+ }
+ if (operand.input_modifier.neg) {
+ const auto unpacked = ir.UnpackUint2x32(value);
+ const auto lo = IR::U32{ir.CompositeExtract(unpacked, 0)};
+ const auto hi = IR::U32{ir.CompositeExtract(unpacked, 1)};
+ const auto hi_neg = ir.BitwiseXor(hi, ir.Imm32(0x80000000u));
+ value = ir.PackUint2x32(ir.CompositeConstruct(lo, hi_neg));
+ }
}
return value;
}
diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h
index 08b0192f5..5ee75e336 100644
--- a/src/shader_recompiler/frontend/translate/translate.h
+++ b/src/shader_recompiler/frontend/translate/translate.h
@@ -153,6 +153,7 @@ public:
void V_SUB_F32(const GcnInst& inst);
void V_SUBREV_F32(const GcnInst& inst);
void V_MUL_F32(const GcnInst& inst);
+ void V_MUL_LEGACY_F32(const GcnInst& inst);
void V_MUL_I32_I24(const GcnInst& inst, bool is_signed);
void V_MIN_F32(const GcnInst& inst, bool is_legacy = false);
void V_MAX_F32(const GcnInst& inst, bool is_legacy = false);
diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp
index 08a0f6527..23236b702 100644
--- a/src/shader_recompiler/frontend/translate/vector_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp
@@ -25,7 +25,7 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
case Opcode::V_MAC_LEGACY_F32:
return V_MAC_F32(inst);
case Opcode::V_MUL_LEGACY_F32:
- return V_MUL_F32(inst);
+ return V_MUL_LEGACY_F32(inst);
case Opcode::V_MUL_F32:
return V_MUL_F32(inst);
case Opcode::V_MUL_I32_I24:
@@ -493,6 +493,19 @@ void Translator::V_MUL_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
}
+void Translator::V_MUL_LEGACY_F32(const GcnInst& inst) {
+ // GCN V_MUL_LEGACY_F32: if either source is zero, the result is +0.0
+ // regardless of the other operand (even if NaN or Inf).
+ // Standard IEEE multiply would produce NaN for 0 * Inf.
+ const IR::F32 src0{GetSrc(inst.src[0])};
+ const IR::F32 src1{GetSrc(inst.src[1])};
+ const IR::F32 zero{ir.Imm32(0.0f)};
+ const IR::U1 src0_zero{ir.FPEqual(src0, zero)};
+ const IR::U1 src1_zero{ir.FPEqual(src1, zero)};
+ const IR::U1 either_zero{ir.LogicalOr(src0_zero, src1_zero)};
+ SetDst(inst.dst[0], IR::F32{ir.Select(either_zero, zero, ir.FPMul(src0, src1))});
+}
+
void Translator::V_MUL_I32_I24(const GcnInst& inst, bool is_signed) {
const IR::U32 src0{
ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), is_signed)};
diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h
index f103b6736..1b14a1c6b 100644
--- a/src/shader_recompiler/ir/passes/ir_passes.h
+++ b/src/shader_recompiler/ir/passes/ir_passes.h
@@ -19,7 +19,7 @@ void DeadCodeEliminationPass(IR::Program& program);
void ConstantPropagationPass(IR::BlockList& program);
void FlattenExtendedUserdataPass(IR::Program& program);
void ReadLaneEliminationPass(IR::Program& program);
-void ResourceTrackingPass(IR::Program& program);
+void ResourceTrackingPass(IR::Program& program, const Profile& profile);
void CollectShaderInfoPass(IR::Program& program, const Profile& profile);
void LowerBufferFormatToRaw(IR::Program& program);
void LowerFp64ToFp32(IR::Program& program);
diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
index 4c41e94e9..3b7888ab3 100644
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@@ -9,6 +9,7 @@
#include "shader_recompiler/ir/operand_helper.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/ir/reinterpret.h"
+#include "shader_recompiler/profile.h"
#include "video_core/amdgpu/resource.h"
namespace Shader::Optimization {
@@ -255,7 +256,9 @@ public:
u32 Add(const ImageResource& desc) {
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
- return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array;
+ return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array &&
+ desc.mip_fallback_mode == existing.mip_fallback_mode &&
+ desc.constant_mip_index == existing.constant_mip_index;
})};
auto& image = image_resources[index];
image.is_atomic |= desc.is_atomic;
@@ -529,14 +532,21 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
inst.SetArg(0, ir.Imm32(buffer_binding));
}
-void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
+void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors,
+ const Profile& profile) {
// Read image sharp.
const auto inst_info = inst.Flags();
const IR::Inst* image_handle = inst.Arg(0).InstRecursive();
const auto tsharp = TrackSharp(image_handle, block, inst_info.pc);
const bool is_atomic = IsImageAtomicInstruction(inst);
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite || is_atomic;
- const ImageResource image_res = {
+ const bool is_storage =
+ inst.GetOpcode() == IR::Opcode::ImageRead || inst.GetOpcode() == IR::Opcode::ImageWrite;
+ // ImageRead with !is_written gets emitted as OpImageFetch with LOD operand, doesn't
+ // need fallback (TODO is this 100% true?)
+ const bool needs_mip_storage_fallback =
+ inst_info.has_lod && is_written && !profile.supports_image_load_store_lod;
+ ImageResource image_res = {
.sharp_idx = tsharp,
.is_depth = bool(inst_info.is_depth),
.is_atomic = is_atomic,
@@ -544,9 +554,42 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
.is_written = is_written,
.is_r128 = bool(inst_info.is_r128),
};
+
auto image = image_res.GetSharp(info);
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
+ if (needs_mip_storage_fallback) {
+ // If the mip level to IMAGE_(LOAD/STORE)_MIP is a constant, set up ImageResource
+ // so that we will only bind a single level.
+ // If index is dynamic, we will bind levels as an array
+ const auto view_type = image.GetViewType(image_res.is_array);
+
+ IR::Inst* body = inst.Arg(1).InstRecursive();
+ const auto lod_arg = [&] -> IR::Value {
+ switch (view_type) {
+ case AmdGpu::ImageType::Color1D: // x, [lod]
+ return body->Arg(1);
+ case AmdGpu::ImageType::Color1DArray: // x, slice, [lod]
+ case AmdGpu::ImageType::Color2D: // x, y, [lod]
+ return body->Arg(2);
+ case AmdGpu::ImageType::Color2DArray: // x, y, slice, [lod]
+ case AmdGpu::ImageType::Color3D: // x, y, z, [lod]
+ return body->Arg(3);
+ case AmdGpu::ImageType::Color2DMsaa:
+ case AmdGpu::ImageType::Color2DMsaaArray:
+ default:
+ UNREACHABLE_MSG("Invalid image type {}", view_type);
+ }
+ }();
+
+ if (lod_arg.IsImmediate()) {
+ image_res.mip_fallback_mode = MipStorageFallbackMode::ConstantIndex;
+ image_res.constant_mip_index = lod_arg.U32();
+ } else {
+ image_res.mip_fallback_mode = MipStorageFallbackMode::DynamicIndex;
+ }
+ }
+
// Patch image instruction if image is FMask.
if (AmdGpu::IsFmask(image.GetDataFmt())) {
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
@@ -1080,7 +1123,11 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
const auto has_ms = view_type == AmdGpu::ImageType::Color2DMsaa ||
view_type == AmdGpu::ImageType::Color2DMsaaArray;
ASSERT(!inst_info.has_lod || !has_ms);
- const auto lod = inst_info.has_lod ? IR::U32{arg} : IR::U32{};
+ // If we are binding a single mip level as fallback, drop the argument
+ const auto lod =
+ (inst_info.has_lod && image_res.mip_fallback_mode != MipStorageFallbackMode::ConstantIndex)
+ ? IR::U32{arg}
+ : IR::U32{};
const auto ms = has_ms ? IR::U32{arg} : IR::U32{};
const auto is_storage = image_res.is_written;
@@ -1111,7 +1158,7 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
}
}
-void ResourceTrackingPass(IR::Program& program) {
+void ResourceTrackingPass(IR::Program& program, const Profile& profile) {
// Iterate resource instructions and patch them after finding the sharp.
auto& info = program.info;
@@ -1122,7 +1169,7 @@ void ResourceTrackingPass(IR::Program& program) {
if (IsBufferInstruction(inst)) {
PatchBufferSharp(*block, inst, info, descriptors);
} else if (IsImageInstruction(inst)) {
- PatchImageSharp(*block, inst, info, descriptors);
+ PatchImageSharp(*block, inst, info, descriptors, profile);
}
}
}
diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp
index f4fa45afc..d6efa2890 100644
--- a/src/shader_recompiler/recompiler.cpp
+++ b/src/shader_recompiler/recompiler.cpp
@@ -80,7 +80,7 @@ IR::Program TranslateProgram(const std::span& code, Pools& pools, Inf
Shader::Optimization::RingAccessElimination(program, runtime_info);
Shader::Optimization::ReadLaneEliminationPass(program);
Shader::Optimization::FlattenExtendedUserdataPass(program);
- Shader::Optimization::ResourceTrackingPass(program);
+ Shader::Optimization::ResourceTrackingPass(program, profile);
Shader::Optimization::LowerBufferFormatToRaw(program);
Shader::Optimization::SharedMemorySimplifyPass(program, profile);
Shader::Optimization::SharedMemoryToStoragePass(program, runtime_info, profile);
diff --git a/src/shader_recompiler/resource.h b/src/shader_recompiler/resource.h
index 5ae3179f6..82a861e2a 100644
--- a/src/shader_recompiler/resource.h
+++ b/src/shader_recompiler/resource.h
@@ -71,6 +71,8 @@ struct BufferResource {
};
using BufferResourceList = boost::container::static_vector;
+enum class MipStorageFallbackMode : u32 { None, DynamicIndex, ConstantIndex };
+
struct ImageResource {
u32 sharp_idx;
bool is_depth{};
@@ -78,6 +80,8 @@ struct ImageResource {
bool is_array{};
bool is_written{};
bool is_r128{};
+ MipStorageFallbackMode mip_fallback_mode{};
+ u32 constant_mip_index{};
constexpr AmdGpu::Image GetSharp(const auto& info) const noexcept {
AmdGpu::Image image{};
@@ -102,6 +106,13 @@ struct ImageResource {
}
return image;
}
+
+ u32 NumBindings(const auto& info) const {
+ const AmdGpu::Image tsharp = GetSharp(info);
+ return (mip_fallback_mode == MipStorageFallbackMode::DynamicIndex)
+ ? (tsharp.last_level - tsharp.base_level + 1)
+ : 1;
+ }
};
using ImageResourceList = boost::container::static_vector;
diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h
index 4f6bb44bf..fa14583af 100644
--- a/src/shader_recompiler/specialization.h
+++ b/src/shader_recompiler/specialization.h
@@ -52,6 +52,8 @@ struct ImageSpecialization {
bool is_srgb = false;
AmdGpu::CompMapping dst_select{};
AmdGpu::NumberConversion num_conversion{};
+ // FIXME any pipeline cache changes needed?
+ u32 num_bindings = 0;
bool operator==(const ImageSpecialization&) const = default;
};
@@ -133,7 +135,7 @@ struct StageSpecialization {
}
});
ForEachSharp(binding, images, info->images,
- [](auto& spec, const auto& desc, AmdGpu::Image sharp) {
+ [&](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.type = sharp.GetViewType(desc.is_array);
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
spec.is_storage = desc.is_written;
@@ -144,6 +146,7 @@ struct StageSpecialization {
spec.is_srgb = sharp.GetNumberFmt() == AmdGpu::NumberFormat::Srgb;
}
spec.num_conversion = sharp.GetNumberConversion();
+ spec.num_bindings = desc.NumBindings(*info);
});
ForEachSharp(binding, fmasks, info->fmasks,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index eecd416d1..ba0a3afa2 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -48,13 +48,15 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler,
});
}
for (const auto& image : info->images) {
+ const u32 num_bindings = image.NumBindings(*info);
bindings.push_back({
- .binding = binding++,
+ .binding = binding,
.descriptorType = image.is_written ? vk::DescriptorType::eStorageImage
: vk::DescriptorType::eSampledImage,
- .descriptorCount = 1,
+ .descriptorCount = num_bindings,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
+ binding += num_bindings;
}
for (const auto& sampler : info->samplers) {
bindings.push_back({
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 242c9b6f2..bc9ef571b 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -457,13 +457,15 @@ void GraphicsPipeline::BuildDescSetLayout(bool preloading) {
});
}
for (const auto& image : stage->images) {
+ const u32 num_bindings = image.NumBindings(*stage);
bindings.push_back({
- .binding = binding++,
+ .binding = binding,
.descriptorType = image.is_written ? vk::DescriptorType::eStorageImage
: vk::DescriptorType::eSampledImage,
- .descriptorCount = 1,
+ .descriptorCount = num_bindings,
.stageFlags = stage_bit,
});
+ binding += num_bindings;
}
for (const auto& sampler : stage->samplers) {
bindings.push_back({
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 41dbcc7d4..7f0bd43e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -662,6 +662,13 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding) {
image_bindings.clear();
+ const u32 first_image_idx = image_infos.size();
+ // For loading/storing to explicit mip levels, when no native instruction support, bind an array
+ // of descriptors consecutively, 1 for each mip level. The shader can index this with LOD
+ // operand.
+ // This array holds the size of each consecutive array with the number of bindings consumed.
+ // This is currently always 1 for anything other than mip fallback arrays.
+ boost::container::small_vector image_descriptor_array_sizes;
for (const auto& image_desc : stage.images) {
const auto tsharp = image_desc.GetSharp(stage);
@@ -671,25 +678,43 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
if (tsharp.GetDataFmt() == AmdGpu::DataFormat::FormatInvalid) {
image_bindings.emplace_back(std::piecewise_construct, std::tuple{}, std::tuple{});
+ image_descriptor_array_sizes.push_back(1);
continue;
}
- auto& [image_id, desc] = image_bindings.emplace_back(std::piecewise_construct, std::tuple{},
- std::tuple{tsharp, image_desc});
- image_id = texture_cache.FindImage(desc);
- auto* image = &texture_cache.GetImage(image_id);
- if (image->depth_id) {
- // If this image has an associated depth image, it's a stencil attachment.
- // Redirect the access to the actual depth-stencil buffer.
- image_id = image->depth_id;
- image = &texture_cache.GetImage(image_id);
+ const Shader::MipStorageFallbackMode mip_fallback_mode = image_desc.mip_fallback_mode;
+ const u32 num_bindings = image_desc.NumBindings(stage);
+
+ for (auto i = 0; i < num_bindings; i++) {
+ auto& [image_id, desc] = image_bindings.emplace_back(
+ std::piecewise_construct, std::tuple{}, std::tuple{tsharp, image_desc});
+
+ if (mip_fallback_mode == Shader::MipStorageFallbackMode::ConstantIndex) {
+ ASSERT(num_bindings == 1);
+ desc.view_info.range.base.level += image_desc.constant_mip_index;
+ desc.view_info.range.extent.levels = 1;
+ } else if (mip_fallback_mode == Shader::MipStorageFallbackMode::DynamicIndex) {
+ desc.view_info.range.base.level += i;
+ desc.view_info.range.extent.levels = 1;
+ }
+
+ image_id = texture_cache.FindImage(desc);
+ auto* image = &texture_cache.GetImage(image_id);
+ if (image->depth_id) {
+ // If this image has an associated depth image, it's a stencil attachment.
+ // Redirect the access to the actual depth-stencil buffer.
+ image_id = image->depth_id;
+ image = &texture_cache.GetImage(image_id);
+ }
+ if (image->binding.is_bound) {
+ // The image is already bound. In case if it is about to be used as storage we
+ // need to force general layout on it.
+ image->binding.force_general |= image_desc.is_written;
+ }
+ image->binding.is_bound = 1u;
}
- if (image->binding.is_bound) {
- // The image is already bound. In case if it is about to be used as storage we need
- // to force general layout on it.
- image->binding.force_general |= image_desc.is_written;
- }
- image->binding.is_bound = 1u;
+
+ image_descriptor_array_sizes.push_back(num_bindings);
}
// Second pass to re-bind images that were updated after binding
@@ -749,16 +774,26 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
image.backing->state.layout);
}
+ }
+ u32 image_info_idx = first_image_idx;
+ u32 image_binding_idx = 0;
+ for (u32 array_size : image_descriptor_array_sizes) {
+ const auto& [_, desc] = image_bindings[image_binding_idx];
+ const bool is_storage = desc.type == VideoCore::TextureCache::BindingType::Storage;
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
- .dstBinding = binding.unified++,
+ .dstBinding = binding.unified,
.dstArrayElement = 0,
- .descriptorCount = 1,
+ .descriptorCount = array_size,
.descriptorType =
is_storage ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage,
- .pImageInfo = &image_infos.back(),
+ .pImageInfo = &image_infos[image_info_idx],
});
+
+ image_info_idx += array_size;
+ image_binding_idx += array_size;
+ binding.unified += array_size;
}
for (const auto& sampler : stage.samplers) {