Merge branch 'main' into user_and_settings

This commit is contained in:
georgemoralis 2026-03-18 13:16:47 +02:00 committed by GitHub
commit 49c9786863
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 287 additions and 111 deletions

View File

@ -26,14 +26,14 @@ jobs:
runs-on: ubuntu-24.04
continue-on-error: true
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- uses: fsfe/reuse-action@v5
clang-format:
runs-on: ubuntu-24.04
continue-on-error: true
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Install
@ -46,7 +46,7 @@ jobs:
env:
COMMIT_RANGE: ${{ github.event.pull_request.base.sha }}..${{ github.event.pull_request.head.sha }}
run: ./.ci/clang-format.sh
get-info:
runs-on: ubuntu-24.04
outputs:
@ -54,7 +54,7 @@ jobs:
shorthash: ${{ steps.vars.outputs.shorthash }}
fullhash: ${{ steps.vars.outputs.fullhash }}
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
- name: Get date and git hash
id: vars
run: |
@ -69,23 +69,23 @@ jobs:
runs-on: windows-2025
needs: get-info
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
submodules: recursive
- name: Cache CMake Configuration
uses: actions/cache@v4
uses: actions/cache@v5
env:
cache-name: ${{ runner.os }}-sdl-ninja-cache-cmake-configuration
with:
path: |
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
- name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.19
uses: hendrikmuhs/ccache-action@v1.2.21
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
with:
@ -99,7 +99,7 @@ jobs:
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $env:NUMBER_OF_PROCESSORS
- name: Upload Windows SDL artifact
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v6
with:
name: shadps4-win64-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}
path: ${{github.workspace}}/build/shadPS4.exe
@ -108,7 +108,7 @@ jobs:
runs-on: macos-15
needs: get-info
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
submodules: recursive
@ -118,18 +118,18 @@ jobs:
xcode-version: latest
- name: Cache CMake Configuration
uses: actions/cache@v4
env:
uses: actions/cache@v5
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-configuration
with:
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
with:
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
- name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.19
uses: hendrikmuhs/ccache-action@v1.2.21
env:
cache-name: ${{runner.os}}-sdl-cache-cmake-build
with:
@ -150,7 +150,7 @@ jobs:
mv ${{github.workspace}}/build/shadps4 upload
mv ${{github.workspace}}/build/MoltenVK_icd.json upload
mv ${{github.workspace}}/build/libMoltenVK.dylib upload
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v6
with:
name: shadps4-macos-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}
path: upload/
@ -159,7 +159,7 @@ jobs:
runs-on: ubuntu-24.04
needs: get-info
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
submodules: recursive
@ -172,18 +172,18 @@ jobs:
run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 clang-19 mold build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev libxcursor-dev libxi-dev libxss-dev libxtst-dev
- name: Cache CMake Configuration
uses: actions/cache@v4
env:
uses: actions/cache@v5
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-configuration
with:
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
with:
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
- name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.19
uses: hendrikmuhs/ccache-action@v1.2.21
env:
cache-name: ${{ runner.os }}-sdl-cache-cmake-build
with:
@ -195,23 +195,23 @@ jobs:
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}} --parallel $(nproc)
- name: Package and Upload Linux(ubuntu64) SDL artifact
- name: Package and Upload Linux(ubuntu64) SDL artifact
run: |
ls -la ${{ github.workspace }}/build/shadps4
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v6
with:
name: shadps4-ubuntu64-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}
path: ${{ github.workspace }}/build/shadps4
- name: Run AppImage packaging script
run: ./.github/linux-appimage-sdl.sh
- name: Package and Upload Linux SDL artifact
run: |
tar cf shadps4-linux-sdl.tar.gz -C ${{github.workspace}}/build shadps4
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v6
with:
name: shadps4-linux-sdl-${{ needs.get-info.outputs.date }}-${{ needs.get-info.outputs.shorthash }}
path: Shadps4-sdl.AppImage
@ -220,7 +220,7 @@ jobs:
runs-on: ubuntu-24.04
needs: get-info
steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v6
with:
submodules: recursive
@ -228,18 +228,18 @@ jobs:
run: sudo apt-get update && sudo apt install -y libx11-dev libxext-dev libwayland-dev libdecor-0-dev libxkbcommon-dev libglfw3-dev libgles2-mesa-dev libfuse2 gcc-14 mold build-essential libasound2-dev libpulse-dev libopenal-dev libudev-dev libxcursor-dev libxi-dev libxss-dev libxtst-dev
- name: Cache CMake Configuration
uses: actions/cache@v4
env:
uses: actions/cache@v5
env:
cache-name: ${{ runner.os }}-sdl-gcc-cache-cmake-configuration
with:
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
with:
path: |
${{github.workspace}}/build
key: ${{ env.cache-name }}-${{ hashFiles('**/CMakeLists.txt', 'cmake/**') }}
restore-keys: |
${{ env.cache-name }}-
- name: Cache CMake Build
uses: hendrikmuhs/ccache-action@v1.2.19
uses: hendrikmuhs/ccache-action@v1.2.21
env:
cache-name: ${{ runner.os }}-sdl-gcc-cache-cmake-build
with:
@ -258,7 +258,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Download all artifacts
uses: actions/download-artifact@v5
uses: actions/download-artifact@v8
with:
path: ./artifacts
@ -266,7 +266,7 @@ jobs:
run: |
chmod -R a+x ./artifacts/shadps4-linux-sdl-*
chmod -R a+x ./artifacts/shadps4-macos-sdl-*
- name: Compress individual directories (without parent directory)
run: |
cd ./artifacts
@ -277,7 +277,7 @@ jobs:
(cd "$dir_name" && zip -r "../${dir_name}.zip" .)
fi
done
- name: Get latest release information
id: get_latest_release
env:
@ -351,52 +351,52 @@ jobs:
upload_url="https://uploads.github.com/repos/$REPO/releases/$release_id/assets?name=$filename"
curl -X POST -H "Authorization: token $GITHUB_TOKEN" -H "Content-Type: application/octet-stream" --data-binary @"$file" "$upload_url"
done
- name: Get current pre-release information
env:
GITHUB_TOKEN: ${{ secrets.SHADPS4_TOKEN_REPO }}
run: |
api_url="https://api.github.com/repos/${{ github.repository }}/releases"
# Get all releases (sorted by date)
releases=$(curl -H "Authorization: token $GITHUB_TOKEN" "$api_url")
# Capture the most recent pre-release (assuming the first one is the latest)
current_release=$(echo "$releases" | jq -c '.[] | select(.prerelease == true) | .published_at' | sort -r | head -n 1)
# Remove extra quotes from captured date
current_release=$(echo $current_release | tr -d '"')
# Export the current published_at to be available for the next step
echo "CURRENT_PUBLISHED_AT=$current_release" >> $GITHUB_ENV
- name: Delete old pre-releases and tags
env:
GITHUB_TOKEN: ${{ secrets.SHADPS4_TOKEN_REPO }}
run: |
api_url="https://api.github.com/repos/${{ github.repository }}/releases"
# Get current pre-releases
releases=$(curl -H "Authorization: token $GITHUB_TOKEN" "$api_url")
# Remove extra quotes from captured date
CURRENT_PUBLISHED_AT=$(echo $CURRENT_PUBLISHED_AT | tr -d '"')
# Convert CURRENT_PUBLISHED_AT para timestamp Unix
current_published_ts=$(date -d "$CURRENT_PUBLISHED_AT" +%s)
# Identify pre-releases
echo "$releases" | jq -c '.[] | select(.prerelease == true)' | while read -r release; do
release_date=$(echo "$release" | jq -r '.published_at')
release_id=$(echo "$release" | jq -r '.id')
release_tag=$(echo "$release" | jq -r '.tag_name')
# Remove extra quotes from captured date
release_date=$(echo $release_date | tr -d '"')
# Convert release_date para timestamp Unix
release_date_ts=$(date -d "$release_date" +%s)
# Compare timestamps and delete old pre-releases
if [[ "$release_date_ts" -lt "$current_published_ts" ]]; then
echo "Deleting old pre-release: $release_id from $release_date with tag: $release_tag"

View File

@ -202,7 +202,7 @@ execute_process(
# Set Version
set(EMULATOR_VERSION_MAJOR "0")
set(EMULATOR_VERSION_MINOR "14")
set(EMULATOR_VERSION_MINOR "15")
set(EMULATOR_VERSION_PATCH "1")
set_source_files_properties(src/shadps4.rc PROPERTIES COMPILE_DEFINITIONS "EMULATOR_VERSION_MAJOR=${EMULATOR_VERSION_MAJOR};EMULATOR_VERSION_MINOR=${EMULATOR_VERSION_MINOR};EMULATOR_VERSION_PATCH=${EMULATOR_VERSION_PATCH}")

View File

@ -38,7 +38,10 @@
<category translate="no">Game</category>
</categories>
<releases>
<release version="0.14.0" date="2026-02-07">
<release version="0.15.0" date="2026-03-17">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.15.0</url>
</release>
<release version="0.14.0" date="2026-02-07">
<url>https://github.com/shadps4-emu/shadPS4/releases/tag/v.0.14.0</url>
</release>
<release version="0.13.0" date="2025-12-24">

View File

@ -113,6 +113,7 @@ bool PSF::Encode(const std::filesystem::path& filepath) const {
LOG_ERROR(Core, "Failed to write PSF file. Written {} Expected {}", written,
psf_buffer.size());
}
file.Close();
return written == psf_buffer.size();
}

View File

@ -242,7 +242,7 @@ int PS4_SYSV_ABI posix_pthread_create_name_np(PthreadT* thread, const PthreadAtt
new_thread->attr.sched_policy = curthread->attr.sched_policy;
}
static int TidCounter = 1;
static std::atomic<int> TidCounter = 1;
new_thread->tid = ++TidCounter;
if (new_thread->attr.stackaddr_attr == nullptr) {

View File

@ -114,7 +114,13 @@ int PS4_SYSV_ABI sceSslFreeCaCerts(s32 ssl_ctx_id, OrbisSslCaCerts* certs) {
if (certs == nullptr) {
return ORBIS_SSL_ERROR_INVALID_ARGUMENT;
}
delete (certs->certs);
if (certs->certs != nullptr) {
for (s32 data = 0; data < certs->num; data++) {
free(certs->certs[data].ptr);
}
delete (certs->certs);
}
// delete (certs->pool);
return ORBIS_OK;
}
@ -139,7 +145,12 @@ int PS4_SYSV_ABI sceSslGetCaCerts(s32 ssl_ctx_id, OrbisSslCaCerts* certs) {
if (certs == nullptr) {
return ORBIS_SSL_ERROR_INVALID_ARGUMENT;
}
certs->certs = new OrbisSslData{nullptr, 0};
// Allocate a buffer to store dummy data in.
const char* dummy_data = "dummy";
u64 dummy_length = strlen(dummy_data) + 1;
char* data = static_cast<char*>(malloc(dummy_length));
strncpy(data, dummy_data, dummy_length);
certs->certs = new OrbisSslData{data, dummy_length};
certs->num = 1;
certs->pool = nullptr;
return ORBIS_OK;

View File

@ -361,8 +361,10 @@ bool Linker::Resolve(const std::string& name, Loader::SymbolType sym_type, Modul
return_info->virtual_address = AeroLib::GetStub(sr.name.c_str());
return_info->name = "Unknown !!!";
}
LOG_WARNING(Core_Linker, "Linker: Stub resolved {} as {} (lib: {}, mod: {})", sr.name,
return_info->name, library->name, module->name);
if (library->name != "libc" && library->name != "libSceFios2") {
LOG_WARNING(Core_Linker, "Linker: Stub resolved {} as {} (lib: {}, mod: {})", sr.name,
return_info->name, library->name, module->name);
}
return false;
}

View File

@ -220,20 +220,33 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id ms) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
const Id color_type = texture.data_types->Get(4);
ImageOperands operands;
operands.Add(spv::ImageOperandsMask::Sample, ms);
Id texel;
if (!texture.is_storage) {
const Id image = ctx.OpLoad(texture.image_type, texture.id);
operands.Add(spv::ImageOperandsMask::Lod, lod);
texel = ctx.OpImageFetch(color_type, image, coords, operands.mask, operands.operands);
} else {
Id image_ptr = texture.id;
if (ctx.profile.supports_image_load_store_lod) {
operands.Add(spv::ImageOperandsMask::Lod, lod);
} else if (Sirit::ValidId(lod)) {
LOG_WARNING(Render, "Image read with LOD not supported by driver");
#if 1
// It's confusing what interactions will cause this code path so leave it as
// unreachable until a case is found.
// Normally IMAGE_LOAD_MIP should translate -> OpImageFetch
UNREACHABLE_MSG("Unsupported ImageRead with Lod");
#else
LOG_WARNING(Render, "Fallback for ImageRead with LOD");
ASSERT(texture.mip_fallback_mode == MipStorageFallbackMode::DynamicIndex);
const Id single_image_ptr_type =
ctx.TypePointer(spv::StorageClass::UniformConstant, texture.image_type);
image_ptr = ctx.OpAccessChain(single_image_ptr_type, image_ptr, std::array{lod});
#endif
}
const Id image = ctx.OpLoad(texture.image_type, image_ptr);
texel = ctx.OpImageRead(color_type, image, coords, operands.mask, operands.operands);
}
return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texel) : texel;
@ -242,15 +255,20 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id ms,
Id color) {
const auto& texture = ctx.images[handle & 0xFFFF];
const Id image = ctx.OpLoad(texture.image_type, texture.id);
Id image_ptr = texture.id;
const Id color_type = texture.data_types->Get(4);
ImageOperands operands;
operands.Add(spv::ImageOperandsMask::Sample, ms);
if (ctx.profile.supports_image_load_store_lod) {
operands.Add(spv::ImageOperandsMask::Lod, lod);
} else if (Sirit::ValidId(lod)) {
LOG_WARNING(Render, "Image write with LOD not supported by driver");
LOG_WARNING(Render, "Fallback for ImageWrite with LOD");
ASSERT(texture.mip_fallback_mode == MipStorageFallbackMode::DynamicIndex);
const Id single_image_ptr_type =
ctx.TypePointer(spv::StorageClass::UniformConstant, texture.image_type);
image_ptr = ctx.OpAccessChain(single_image_ptr_type, image_ptr, std::array{lod});
}
const Id image = ctx.OpLoad(texture.image_type, image_ptr);
const Id texel = texture.is_integer ? ctx.OpBitcast(color_type, color) : color;
ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands);
}

View File

@ -961,23 +961,33 @@ void EmitContext::DefineImagesAndSamplers() {
const auto nfmt = sharp.GetNumberFmt();
const bool is_integer = AmdGpu::IsInteger(nfmt);
const bool is_storage = image_desc.is_written;
const MipStorageFallbackMode mip_fallback_mode = image_desc.mip_fallback_mode;
const VectorIds& data_types = GetAttributeType(*this, nfmt);
const Id sampled_type = data_types[1];
const Id image_type{ImageType(*this, image_desc, sampled_type)};
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
const u32 num_bindings = image_desc.NumBindings(info);
Id pointee_type = image_type;
if (mip_fallback_mode == MipStorageFallbackMode::DynamicIndex) {
pointee_type = TypeArray(pointee_type, ConstU32(num_bindings));
}
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, pointee_type)};
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
Decorate(id, spv::Decoration::Binding, binding.unified++);
Decorate(id, spv::Decoration::Binding, binding.unified);
binding.unified += num_bindings;
Decorate(id, spv::Decoration::DescriptorSet, 0U);
// TODO better naming for resources (flattened sharp_idx is not informative)
Name(id, fmt::format("{}_{}{}", stage, "img", image_desc.sharp_idx));
images.push_back({
.data_types = &data_types,
.id = id,
.sampled_type = is_storage ? sampled_type : TypeSampledImage(image_type),
.pointer_type = pointer_type,
.image_type = image_type,
.view_type = sharp.GetViewType(image_desc.is_array),
.is_integer = is_integer,
.is_storage = is_storage,
.mip_fallback_mode = mip_fallback_mode,
});
interfaces.push_back(id);
}

View File

@ -293,11 +293,11 @@ public:
const VectorIds* data_types;
Id id;
Id sampled_type;
Id pointer_type;
Id image_type;
AmdGpu::ImageType view_type;
bool is_integer = false;
bool is_storage = false;
MipStorageFallbackMode mip_fallback_mode{};
};
enum class PointerType : u32 {

View File

@ -352,10 +352,10 @@ T Translator::GetSrc(const InstOperand& operand) {
}
} else {
if (operand.input_modifier.abs) {
value = ir.IAbs(value);
value = ir.BitwiseAnd(value, ir.Imm32(0x7FFFFFFFu));
}
if (operand.input_modifier.neg) {
value = ir.INeg(value);
value = ir.BitwiseXor(value, ir.Imm32(0x80000000u));
}
}
return value;
@ -453,6 +453,23 @@ T Translator::GetSrc64(const InstOperand& operand) {
if (operand.input_modifier.neg) {
value = ir.FPNeg(value);
}
} else {
// GCN VOP3 abs/neg modifier bits operate on the sign bit (bit 63 for
// 64-bit values). Unpack, modify the high dword's bit 31, repack.
if (operand.input_modifier.abs) {
const auto unpacked = ir.UnpackUint2x32(value);
const auto lo = IR::U32{ir.CompositeExtract(unpacked, 0)};
const auto hi = IR::U32{ir.CompositeExtract(unpacked, 1)};
const auto hi_abs = ir.BitwiseAnd(hi, ir.Imm32(0x7FFFFFFFu));
value = ir.PackUint2x32(ir.CompositeConstruct(lo, hi_abs));
}
if (operand.input_modifier.neg) {
const auto unpacked = ir.UnpackUint2x32(value);
const auto lo = IR::U32{ir.CompositeExtract(unpacked, 0)};
const auto hi = IR::U32{ir.CompositeExtract(unpacked, 1)};
const auto hi_neg = ir.BitwiseXor(hi, ir.Imm32(0x80000000u));
value = ir.PackUint2x32(ir.CompositeConstruct(lo, hi_neg));
}
}
return value;
}

View File

@ -153,6 +153,7 @@ public:
void V_SUB_F32(const GcnInst& inst);
void V_SUBREV_F32(const GcnInst& inst);
void V_MUL_F32(const GcnInst& inst);
void V_MUL_LEGACY_F32(const GcnInst& inst);
void V_MUL_I32_I24(const GcnInst& inst, bool is_signed);
void V_MIN_F32(const GcnInst& inst, bool is_legacy = false);
void V_MAX_F32(const GcnInst& inst, bool is_legacy = false);

View File

@ -25,7 +25,7 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
case Opcode::V_MAC_LEGACY_F32:
return V_MAC_F32(inst);
case Opcode::V_MUL_LEGACY_F32:
return V_MUL_F32(inst);
return V_MUL_LEGACY_F32(inst);
case Opcode::V_MUL_F32:
return V_MUL_F32(inst);
case Opcode::V_MUL_I32_I24:
@ -493,6 +493,19 @@ void Translator::V_MUL_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPMul(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1])));
}
void Translator::V_MUL_LEGACY_F32(const GcnInst& inst) {
// GCN V_MUL_LEGACY_F32: if either source is zero, the result is +0.0
// regardless of the other operand (even if NaN or Inf).
// Standard IEEE multiply would produce NaN for 0 * Inf.
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
const IR::F32 zero{ir.Imm32(0.0f)};
const IR::U1 src0_zero{ir.FPEqual(src0, zero)};
const IR::U1 src1_zero{ir.FPEqual(src1, zero)};
const IR::U1 either_zero{ir.LogicalOr(src0_zero, src1_zero)};
SetDst(inst.dst[0], IR::F32{ir.Select(either_zero, zero, ir.FPMul(src0, src1))});
}
void Translator::V_MUL_I32_I24(const GcnInst& inst, bool is_signed) {
const IR::U32 src0{
ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), is_signed)};

View File

@ -19,7 +19,7 @@ void DeadCodeEliminationPass(IR::Program& program);
void ConstantPropagationPass(IR::BlockList& program);
void FlattenExtendedUserdataPass(IR::Program& program);
void ReadLaneEliminationPass(IR::Program& program);
void ResourceTrackingPass(IR::Program& program);
void ResourceTrackingPass(IR::Program& program, const Profile& profile);
void CollectShaderInfoPass(IR::Program& program, const Profile& profile);
void LowerBufferFormatToRaw(IR::Program& program);
void LowerFp64ToFp32(IR::Program& program);

View File

@ -9,6 +9,7 @@
#include "shader_recompiler/ir/operand_helper.h"
#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/ir/reinterpret.h"
#include "shader_recompiler/profile.h"
#include "video_core/amdgpu/resource.h"
namespace Shader::Optimization {
@ -255,7 +256,9 @@ public:
u32 Add(const ImageResource& desc) {
const u32 index{Add(image_resources, desc, [&desc](const auto& existing) {
return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array;
return desc.sharp_idx == existing.sharp_idx && desc.is_array == existing.is_array &&
desc.mip_fallback_mode == existing.mip_fallback_mode &&
desc.constant_mip_index == existing.constant_mip_index;
})};
auto& image = image_resources[index];
image.is_atomic |= desc.is_atomic;
@ -529,14 +532,21 @@ void PatchBufferSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
inst.SetArg(0, ir.Imm32(buffer_binding));
}
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors,
const Profile& profile) {
// Read image sharp.
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
const IR::Inst* image_handle = inst.Arg(0).InstRecursive();
const auto tsharp = TrackSharp(image_handle, block, inst_info.pc);
const bool is_atomic = IsImageAtomicInstruction(inst);
const bool is_written = inst.GetOpcode() == IR::Opcode::ImageWrite || is_atomic;
const ImageResource image_res = {
const bool is_storage =
inst.GetOpcode() == IR::Opcode::ImageRead || inst.GetOpcode() == IR::Opcode::ImageWrite;
// ImageRead with !is_written gets emitted as OpImageFetch with LOD operand, doesn't
// need fallback (TODO is this 100% true?)
const bool needs_mip_storage_fallback =
inst_info.has_lod && is_written && !profile.supports_image_load_store_lod;
ImageResource image_res = {
.sharp_idx = tsharp,
.is_depth = bool(inst_info.is_depth),
.is_atomic = is_atomic,
@ -544,9 +554,42 @@ void PatchImageSharp(IR::Block& block, IR::Inst& inst, Info& info, Descriptors&
.is_written = is_written,
.is_r128 = bool(inst_info.is_r128),
};
auto image = image_res.GetSharp(info);
ASSERT(image.GetType() != AmdGpu::ImageType::Invalid);
if (needs_mip_storage_fallback) {
// If the mip level to IMAGE_(LOAD/STORE)_MIP is a constant, set up ImageResource
// so that we will only bind a single level.
// If index is dynamic, we will bind levels as an array
const auto view_type = image.GetViewType(image_res.is_array);
IR::Inst* body = inst.Arg(1).InstRecursive();
const auto lod_arg = [&] -> IR::Value {
switch (view_type) {
case AmdGpu::ImageType::Color1D: // x, [lod]
return body->Arg(1);
case AmdGpu::ImageType::Color1DArray: // x, slice, [lod]
case AmdGpu::ImageType::Color2D: // x, y, [lod]
return body->Arg(2);
case AmdGpu::ImageType::Color2DArray: // x, y, slice, [lod]
case AmdGpu::ImageType::Color3D: // x, y, z, [lod]
return body->Arg(3);
case AmdGpu::ImageType::Color2DMsaa:
case AmdGpu::ImageType::Color2DMsaaArray:
default:
UNREACHABLE_MSG("Invalid image type {}", view_type);
}
}();
if (lod_arg.IsImmediate()) {
image_res.mip_fallback_mode = MipStorageFallbackMode::ConstantIndex;
image_res.constant_mip_index = lod_arg.U32();
} else {
image_res.mip_fallback_mode = MipStorageFallbackMode::DynamicIndex;
}
}
// Patch image instruction if image is FMask.
if (AmdGpu::IsFmask(image.GetDataFmt())) {
ASSERT_MSG(!is_written, "FMask storage instructions are not supported");
@ -1080,7 +1123,11 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
const auto has_ms = view_type == AmdGpu::ImageType::Color2DMsaa ||
view_type == AmdGpu::ImageType::Color2DMsaaArray;
ASSERT(!inst_info.has_lod || !has_ms);
const auto lod = inst_info.has_lod ? IR::U32{arg} : IR::U32{};
// If we are binding a single mip level as fallback, drop the argument
const auto lod =
(inst_info.has_lod && image_res.mip_fallback_mode != MipStorageFallbackMode::ConstantIndex)
? IR::U32{arg}
: IR::U32{};
const auto ms = has_ms ? IR::U32{arg} : IR::U32{};
const auto is_storage = image_res.is_written;
@ -1111,7 +1158,7 @@ void PatchImageArgs(IR::Block& block, IR::Inst& inst, Info& info) {
}
}
void ResourceTrackingPass(IR::Program& program) {
void ResourceTrackingPass(IR::Program& program, const Profile& profile) {
// Iterate resource instructions and patch them after finding the sharp.
auto& info = program.info;
@ -1122,7 +1169,7 @@ void ResourceTrackingPass(IR::Program& program) {
if (IsBufferInstruction(inst)) {
PatchBufferSharp(*block, inst, info, descriptors);
} else if (IsImageInstruction(inst)) {
PatchImageSharp(*block, inst, info, descriptors);
PatchImageSharp(*block, inst, info, descriptors, profile);
}
}
}

View File

@ -80,7 +80,7 @@ IR::Program TranslateProgram(const std::span<const u32>& code, Pools& pools, Inf
Shader::Optimization::RingAccessElimination(program, runtime_info);
Shader::Optimization::ReadLaneEliminationPass(program);
Shader::Optimization::FlattenExtendedUserdataPass(program);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::ResourceTrackingPass(program, profile);
Shader::Optimization::LowerBufferFormatToRaw(program);
Shader::Optimization::SharedMemorySimplifyPass(program, profile);
Shader::Optimization::SharedMemoryToStoragePass(program, runtime_info, profile);

View File

@ -71,6 +71,8 @@ struct BufferResource {
};
using BufferResourceList = boost::container::static_vector<BufferResource, NUM_BUFFERS>;
enum class MipStorageFallbackMode : u32 { None, DynamicIndex, ConstantIndex };
struct ImageResource {
u32 sharp_idx;
bool is_depth{};
@ -78,6 +80,8 @@ struct ImageResource {
bool is_array{};
bool is_written{};
bool is_r128{};
MipStorageFallbackMode mip_fallback_mode{};
u32 constant_mip_index{};
constexpr AmdGpu::Image GetSharp(const auto& info) const noexcept {
AmdGpu::Image image{};
@ -102,6 +106,13 @@ struct ImageResource {
}
return image;
}
u32 NumBindings(const auto& info) const {
const AmdGpu::Image tsharp = GetSharp(info);
return (mip_fallback_mode == MipStorageFallbackMode::DynamicIndex)
? (tsharp.last_level - tsharp.base_level + 1)
: 1;
}
};
using ImageResourceList = boost::container::static_vector<ImageResource, NUM_IMAGES>;

View File

@ -52,6 +52,8 @@ struct ImageSpecialization {
bool is_srgb = false;
AmdGpu::CompMapping dst_select{};
AmdGpu::NumberConversion num_conversion{};
// FIXME any pipeline cache changes needed?
u32 num_bindings = 0;
bool operator==(const ImageSpecialization&) const = default;
};
@ -133,7 +135,7 @@ struct StageSpecialization {
}
});
ForEachSharp(binding, images, info->images,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
[&](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.type = sharp.GetViewType(desc.is_array);
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
spec.is_storage = desc.is_written;
@ -144,6 +146,7 @@ struct StageSpecialization {
spec.is_srgb = sharp.GetNumberFmt() == AmdGpu::NumberFormat::Srgb;
}
spec.num_conversion = sharp.GetNumberConversion();
spec.num_bindings = desc.NumBindings(*info);
});
ForEachSharp(binding, fmasks, info->fmasks,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {

View File

@ -48,13 +48,15 @@ ComputePipeline::ComputePipeline(const Instance& instance, Scheduler& scheduler,
});
}
for (const auto& image : info->images) {
const u32 num_bindings = image.NumBindings(*info);
bindings.push_back({
.binding = binding++,
.binding = binding,
.descriptorType = image.is_written ? vk::DescriptorType::eStorageImage
: vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.descriptorCount = num_bindings,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
binding += num_bindings;
}
for (const auto& sampler : info->samplers) {
bindings.push_back({

View File

@ -457,13 +457,15 @@ void GraphicsPipeline::BuildDescSetLayout(bool preloading) {
});
}
for (const auto& image : stage->images) {
const u32 num_bindings = image.NumBindings(*stage);
bindings.push_back({
.binding = binding++,
.binding = binding,
.descriptorType = image.is_written ? vk::DescriptorType::eStorageImage
: vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.descriptorCount = num_bindings,
.stageFlags = stage_bit,
});
binding += num_bindings;
}
for (const auto& sampler : stage->samplers) {
bindings.push_back({

View File

@ -662,6 +662,13 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindings& binding) {
image_bindings.clear();
const u32 first_image_idx = image_infos.size();
// For loading/storing to explicit mip levels, when no native instruction support, bind an array
// of descriptors consecutively, 1 for each mip level. The shader can index this with LOD
// operand.
// This array holds the size of each consecutive array with the number of bindings consumed.
// This is currently always 1 for anything other than mip fallback arrays.
boost::container::small_vector<u32, 8> image_descriptor_array_sizes;
for (const auto& image_desc : stage.images) {
const auto tsharp = image_desc.GetSharp(stage);
@ -671,25 +678,43 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
if (tsharp.GetDataFmt() == AmdGpu::DataFormat::FormatInvalid) {
image_bindings.emplace_back(std::piecewise_construct, std::tuple{}, std::tuple{});
image_descriptor_array_sizes.push_back(1);
continue;
}
auto& [image_id, desc] = image_bindings.emplace_back(std::piecewise_construct, std::tuple{},
std::tuple{tsharp, image_desc});
image_id = texture_cache.FindImage(desc);
auto* image = &texture_cache.GetImage(image_id);
if (image->depth_id) {
// If this image has an associated depth image, it's a stencil attachment.
// Redirect the access to the actual depth-stencil buffer.
image_id = image->depth_id;
image = &texture_cache.GetImage(image_id);
const Shader::MipStorageFallbackMode mip_fallback_mode = image_desc.mip_fallback_mode;
const u32 num_bindings = image_desc.NumBindings(stage);
for (auto i = 0; i < num_bindings; i++) {
auto& [image_id, desc] = image_bindings.emplace_back(
std::piecewise_construct, std::tuple{}, std::tuple{tsharp, image_desc});
if (mip_fallback_mode == Shader::MipStorageFallbackMode::ConstantIndex) {
ASSERT(num_bindings == 1);
desc.view_info.range.base.level += image_desc.constant_mip_index;
desc.view_info.range.extent.levels = 1;
} else if (mip_fallback_mode == Shader::MipStorageFallbackMode::DynamicIndex) {
desc.view_info.range.base.level += i;
desc.view_info.range.extent.levels = 1;
}
image_id = texture_cache.FindImage(desc);
auto* image = &texture_cache.GetImage(image_id);
if (image->depth_id) {
// If this image has an associated depth image, it's a stencil attachment.
// Redirect the access to the actual depth-stencil buffer.
image_id = image->depth_id;
image = &texture_cache.GetImage(image_id);
}
if (image->binding.is_bound) {
// The image is already bound. In case if it is about to be used as storage we
// need to force general layout on it.
image->binding.force_general |= image_desc.is_written;
}
image->binding.is_bound = 1u;
}
if (image->binding.is_bound) {
// The image is already bound. In case if it is about to be used as storage we need
// to force general layout on it.
image->binding.force_general |= image_desc.is_written;
}
image->binding.is_bound = 1u;
image_descriptor_array_sizes.push_back(num_bindings);
}
// Second pass to re-bind images that were updated after binding
@ -749,16 +774,26 @@ void Rasterizer::BindTextures(const Shader::Info& stage, Shader::Backend::Bindin
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view,
image.backing->state.layout);
}
}
u32 image_info_idx = first_image_idx;
u32 image_binding_idx = 0;
for (u32 array_size : image_descriptor_array_sizes) {
const auto& [_, desc] = image_bindings[image_binding_idx];
const bool is_storage = desc.type == VideoCore::TextureCache::BindingType::Storage;
set_writes.push_back({
.dstSet = VK_NULL_HANDLE,
.dstBinding = binding.unified++,
.dstBinding = binding.unified,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorCount = array_size,
.descriptorType =
is_storage ? vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage,
.pImageInfo = &image_infos.back(),
.pImageInfo = &image_infos[image_info_idx],
});
image_info_idx += array_size;
image_binding_idx += array_size;
binding.unified += array_size;
}
for (const auto& sampler : stage.samplers) {