diff --git a/.ci/build-mac.sh b/.ci/build-mac.sh index 8e33a09d72..9e16fce4ec 100755 --- a/.ci/build-mac.sh +++ b/.ci/build-mac.sh @@ -13,6 +13,7 @@ export HOMEBREW_NO_AUTO_UPDATE=1 export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 export HOMEBREW_NO_ENV_HINTS=1 export HOMEBREW_NO_INSTALL_CLEANUP=1 +brew update brew install -f --overwrite --quiet ccache "llvm@$LLVM_COMPILER_VER" brew link -f --overwrite --quiet "llvm@$LLVM_COMPILER_VER" if [ "$AARCH64" -eq 1 ]; then diff --git a/.ci/setup-windows.sh b/.ci/setup-windows.sh index cee6d624de..d8016d8c13 100755 --- a/.ci/setup-windows.sh +++ b/.ci/setup-windows.sh @@ -6,7 +6,7 @@ QT_HOST="http://qt.mirror.constant.com/" QT_URL_VER=$(echo "$QT_VER" | sed "s/\.//g") QT_VER_MSVC_UP=$(echo "${QT_VER_MSVC}" | tr '[:lower:]' '[:upper:]') -QT_PREFIX="online/qtsdkrepository/windows_x86/desktop/qt${QT_VER_MAIN}_${QT_URL_VER}/qt${QT_VER_MAIN}_${QT_URL_VER}/qt.qt${QT_VER_MAIN}.${QT_URL_VER}." +QT_PREFIX="online/qtsdkrepository/windows_x86/desktop/qt${QT_VER_MAIN}_${QT_URL_VER}/qt${QT_VER_MAIN}_${QT_URL_VER}_${QT_VER_MSVC}_64/qt.qt${QT_VER_MAIN}.${QT_URL_VER}." QT_PREFIX_2="win64_${QT_VER_MSVC}_64/${QT_VER}-0-${QT_DATE}" QT_SUFFIX="-Windows-Windows_11_24H2-${QT_VER_MSVC_UP}-Windows-Windows_11_24H2-X86_64.7z" QT_BASE_URL="${QT_HOST}${QT_PREFIX}${QT_PREFIX_2}qtbase${QT_SUFFIX}" diff --git a/.github/workflows/rpcs3.yml b/.github/workflows/rpcs3.yml index 740e844d04..f474643e09 100644 --- a/.github/workflows/rpcs3.yml +++ b/.github/workflows/rpcs3.yml @@ -30,23 +30,23 @@ jobs: matrix: include: - os: ubuntu-24.04 - docker_img: "rpcs3/rpcs3-ci-jammy:1.9" + docker_img: "rpcs3/rpcs3-ci-jammy:1.10" build_sh: "/rpcs3/.ci/build-linux.sh" compiler: clang UPLOAD_COMMIT_HASH: d812f1254a1157c80fd402f94446310560f54e5f UPLOAD_REPO_FULL_NAME: "rpcs3/rpcs3-binaries-linux" - os: ubuntu-24.04 - docker_img: "rpcs3/rpcs3-ci-jammy:1.9" + docker_img: "rpcs3/rpcs3-ci-jammy:1.10" build_sh: "/rpcs3/.ci/build-linux.sh" compiler: gcc - os: ubuntu-24.04-arm - docker_img: "rpcs3/rpcs3-ci-jammy-aarch64:1.9" + docker_img: "rpcs3/rpcs3-ci-jammy-aarch64:1.10" build_sh: "/rpcs3/.ci/build-linux-aarch64.sh" compiler: clang UPLOAD_COMMIT_HASH: a1d35836e8d45bfc6f63c26f0a3e5d46ef622fe1 UPLOAD_REPO_FULL_NAME: "rpcs3/rpcs3-binaries-linux-arm64" - os: ubuntu-24.04-arm - docker_img: "rpcs3/rpcs3-ci-jammy-aarch64:1.9" + docker_img: "rpcs3/rpcs3-ci-jammy-aarch64:1.10" build_sh: "/rpcs3/.ci/build-linux-aarch64.sh" compiler: gcc name: RPCS3 Linux ${{ matrix.os }} ${{ matrix.compiler }} @@ -134,7 +134,7 @@ jobs: runs-on: macos-14 env: CCACHE_DIR: /tmp/ccache_dir - QT_VER: '6.10.2' + QT_VER: '6.11.0' QT_VER_MAIN: '6' LLVM_COMPILER_VER: '21' RELEASE_MESSAGE: ../GitHubReleaseMessage.txt @@ -213,9 +213,9 @@ jobs: env: COMPILER: msvc QT_VER_MAIN: '6' - QT_VER: '6.10.2' + QT_VER: '6.11.0' QT_VER_MSVC: 'msvc2022' - QT_DATE: '202601261212' + QT_DATE: '202603180535' LLVM_VER: '19.1.7' VULKAN_VER: '1.3.268.0' VULKAN_SDK_SHA: '8459ef49bd06b697115ddd3d97c9aec729e849cd775f5be70897718a9b3b9db5' diff --git a/.gitignore b/.gitignore index 4688d5fa52..a3911be3a0 100644 --- a/.gitignore +++ b/.gitignore @@ -69,6 +69,9 @@ CMakeSettings.json *PVS-Studio* PVS/* +# Zed Editor files +.zed/* + # Ignore other system generated files x64/* rpcs3/x64/* diff --git a/3rdparty/FAudio b/3rdparty/FAudio index dc034fc671..0372329dbb 160000 --- a/3rdparty/FAudio +++ b/3rdparty/FAudio @@ -1 +1 @@ -Subproject commit dc034fc671b07bbd14e8410d5dd6be6da38fdf6d +Subproject commit 0372329dbb56e7814d0dea7b6eafa7a613bd8042 diff --git a/3rdparty/SoundTouch/soundtouch b/3rdparty/SoundTouch/soundtouch index 3982730833..a0fba77b6f 160000 --- a/3rdparty/SoundTouch/soundtouch +++ b/3rdparty/SoundTouch/soundtouch @@ -1 +1 @@ -Subproject commit 3982730833b6daefe77dcfb32b5c282851640c17 +Subproject commit a0fba77b6f9cfbdb71f8bbec58b6ac4e5e3b1097 diff --git a/3rdparty/curl/CMakeLists.txt b/3rdparty/curl/CMakeLists.txt index b20763af65..2b725169f9 100644 --- a/3rdparty/curl/CMakeLists.txt +++ b/3rdparty/curl/CMakeLists.txt @@ -18,9 +18,6 @@ else() set(USE_LIBIDN2 OFF CACHE BOOL "Use libidn2 for IDN support") # Disabled because MacOS CI doesn't work otherwise set(CURL_CA_PATH "none" CACHE STRING "Location of default CA path. Set 'none' to disable or 'auto' for auto-detection. Defaults to 'auto'.") option(CURL_DISABLE_INSTALL "Disable installation targets" ON) - if(USE_MSVC_STATIC_CRT) - set(CURL_STATIC_CRT ON CACHE BOOL "Use static crt to build curl") - endif() if(WIN32) set(ENABLE_UNICODE ON CACHE BOOL "enable Unicode") endif() diff --git a/3rdparty/libpng/libpng b/3rdparty/libpng/libpng index c3e304954a..d5515b5b8b 160000 --- a/3rdparty/libpng/libpng +++ b/3rdparty/libpng/libpng @@ -1 +1 @@ -Subproject commit c3e304954a9cfd154bc0dfbfea2b01cd61d6546d +Subproject commit d5515b5b8be3901aac04e5bd8bd5c89f287bcd33 diff --git a/3rdparty/libsdl-org/SDL b/3rdparty/libsdl-org/SDL index 683181b47c..5848e584a1 160000 --- a/3rdparty/libsdl-org/SDL +++ b/3rdparty/libsdl-org/SDL @@ -1 +1 @@ -Subproject commit 683181b47cfabd293e3ea409f838915b8297a4fd +Subproject commit 5848e584a1b606de26e3dbd1c7e4ecbc34f807a6 diff --git a/3rdparty/protobuf/CMakeLists.txt b/3rdparty/protobuf/CMakeLists.txt index f8e6bed59b..e1f82f146f 100644 --- a/3rdparty/protobuf/CMakeLists.txt +++ b/3rdparty/protobuf/CMakeLists.txt @@ -20,6 +20,8 @@ else() option(protobuf_DISABLE_RTTI "Remove runtime type information in the binaries" OFF) option(protobuf_FORCE_FETCH_DEPENDENCIES "Force all dependencies to be downloaded from GitHub. Local installations will be ignored." OFF) option(protobuf_LOCAL_DEPENDENCIES_ONLY "Prevent downloading any dependencies from GitHub. If this option is set, the dependency must be available locally as an installed package." OFF) + option(protobuf_BUILD_SHARED_LIBS "Build Shared Libraries" OFF) + option(protobuf_MSVC_STATIC_RUNTIME "Link static runtime libraries" OFF) add_subdirectory(protobuf EXCLUDE_FROM_ALL) target_include_directories(3rdparty_protobuf SYSTEM INTERFACE protobuf/src) diff --git a/3rdparty/wolfssl/wolfssl b/3rdparty/wolfssl/wolfssl index b077c81eb6..922d04b356 160000 --- a/3rdparty/wolfssl/wolfssl +++ b/3rdparty/wolfssl/wolfssl @@ -1 +1 @@ -Subproject commit b077c81eb635392e694ccedbab8b644297ec0285 +Subproject commit 922d04b3568c6428a9fb905ddee3ef5a68db3108 diff --git a/3rdparty/zlib/CMakeLists.txt b/3rdparty/zlib/CMakeLists.txt index 55d7353acf..47645d290c 100644 --- a/3rdparty/zlib/CMakeLists.txt +++ b/3rdparty/zlib/CMakeLists.txt @@ -6,7 +6,8 @@ if (USE_SYSTEM_ZLIB) target_link_libraries(3rdparty_zlib INTERFACE ZLIB::ZLIB) target_compile_definitions(3rdparty_zlib INTERFACE -DZLIB_CONST=1) else() - option(ZLIB_BUILD_EXAMPLES "Enable Zlib Examples" OFF) + option(ZLIB_BUILD_TESTING "Enable Zlib Examples as tests" OFF) + option(ZLIB_BUILD_SHARED "Enable building zlib shared library" OFF) message(STATUS "RPCS3: Using builtin ZLIB") set(SKIP_INSTALL_ALL ON) add_subdirectory(zlib EXCLUDE_FROM_ALL) diff --git a/BUILDING.md b/BUILDING.md index 597621e810..c1774908fd 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -20,26 +20,26 @@ The following tools are required to build RPCS3 on Windows 10 or later: with standalone **CMake** tool. - [Python 3.6+](https://www.python.org/downloads/) (add to PATH) -- [Qt 6.10.2](https://www.qt.io/download-qt-installer) In case you can't download from the official installer, you can use [Another Qt installer](https://github.com/miurahr/aqtinstall) (In that case you will need to manually add the "qtmultimedia" module when installing Qt) +- [Qt 6.11.0](https://www.qt.io/download-qt-installer) In case you can't download from the official installer, you can use [Another Qt installer](https://github.com/miurahr/aqtinstall) (In that case you will need to manually add the "qtmultimedia" module when installing Qt) - [Vulkan SDK 1.3.268.0](https://vulkan.lunarg.com/sdk/home) (see "Install the SDK" [here](https://vulkan.lunarg.com/doc/sdk/latest/windows/getting_started.html)) for now future SDKs don't work. You need precisely 1.3.268.0. The `sln` solution available only on **Visual Studio** is the preferred building solution. It easily allows to build the **RPCS3** application in `Release` and `Debug` mode. In order to build **RPCS3** with the `sln` solution (with **Visual Studio**), **Qt** libs need to be detected. To detect the libs: -- add and set the `QTDIR` environment variable, e.g. `\6.10.2\msvc2022_64\` +- add and set the `QTDIR` environment variable, e.g. `\6.11.0\msvc2022_64\` - or use the [Visual Studio Qt Plugin](https://marketplace.visualstudio.com/items?itemName=TheQtCompany.QtVisualStudioTools2022) **NOTE:** If you have issues with the **Visual Studio Qt Plugin**, you may want to uninstall it and install the [Legacy Qt Plugin](https://marketplace.visualstudio.com/items?itemName=TheQtCompany.LEGACYQtVisualStudioTools2022) instead. In order to build **RPCS3** with the `CMake` solution (with both **Visual Studio** and standalone **CMake** tool): -- add and set the `Qt6_ROOT` environment variable to the **Qt** libs path, e.g. `\6.10.2\msvc2022_64\` +- add and set the `Qt6_ROOT` environment variable to the **Qt** libs path, e.g. `\6.11.0\msvc2022_64\` ### Linux These are the essentials tools to build RPCS3 on Linux. Some of them can be installed through your favorite package manager: - Clang 17+ or GCC 13+ - [CMake 3.28.0+](https://www.cmake.org/download/) -- [Qt 6.10.2](https://www.qt.io/download-qt-installer) +- [Qt 6.11.0](https://www.qt.io/download-qt-installer) - [Vulkan SDK 1.3.268.0](https://vulkan.lunarg.com/sdk/home) (See "Install the SDK" [here](https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html)) for now future SDKs don't work. You need precisely 1.3.268.0. - [SDL3](https://github.com/libsdl-org/SDL/releases) (for the FAudio backend) @@ -123,7 +123,7 @@ Start **Visual Studio**, click on `Open a project or solution` and select the `r ##### Configuring the Qt Plugin (if used) 1) go to `Extensions->Qt VS Tools->Qt Versions` -2) add the path to your Qt installation with compiler e.g. `\6.10.2\msvc2022_64`, version will fill in automatically +2) add the path to your Qt installation with compiler e.g. `\6.11.0\msvc2022_64`, version will fill in automatically 3) go to `Extensions->Qt VS Tools->Options->Legacy Project Format`. (Only available in the **Legacy Qt Plugin**) 4) set `Build: Run pre-build setup` to `true`. (Only available in the **Legacy Qt Plugin**) diff --git a/CMakeLists.txt b/CMakeLists.txt index e5b77091cb..217c40f341 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,50 +86,7 @@ if(CMAKE_BUILD_TYPE MATCHES "Debug" AND NOT MSVC) endif() if(MSVC) - option(USE_MSVC_STATIC_CRT "Use static MSVC C runtime" OFF) - - # TODO(cjj19970505@live.cn) - # DiscordRPC binary in 3rdparty is compiled /MT - # So theoretically we should enable DiscordRPC in Release and static CRT build - # since we might encounter some rumtime issues when more than one CRT version are presented. - # https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=msvc-160#what-problems-exist-if-an-application-uses-more-than-one-crt-version - # Add other DiscordRPC binaries(compiled with /MTd, /MD, /MDd) or compile it from source may address this issue. - if(NOT IS_MULTI_CONFIG) - if(NOT(CMAKE_BUILD_TYPE MATCHES "Release" AND USE_MSVC_STATIC_CRT)) - set(USE_DISCORD_RPC OFF CACHE BOOL "Discord RPC is only available in Release and static CRT build." FORCE) - endif() - endif() - - if(USE_MSVC_STATIC_CRT) - set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") - else() - # though doc ( https://cmake.org/cmake/help/latest/variable/CMAKE_MSVC_RUNTIME_LIBRARY.html ) - # says if that property is not set then CMake uses the default value MultiThreaded$<$:Debug>DLL - # to select a MSVC runtime library. - # But yaml-cpp set /MT(d) if CMAKE_MSVC_RUNTIME_LIBRARY is undefined - # So we have to define it explicitly - set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>DLL") - endif() - - # TODO(cjj19970505@live.cn) - # offical QT uses dynamic CRT. - # When building our lib with static CRT and debug build type - # and linking with Qt with dynamic CRT and debug build, - # error is encountered in runtime (which is expected). - # But building our lib with static CRT and release build type, - # and linking with Qt with dynamic CRT and release build seems to be working, - # which is the same config with VS solution. - # (though technically it might still have some hidden errors). - # So we allow static CRT in both relase and debug build, but prompt warning in debug build. - # For more info: - # https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=msvc-160#what-problems-exist-if-an-application-uses-more-than-one-crt-version - # https://wiki.qt.io/Technical_FAQ#Why_does_a_statically_built_Qt_use_the_dynamic_Visual_Studio_runtime_libraries_.3F_Do_I_need_to_deploy_those_with_my_application_.3F - if(USE_MSVC_STATIC_CRT) - if(IS_MULTI_CONFIG OR CMAKE_BUILD_TYPE MATCHES "Debug") - message(AUTHOR_WARNING "Debug build currently can not work with static CRT.") - endif() - endif() - add_compile_options(/MP) + add_compile_options("$<$:/MP>") endif() if(NOT CMAKE_SIZEOF_VOID_P EQUAL 8) @@ -142,7 +99,7 @@ if(APPLE AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64") endif() if(MSVC) - add_compile_options(/wd4530 /utf-8) # C++ exception handler used, but unwind semantics are not enabled + add_compile_options("$<$:/wd4530;/utf-8>") # C++ exception handler used, but unwind semantics are not enabled endif() add_subdirectory(3rdparty) @@ -161,10 +118,6 @@ if (NOT FOUND_LTO EQUAL -1) message(FATAL_ERROR "RPCS3 doesn't support building with LTO, use -DDISABLE_LTO=TRUE to force-disable it") endif() -if(NOT WIN32) - add_compile_options(-pthread) -endif() - ## Look for Gamemode if its installed on Linux if(LINUX) ## User chooses whether to Enable GameMode features or not diff --git a/Utilities/Config.h b/Utilities/Config.h index ca9af028a5..4c79cbf31c 100644 --- a/Utilities/Config.h +++ b/Utilities/Config.h @@ -393,7 +393,7 @@ namespace cfg void set(const s64& value) { - ensure(value >= Min && value <= Max); + if (value < Min || value > Max) fmt::throw_exception("'%s': value %d out of bounds (min=%d, max=%d)", m_name, value, Min, Max); m_value = static_cast(value); } @@ -484,7 +484,7 @@ namespace cfg void set(const f64& value) { - ensure(value >= Min && value <= Max); + if (value < Min || value > Max) fmt::throw_exception("'%s': value %d out of bounds (min=%d, max=%d)", m_name, value, Min, Max); m_value = static_cast(value); } @@ -571,7 +571,7 @@ namespace cfg void set(const u64& value) { - ensure(value >= Min && value <= Max); + if (value < Min || value > Max) fmt::throw_exception("'%s': value %d out of bounds (min=%d, max=%d)", m_name, value, Min, Max); m_value = static_cast(value); } diff --git a/Utilities/File.cpp b/Utilities/File.cpp index 490605c792..aff4537dea 100644 --- a/Utilities/File.cpp +++ b/Utilities/File.cpp @@ -166,6 +166,7 @@ static fs::error to_error(int e) case ENOTEMPTY: return fs::error::notempty; case EROFS: return fs::error::readonly; case EISDIR: return fs::error::isdir; + case ENOTDIR: return fs::error::notdir; case ENOSPC: return fs::error::nospace; case EXDEV: return fs::error::xdev; default: return fs::error::unknown; @@ -2833,6 +2834,7 @@ void fmt_class_string::format(std::string& out, u64 arg) case fs::error::notempty: return "Not empty"; case fs::error::readonly: return "Read only"; case fs::error::isdir: return "Is a directory"; + case fs::error::notdir: return "Not a directory"; case fs::error::toolong: return "Path too long"; case fs::error::nospace: return "Not enough space on the device"; case fs::error::xdev: return "Device mismatch"; diff --git a/Utilities/File.h b/Utilities/File.h index dd2db42a46..3d332dd0be 100644 --- a/Utilities/File.h +++ b/Utilities/File.h @@ -683,6 +683,7 @@ namespace fs notempty, readonly, isdir, + notdir, toolong, nospace, xdev, diff --git a/Utilities/JIT.h b/Utilities/JIT.h index 6dfa9e7cd0..86fc72ed55 100644 --- a/Utilities/JIT.h +++ b/Utilities/JIT.h @@ -493,6 +493,10 @@ inline FT build_function_asm(std::string_view name, F&& builder, ::jit_runtime* return reinterpret_cast(uptr(result)); } +#if defined(__INTELLISENSE__) && !defined(LLVM_AVAILABLE) +#define LLVM_AVAILABLE +#endif + #ifdef LLVM_AVAILABLE namespace llvm diff --git a/Utilities/Thread.cpp b/Utilities/Thread.cpp index 6395c32505..b22c1aeb52 100644 --- a/Utilities/Thread.cpp +++ b/Utilities/Thread.cpp @@ -107,7 +107,7 @@ thread_local u64 g_tls_fault_rsx = 0; thread_local u64 g_tls_fault_spu = 0; thread_local u64 g_tls_wait_time = 0; thread_local u64 g_tls_wait_fail = 0; -thread_local bool g_tls_access_violation_recovered = false; +thread_local u64 g_tls_access_violation_recovered = umax; extern thread_local std::string(*g_tls_log_prefix)(); namespace stx @@ -1269,7 +1269,7 @@ namespace rsx extern std::function g_access_violation_handler; } -bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noexcept +bool handle_access_violation(u32 addr, bool is_writing, bool is_exec, ucontext_t* context) noexcept { g_tls_fault_all++; @@ -1305,7 +1305,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe } } spu_protection{cpu}; - if (addr < RAW_SPU_BASE_ADDR && vm::check_addr(addr) && rsx::g_access_violation_handler) + if (!is_exec && addr < RAW_SPU_BASE_ADDR && vm::check_addr(addr) && rsx::g_access_violation_handler) { bool state_changed = false; @@ -1371,7 +1371,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe { auto thread = idm::get_unlocked>(spu_thread::find_raw_spu((addr - RAW_SPU_BASE_ADDR) / RAW_SPU_OFFSET)); - if (!thread) + if (!thread || is_exec) { break; } @@ -1503,7 +1503,9 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe static_cast(context); #endif /* ARCH_ */ - if (vm::check_addr(addr, is_writing ? vm::page_writable : vm::page_readable)) + const auto required_page_perms = (is_writing ? vm::page_writable : vm::page_readable) + (is_exec ? vm::page_executable : 0); + + if (vm::check_addr(addr, required_page_perms)) { return true; } @@ -1511,9 +1513,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe // Hack: allocate memory in case the emulator is stopping const auto hack_alloc = [&]() { - g_tls_access_violation_recovered = true; - - if (vm::check_addr(addr, is_writing ? vm::page_writable : vm::page_readable)) + if (vm::check_addr(addr, required_page_perms)) { return true; } @@ -1525,17 +1525,45 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe return false; } + extern void ppu_register_range(u32 addr, u32 size); + + bool reprotected = false; + if (vm::writer_lock mlock; area->flags & vm::preallocated || vm::check_addr(addr, 0)) { // For allocated memory with protection lower than required (such as protection::no or read-only while writing to it) utils::memory_protect(vm::base(addr & -0x1000), 0x1000, utils::protection::rw); + reprotected = true; + } + + if (reprotected) + { + if (is_exec && !vm::check_addr(addr, vm::page_executable)) + { + ppu_register_range(addr & -0x10000, 0x10000); + } + + g_tls_access_violation_recovered = addr; return true; } - return area->falloc(addr & -0x10000, 0x10000) || vm::check_addr(addr, is_writing ? vm::page_writable : vm::page_readable); + const bool allocated = area->falloc(addr & -0x10000, 0x10000); + + if (allocated) + { + if (is_exec && !vm::check_addr(addr, vm::page_executable)) + { + ppu_register_range(addr & -0x10000, 0x10000); + } + + g_tls_access_violation_recovered = addr; + return true; + } + + return false; }; - if (cpu && (cpu->get_class() == thread_class::ppu || cpu->get_class() == thread_class::spu)) + if (cpu && (cpu->get_class() == thread_class::ppu || cpu->get_class() == thread_class::spu) && !is_exec) { vm::temporary_unlock(*cpu); u32 pf_port_id = 0; @@ -1678,7 +1706,7 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe if (cpu->get_class() == thread_class::spu) { - if (!g_tls_access_violation_recovered) + if (g_tls_access_violation_recovered != addr) { vm_log.notice("\n%s", dump_useful_thread_info()); vm_log.always()("[%s] Access violation %s location 0x%x (%s)", cpu->get_name(), is_writing ? "writing" : "reading", addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory"); @@ -1714,10 +1742,10 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe // Note: a thread may access violate more than once after hack_alloc recovery // Do not log any further access violations in this case. - if (!g_tls_access_violation_recovered) + if (g_tls_access_violation_recovered != addr) { vm_log.notice("\n%s", dump_useful_thread_info()); - vm_log.fatal("Access violation %s location 0x%x (%s)", is_writing ? "writing" : (cpu && cpu->get_class() == thread_class::ppu && cpu->get_pc() == addr ? "executing" : "reading"), addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory"); + vm_log.fatal("Access violation %s location 0x%x (%s)", is_writing ? "writing" : (is_exec ? "executing" : "reading"), addr, (is_writing && vm::check_addr(addr)) ? "read-only memory" : "unmapped memory"); } while (Emu.IsPausedOrReady()) @@ -1766,8 +1794,13 @@ bool handle_access_violation(u32 addr, bool is_writing, ucontext_t* context) noe } } - if (Emu.IsStopped() && !hack_alloc()) + if (Emu.IsStopped()) { + while (!hack_alloc()) + { + thread_ctrl::wait_for(1000); + } + return false; } @@ -1806,6 +1839,7 @@ static LONG exception_handler(PEXCEPTION_POINTERS pExp) noexcept if (pExp->ExceptionRecord->ExceptionCode == EXCEPTION_ACCESS_VIOLATION && !is_executing) { u32 addr = 0; + bool is_exec = false; if (auto [addr0, ok] = vm::try_get_addr(ptr); ok) { @@ -1813,14 +1847,21 @@ static LONG exception_handler(PEXCEPTION_POINTERS pExp) noexcept } else if (const usz exec64 = (ptr - vm::g_exec_addr) / 2; exec64 <= u32{umax}) { + is_exec = true; addr = static_cast(exec64); } - else + else if (const usz exec64 = (ptr - vm::g_exec_addr - vm::g_exec_addr_seg_offset); exec64 <= u32{umax}) { + is_exec = true; + addr = static_cast(exec64); + } + else + { + std::this_thread::sleep_for(1ms); return EXCEPTION_CONTINUE_SEARCH; } - if (thread_ctrl::get_current() && handle_access_violation(addr, is_writing, pExp->ContextRecord)) + if (thread_ctrl::get_current() && handle_access_violation(addr, is_writing, is_exec, pExp->ContextRecord)) { return EXCEPTION_CONTINUE_EXECUTION; } @@ -2027,12 +2068,13 @@ static void signal_handler(int /*sig*/, siginfo_t* info, void* uct) noexcept #endif const u64 exec64 = (reinterpret_cast(info->si_addr) - reinterpret_cast(vm::g_exec_addr)) / 2; + const u64 exec64_2 = (reinterpret_cast(info->si_addr) - reinterpret_cast(vm::g_exec_addr)) - vm::g_exec_addr_seg_offset; const auto cause = is_executing ? "executing" : is_writing ? "writing" : "reading"; if (auto [addr, ok] = vm::try_get_addr(info->si_addr); ok && !is_executing) { // Try to process access violation - if (thread_ctrl::get_current() && handle_access_violation(addr, is_writing, context)) + if (thread_ctrl::get_current() && handle_access_violation(addr, is_writing, false, context)) { return; } @@ -2040,7 +2082,14 @@ static void signal_handler(int /*sig*/, siginfo_t* info, void* uct) noexcept if (exec64 < 0x100000000ull && !is_executing) { - if (thread_ctrl::get_current() && handle_access_violation(static_cast(exec64), is_writing, context)) + if (thread_ctrl::get_current() && handle_access_violation(static_cast(exec64), is_writing, true, context)) + { + return; + } + } + else if (exec64_2 < 0x100000000ull && !is_executing) + { + if (thread_ctrl::get_current() && handle_access_violation(static_cast(exec64_2), is_writing, true, context)) { return; } @@ -2359,7 +2408,7 @@ thread_base::native_entry thread_base::finalize(u64 _self) noexcept g_tls_fault_spu = 0; g_tls_wait_time = 0; g_tls_wait_fail = 0; - g_tls_access_violation_recovered = false; + g_tls_access_violation_recovered = umax; g_tls_log_prefix = []() -> std::string { return {}; }; @@ -2830,6 +2879,16 @@ void thread_base::exec() } } + if (auto [total, current] = utils::get_memory_usage(); total - current <= 256 * 1024 * 1024) + { + if (reason_buf.empty()) + { + reason_buf = std::string{reason}; + } + + fmt::append(reason_buf, " (Possible RAM deficiency: free RAM: %dMB)", (total - current) / (1024 * 1024)); + } + if (!reason_buf.empty()) { reason = reason_buf; diff --git a/buildfiles/cmake/ConfigureCompiler.cmake b/buildfiles/cmake/ConfigureCompiler.cmake index be900d13c2..ea8cd3200b 100644 --- a/buildfiles/cmake/ConfigureCompiler.cmake +++ b/buildfiles/cmake/ConfigureCompiler.cmake @@ -96,11 +96,6 @@ else() # This hides our LLVM from mesa's LLVM, otherwise we get some unresolvable conflicts. add_link_options(-Wl,--exclude-libs,ALL) elseif(WIN32) - add_compile_definitions(__STDC_FORMAT_MACROS=1) - - # Workaround for mingw64 (MSYS2) - add_link_options(-Wl,--allow-multiple-definition) - # Increase stack limit to 8 MB add_link_options(-Wl,--stack -Wl,8388608) endif() diff --git a/buildfiles/cmake/FindWolfSSL.cmake b/buildfiles/cmake/FindWolfSSL.cmake index d2e30be60b..35f316837c 100644 --- a/buildfiles/cmake/FindWolfSSL.cmake +++ b/buildfiles/cmake/FindWolfSSL.cmake @@ -1,4 +1,3 @@ -set(WOLFSSL_LIBRARY ON) -set(WOLFSSL_INCLUDE_DIR ON) -set(WOLFSSL_LIBRARIES wolfssl) +set(WOLFSSL_LIBRARY wolfssl) +set(WOLFSSL_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/wolfssl) set(WOLFSSL_FOUND TRUE) diff --git a/buildfiles/cmake/FindZLIB.cmake b/buildfiles/cmake/FindZLIB.cmake index 0a29abafa9..ff5869a5f7 100644 --- a/buildfiles/cmake/FindZLIB.cmake +++ b/buildfiles/cmake/FindZLIB.cmake @@ -3,9 +3,9 @@ if(USE_SYSTEM_ZLIB) find_package(ZLIB) list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}) else() - add_library(ZLIB::ZLIB INTERFACE IMPORTED) + add_library(ZLIB::ZLIB STATIC IMPORTED) set_target_properties(ZLIB::ZLIB PROPERTIES - INTERFACE_LINK_LIBRARIES zlibstatic + IMPORTED_LOCATION "${CMAKE_BINARY_DIR}/3rdparty/zlib/zlib/libzlibstatic.a" INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_SOURCE_DIR}/3rdparty/zlib/zlib;${CMAKE_BINARY_DIR}/3rdparty/zlib/zlib") set(ZLIB_FOUND TRUE) endif() diff --git a/rpcs3/CMakeLists.txt b/rpcs3/CMakeLists.txt index 2aa8120752..cfc2495f15 100644 --- a/rpcs3/CMakeLists.txt +++ b/rpcs3/CMakeLists.txt @@ -87,12 +87,6 @@ if (NOT ANDROID) message(FATAL_ERROR "RPCS3 requires either X11 or Wayland (or both) for Vulkan.") endif() - if(UNIX) - set(CMAKE_THREAD_PREFER_PTHREAD TRUE) - find_package(Threads REQUIRED) - target_link_libraries(rpcs3_lib PRIVATE Threads::Threads) - endif() - if(WIN32) target_link_libraries(rpcs3_lib PRIVATE ws2_32 Iphlpapi Winmm Psapi gdi32 setupapi) else() diff --git a/rpcs3/Emu/CMakeLists.txt b/rpcs3/Emu/CMakeLists.txt index e115585db9..d691952fa0 100644 --- a/rpcs3/Emu/CMakeLists.txt +++ b/rpcs3/Emu/CMakeLists.txt @@ -645,6 +645,9 @@ if(TARGET 3rdparty_vulkan) endif() endif() +if(NOT WIN32) + set(THREADS_PREFER_PTHREAD_FLAG ON) +endif() find_package(Threads REQUIRED) target_link_libraries(rpcs3_emu diff --git a/rpcs3/Emu/CPU/CPUThread.cpp b/rpcs3/Emu/CPU/CPUThread.cpp index 3ab011aa04..4bd5fc9157 100644 --- a/rpcs3/Emu/CPU/CPUThread.cpp +++ b/rpcs3/Emu/CPU/CPUThread.cpp @@ -888,6 +888,14 @@ bool cpu_thread::check_state() noexcept store = true; } + if (flags & cpu_flag::req_exit) + { + // A request for the thread to quit has been made + flags -= cpu_flag::req_exit; + flags += cpu_flag::exit; + store = true; + } + // Can't process dbg_step if we only paused temporarily if (cpu_can_stop && flags & cpu_flag::dbg_step) { @@ -1157,13 +1165,13 @@ void cpu_thread::notify() cpu_thread& cpu_thread::operator=(thread_state) { - if (state & cpu_flag::exit) + if (state & (cpu_flag::exit + cpu_flag::req_exit)) { // Must be notified elsewhere or self-raised return *this; } - const auto old = state.fetch_add(cpu_flag::exit); + const auto old = state.fetch_add(cpu_flag::req_exit); if (old & cpu_flag::wait && old.none_of(cpu_flag::again + cpu_flag::exit)) { @@ -1322,8 +1330,9 @@ extern std::shared_ptr make_disasm(const cpu_thread* cpu, shared_ptr< void cpu_thread::dump_all(std::string& ret) const { std::any func_data; + std::any misc_data; - ret += dump_misc(); + dump_misc(ret, misc_data); ret += '\n'; dump_regs(ret, func_data); ret += '\n'; @@ -1371,9 +1380,9 @@ std::vector> cpu_thread::dump_callstack_list() const return {}; } -std::string cpu_thread::dump_misc() const +void cpu_thread::dump_misc(std::string& ret, std::any& /*custom_data*/) const { - return fmt::format("%s[0x%x]; State: %s\n", get_class() == thread_class::ppu ? "PPU" : get_class() == thread_class::spu ? "SPU" : "RSX", id, state.load()); + fmt::append(ret, "%s[0x%x]; State: %s\n", get_class() == thread_class::ppu ? "PPU" : get_class() == thread_class::spu ? "SPU" : "RSX", id, state.load()); } bool cpu_thread::suspend_work::push(cpu_thread* _this) noexcept diff --git a/rpcs3/Emu/CPU/CPUThread.h b/rpcs3/Emu/CPU/CPUThread.h index 5e3484f7f5..e723fd2d4b 100644 --- a/rpcs3/Emu/CPU/CPUThread.h +++ b/rpcs3/Emu/CPU/CPUThread.h @@ -29,6 +29,7 @@ enum class cpu_flag : u32 yield, // Thread is being requested to yield its execution time if it's running preempt, // Thread is being requested to preempt the execution of all CPU threads + req_exit, // Request the thread to exit dbg_global_pause, // Emulation paused dbg_pause, // Thread paused dbg_step, // Thread forced to pause after one step (one instruction, etc) @@ -39,7 +40,7 @@ enum class cpu_flag : u32 // Test stopped state constexpr bool is_stopped(bs_t state) { - return !!(state & (cpu_flag::stop + cpu_flag::exit + cpu_flag::again)); + return !!(state & (cpu_flag::stop + cpu_flag::exit + cpu_flag::again + cpu_flag::req_exit)); } // Test paused state @@ -176,7 +177,7 @@ public: virtual std::vector> dump_callstack_list() const; // Get CPU dump of misc information - virtual std::string dump_misc() const; + virtual void dump_misc(std::string& ret, std::any& /*custom_data*/) const; // Thread entry point function virtual void cpu_task() = 0; diff --git a/rpcs3/Emu/CPU/CPUTranslator.cpp b/rpcs3/Emu/CPU/CPUTranslator.cpp index 08e8e9ad30..6bd7924ea5 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.cpp +++ b/rpcs3/Emu/CPU/CPUTranslator.cpp @@ -210,7 +210,7 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin #endif } -llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type) const +llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type, std::source_location src_loc) const { uint s1 = type->getScalarSizeInBits(); uint s2 = val->getType()->getScalarSizeInBits(); @@ -222,15 +222,69 @@ llvm::Value* cpu_translator::bitcast(llvm::Value* val, llvm::Type* type) const if (s1 != s2) { - fmt::throw_exception("cpu_translator::bitcast(): incompatible type sizes (%u vs %u)", s1, s2); + fmt::throw_exception("cpu_translator::bitcast(): incompatible type sizes (%u vs %u)\nCalled from: %s", s1, s2, src_loc); } - if (const auto c1 = llvm::dyn_cast(val)) + if (val->getType() == type) + { + return val; + } + + llvm::CastInst* i; + llvm::Value* source_val = val; + + // Try to reuse older bitcasts + while ((i = llvm::dyn_cast_or_null(source_val)) && i->getOpcode() == llvm::Instruction::BitCast) + { + source_val = i->getOperand(0); + + if (source_val->getType() == type) + { + return source_val; + } + } + + for (auto it = source_val->use_begin(); it != source_val->use_end(); ++it) + { + llvm::Value* it_val = *it; + + if (!it_val) + { + continue; + } + + llvm::CastInst* bci = llvm::dyn_cast_or_null(it_val); + + // Walk through bitcasts + while (bci && bci->getOpcode() == llvm::Instruction::BitCast) + { + if (bci->getParent() != m_ir->GetInsertBlock()) + { + break; + } + + if (bci->getType() == type) + { + return bci; + } + + if (bci->use_begin() == bci->use_end()) + { + break; + } + + bci = llvm::dyn_cast_or_null(*bci->use_begin()); + } + } + + // Do bitcast on the source + + if (const auto c1 = llvm::dyn_cast(source_val)) { return ensure(llvm::ConstantFoldCastOperand(llvm::Instruction::BitCast, c1, type, m_module->getDataLayout())); } - return m_ir->CreateBitCast(val, type); + return m_ir->CreateBitCast(source_val, type); } template <> diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index 9b9804fd39..ab2aed8156 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -43,6 +43,7 @@ #include #include +#include // Helper function llvm::Value* peek_through_bitcasts(llvm::Value*); @@ -567,6 +568,32 @@ struct llvm_placeholder_t } }; +template >> +struct llvm_place_stealer_t +{ + // TODO: placeholder extracting actual constant values (u64, f64, vector, etc) + + using type = T; + + static constexpr bool is_ok = true; + + llvm::Value* eval(llvm::IRBuilder<>*) const + { + return nullptr; + } + + std::tuple<> match(llvm::Value*& value, llvm::Module*) const + { + if (value && value->getType() == llvm_value_t::get_type(value->getContext())) + { + return {}; + } + + value = nullptr; + return {}; + } +}; + template struct llvm_const_int { @@ -3213,7 +3240,7 @@ public: } // Bitcast with immediate constant folding - llvm::Value* bitcast(llvm::Value* val, llvm::Type* type) const; + llvm::Value* bitcast(llvm::Value* val, llvm::Type* type, std::source_location src_loc = std::source_location::current()) const; template llvm::Value* bitcast(llvm::Value* val) @@ -3227,6 +3254,12 @@ public: return {}; } + template + static llvm_place_stealer_t match_stealer() + { + return {}; + } + template requires requires { typename llvm_common_t; } static auto match_expr(llvm::Value* v, llvm::Module* _m, T&& expr) @@ -3951,6 +3984,15 @@ public: erase_stores({args.value...}); } + // Debug breakpoint + void debugtrap() + { + const auto _rty = llvm::Type::getVoidTy(m_context); + const auto type = llvm::FunctionType::get(_rty, {}, false); + const auto func = llvm::cast(m_ir->GetInsertBlock()->getParent()->getParent()->getOrInsertFunction("llvm.debugtrap", type).getCallee()); + m_ir->CreateCall(func); + } + template static auto pshufb(T&& a, U&& b) { diff --git a/rpcs3/Emu/Cell/Modules/cellAdec.cpp b/rpcs3/Emu/Cell/Modules/cellAdec.cpp index dfc91c8d2f..0d0c4481e9 100644 --- a/rpcs3/Emu/Cell/Modules/cellAdec.cpp +++ b/rpcs3/Emu/Cell/Modules/cellAdec.cpp @@ -261,7 +261,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) savestate = lpcm_dec_state::waiting_for_output_mutex_lock; output_mutex_lock: - error_occurred |= static_cast(sys_mutex_lock(ppu, output_mutex, 0) != CELL_OK); + error_occurred |= static_cast(lv2_syscall(ppu, output_mutex, 0) != CELL_OK); if (ppu.state & cpu_flag::again) { @@ -273,7 +273,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) savestate = lpcm_dec_state::waiting_for_output_cond_wait; output_cond_wait: - ensure(sys_cond_wait(ppu, output_consumed, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, output_consumed, 0) == CELL_OK); // Error code isn't checked on LLE if (ppu.state & cpu_flag::again) { @@ -287,7 +287,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) savestate = lpcm_dec_state::queue_mutex_lock; queue_mutex_lock: - ensure(sys_mutex_lock(ppu, queue_mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (ppu.state & cpu_flag::again) { @@ -296,7 +296,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) cmd_queue.pop(cmd); - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); // Error code isn't checked on LLE cellAdec.trace("Command type: %d", static_cast(cmd.type.get())); @@ -307,7 +307,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) { case LpcmDecCmdType::start_seq: // LLE sends a command to the SPU thread. The SPU thread consumes the command without doing anything, however - error_occurred |= static_cast(sys_mutex_unlock(ppu, output_mutex) != CELL_OK); + error_occurred |= static_cast(lv2_syscall(ppu, output_mutex) != CELL_OK); break; case LpcmDecCmdType::end_seq: @@ -324,11 +324,11 @@ void LpcmDecContext::exec(ppu_thread& ppu) // Doesn't do anything else notify_seq_done.cbFunc(ppu, notify_seq_done.cbArg); - error_occurred |= static_cast(sys_mutex_unlock(ppu, output_mutex) != CELL_OK); + error_occurred |= static_cast(lv2_syscall(ppu, output_mutex) != CELL_OK); break; } case LpcmDecCmdType::close: - ensure(sys_mutex_unlock(ppu, output_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, output_mutex) == CELL_OK); // Error code isn't checked on LLE return; case LpcmDecCmdType::decode_au: @@ -685,7 +685,7 @@ void LpcmDecContext::exec(ppu_thread& ppu) notify_au_done.cbFunc(ppu, cmd.pcm_handle, notify_au_done.cbArg); output_locked = true; - error_occurred |= static_cast(sys_mutex_unlock(ppu, output_mutex) != CELL_OK); + error_occurred |= static_cast(lv2_syscall(ppu, output_mutex) != CELL_OK); const vm::var bsi_info{{ lpcm_param->channelNumber, lpcm_param->sampleRate, static_cast(output_size) }}; @@ -703,14 +703,14 @@ error_code LpcmDecContext::send_command(ppu_thread& ppu, auto&&... args) { ppu.state += cpu_flag::wait; - if (error_code ret = sys_mutex_lock(ppu, queue_size_mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, queue_size_mutex, 0); ret != CELL_OK) { return ret; } if (cmd_queue.full()) { - ensure(sys_mutex_unlock(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_BUSY; } @@ -720,39 +720,39 @@ error_code LpcmDecContext::send_command(ppu_thread& ppu, auto&&... args) *lpcm_param = { args... }; } - if (error_code ret = sys_mutex_lock(ppu, queue_mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, queue_mutex, 0); ret != CELL_OK) { - ensure(sys_mutex_unlock(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } cmd_queue.emplace(type, std::forward(args)...); - if (error_code ret = sys_mutex_unlock(ppu, queue_mutex); ret != CELL_OK + if (error_code ret = lv2_syscall(ppu, queue_mutex); ret != CELL_OK || (ret = cmd_available.release(ppu)) != CELL_OK) { - ensure(sys_mutex_unlock(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } - return sys_mutex_unlock(ppu, queue_size_mutex); + return lv2_syscall(ppu, queue_size_mutex); } inline error_code LpcmDecContext::release_output(ppu_thread& ppu) { - if (error_code ret = sys_mutex_lock(ppu, output_mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, output_mutex, 0); ret != CELL_OK) { return ret; } output_locked = false; - if (error_code ret = sys_cond_signal(ppu, output_consumed); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, output_consumed); ret != CELL_OK) { return ret; // LLE doesn't unlock the mutex } - return sys_mutex_unlock(ppu, output_mutex); + return lv2_syscall(ppu, output_mutex); } void lpcmDecEntry(ppu_thread& ppu, vm::ptr lpcm_dec) @@ -820,13 +820,13 @@ error_code _CellAdecCoreOpOpenExt_lpcm(ppu_thread& ppu, vm::ptr const vm::var queue_mutex_attr{{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_adem06"_u64 } }}; const vm::var cond_attr{{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_adec03"_u64 } }}; - error_code ret = sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::queue_size_mutex), mutex_attr); - ret = ret ? ret : sys_cond_create(ppu, handle.ptr(&LpcmDecContext::queue_size_cond), handle->queue_size_mutex, cond_attr); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::unk_mutex), mutex_attr); - ret = ret ? ret : sys_cond_create(ppu, handle.ptr(&LpcmDecContext::unk_cond), handle->unk_mutex, cond_attr); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::output_mutex), output_mutex_attr); - ret = ret ? ret : sys_cond_create(ppu, handle.ptr(&LpcmDecContext::output_consumed), handle->output_mutex, cond_attr); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::queue_mutex), queue_mutex_attr); + error_code ret = lv2_syscall(ppu, handle.ptr(&LpcmDecContext::queue_size_mutex), mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::queue_size_cond), handle->queue_size_mutex, cond_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::unk_mutex), mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::unk_cond), handle->unk_mutex, cond_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::output_mutex), output_mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::output_consumed), handle->output_mutex, cond_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::queue_mutex), queue_mutex_attr); ret = ret ? ret : handle->release_output(ppu); ret = ret ? ret : handle->cmd_available.init(ppu, handle.ptr(&LpcmDecContext::cmd_available), 0); ret = ret ? ret : handle->reserved2.init(ppu, handle.ptr(&LpcmDecContext::reserved2), 0); @@ -844,8 +844,8 @@ error_code _CellAdecCoreOpOpenExt_lpcm(ppu_thread& ppu, vm::ptr const auto entry = g_fxo->get().func_addr(FIND_FUNC(lpcmDecEntry)); ret = ppu_execute<&sys_ppu_thread_create>(ppu, handle.ptr(&LpcmDecContext::thread_id), entry, handle.addr(), +res->ppuThreadPriority, +res->ppuThreadStackSize, SYS_PPU_THREAD_CREATE_JOINABLE, +_name); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::spurs_queue_pop_mutex), mutex_attr); - ret = ret ? ret : sys_mutex_create(ppu, handle.ptr(&LpcmDecContext::spurs_queue_push_mutex), mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::spurs_queue_pop_mutex), mutex_attr); + ret = ret ? ret : lv2_syscall(ppu, handle.ptr(&LpcmDecContext::spurs_queue_push_mutex), mutex_attr); return ret; } @@ -865,8 +865,8 @@ error_code _CellAdecCoreOpClose_lpcm(ppu_thread& ppu, vm::ptr ha cellAdec.notice("_CellAdecCoreOpClose_lpcm(handle=*0x%x)", handle); - if (error_code ret = sys_mutex_lock(ppu, handle->queue_size_mutex, 0); ret != CELL_OK - || (ret = sys_mutex_lock(ppu, handle->queue_mutex, 0)) != CELL_OK) + if (error_code ret = lv2_syscall(ppu, handle->queue_size_mutex, 0); ret != CELL_OK + || (ret = lv2_syscall(ppu, handle->queue_mutex, 0)) != CELL_OK) { return ret; } @@ -875,14 +875,14 @@ error_code _CellAdecCoreOpClose_lpcm(ppu_thread& ppu, vm::ptr ha { handle->cmd_queue.emplace(LpcmDecCmdType::close); - if (error_code ret = sys_mutex_unlock(ppu, handle->queue_mutex); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, handle->queue_mutex); ret != CELL_OK) { return ret; // LLE doesn't unlock the queue size mutex } if (error_code ret = handle->cmd_available.release(ppu); ret != CELL_OK) { - ensure(sys_mutex_unlock(ppu, handle->queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, handle->queue_size_mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } } @@ -893,29 +893,29 @@ error_code _CellAdecCoreOpClose_lpcm(ppu_thread& ppu, vm::ptr ha cmd.type = LpcmDecCmdType::close; } - if (error_code ret = sys_mutex_unlock(ppu, handle->queue_mutex); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, handle->queue_mutex); ret != CELL_OK) { return ret; // LLE doesn't unlock the queue size mutex } } - error_code ret = sys_mutex_unlock(ppu, handle->queue_size_mutex); + error_code ret = lv2_syscall(ppu, handle->queue_size_mutex); ret = ret ? ret : handle->release_output(ppu); vm::var thread_ret; - ret = ret ? ret : sys_ppu_thread_join(ppu, static_cast(handle->thread_id), +thread_ret); + ret = ret ? ret : lv2_syscall(ppu, static_cast(handle->thread_id), +thread_ret); - ret = ret ? ret : sys_cond_destroy(ppu, handle->queue_size_cond); - ret = ret ? ret : sys_cond_destroy(ppu, handle->unk_cond); - ret = ret ? ret : sys_cond_destroy(ppu, handle->output_consumed); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->queue_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->queue_size_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->unk_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->output_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->queue_size_cond); + ret = ret ? ret : lv2_syscall(ppu, handle->unk_cond); + ret = ret ? ret : lv2_syscall(ppu, handle->output_consumed); + ret = ret ? ret : lv2_syscall(ppu, handle->queue_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->queue_size_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->unk_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->output_mutex); ret = ret ? ret : handle->cmd_available.finalize(ppu); ret = ret ? ret : handle->reserved2.finalize(ppu); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->spurs_queue_pop_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->spurs_queue_push_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->spurs_queue_pop_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->spurs_queue_push_mutex); return ret; } @@ -1091,11 +1091,11 @@ error_code AdecContext::set_pcm_item(s32 pcm_handle, vm::ptr pcm_addr, u32 error_code AdecContext::link_frame(ppu_thread& ppu, s32 pcm_handle) { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (verify_pcm_handle(pcm_handle) == static_cast(CELL_ADEC_ERROR_FATAL)) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } @@ -1115,27 +1115,27 @@ error_code AdecContext::link_frame(ppu_thread& ppu, s32 pcm_handle) } else { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_OK; } error_code AdecContext::unlink_frame(ppu_thread& ppu, s32 pcm_handle) { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (verify_pcm_handle(pcm_handle) == static_cast(CELL_ADEC_ERROR_FATAL)) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } if (frames_head == -1 || frames_tail == -1) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } @@ -1146,7 +1146,7 @@ error_code AdecContext::unlink_frame(ppu_thread& ppu, s32 pcm_handle) { if (pcm_handle != frames_tail) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_ADEC_ERROR_FATAL; } @@ -1171,7 +1171,7 @@ error_code AdecContext::unlink_frame(ppu_thread& ppu, s32 pcm_handle) frames[prev].next = next; } - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_OK; } @@ -1557,7 +1557,7 @@ error_code adecOpen(ppu_thread& ppu, vm::ptr type, vm::cptr(ppu, _this.ptr(&AdecContext::mutex), _this.ptr(&AdecContext::mutex_attribute)) == CELL_OK); // Error code isn't checked on LLE *handle = _this; @@ -1626,7 +1626,7 @@ error_code cellAdecClose(ppu_thread& ppu, vm::ptr handle) return ret; } - if (error_code ret = sys_mutex_destroy(ppu, handle->mutex); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, handle->mutex); ret != CELL_OK) { return ret; } diff --git a/rpcs3/Emu/Cell/Modules/cellAdec.h b/rpcs3/Emu/Cell/Modules/cellAdec.h index a43c3f0aef..2216561516 100644 --- a/rpcs3/Emu/Cell/Modules/cellAdec.h +++ b/rpcs3/Emu/Cell/Modules/cellAdec.h @@ -485,6 +485,19 @@ struct AdecFrame CHECK_SIZE(AdecFrame, 0x68); +template +static auto lv2_syscall(ppu_thread& ppu, auto&&... args) +{ + const auto ret = Syscall(ppu, std::forward(args)...); + + if (ppu.test_stopped()) + { + ppu.state += cpu_flag::again; + } + + return ret; +} + class AdecOutputQueue { struct entry @@ -511,10 +524,10 @@ public: this->size = 0; const vm::var mutex_attr = {{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_adem07"_u64 } }}; - ensure(sys_mutex_create(ppu, _this.ptr(&AdecOutputQueue::mutex), mutex_attr) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, _this.ptr(&AdecOutputQueue::mutex), mutex_attr) == CELL_OK); // Error code isn't checked on LLE const vm::var cond_attr = {{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_adec05"_u64 } }}; - ensure(sys_cond_create(ppu, _this.ptr(&AdecOutputQueue::cond), mutex, cond_attr) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, _this.ptr(&AdecOutputQueue::cond), mutex, cond_attr) == CELL_OK); // Error code isn't checked on LLE for (s32 i = 0; i < 4; i++) { @@ -524,12 +537,12 @@ public: error_code finalize(ppu_thread& ppu) const { - if (error_code ret = sys_cond_destroy(ppu, cond); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, cond); ret != CELL_OK) { return ret; } - if (error_code ret = sys_mutex_destroy(ppu, mutex); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, mutex); ret != CELL_OK) { return ret; } @@ -539,11 +552,11 @@ public: error_code push(ppu_thread& ppu, vm::ptr pcm_item, s32 pcm_handle) { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (entries[back].state != 0xff) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return true; // LLE returns the result of the comparison above } @@ -554,13 +567,13 @@ public: back = (back + 1) & 3; size++; - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return CELL_OK; } const entry* pop(ppu_thread& ppu) { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE if (ppu.state & cpu_flag::again) // Savestate was created while waiting on the mutex { @@ -569,7 +582,7 @@ public: if (entries[front].state == 0xff) { - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return nullptr; } @@ -581,15 +594,15 @@ public: front = (front + 1) & 3; size--; - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } const entry& peek(ppu_thread& ppu) const { - ensure(sys_mutex_lock(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex, 0) == CELL_OK); // Error code isn't checked on LLE const entry& ret = entries[front]; - ensure(sys_mutex_unlock(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE + ensure(lv2_syscall(ppu, mutex) == CELL_OK); // Error code isn't checked on LLE return ret; } }; @@ -708,39 +721,39 @@ public: const vm::var mutex_attr{{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_adem01"_u64 } }}; const vm::var cond_attr{{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_adec01"_u64 } }}; - if (error_code ret = sys_mutex_create(ppu, _this.ptr(&LpcmDecSemaphore::mutex), mutex_attr); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, _this.ptr(&LpcmDecSemaphore::mutex), mutex_attr); ret != CELL_OK) { return ret; } - return sys_cond_create(ppu, _this.ptr(&LpcmDecSemaphore::cond), mutex, cond_attr); + return lv2_syscall(ppu, _this.ptr(&LpcmDecSemaphore::cond), mutex, cond_attr); } error_code finalize(ppu_thread& ppu) const { - if (error_code ret = sys_cond_destroy(ppu, cond); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, cond); ret != CELL_OK) { return ret; } - return sys_mutex_destroy(ppu, mutex); + return lv2_syscall(ppu, mutex); } error_code release(ppu_thread& ppu) { - if (error_code ret = sys_mutex_lock(ppu, mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, mutex, 0); ret != CELL_OK) { return ret; } value++; - if (error_code ret = sys_cond_signal(ppu, cond); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, cond); ret != CELL_OK) { return ret; // LLE doesn't unlock the mutex } - return sys_mutex_unlock(ppu, mutex); + return lv2_syscall(ppu, mutex); } error_code acquire(ppu_thread& ppu, lpcm_dec_state& savestate) @@ -752,7 +765,7 @@ public: savestate = lpcm_dec_state::waiting_for_cmd_mutex_lock; - if (error_code ret = sys_mutex_lock(ppu, mutex, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, mutex, 0); ret != CELL_OK) { return ret; } @@ -767,7 +780,7 @@ public: savestate = lpcm_dec_state::waiting_for_cmd_cond_wait; cond_wait: - if (error_code ret = sys_cond_wait(ppu, cond, 0); ret != CELL_OK) + if (error_code ret = lv2_syscall(ppu, cond, 0); ret != CELL_OK) { return ret; // LLE doesn't unlock the mutex } @@ -780,7 +793,7 @@ public: value--; - return sys_mutex_unlock(ppu, mutex); + return lv2_syscall(ppu, mutex); } }; diff --git a/rpcs3/Emu/Cell/Modules/cellAtracXdec.cpp b/rpcs3/Emu/Cell/Modules/cellAtracXdec.cpp index c55cf7b60f..fb3191bcb4 100644 --- a/rpcs3/Emu/Cell/Modules/cellAtracXdec.cpp +++ b/rpcs3/Emu/Cell/Modules/cellAtracXdec.cpp @@ -295,7 +295,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) { savestate = atracxdec_state::initial; - ensure(sys_mutex_lock(ppu, queue_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -310,24 +310,24 @@ void AtracXdecContext::exec(ppu_thread& ppu) savestate = atracxdec_state::waiting_for_cmd; label1_wait_for_cmd_state: - ensure(sys_cond_wait(ppu, queue_not_empty, 0) == CELL_OK); + ensure(lv2_syscall(ppu, queue_not_empty, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { return; } - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); } cmd_queue.pop(cmd); - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); savestate = atracxdec_state::checking_run_thread_1; label2_check_run_thread_1_state: - ensure(sys_mutex_lock(ppu, run_thread_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -336,11 +336,11 @@ void AtracXdecContext::exec(ppu_thread& ppu) if (!run_thread) { - ensure(sys_mutex_unlock(ppu, run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex) == CELL_OK); return; } - ensure(sys_mutex_unlock(ppu, run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex) == CELL_OK); savestate = atracxdec_state::executing_cmd; label3_execute_cmd_state: @@ -392,7 +392,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) cellAtracXdec.trace("Waiting for output to be consumed..."); - ensure(sys_mutex_lock(ppu, output_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, output_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -404,7 +404,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) savestate = atracxdec_state::waiting_for_output; label4_wait_for_output_state: - ensure(sys_cond_wait(ppu, output_consumed, 0) == CELL_OK); + ensure(lv2_syscall(ppu, output_consumed, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -417,7 +417,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) savestate = atracxdec_state::checking_run_thread_2; label5_check_run_thread_2_state: - ensure(sys_mutex_lock(ppu, run_thread_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -426,12 +426,12 @@ void AtracXdecContext::exec(ppu_thread& ppu) if (!run_thread) { - ensure(sys_mutex_unlock(ppu, run_thread_mutex) == CELL_OK); - ensure(sys_mutex_unlock(ppu, output_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, output_mutex) == CELL_OK); return; } - ensure(sys_mutex_unlock(ppu, run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, run_thread_mutex) == CELL_OK); savestate = atracxdec_state::decoding; label6_decode_state: @@ -645,7 +645,7 @@ void AtracXdecContext::exec(ppu_thread& ppu) notify_au_done.cbFunc(ppu, cmd.pcm_handle, notify_au_done.cbArg); output_locked = true; - ensure(sys_mutex_unlock(ppu, output_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, output_mutex) == CELL_OK); const u32 output_size = decoded_samples_num * (decoder.bw_pcm & 0x7fu) * decoder.nch_out; @@ -680,7 +680,7 @@ error_code AtracXdecContext::send_command(ppu_thread& ppu, auto&&... args) if (!signal) { - ensure(sys_mutex_lock(ppu, queue_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -692,23 +692,23 @@ error_code AtracXdecContext::send_command(ppu_thread& ppu, auto&&... args) // Close command is only sent if the queue is empty on LLE if (!cmd_queue.empty()) { - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); return {}; } } if (cmd_queue.full()) { - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); return CELL_ADEC_ERROR_ATX_BUSY; } cmd_queue.emplace(std::forward(type), std::forward(args)...); - ensure(sys_mutex_unlock(ppu, queue_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, queue_mutex) == CELL_OK); } - ensure(sys_cond_signal(ppu, queue_not_empty) == CELL_OK); + ensure(lv2_syscall(ppu, queue_not_empty) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -779,25 +779,25 @@ error_code _CellAdecCoreOpOpenExt_atracx(ppu_thread& ppu, vm::ptr mutex_attr{{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_atd001"_u64 } }}; const vm::var cond_attr{{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_atd002"_u64 } }}; - ensure(sys_mutex_create(ppu, handle.ptr(&AtracXdecContext::queue_mutex), mutex_attr) == CELL_OK); - ensure(sys_cond_create(ppu, handle.ptr(&AtracXdecContext::queue_not_empty), handle->queue_mutex, cond_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::queue_mutex), mutex_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::queue_not_empty), handle->queue_mutex, cond_attr) == CELL_OK); mutex_attr->name_u64 = "_atd003"_u64; cond_attr->name_u64 = "_atd004"_u64; - ensure(sys_mutex_create(ppu, handle.ptr(&AtracXdecContext::run_thread_mutex), mutex_attr) == CELL_OK); - ensure(sys_cond_create(ppu, handle.ptr(&AtracXdecContext::run_thread_cond), handle->run_thread_mutex, cond_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::run_thread_mutex), mutex_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::run_thread_cond), handle->run_thread_mutex, cond_attr) == CELL_OK); mutex_attr->name_u64 = "_atd005"_u64; cond_attr->name_u64 = "_atd006"_u64; - ensure(sys_mutex_create(ppu, handle.ptr(&AtracXdecContext::output_mutex), mutex_attr) == CELL_OK); - ensure(sys_cond_create(ppu, handle.ptr(&AtracXdecContext::output_consumed), handle->output_mutex, cond_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::output_mutex), mutex_attr) == CELL_OK); + ensure(lv2_syscall(ppu, handle.ptr(&AtracXdecContext::output_consumed), handle->output_mutex, cond_attr) == CELL_OK); - ensure(sys_mutex_lock(ppu, handle->output_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex, 0) == CELL_OK); handle->output_locked = false; - ensure(sys_cond_signal(ppu, handle->output_consumed) == CELL_OK); - ensure(sys_mutex_unlock(ppu, handle->output_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_consumed) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex) == CELL_OK); const vm::var _name = vm::make_str("HLE ATRAC3plus decoder"); const auto entry = g_fxo->get().func_addr(FIND_FUNC(atracXdecEntry)); @@ -829,26 +829,26 @@ error_code _CellAdecCoreOpClose_atracx(ppu_thread& ppu, vm::ptrrun_thread_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, handle->run_thread_mutex, 0) == CELL_OK); handle->run_thread = false; - ensure(sys_mutex_unlock(ppu, handle->run_thread_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, handle->run_thread_mutex) == CELL_OK); handle->send_command(ppu); - ensure(sys_mutex_lock(ppu, handle->output_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex, 0) == CELL_OK); handle->output_locked = false; - ensure(sys_mutex_unlock(ppu, handle->output_mutex) == CELL_OK); - ensure(sys_cond_signal(ppu, handle->output_consumed) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_consumed) == CELL_OK); vm::var thread_ret; - ensure(sys_ppu_thread_join(ppu, static_cast(handle->thread_id), +thread_ret) == CELL_OK); + ensure(lv2_syscall(ppu, static_cast(handle->thread_id), +thread_ret) == CELL_OK); - error_code ret = sys_cond_destroy(ppu, handle->queue_not_empty); - ret = ret ? ret : sys_cond_destroy(ppu, handle->run_thread_cond); - ret = ret ? ret : sys_cond_destroy(ppu, handle->output_consumed); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->queue_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->run_thread_mutex); - ret = ret ? ret : sys_mutex_destroy(ppu, handle->output_mutex); + error_code ret = lv2_syscall(ppu, handle->queue_not_empty); + ret = ret ? ret : lv2_syscall(ppu, handle->run_thread_cond); + ret = ret ? ret : lv2_syscall(ppu, handle->output_consumed); + ret = ret ? ret : lv2_syscall(ppu, handle->queue_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->run_thread_mutex); + ret = ret ? ret : lv2_syscall(ppu, handle->output_mutex); return ret != CELL_OK ? static_cast(CELL_ADEC_ERROR_FATAL) : CELL_OK; } @@ -921,7 +921,7 @@ error_code _CellAdecCoreOpReleasePcm_atracx(ppu_thread& ppu, vm::ptroutput_mutex, 0) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex, 0) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -931,7 +931,7 @@ error_code _CellAdecCoreOpReleasePcm_atracx(ppu_thread& ppu, vm::ptroutput_locked = false; } - ensure(sys_cond_signal(ppu, handle->output_consumed) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_consumed) == CELL_OK); if (ppu.state & cpu_flag::again) { @@ -939,7 +939,7 @@ error_code _CellAdecCoreOpReleasePcm_atracx(ppu_thread& ppu, vm::ptroutput_mutex) == CELL_OK); + ensure(lv2_syscall(ppu, handle->output_mutex) == CELL_OK); return CELL_OK; } diff --git a/rpcs3/Emu/Cell/Modules/cellDmux.cpp b/rpcs3/Emu/Cell/Modules/cellDmux.cpp index fb1f32837d..7c2fe6f1a9 100644 --- a/rpcs3/Emu/Cell/Modules/cellDmux.cpp +++ b/rpcs3/Emu/Cell/Modules/cellDmux.cpp @@ -1,16 +1,14 @@ #include "stdafx.h" -#include "Emu/System.h" -#include "Emu/IdManager.h" -#include "Emu/Cell/PPUModule.h" +#include "Emu/Cell/lv2/sys_mutex.h" #include "Emu/Cell/lv2/sys_sync.h" +#include "Emu/Cell/lv2/sys_timer.h" +#include "Emu/Cell/PPUModule.h" +#include "Emu/savestate_utils.hpp" +#include "util/asm.hpp" #include "cellPamf.h" #include "cellDmux.h" -#include "util/asm.hpp" - -#include - LOG_CHANNEL(cellDmux); template <> @@ -31,1331 +29,1209 @@ void fmt_class_string::format(std::string& out, u64 arg) }); } -/* Demuxer Thread Classes */ - -enum +static error_code get_error(u32 internal_error) { - /* http://dvd.sourceforge.net/dvdinfo/mpeghdrs.html */ - - PACKET_START_CODE_MASK = 0xffffff00, - PACKET_START_CODE_PREFIX = 0x00000100, - - PACK_START_CODE = 0x000001ba, - SYSTEM_HEADER_START_CODE = 0x000001bb, - PRIVATE_STREAM_1 = 0x000001bd, - PADDING_STREAM = 0x000001be, - PRIVATE_STREAM_2 = 0x000001bf, -}; - -struct DemuxerStream -{ - u32 addr; - u32 size; - u64 userdata; - bool discontinuity; - - template - bool get(T& out) + switch (internal_error) { - if (sizeof(T) > size) return false; + case 0: return CELL_OK; + case 1: return CELL_DMUX_ERROR_FATAL; + case 2: // Error values two to five are all converted to CELL_DMUX_ERROR_ARG. + case 3: + case 4: + case 5: return CELL_DMUX_ERROR_ARG; + default: return CELL_DMUX_ERROR_FATAL; + } +} - std::memcpy(&out, vm::base(addr), sizeof(T)); - addr += sizeof(T); - size -= sizeof(T); +static inline std::span> get_es_handles(vm::ptr handle) +{ + return { vm::pptr::make(handle.addr() + sizeof(DmuxContext)).get_ptr(), static_cast(handle->enabled_es_num) }; +} - return true; +static inline vm::ptr get_au_queue_elements(vm::ptr es_handle) +{ + return vm::ptr::make(es_handle.addr() + sizeof(DmuxEsContext)); +} + +static inline vm::cptr get_core_ops() +{ + return vm::cptr::make(*ppu_module_manager::cellDmuxPamf.variables.find(0x28b2b7b2)->second.export_addr); +} + +template +static auto lv2_syscall(ppu_thread& ppu, auto&&... args) +{ + const auto ret = Syscall(ppu, std::forward(args)...); + + if (ppu.test_stopped()) + { + ppu.state += cpu_flag::again; } - template - bool peek(T& out, u32 shift = 0) - { - if (sizeof(T) + shift > size) return false; + return ret; +} - std::memcpy(&out, vm::base(addr + shift), sizeof(T)); - return true; - } +// Callbacks for cellDmuxPamf - void skip(u32 count) - { - addr += count; - size = size > count ? size - count : 0; - } - - bool check(u32 count) const - { - return count <= size; - } - - u64 get_ts(u8 c) - { - u8 v[4]; get(v); - return - ((u64{c} & 0x0e) << 29) | - ((u64{v[0]}) << 21) | - ((u64{v[1]} & 0x7e) << 15) | - ((u64{v[2]}) << 7) | (u64{v[3]} >> 1); - } -}; - -struct PesHeader +static error_code notify_demux_done(ppu_thread& ppu, vm::ptr core_handle, u32 error, vm::ptr handle) { - u64 pts; - u64 dts; - u8 size; - bool has_ts; - bool is_ok; + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; - PesHeader(DemuxerStream& stream); -}; - -class ElementaryStream; -class Demuxer; - -enum DemuxerJobType -{ - dmuxSetStream, - dmuxResetStream, - dmuxResetStreamAndWaitDone, - dmuxEnableEs, - dmuxDisableEs, - dmuxResetEs, - dmuxFlushEs, - dmuxClose, -}; - -struct DemuxerTask -{ - DemuxerJobType type; - - union + if (!savestate_lock) { - DemuxerStream stream; + ppu.state += cpu_flag::again; + return {}; + } - struct + cellDmux.trace("notify_demux_done(core_handle=*0x%x, error=%d, handle=*0x%x)", core_handle, error, handle); + + ensure(!!handle); // Not checked on LLE + + ensure(lv2_syscall(ppu, handle->_dx_mhd, 0) == CELL_OK); // Failing this check on LLE would result in it dereferencing an invalid pointer. + handle->dmux_state = DMUX_STOPPED; + ensure(lv2_syscall(ppu, handle->_dx_mhd) == CELL_OK); // Failing this check on LLE would result in it dereferencing an invalid pointer. + + if (handle->_this) + { + const vm::var msg{{ .msgType = CELL_DMUX_MSG_TYPE_DEMUX_DONE, .supplementalInfo = handle->user_data }}; + handle->dmux_cb.cbFunc(ppu, handle, msg, handle->dmux_cb.cbArg); + } + + return CELL_OK; +} + +static error_code notify_fatal_err(ppu_thread& ppu, vm::ptr core_handle, u32 error, vm::ptr handle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.error("notify_fatal_err(core_handle=*0x%x, error=%d, handle=*0x%x)", core_handle, error, handle); + + ensure(!!handle); // Not checked on LLE + + const vm::var msg{{ .msgType = CELL_DMUX_MSG_TYPE_FATAL_ERR, .supplementalInfo = static_cast(get_error(error)) }}; + return handle->dmux_cb.cbFunc(ppu, handle, msg, handle->dmux_cb.cbArg); +} + +static error_code notify_prog_end_code(ppu_thread& ppu, vm::ptr core_handle, vm::ptr handle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.notice("notify_prog_end_code(core_handle=*0x%x, handle=*0x%x)", core_handle, handle); + + ensure(!!handle); // Not checked on LLE + + if (handle->_this) + { + const vm::var msg{{ .msgType = CELL_DMUX_MSG_TYPE_PROG_END_CODE, .supplementalInfo = handle->user_data }}; + handle->dmux_cb.cbFunc(ppu, handle, msg, handle->dmux_cb.cbArg); + } + + return CELL_OK; +} + +static error_code notify_es_au_found(ppu_thread& ppu, vm::ptr core_es_handle, vm::cptr au_info, vm::ptr es_handle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.trace("notify_es_au_found(core_es_handle=*0x%x, au_info=*0x%x, es_handle=*0x%x)", core_es_handle, au_info, es_handle); + + ensure(!!au_info && !!es_handle); // Not checked on LLE + + const auto fatal_err = [&](be_t es_is_enabled, error_code ret) + { + if (es_is_enabled) { - u32 es; - u32 auInfo_ptr_addr; - u32 auSpec_ptr_addr; - ElementaryStream* es_ptr; - } es; + const vm::var demuxerMsg{{ .msgType = CELL_DMUX_MSG_TYPE_FATAL_ERR, .supplementalInfo = static_cast(ret) }}; + es_handle->dmux_handle->dmux_cb.cbFunc(ppu, es_handle->dmux_handle, demuxerMsg, es_handle->dmux_handle->dmux_cb.cbArg); + } }; - DemuxerTask() - { - } - - DemuxerTask(DemuxerJobType type) - : type(type) - { - } -}; - -class ElementaryStream -{ - std::mutex m_mutex; - - squeue_t entries; // AU starting addresses - u32 put_count = 0; // number of AU written - u32 got_count = 0; // number of AU obtained by GetAu(Ex) - u32 released = 0; // number of AU released - - u32 put; // AU that is being written now - - bool is_full(u32 space); - -public: - static const u32 id_base = 1; - static const u32 id_step = 1; - static const u32 id_count = 1023; - SAVESTATE_INIT_POS(34); - - ElementaryStream(Demuxer* dmux, vm::ptr addr, u32 size, u32 fidMajor, u32 fidMinor, u32 sup1, u32 sup2, vm::ptr cbFunc, vm::ptr cbArg, u32 spec); - - Demuxer* dmux; - const u32 id = idm::last_id(); - const vm::ptr memAddr; - const u32 memSize; - const u32 fidMajor; - const u32 fidMinor; - const u32 sup1; - const u32 sup2; - const vm::ptr cbFunc; - const vm::ptr cbArg; - const u32 spec; //addr - - std::vector raw_data; // demultiplexed data stream (managed by demuxer thread) - usz raw_pos = 0; // should be <= raw_data.size() - u64 last_dts = CODEC_TS_INVALID; - u64 last_pts = CODEC_TS_INVALID; - - void push(DemuxerStream& stream, u32 size); // called by demuxer thread (not multithread-safe) - - bool isfull(u32 space); - - void push_au(u32 size, u64 dts, u64 pts, u64 userdata, bool rap, u32 specific); - - bool release(); - - bool peek(u32& out_data, bool no_ex, u32& out_spec, bool update_index); - - void reset(); -}; - -class Demuxer : public ppu_thread -{ -public: - squeue_t job; - const u32 memAddr; - const u32 memSize; - const vm::ptr cbFunc; - const vm::ptr cbArg; - volatile bool is_finished = false; - volatile bool is_closed = false; - atomic_t is_running = false; - atomic_t is_working = false; - - Demuxer(u32 addr, u32 size, vm::ptr func, vm::ptr arg) - : ppu_thread({}, "", 0) - , memAddr(addr) - , memSize(size) - , cbFunc(func) - , cbArg(arg) - { - } - - void non_task() - { - DemuxerTask task; - DemuxerStream stream = {}; - ElementaryStream* esALL[96]{}; - ElementaryStream** esAVC = &esALL[0]; // AVC (max 16 minus M2V count) - //ElementaryStream** esM2V = &esALL[16]; // M2V (max 16 minus AVC count) - //ElementaryStream** esDATA = &esALL[32]; // user data (max 16) - ElementaryStream** esATX = &esALL[48]; // ATRAC3+ (max 16) - //ElementaryStream** esAC3 = &esALL[64]; // AC3 (max 16) - //ElementaryStream** esPCM = &esALL[80]; // LPCM (max 16) - - u32 cb_add = 0; - - while (true) - { - if (Emu.IsStopped() || is_closed) - { - break; - } - - if (!job.try_peek(task) && is_running && stream.addr) - { - // default task (demuxing) (if there is no other work) - be_t code; - be_t len; - - if (!stream.peek(code)) - { - // demuxing finished - is_running = false; - - // callback - auto dmuxMsg = vm::ptr::make(memAddr + (cb_add ^= 16)); - dmuxMsg->msgType = CELL_DMUX_MSG_TYPE_DEMUX_DONE; - dmuxMsg->supplementalInfo = stream.userdata; - cbFunc(*this, id, dmuxMsg, cbArg); - lv2_obj::sleep(*this); - - is_working = false; - - stream = {}; - - continue; - } - - switch (code) - { - case PACK_START_CODE: - { - if (!stream.check(14)) - { - fmt::throw_exception("End of stream (PACK_START_CODE)"); - } - stream.skip(14); - break; - } - - case SYSTEM_HEADER_START_CODE: - { - if (!stream.check(18)) - { - fmt::throw_exception("End of stream (SYSTEM_HEADER_START_CODE)"); - } - stream.skip(18); - break; - } - - case PADDING_STREAM: - { - if (!stream.check(6)) - { - fmt::throw_exception("End of stream (PADDING_STREAM)"); - } - stream.skip(4); - stream.get(len); - - if (!stream.check(len)) - { - fmt::throw_exception("End of stream (PADDING_STREAM, len=%d)", len); - } - stream.skip(len); - break; - } - - case PRIVATE_STREAM_2: - { - if (!stream.check(6)) - { - fmt::throw_exception("End of stream (PRIVATE_STREAM_2)"); - } - stream.skip(4); - stream.get(len); - - cellDmux.notice("PRIVATE_STREAM_2 (%d)", len); - - if (!stream.check(len)) - { - fmt::throw_exception("End of stream (PRIVATE_STREAM_2, len=%d)", len); - } - stream.skip(len); - break; - } - - case PRIVATE_STREAM_1: - { - // audio and user data stream - DemuxerStream backup = stream; - - if (!stream.check(6)) - { - fmt::throw_exception("End of stream (PRIVATE_STREAM_1)"); - } - stream.skip(4); - stream.get(len); - - if (!stream.check(len)) - { - fmt::throw_exception("End of stream (PRIVATE_STREAM_1, len=%d)", len); - } - - const PesHeader pes(stream); - if (!pes.is_ok) - { - fmt::throw_exception("PesHeader error (PRIVATE_STREAM_1, len=%d)", len); - } - - if (len < pes.size + 4) - { - fmt::throw_exception("End of block (PRIVATE_STREAM_1, PesHeader + fid_minor, len=%d)", len); - } - len -= pes.size + 4; - - u8 fid_minor; - if (!stream.get(fid_minor)) - { - fmt::throw_exception("End of stream (PRIVATE_STREAM1, fid_minor)"); - } - - const u32 ch = fid_minor % 16; - if ((fid_minor & -0x10) == 0 && esATX[ch]) - { - ElementaryStream& es = *esATX[ch]; - if (es.raw_data.size() > 1024 * 1024) - { - stream = backup; - std::this_thread::sleep_for(1ms); // hack - continue; - } - - if (len < 3 || !stream.check(3)) - { - fmt::throw_exception("End of block (ATX, unknown header, len=%d)", len); - } - len -= 3; - stream.skip(3); - - if (pes.has_ts) - { - es.last_dts = pes.dts; - es.last_pts = pes.pts; - } - - es.push(stream, len); - - while (true) - { - auto const size = es.raw_data.size() - es.raw_pos; // size of available new data - auto const data = es.raw_data.data() + es.raw_pos; // pointer to available data - - if (size < 8) break; // skip if cannot read ATS header - - if (data[0] != 0x0f || data[1] != 0xd0) - { - fmt::throw_exception("ATX: 0x0fd0 header not found (ats=0x%llx)", *reinterpret_cast*>(data)); - } - - u32 frame_size = (((u32{data[2]} & 0x3) << 8) | u32{data[3]}) * 8 + 8; - - if (size < frame_size + 8) break; // skip non-complete AU - - if (es.isfull(frame_size + 8)) break; // skip if cannot push AU - - es.push_au(frame_size + 8, es.last_dts, es.last_pts, stream.userdata, false /* TODO: set correct value */, 0); - - //cellDmux.notice("ATX AU pushed (ats=0x%llx, frame_size=%d)", *(be_t*)data, frame_size); - - auto esMsg = vm::ptr::make(memAddr + (cb_add ^= 16)); - esMsg->msgType = CELL_DMUX_ES_MSG_TYPE_AU_FOUND; - esMsg->supplementalInfo = stream.userdata; - es.cbFunc(*this, id, es.id, esMsg, es.cbArg); - lv2_obj::sleep(*this); - } - } - else - { - cellDmux.notice("PRIVATE_STREAM_1 (len=%d, fid_minor=0x%x)", len, fid_minor); - stream.skip(len); - } - break; - } - - case 0x1e0: case 0x1e1: case 0x1e2: case 0x1e3: - case 0x1e4: case 0x1e5: case 0x1e6: case 0x1e7: - case 0x1e8: case 0x1e9: case 0x1ea: case 0x1eb: - case 0x1ec: case 0x1ed: case 0x1ee: case 0x1ef: - { - // video stream (AVC or M2V) - DemuxerStream backup = stream; - - if (!stream.check(6)) - { - fmt::throw_exception("End of stream (video, code=0x%x)", code); - } - stream.skip(4); - stream.get(len); - - if (!stream.check(len)) - { - fmt::throw_exception("End of stream (video, code=0x%x, len=%d)", code, len); - } - - const PesHeader pes(stream); - if (!pes.is_ok) - { - fmt::throw_exception("PesHeader error (video, code=0x%x, len=%d)", code, len); - } - - if (len < pes.size + 3) - { - fmt::throw_exception("End of block (video, code=0x%x, PesHeader)", code); - } - len -= pes.size + 3; - - const u32 ch = code % 16; - if (esAVC[ch]) - { - ElementaryStream& es = *esAVC[ch]; - - const u32 old_size = ::size32(es.raw_data); - if (es.isfull(old_size)) - { - stream = backup; - std::this_thread::sleep_for(1ms); // hack - continue; - } - - if ((pes.has_ts && old_size) || old_size >= 0x69800) - { - // push AU if it becomes too big or the next packet contains PTS/DTS - es.push_au(old_size, es.last_dts, es.last_pts, stream.userdata, false /* TODO: set correct value */, 0); - - // callback - auto esMsg = vm::ptr::make(memAddr + (cb_add ^= 16)); - esMsg->msgType = CELL_DMUX_ES_MSG_TYPE_AU_FOUND; - esMsg->supplementalInfo = stream.userdata; - es.cbFunc(*this, id, es.id, esMsg, es.cbArg); - lv2_obj::sleep(*this); - } - - if (pes.has_ts) - { - // preserve dts/pts for next AU - es.last_dts = pes.dts; - es.last_pts = pes.pts; - } - - // reconstruction of MPEG2-PS stream for vdec module - const u32 size = len + pes.size + 9; - stream = backup; - es.push(stream, size); - } - else - { - cellDmux.notice("Video stream (code=0x%x, len=%d)", code, len); - stream.skip(len); - } - break; - } - - default: - { - if ((code & PACKET_START_CODE_MASK) == PACKET_START_CODE_PREFIX) - { - fmt::throw_exception("Unknown code found (0x%x)", code); - } - - // search - stream.skip(1); - } - } - - continue; - } - - // wait for task if no work - if (!job.pop(task, &is_closed)) - { - break; // Emu is stopped - } - - switch (task.type) - { - case dmuxSetStream: - { - if (task.stream.discontinuity) - { - cellDmux.warning("dmuxSetStream (beginning)"); - for (u32 i = 0; i < std::size(esALL); i++) - { - if (esALL[i]) - { - esALL[i]->reset(); - } - } - } - - stream = task.stream; - //cellDmux.notice("*** stream updated(addr=0x%x, size=0x%x, discont=%d, userdata=0x%llx)", - //stream.addr, stream.size, stream.discontinuity, stream.userdata); - break; - } - - case dmuxResetStream: - case dmuxResetStreamAndWaitDone: - { - // demuxing stopped - if (is_running.exchange(false)) - { - // callback - auto dmuxMsg = vm::ptr::make(memAddr + (cb_add ^= 16)); - dmuxMsg->msgType = CELL_DMUX_MSG_TYPE_DEMUX_DONE; - dmuxMsg->supplementalInfo = stream.userdata; - cbFunc(*this, id, dmuxMsg, cbArg); - lv2_obj::sleep(*this); - - stream = {}; - - is_working = false; - } - - break; - } - - case dmuxEnableEs: - { - ElementaryStream& es = *task.es.es_ptr; - - // TODO: uncomment when ready to use - //if ((es.fidMajor & -0x10) == 0xe0 && es.fidMinor == 0 && es.sup1 == 1 && !es.sup2) - //{ - // esAVC[es.fidMajor % 16] = task.es.es_ptr; - //} - //else if ((es.fidMajor & -0x10) == 0xe0 && es.fidMinor == 0 && !es.sup1 && !es.sup2) - //{ - // esM2V[es.fidMajor % 16] = task.es.es_ptr; - //} - //else if (es.fidMajor == 0xbd && (es.fidMinor & -0x10) == 0 && !es.sup1 && !es.sup2) - //{ - // esATX[es.fidMinor % 16] = task.es.es_ptr; - //} - //else if (es.fidMajor == 0xbd && (es.fidMinor & -0x10) == 0x20 && !es.sup1 && !es.sup2) - //{ - // esDATA[es.fidMinor % 16] = task.es.es_ptr; - //} - //else if (es.fidMajor == 0xbd && (es.fidMinor & -0x10) == 0x30 && !es.sup1 && !es.sup2) - //{ - // esAC3[es.fidMinor % 16] = task.es.es_ptr; - //} - //else if (es.fidMajor == 0xbd && (es.fidMinor & -0x10) == 0x40 && !es.sup1 && !es.sup2) - //{ - // esPCM[es.fidMinor % 16] = task.es.es_ptr; - //} - //else - { - fmt::throw_exception("dmuxEnableEs: unknown filter (0x%x, 0x%x, 0x%x, 0x%x)", es.fidMajor, es.fidMinor, es.sup1, es.sup2); - } - es.dmux = this; - break; - } - - case dmuxDisableEs: - { - ElementaryStream& es = *task.es.es_ptr; - if (es.dmux != this) - { - fmt::throw_exception("dmuxDisableEs: invalid elementary stream"); - } - - for (u32 i = 0; i < std::size(esALL); i++) - { - if (esALL[i] == &es) - { - esALL[i] = nullptr; - } - } - es.dmux = nullptr; - idm::remove(task.es.es); - break; - } - - case dmuxFlushEs: - { - ElementaryStream& es = *task.es.es_ptr; - - const u32 old_size = ::size32(es.raw_data); - if (old_size && (es.fidMajor & -0x10) == 0xe0) - { - // TODO (it's only for AVC, some ATX data may be lost) - while (es.isfull(old_size)) - { - if (Emu.IsStopped() || is_closed) break; - - std::this_thread::sleep_for(1ms); // hack - } - - es.push_au(old_size, es.last_dts, es.last_pts, stream.userdata, false, 0); - - // callback - auto esMsg = vm::ptr::make(memAddr + (cb_add ^= 16)); - esMsg->msgType = CELL_DMUX_ES_MSG_TYPE_AU_FOUND; - esMsg->supplementalInfo = stream.userdata; - es.cbFunc(*this, id, es.id, esMsg, es.cbArg); - lv2_obj::sleep(*this); - } - - if (!es.raw_data.empty()) - { - cellDmux.error("dmuxFlushEs: 0x%x bytes lost (es_id=%d)", ::size32(es.raw_data), es.id); - } - - // callback - auto esMsg = vm::ptr::make(memAddr + (cb_add ^= 16)); - esMsg->msgType = CELL_DMUX_ES_MSG_TYPE_FLUSH_DONE; - esMsg->supplementalInfo = stream.userdata; - es.cbFunc(*this, id, es.id, esMsg, es.cbArg); - lv2_obj::sleep(*this); - break; - } - - case dmuxResetEs: - { - task.es.es_ptr->reset(); - break; - } - - case dmuxClose: - { - break; - } - - default: - { - fmt::throw_exception("Demuxer thread error: unknown task (0x%x)", +task.type); - } - } - } - - is_finished = true; - } -}; - - -PesHeader::PesHeader(DemuxerStream& stream) - : pts(CODEC_TS_INVALID) - , dts(CODEC_TS_INVALID) - , size(0) - , has_ts(false) - , is_ok(false) -{ - u16 header; - if (!stream.get(header)) - { - fmt::throw_exception("End of stream (header)"); - } - if (!stream.get(size)) - { - fmt::throw_exception("End of stream (size)"); - } - if (!stream.check(size)) - { - fmt::throw_exception("End of stream (size=%d)", size); - } - - u8 pos = 0; - while (pos++ < size) - { - u8 v; - if (!stream.get(v)) - { - return; // should never occur - } - - if (v == 0xff) // skip padding bytes - { - continue; - } - - if ((v & 0xf0) == 0x20 && (size - pos) >= 4) // pts only - { - pos += 4; - pts = stream.get_ts(v); - has_ts = true; - } - else if ((v & 0xf0) == 0x30 && (size - pos) >= 9) // pts and dts - { - pos += 5; - pts = stream.get_ts(v); - stream.get(v); - has_ts = true; - - if ((v & 0xf0) != 0x10) - { - cellDmux.error("PesHeader(): dts not found (v=0x%x, size=%d, pos=%d)", v, size, pos - 1); - stream.skip(size - pos); - return; - } - pos += 4; - dts = stream.get_ts(v); - } - else - { - cellDmux.warning("PesHeader(): unknown code (v=0x%x, size=%d, pos=%d)", v, size, pos - 1); - stream.skip(size - pos); - pos = size; - break; - } - } - - is_ok = true; -} - -ElementaryStream::ElementaryStream(Demuxer* dmux, vm::ptr addr, u32 size, u32 fidMajor, u32 fidMinor, u32 sup1, u32 sup2, vm::ptr cbFunc, vm::ptr cbArg, u32 spec) - : put(utils::align(addr.addr(), 128)) - , dmux(dmux) - , memAddr(vm::ptr::make(utils::align(addr.addr(), 128))) - , memSize(size - (addr.addr() - memAddr.addr())) - , fidMajor(fidMajor) - , fidMinor(fidMinor) - , sup1(sup1) - , sup2(sup2) - , cbFunc(cbFunc) - , cbArg(cbArg) - , spec(spec) -{ -} - -bool ElementaryStream::is_full(u32 space) -{ - if (released < put_count) - { - if (entries.is_full()) - { - return true; - } - - u32 first = 0; - if (!entries.peek(first, 0, &dmux->is_closed) || !first) - { - fmt::throw_exception("entries.peek() failed"); - } - else if (first >= put) - { - return first - put < space + 128; - } - else if (put + space + 128 > memAddr.addr() + memSize) - { - return first - memAddr.addr() < space + 128; - } - else - { - return false; - } - } - else - { - return false; - } -} - -bool ElementaryStream::isfull(u32 space) -{ - std::lock_guard lock(m_mutex); - return is_full(space); -} - -void ElementaryStream::push_au(u32 size, u64 dts, u64 pts, u64 userdata, bool rap, u32 specific) -{ - u32 addr; - { - std::lock_guard lock(m_mutex); - ensure(!is_full(size)); - - if (put + size + 128 > memAddr.addr() + memSize) - { - put = memAddr.addr(); - } - - std::memcpy(vm::base(put + 128), raw_data.data(), size); - raw_data.erase(raw_data.begin(), raw_data.begin() + size); - - auto info = vm::ptr::make(put); - info->auAddr.set(put + 128); - info->auSize = size; - info->dts.lower = static_cast(dts); - info->dts.upper = static_cast(dts >> 32); - info->pts.lower = static_cast(pts); - info->pts.upper = static_cast(pts >> 32); - info->isRap = rap; - info->auMaxSize = 0; - info->userData = userdata; - - auto spec = vm::ptr::make(put + u32{sizeof(CellDmuxAuInfoEx)}); - *spec = specific; - - auto inf = vm::ptr::make(put + 64); - inf->auAddr.set(put + 128); - inf->auSize = size; - inf->dts.lower = static_cast(dts); - inf->dts.upper = static_cast(dts >> 32); - inf->pts.lower = static_cast(pts); - inf->pts.upper = static_cast(pts >> 32); - inf->auMaxSize = 0; // ????? - inf->userData = userdata; - - addr = put; - - put = utils::align(put + 128 + size, 128); - - put_count++; - } - - ensure(entries.push(addr, &dmux->is_closed)); -} - -void ElementaryStream::push(DemuxerStream& stream, u32 size) -{ - auto const old_size = raw_data.size(); - - raw_data.resize(old_size + size); - - std::memcpy(raw_data.data() + old_size, vm::base(stream.addr), size); // append bytes - - stream.skip(size); -} - -bool ElementaryStream::release() -{ - std::lock_guard lock(m_mutex); - if (released >= put_count) - { - cellDmux.fatal("es::release() error: buffer is empty"); - return false; - } - if (released >= got_count) - { - cellDmux.fatal("es::release() error: buffer has not been seen yet"); - return false; - } - - u32 addr = 0; - if (!entries.pop(addr, &dmux->is_closed) || !addr) - { - cellDmux.fatal("es::release() error: entries.Pop() failed"); - return false; - } - - released++; - return true; -} - -bool ElementaryStream::peek(u32& out_data, bool no_ex, u32& out_spec, bool update_index) -{ - std::lock_guard lock(m_mutex); - if (got_count < released) - { - cellDmux.fatal("es::peek() error: got_count(%d) < released(%d) (put_count=%d)", got_count, released, put_count); - return false; - } - if (got_count >= put_count) - { - return false; - } - - u32 addr = 0; - if (!entries.peek(addr, got_count - released, &dmux->is_closed) || !addr) - { - cellDmux.fatal("es::peek() error: entries.Peek() failed"); - return false; - } - - out_data = no_ex ? addr + 64 : addr; - out_spec = addr + sizeof(CellDmuxAuInfoEx); - - if (update_index) - { - got_count++; - } - return true; -} - -void ElementaryStream::reset() -{ - std::lock_guard lock(m_mutex); - put = memAddr.addr(); - entries.clear(); - put_count = 0; - got_count = 0; - released = 0; - raw_data.clear(); - raw_pos = 0; -} - -void dmuxQueryAttr(u32 /* info_addr, may be 0 */, vm::ptr attr) -{ - attr->demuxerVerLower = 0x280000; // TODO: check values - attr->demuxerVerUpper = 0x260000; - attr->memSize = 0x10000; // 0x3e8e6 from ps3 -} - -void dmuxQueryEsAttr(u32 /* info, may be 0 */, vm::cptr esFilterId, u32 /*esSpecificInfo*/, vm::ptr attr) -{ - if (esFilterId->filterIdMajor >= 0xe0) - { - attr->memSize = 0x500000; // 0x45fa49 from ps3 - } - else - { - attr->memSize = 0x7000; // 0x73d9 from ps3 - } - - cellDmux.warning("*** filter(0x%x, 0x%x, 0x%x, 0x%x)", esFilterId->filterIdMajor, esFilterId->filterIdMinor, esFilterId->supplementalInfo1, esFilterId->supplementalInfo2); -} - -error_code cellDmuxQueryAttr(vm::cptr type, vm::ptr attr) -{ - cellDmux.warning("cellDmuxQueryAttr(type=*0x%x, attr=*0x%x)", type, attr); - - if (type->streamType != CELL_DMUX_STREAM_TYPE_PAMF) - { - return CELL_DMUX_ERROR_ARG; - } - - dmuxQueryAttr(0, attr); - return CELL_OK; -} - -error_code cellDmuxQueryAttr2(vm::cptr type2, vm::ptr attr) -{ - cellDmux.warning("cellDmuxQueryAttr2(demuxerType2=*0x%x, demuxerAttr=*0x%x)", type2, attr); - - if (type2->streamType != CELL_DMUX_STREAM_TYPE_PAMF) - { - return CELL_DMUX_ERROR_ARG; - } - - dmuxQueryAttr(type2->streamSpecificInfo, attr); - return CELL_OK; -} - -error_code cellDmuxOpen(vm::cptr type, vm::cptr res, vm::cptr cb, vm::ptr handle) -{ - cellDmux.warning("cellDmuxOpen(type=*0x%x, res=*0x%x, cb=*0x%x, handle=*0x%x)", type, res, cb, handle); - - if (type->streamType != CELL_DMUX_STREAM_TYPE_PAMF) - { - return CELL_DMUX_ERROR_ARG; - } - - // TODO: check demuxerResource and demuxerCb arguments - fmt::throw_exception("cellDmux disabled, use LLE."); -} - -error_code cellDmuxOpenEx(vm::cptr type, vm::cptr resEx, vm::cptr cb, vm::ptr handle) -{ - cellDmux.warning("cellDmuxOpenEx(type=*0x%x, resEx=*0x%x, cb=*0x%x, handle=*0x%x)", type, resEx, cb, handle); - - if (type->streamType != CELL_DMUX_STREAM_TYPE_PAMF) - { - return CELL_DMUX_ERROR_ARG; - } - - // TODO: check demuxerResourceEx and demuxerCb arguments - fmt::throw_exception("cellDmux disabled, use LLE."); -} - -error_code cellDmuxOpenExt(vm::cptr type, vm::cptr resEx, vm::cptr cb, vm::ptr handle) -{ - cellDmux.warning("cellDmuxOpenExt(type=*0x%x, resEx=*0x%x, cb=*0x%x, handle=*0x%x)", type, resEx, cb, handle); - - return cellDmuxOpenEx(type, resEx, cb, handle); -} - -error_code cellDmuxOpen2(vm::cptr type2, vm::cptr res2, vm::cptr cb, vm::ptr handle) -{ - cellDmux.warning("cellDmuxOpen2(type2=*0x%x, res2=*0x%x, cb=*0x%x, handle=*0x%x)", type2, res2, cb, handle); - - if (type2->streamType != CELL_DMUX_STREAM_TYPE_PAMF) - { - return CELL_DMUX_ERROR_ARG; - } - - // TODO: check demuxerType2, demuxerResource2 and demuxerCb arguments - fmt::throw_exception("cellDmux disabled, use LLE."); -} - -error_code cellDmuxClose(u32 handle) -{ - cellDmux.warning("cellDmuxClose(handle=0x%x)", handle); - - const auto dmux = idm::get_unlocked(handle); - - if (!dmux) - { - return CELL_DMUX_ERROR_ARG; - } - - dmux->is_closed = true; - dmux->job.try_push(DemuxerTask(dmuxClose)); - - while (!dmux->is_finished) - { - if (Emu.IsStopped()) - { - cellDmux.warning("cellDmuxClose(%d) aborted", handle); - return CELL_OK; - } - - std::this_thread::sleep_for(1ms); // hack - } - - idm::remove(handle); - return CELL_OK; -} - -error_code cellDmuxSetStream(u32 handle, u32 streamAddress, u32 streamSize, b8 discontinuity, u64 userData) -{ - cellDmux.trace("cellDmuxSetStream(handle=0x%x, streamAddress=0x%x, streamSize=%d, discontinuity=%d, userData=0x%llx)", handle, streamAddress, streamSize, discontinuity, userData); - - const auto dmux = idm::get_unlocked(handle); - - if (!dmux) - { - return CELL_DMUX_ERROR_ARG; - } - - if (dmux->is_running.exchange(true)) - { - //std::this_thread::sleep_for(1ms); // hack - return CELL_DMUX_ERROR_BUSY; - } - - DemuxerTask task(dmuxSetStream); - auto& info = task.stream; - info.addr = streamAddress; - info.size = streamSize; - info.discontinuity = discontinuity; - info.userdata = userData; - - dmux->job.push(task, &dmux->is_closed); - return CELL_OK; -} - -error_code cellDmuxResetStream(u32 handle) -{ - cellDmux.warning("cellDmuxResetStream(handle=0x%x)", handle); - - const auto dmux = idm::get_unlocked(handle); - - if (!dmux) - { - return CELL_DMUX_ERROR_ARG; - } - - dmux->job.push(DemuxerTask(dmuxResetStream), &dmux->is_closed); - return CELL_OK; -} - -error_code cellDmuxResetStreamAndWaitDone(u32 handle) -{ - cellDmux.warning("cellDmuxResetStreamAndWaitDone(handle=0x%x)", handle); - - const auto dmux = idm::get_unlocked(handle); - - if (!dmux) - { - return CELL_DMUX_ERROR_ARG; - } - - if (!dmux->is_running) + // This is frequently checked in here because the elementary stream could get disabled at any time by a different thread via cellDmuxDisableEs() or cellDmuxClose(). + if (!es_handle->is_enabled) { return CELL_OK; } - dmux->is_working = true; - - dmux->job.push(DemuxerTask(dmuxResetStreamAndWaitDone), &dmux->is_closed); - - while (dmux->is_running && dmux->is_working && !dmux->is_closed) // TODO: ensure that it is safe + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes, 0); ret != CELL_OK) { - if (Emu.IsStopped()) + fatal_err(es_handle->is_enabled, ret); + return 1; + } + + // Check if the access unit queue is full. One slot is reserved for the access unit produced by flushing the stream, so that flushing always succeeds. + if (!es_handle->is_enabled || es_handle->au_queue.allocated_size >= es_handle->au_queue.max_size - !es_handle->flush_started) + { + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes); ret != CELL_OK) { - cellDmux.warning("cellDmuxResetStreamAndWaitDone(%d) aborted", handle); - return CELL_OK; + fatal_err(es_handle->is_enabled, ret); + return 1; } - std::this_thread::sleep_for(1ms); // hack + + return !es_handle->is_enabled ? CELL_OK : not_an_error(1); // Disable error reporting if the queue is full. This is expected to happen frequently. } + DmuxAuInfo& _au_info = get_au_queue_elements(es_handle)[es_handle->au_queue.back].au_info; + + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes); ret != CELL_OK) + { + fatal_err(es_handle->is_enabled, ret); + return 1; + } + + _au_info.info = au_info->info; + std::memcpy(_au_info.specific_info.get_ptr(), au_info->specific_info.get_ptr(), au_info->specific_info_size); + + if (!es_handle->is_enabled) + { + return CELL_OK; + } + + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes, 0); ret != CELL_OK) + { + fatal_err(es_handle->is_enabled, ret); + return CELL_OK; // LLE returns CELL_OK + } + + if (!es_handle->is_enabled) + { + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes); ret != CELL_OK) + { + fatal_err(es_handle->is_enabled, ret); + } + + return CELL_OK; + } + + es_handle->au_queue.back = (es_handle->au_queue.back + 1) % es_handle->au_queue.max_size; + es_handle->au_queue.allocated_size++; + es_handle->au_queue.size++; + + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes); ret != CELL_OK) + { + fatal_err(es_handle->is_enabled, ret); + return CELL_OK; // LLE returns CELL_OK + } + + if (!es_handle->is_enabled) + { + return CELL_OK; + } + + const vm::var es_msg{{ .msgType = CELL_DMUX_ES_MSG_TYPE_AU_FOUND, .supplementalInfo = es_handle->dmux_handle->user_data }}; + es_handle->es_cb.cbFunc(ppu, es_handle->dmux_handle, es_handle, es_msg, es_handle->es_cb.cbArg); + return CELL_OK; } -error_code cellDmuxQueryEsAttr(vm::cptr type, vm::cptr esFilterId, u32 esSpecificInfo, vm::ptr esAttr) +static error_code notify_es_flush_done(ppu_thread& ppu, vm::ptr core_es_handle, vm::ptr es_handle) { - cellDmux.warning("cellDmuxQueryEsAttr(demuxerType=*0x%x, esFilterId=*0x%x, esSpecificInfo=*0x%x, esAttr=*0x%x)", type, esFilterId, esSpecificInfo, esAttr); + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; - if (type->streamType != CELL_DMUX_STREAM_TYPE_PAMF) + if (!savestate_lock) { - return CELL_DMUX_ERROR_ARG; + ppu.state += cpu_flag::again; + return {}; } - // TODO: check esFilterId and esSpecificInfo correctly - dmuxQueryEsAttr(0, esFilterId, esSpecificInfo, esAttr); + cellDmux.notice("dmuxEsNotifyFlushDone(unk=*0x%x, es_handle=*0x%x)", core_es_handle, es_handle); + + ensure(!!es_handle); // Not checked on LLE + + if (!es_handle->dmux_handle->_this || !es_handle->is_enabled) + { + return CELL_OK; + } + + es_handle->flush_started = false; + + const vm::var es_msg{{ .msgType = CELL_DMUX_ES_MSG_TYPE_FLUSH_DONE, .supplementalInfo = es_handle->dmux_handle->user_data }}; + es_handle->es_cb.cbFunc(ppu, es_handle->dmux_handle, es_handle, es_msg, es_handle->es_cb.cbArg); + return CELL_OK; } -error_code cellDmuxQueryEsAttr2(vm::cptr type2, vm::cptr esFilterId, u32 esSpecificInfo, vm::ptr esAttr) -{ - cellDmux.warning("cellDmuxQueryEsAttr2(type2=*0x%x, esFilterId=*0x%x, esSpecificInfo=*0x%x, esAttr=*0x%x)", type2, esFilterId, esSpecificInfo, esAttr); - if (type2->streamType != CELL_DMUX_STREAM_TYPE_PAMF) +static error_code query_attr(ppu_thread& ppu, vm::ptr demuxerAttr, vm::cptr streamSpecificInfo) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) { - return CELL_DMUX_ERROR_ARG; + ppu.state += cpu_flag::again; + return {}; } - // TODO: check demuxerType2, esFilterId and esSpecificInfo correctly - dmuxQueryEsAttr(type2->streamSpecificInfo, esFilterId, esSpecificInfo, esAttr); + const vm::var pamf_attr; + + if (const error_code ret = get_error(get_core_ops()->queryAttr(ppu, streamSpecificInfo, pamf_attr)); ret != CELL_OK) + { + return ret; + } + + demuxerAttr->memSize = utils::align(sizeof(DmuxContext) + (pamf_attr->maxEnabledEsNum * sizeof(vm::addr_t)) + sizeof(DmuxEsContext), alignof(DmuxContext)) + + pamf_attr->memSize + 0xf; + demuxerAttr->demuxerVerUpper = 0x260000; + demuxerAttr->demuxerVerLower = pamf_attr->version; + return CELL_OK; } -error_code cellDmuxEnableEs(u32 handle, vm::cptr esFilterId, vm::cptr esResourceInfo, vm::cptr esCb, u32 esSpecificInfo, vm::ptr esHandle) +error_code cellDmuxQueryAttr(ppu_thread& ppu, vm::cptr demuxerType, vm::ptr demuxerAttr) { - cellDmux.warning("cellDmuxEnableEs(handle=0x%x, esFilterId=*0x%x, esResourceInfo=*0x%x, esCb=*0x%x, esSpecificInfo=*0x%x, esHandle=*0x%x)", handle, esFilterId, esResourceInfo, esCb, esSpecificInfo, esHandle); + cellDmux.notice("cellDmuxQueryAttr(demuxerType=*0x%x, demuxerAttr=*0x%x)", demuxerType, demuxerAttr); - const auto dmux = idm::get_unlocked(handle); - - if (!dmux) + if (!demuxerType || !demuxerAttr || demuxerType->streamType != CELL_DMUX_STREAM_TYPE_PAMF) { return CELL_DMUX_ERROR_ARG; } - // TODO: check esFilterId, esResourceInfo, esCb and esSpecificInfo correctly + return query_attr(ppu, demuxerAttr, vm::null); +} - const auto es = idm::make_ptr(dmux.get(), esResourceInfo->memAddr, esResourceInfo->memSize, - esFilterId->filterIdMajor, esFilterId->filterIdMinor, esFilterId->supplementalInfo1, esFilterId->supplementalInfo2, - esCb->cbFunc, esCb->cbArg, esSpecificInfo); +error_code cellDmuxQueryAttr2(ppu_thread& ppu, vm::cptr demuxerType2, vm::ptr demuxerAttr) +{ + cellDmux.notice("cellDmuxQueryAttr2(demuxerType2=*0x%x, demuxerAttr=*0x%x)", demuxerType2, demuxerAttr); - *esHandle = es->id; + if (!demuxerType2 || !demuxerAttr || demuxerType2->streamType != CELL_DMUX_STREAM_TYPE_PAMF) + { + return CELL_DMUX_ERROR_ARG; + } - cellDmux.warning("*** New ES(dmux=0x%x, addr=0x%x, size=0x%x, filter={0x%x, 0x%x, 0x%x, 0x%x}, cb=0x%x, arg=0x%x, spec=0x%x): id = 0x%x", - handle, es->memAddr, es->memSize, es->fidMajor, es->fidMinor, es->sup1, es->sup2, es->cbFunc, es->cbArg, es->spec, es->id); + return query_attr(ppu, demuxerAttr, demuxerType2->streamSpecificInfo); +} - DemuxerTask task(dmuxEnableEs); - task.es.es = es->id; - task.es.es_ptr = es.get(); +static error_code open(ppu_thread& ppu, vm::cptr demuxerType, vm::cptr demuxerResource, vm::cptr demuxerResourceEx, + vm::cptr demuxerCb, vm::cptr streamSpecificInfo, vm::pptr demuxerHandle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + const vm::var type{{ .streamType = demuxerType->streamType, .streamSpecificInfo = streamSpecificInfo }}; + const vm::var attr; + + if (const error_code ret = cellDmuxQueryAttr2(ppu, type, attr); ret != CELL_OK) + { + return ret; + } + + if (attr->memSize > demuxerResource->memSize) + { + return CELL_DMUX_ERROR_ARG; + } + + const vm::var core_attr; + + if (const error_code ret = get_error(get_core_ops()->queryAttr(ppu, streamSpecificInfo, core_attr)); ret != CELL_OK) + { + return ret; + } + + const auto handle = vm::ptr::make(utils::align(demuxerResource->memAddr.addr(), alignof(DmuxContext))); + const u32 es_handles_size = core_attr->maxEnabledEsNum * sizeof(vm::addr_t); + const auto core_mem_addr = vm::ptr::make(utils::align(handle.addr() + sizeof(DmuxContext) + es_handles_size, 0x10)); + + const vm::var core_resource = + {{ + .memAddr = core_mem_addr, + .memSize = demuxerResource->memSize - (core_mem_addr.addr() - demuxerResource->memAddr.addr()), + .ppuThreadPriority = demuxerResource->ppuThreadPriority, + .ppuThreadStackSize = demuxerResource->ppuThreadStackSize, + .spuThreadPriority = demuxerResource->spuThreadPriority, + .numOfSpus = demuxerResource->numOfSpus + }}; + + const vm::var res_spurs; + + if (demuxerResourceEx) + { + res_spurs->spurs = demuxerResourceEx->spurs; + res_spurs->priority = demuxerResourceEx->priority; + res_spurs->maxContention = demuxerResourceEx->maxContention; + } + + const auto demux_done_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(notify_demux_done))); + const auto prog_end_code_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(notify_prog_end_code))); + const auto fatal_err_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(notify_fatal_err))); + const vm::var> cb_demux_done{{ .cbFunc = demux_done_func, .cbArg = handle }}; + const vm::var> cb_prog_end_code{{ .cbFunc = prog_end_code_func, .cbArg = handle }}; + const vm::var> cb_fatal_err{{ .cbFunc = fatal_err_func, .cbArg = handle }}; + + const vm::var> core_handle; + + if (const error_code ret = get_error(get_core_ops()->open(ppu, streamSpecificInfo, core_resource, demuxerResourceEx ? +res_spurs : vm::null, + cb_demux_done, cb_prog_end_code, cb_fatal_err, core_handle)); + ret != CELL_OK) + { + return ret; + } + + handle->_this = handle; + handle->_this_size = sizeof(DmuxContext) + es_handles_size; + handle->version = core_attr->version; + handle->dmux_state = DMUX_STOPPED; + handle->dmux_type = *demuxerType; + handle->dmux_cb = *demuxerCb; + handle->stream_is_set = false; + handle->core_handle = *core_handle; + handle->version_ = core_attr->version; + handle->user_data = 0; + handle->max_enabled_es_num = core_attr->maxEnabledEsNum; + handle->enabled_es_num = 0; + + const vm::var mutex_attr = + {{ + .protocol = SYS_SYNC_PRIORITY, + .recursive = SYS_SYNC_NOT_RECURSIVE, + .pshared = SYS_SYNC_NOT_PROCESS_SHARED, + .adaptive = SYS_SYNC_NOT_ADAPTIVE, + .name_u64 = "_dx_mhd"_u64 + }}; + + if (const error_code ret = lv2_syscall(ppu, handle.ptr(&DmuxContext::_dx_mhd), mutex_attr); ret != CELL_OK) + { + return ret; + } + + *demuxerHandle = handle; - dmux->job.push(task, &dmux->is_closed); return CELL_OK; } -error_code cellDmuxDisableEs(u32 esHandle) +error_code cellDmuxOpen(ppu_thread& ppu, vm::cptr demuxerType, vm::cptr demuxerResource, vm::cptr demuxerCb, vm::pptr demuxerHandle) { - cellDmux.warning("cellDmuxDisableEs(esHandle=0x%x)", esHandle); + cellDmux.notice("cellDmuxOpen(demuxerType=*0x%x, demuxerResource=*0x%x, demuxerCb=*0x%x, handle=*0x%x)", demuxerType, demuxerResource, demuxerCb, demuxerHandle); - const auto es = idm::get_unlocked(esHandle); - - if (!es) + if (!demuxerType || demuxerType->streamType != CELL_DMUX_STREAM_TYPE_PAMF + || !demuxerResource || !demuxerResource->memAddr || demuxerResource->memSize == umax || demuxerResource->ppuThreadStackSize == umax + || !demuxerCb || !demuxerCb->cbFunc + || !demuxerHandle) { return CELL_DMUX_ERROR_ARG; } - DemuxerTask task(dmuxDisableEs); - task.es.es = esHandle; - task.es.es_ptr = es.get(); - - es->dmux->job.push(task, &es->dmux->is_closed); - return CELL_OK; + return open(ppu, demuxerType, demuxerResource, vm::null, demuxerCb, vm::null, demuxerHandle); } -error_code cellDmuxResetEs(u32 esHandle) +error_code cellDmuxOpenEx(ppu_thread& ppu, vm::cptr demuxerType, vm::cptr demuxerResourceEx, vm::cptr demuxerCb, vm::pptr demuxerHandle) { - cellDmux.trace("cellDmuxResetEs(esHandle=0x%x)", esHandle); + cellDmux.notice("cellDmuxOpenEx(demuxerType=*0x%x, demuxerResourceEx=*0x%x, demuxerCb=*0x%x, demuxerHandle=*0x%x)", demuxerType, demuxerResourceEx, demuxerCb, demuxerHandle); - const auto es = idm::get_unlocked(esHandle); - - if (!es) + if (!demuxerType || demuxerType->streamType != CELL_DMUX_STREAM_TYPE_PAMF + || !demuxerResourceEx || !demuxerResourceEx->memAddr || demuxerResourceEx->memSize == umax || demuxerResourceEx->ppuThreadStackSize == umax + || !demuxerResourceEx->spurs || demuxerResourceEx->maxContention == 0u + || (demuxerResourceEx->priority & 0xf0f0f0f0f0f0f0f0ull) != 0u // Each byte in priority must be less than 0x10 + || !demuxerCb + || !demuxerHandle) { return CELL_DMUX_ERROR_ARG; } - DemuxerTask task(dmuxResetEs); - task.es.es = esHandle; - task.es.es_ptr = es.get(); + const vm::var resource + {{ + .memAddr = demuxerResourceEx->memAddr, + .memSize = demuxerResourceEx->memSize, + .ppuThreadPriority = demuxerResourceEx->ppuThreadPriority, + .ppuThreadStackSize = demuxerResourceEx->ppuThreadStackSize, + .spuThreadPriority = 0xfa, + .numOfSpus = 1 + }}; - es->dmux->job.push(task, &es->dmux->is_closed); - return CELL_OK; + return open(ppu, demuxerType, resource, demuxerResourceEx, demuxerCb, vm::null, demuxerHandle); } -error_code cellDmuxGetAu(u32 esHandle, vm::ptr auInfo, vm::ptr auSpecificInfo) +error_code cellDmuxOpenExt(ppu_thread& ppu, vm::cptr demuxerType, vm::cptr demuxerResourceEx, vm::cptr demuxerCb, vm::pptr demuxerHandle) { - cellDmux.trace("cellDmuxGetAu(esHandle=0x%x, auInfo=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfo, auSpecificInfo); + cellDmux.notice("cellDmuxOpenExt(demuxerType=*0x%x, demuxerResourceEx=*0x%x, demuxerCb=*0x%x, demuxerHandle=*0x%x)", demuxerType, demuxerResourceEx, demuxerCb, demuxerHandle); - const auto es = idm::get_unlocked(esHandle); - - if (!es) - { - return CELL_DMUX_ERROR_ARG; - } - - u32 info; - u32 spec; - if (!es->peek(info, true, spec, true)) - { - return CELL_DMUX_ERROR_EMPTY; - } - - *auInfo = info; - *auSpecificInfo = spec; - return CELL_OK; + return cellDmuxOpenEx(ppu, demuxerType, demuxerResourceEx, demuxerCb, demuxerHandle); } -error_code cellDmuxPeekAu(u32 esHandle, vm::ptr auInfo, vm::ptr auSpecificInfo) +error_code cellDmuxOpen2(ppu_thread& ppu, vm::cptr demuxerType2, vm::cptr demuxerResource2, vm::cptr demuxerCb, vm::pptr demuxerHandle) { - cellDmux.trace("cellDmuxPeekAu(esHandle=0x%x, auInfo=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfo, auSpecificInfo); + cellDmux.notice("cellDmuxOpen2(demuxerType2=*0x%x, demuxerResource2=*0x%x, demuxerCb=*0x%x, demuxerHandle=*0x%x)", demuxerType2, demuxerResource2, demuxerCb, demuxerHandle); - const auto es = idm::get_unlocked(esHandle); - - if (!es) + if (!demuxerType2 || demuxerType2->streamType != CELL_DMUX_STREAM_TYPE_PAMF + || !demuxerResource2 + || !demuxerCb || !demuxerCb->cbFunc + || !demuxerHandle) { return CELL_DMUX_ERROR_ARG; } - u32 info; - u32 spec; - if (!es->peek(info, true, spec, false)) + const vm::var type{{ .streamType = CELL_DMUX_STREAM_TYPE_PAMF }}; + + if (demuxerResource2->isResourceEx) { - return CELL_DMUX_ERROR_EMPTY; + if (!demuxerResource2->resourceEx.memAddr || demuxerResource2->resourceEx.memSize == umax || demuxerResource2->resourceEx.ppuThreadStackSize == umax + || !demuxerResource2->resourceEx.spurs || demuxerResource2->resourceEx.maxContention == 0u + || (demuxerResource2->resourceEx.priority & 0xf0f0f0f0f0f0f0f0ull) != 0u) // Each byte in priority must be less than 0x10 + { + return CELL_DMUX_ERROR_ARG; + } + + const vm::var resource + {{ + .memAddr = demuxerResource2->resourceEx.memAddr, + .memSize = demuxerResource2->resourceEx.memSize, + .ppuThreadPriority = demuxerResource2->resourceEx.ppuThreadPriority, + .ppuThreadStackSize = demuxerResource2->resourceEx.ppuThreadStackSize, + .spuThreadPriority = 0xfa, + .numOfSpus = 1 + }}; + + return open(ppu, type, resource, demuxerResource2.ptr(&CellDmuxResource2::resourceEx), demuxerCb, demuxerType2->streamSpecificInfo, demuxerHandle); } - *auInfo = info; - *auSpecificInfo = spec; - return CELL_OK; + if (!demuxerResource2->resource.memAddr || demuxerResource2->resource.memSize == umax || demuxerResource2->resource.ppuThreadStackSize == umax) + { + return CELL_DMUX_ERROR_ARG; + } + + return open(ppu, type, demuxerResource2.ptr(&CellDmuxResource2::resource), vm::null, demuxerCb, demuxerType2->streamSpecificInfo, demuxerHandle); } -error_code cellDmuxGetAuEx(u32 esHandle, vm::ptr auInfoEx, vm::ptr auSpecificInfo) +static error_code disable_es(ppu_thread& ppu, DmuxEsContext& esHandle) { - cellDmux.trace("cellDmuxGetAuEx(esHandle=0x%x, auInfoEx=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfoEx, auSpecificInfo); - - const auto es = idm::get_unlocked(esHandle); - - if (!es) + if (const error_code ret = lv2_syscall(ppu, esHandle._dx_mes, 0); ret != CELL_OK) { - return CELL_DMUX_ERROR_ARG; + return ret; } - u32 info; - u32 spec; - if (!es->peek(info, false, spec, true)) + const error_code core_ret = get_core_ops()->disableEs(ppu, esHandle.core_es_handle); + + esHandle.is_enabled = false; + + if (const error_code ret = lv2_syscall(ppu, esHandle._dx_mes); ret != CELL_OK) { - return CELL_DMUX_ERROR_EMPTY; + return ret; } - *auInfoEx = info; - *auSpecificInfo = spec; - return CELL_OK; + error_code ret; + while ((ret = lv2_syscall(ppu, esHandle._dx_mes)) == static_cast(CELL_EBUSY)) + { + lv2_syscall(ppu, 200); + } + + if (ret != CELL_OK) + { + return ret; + } + + esHandle._this = vm::null; + + return get_error(core_ret); } -error_code cellDmuxPeekAuEx(u32 esHandle, vm::ptr auInfoEx, vm::ptr auSpecificInfo) +error_code cellDmuxClose(ppu_thread& ppu, vm::ptr demuxerHandle) { - cellDmux.trace("cellDmuxPeekAuEx(esHandle=0x%x, auInfoEx=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfoEx, auSpecificInfo); + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; - const auto es = idm::get_unlocked(esHandle); + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } - if (!es) + cellDmux.notice("cellDmuxClose(demuxerHandle=*0x%x)", demuxerHandle); + + if (!demuxerHandle || !demuxerHandle->_this || demuxerHandle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) { return CELL_DMUX_ERROR_ARG; } - u32 info; - u32 spec; - if (!es->peek(info, false, spec, false)) + demuxerHandle->_this = vm::null; + + if (const error_code ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd, 0); ret != CELL_OK) { - return CELL_DMUX_ERROR_EMPTY; + demuxerHandle->_this = demuxerHandle; + return ret; } - *auInfoEx = info; - *auSpecificInfo = spec; - return CELL_OK; + for (const vm::ptr es_handle : get_es_handles(demuxerHandle)) + { + if (const error_code ret = disable_es(ppu, *es_handle); ret != CELL_OK) + { + ensure(lv2_syscall(ppu, demuxerHandle->_dx_mhd) == CELL_OK); // Not checked on LLE + demuxerHandle->_this = demuxerHandle; + return ret; + } + + es_handle->dmux_handle = vm::null; + demuxerHandle->enabled_es_num--; + } + + error_code ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd); + ret = ret ? ret : get_error(get_core_ops()->close(ppu, demuxerHandle->core_handle)); + ret = ret ? ret : lv2_syscall(ppu, demuxerHandle->_dx_mhd); + + if (ret != CELL_OK) + { + demuxerHandle->_this = demuxerHandle; + } + + return ret; } -error_code cellDmuxReleaseAu(u32 esHandle) +error_code cellDmuxSetStream(ppu_thread& ppu, vm::ptr demuxerHandle, vm::cptr streamAddress, u32 streamSize, b8 discontinuity, u64 userData) { - cellDmux.trace("cellDmuxReleaseAu(esHandle=0x%x)", esHandle); + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; - const auto es = idm::get_unlocked(esHandle); + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } - if (!es) + cellDmux.trace("cellDmuxSetStream(demuxerHandle=*0x%x, streamAddress=*0x%x, streamSize=0x%x, discontinuity=%d, userData=0x%llx)", + demuxerHandle, streamAddress, streamSize, +discontinuity, userData); + + if (!demuxerHandle || !demuxerHandle->_this || streamSize == 0 || streamSize == umax || demuxerHandle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) { return CELL_DMUX_ERROR_ARG; } - if (!es->release()) + if (!(demuxerHandle->dmux_state & DMUX_STOPPED)) + { + return CELL_DMUX_ERROR_BUSY; + } + + if (const error_code ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd, 0); ret != CELL_OK) + { + return ret; + } + + if (const error_code ret = get_error(get_core_ops()->setStream(ppu, demuxerHandle->core_handle, streamAddress, streamSize, discontinuity, userData)); + ret != CELL_OK) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd); + return mutex_unlock_ret ? mutex_unlock_ret : ret; + } + + demuxerHandle->stream_is_set = true; + demuxerHandle->dmux_state = DMUX_RUNNING; + demuxerHandle->user_data = userData; + + return lv2_syscall(ppu, demuxerHandle->_dx_mhd); +} + +error_code cellDmuxResetStream(ppu_thread& ppu, vm::ptr demuxerHandle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.notice("cellDmuxResetStream(demuxerHandle=*0x%x)", demuxerHandle); + + if (!demuxerHandle || !demuxerHandle->_this || demuxerHandle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) + { + return CELL_DMUX_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd, 0); ret != CELL_OK) + { + return ret; + } + + const u32 dmux_status = demuxerHandle->dmux_state; + + if (const error_code ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd); ret != CELL_OK) + { + return ret; + } + + if (!(dmux_status & DMUX_RUNNING) || !demuxerHandle->stream_is_set) { return CELL_DMUX_ERROR_SEQ; } + + if (const error_code ret = get_error(get_core_ops()->resetStream(ppu, demuxerHandle->core_handle)); ret != CELL_OK) + { + return ret; + } + + demuxerHandle->stream_is_set = false; + return CELL_OK; } -error_code cellDmuxFlushEs(u32 esHandle) +error_code cellDmuxResetStreamAndWaitDone(ppu_thread& ppu, vm::ptr demuxerHandle) { - cellDmux.warning("cellDmuxFlushEs(esHandle=0x%x)", esHandle); + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; - const auto es = idm::get_unlocked(esHandle); + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } - if (!es) + cellDmux.notice("cellDmuxResetStreamAndWaitDone(demuxerHandle=*0x%x)", demuxerHandle); + + if (!demuxerHandle || !demuxerHandle->_this || demuxerHandle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) { return CELL_DMUX_ERROR_ARG; } - DemuxerTask task(dmuxFlushEs); - task.es.es = esHandle; - task.es.es_ptr = es.get(); + if (const error_code ret = get_error(get_core_ops()->resetStreamAndWaitDone(ppu, demuxerHandle->core_handle)); ret != CELL_OK) + { + return ret; + } + + // LLE doesn't set DmuxContext::stream_is_set to false + + return CELL_OK; +} + +error_code cellDmuxQueryEsAttr(ppu_thread& ppu, vm::cptr demuxerType, vm::cptr esFilterId, vm::cptr esSpecificInfo, vm::ptr esAttr) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.notice("cellDmuxQueryEsAttr(demuxerType=*0x%x, esFilterId=*0x%x, esSpecificInfo=*0x%x, esAttr=*0x%x)", demuxerType, esFilterId, esSpecificInfo, esAttr); + + if (!demuxerType || demuxerType->streamType != CELL_DMUX_STREAM_TYPE_PAMF || !esFilterId || !esAttr) + { + return CELL_DMUX_ERROR_ARG; + } + + const vm::var core_es_attr; + + if (const error_code ret = get_error(get_core_ops()->queryEsAttr(ppu, vm::make_var(*esFilterId), esSpecificInfo, core_es_attr)); + ret != CELL_OK) + { + return ret; + } + + esAttr->memSize = utils::align(sizeof(DmuxEsContext) + ((core_es_attr->auQueueMaxSize + 1) * (core_es_attr->specificInfoSize + sizeof(DmuxAuQueueElement))), alignof(DmuxEsContext)) + + core_es_attr->memSize + 0xf; + + return CELL_OK; +} + +error_code cellDmuxQueryEsAttr2(ppu_thread& ppu, vm::cptr demuxerType2, vm::cptr esFilterId, vm::cptr esSpecificInfo, vm::ptr esAttr) +{ + cellDmux.notice("cellDmuxQueryEsAttr2(demuxerType2=*0x%x, esFilterId=*0x%x, esSpecificInfo=*0x%x, esAttr=*0x%x)", demuxerType2, esFilterId, esSpecificInfo, esAttr); + + ensure(!!demuxerType2); // Not checked on LLE + + const vm::var demuxerType{{ .streamType = demuxerType2->streamType }}; + + return cellDmuxQueryEsAttr(ppu, demuxerType, esFilterId, esSpecificInfo, esAttr); +} + +error_code cellDmuxEnableEs(ppu_thread& ppu, vm::ptr demuxerHandle, vm::cptr esFilterId, vm::cptr esResourceInfo, + vm::cptr esCb, vm::cptr esSpecificInfo, vm::pptr esHandle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.notice("cellDmuxEnableEs(demuxerHandle=*0x%x, esFilterId=*0x%x, esResourceInfo=*0x%x, esCb=*0x%x, esSpecificInfo=*0x%x, esHandle=**0x%x)", + demuxerHandle, esFilterId, esResourceInfo, esCb, esSpecificInfo, esHandle); + + if (!demuxerHandle || !demuxerHandle->_this || demuxerHandle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF + || !esFilterId + || !esResourceInfo || !esResourceInfo->memAddr || esResourceInfo->memSize == umax + || !esCb || !esCb->cbFunc + || !esHandle) + { + return CELL_DMUX_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd, 0); ret != CELL_OK) + { + return ret; + } + + if (demuxerHandle->enabled_es_num >= demuxerHandle->max_enabled_es_num) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd); + return mutex_unlock_ret ? mutex_unlock_ret : CELL_DMUX_ERROR_ARG; + } + + const vm::var es_attr; + + if (const error_code ret = cellDmuxQueryEsAttr(ppu, demuxerHandle.ptr(&DmuxContext::dmux_type), esFilterId, esSpecificInfo, es_attr); ret != CELL_OK) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd); + return mutex_unlock_ret ? mutex_unlock_ret : ret; + } + + if (es_attr->memSize > esResourceInfo->memSize) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd); + return mutex_unlock_ret ? mutex_unlock_ret : CELL_DMUX_ERROR_ARG; + } + + const vm::var es_filter_id{ *esFilterId }; + const vm::var core_es_attr; + + if (const error_code ret = get_error(get_core_ops()->queryEsAttr(ppu, es_filter_id, esSpecificInfo, core_es_attr)); ret != CELL_OK) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, demuxerHandle->_dx_mhd); + return mutex_unlock_ret ? mutex_unlock_ret : ret; + } + + core_es_attr->auQueueMaxSize++; // One extra slot for the access unit produced by flushing the stream, so that flushing always succeeds + + const auto es_handle = vm::ptr::make(utils::align(esResourceInfo->memAddr.addr(), alignof(DmuxEsContext))); + const u32 au_queue_elements_size = core_es_attr->auQueueMaxSize * (core_es_attr->specificInfoSize + sizeof(DmuxAuQueueElement)); + const auto core_mem_addr = vm::bptr::make(utils::align(es_handle.addr() + sizeof(DmuxEsContext) + au_queue_elements_size, 0x10)); + + const vm::var core_es_resource + {{ + .memAddr = core_mem_addr, + .memSize = esResourceInfo->memSize - (core_mem_addr.addr() - esResourceInfo->memAddr.addr()) + }}; + + const vm::var mutex_attr = + {{ + .protocol = SYS_SYNC_PRIORITY, + .recursive = SYS_SYNC_NOT_RECURSIVE, + .pshared = SYS_SYNC_NOT_PROCESS_SHARED, + .adaptive = SYS_SYNC_NOT_ADAPTIVE, + .name_u64 = "_dx_mes"_u64 + }}; + + if (const error_code ret = lv2_syscall(ppu, es_handle.ptr(&DmuxEsContext::_dx_mes), mutex_attr); ret != CELL_OK) + { + ensure(lv2_syscall(ppu, demuxerHandle->_dx_mhd) == CELL_OK); // Not checked on LLE + return ret; + } + + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes, 0); ret != CELL_OK) + { + ensure(lv2_syscall(ppu, es_handle->_dx_mes) == CELL_OK); // Not checked on LLE + ensure(lv2_syscall(ppu, demuxerHandle->_dx_mhd) == CELL_OK); // Not checked on LLE + return ret; + } + + const auto au_found_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(notify_es_au_found))); + const auto flush_done_func = vm::bptr::make(g_fxo->get().func_addr(FIND_FUNC(notify_es_flush_done))); + const vm::var> cb_au_found{{ .cbFunc = au_found_func, .cbArg = es_handle }}; + const vm::var> cb_flush_done{{ .cbFunc = flush_done_func, .cbArg = es_handle }}; + + const vm::var> core_es_handle; + + if (const error_code ret = get_error(get_core_ops()->enableEs(ppu, demuxerHandle->core_handle, es_filter_id, core_es_resource, cb_au_found, cb_flush_done, + esSpecificInfo, core_es_handle)); + ret != CELL_OK) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, es_handle->_dx_mes); + const error_code mutex_destroy_ret = lv2_syscall(ppu, es_handle->_dx_mes); + + if (mutex_unlock_ret != CELL_OK) + { + ensure(lv2_syscall(ppu, demuxerHandle->_dx_mhd) == CELL_OK); // Not checked on LLE + return mutex_unlock_ret; + } + + if (mutex_destroy_ret != CELL_OK) + { + ensure(lv2_syscall(ppu, demuxerHandle->_dx_mhd) == CELL_OK); // Not checked on LLE + return mutex_destroy_ret; + } + + const error_code mutex_unlock_ret2 = lv2_syscall(ppu, demuxerHandle->_dx_mhd); + return mutex_unlock_ret2 ? mutex_unlock_ret2 : ret; + } + + es_handle->is_enabled = true; + es_handle->error_mem_size = 0; + es_handle->error_count = 0; + // es_handle->error_mem_addr is not initialized on LLE + es_handle->_this = es_handle; + es_handle->_this_size = sizeof(DmuxEsContext) + au_queue_elements_size; + es_handle->_this_index = demuxerHandle->enabled_es_num; + es_handle->dmux_handle = demuxerHandle; + es_handle->es_cb = *esCb; + es_handle->core_es_handle = *core_es_handle; + es_handle->flush_started = bf_t, 0, 1>{}; + es_handle->au_queue.max_size = core_es_attr->auQueueMaxSize; + es_handle->au_queue.allocated_size = 0; + es_handle->au_queue.size = 0; + es_handle->au_queue.front = 0; + es_handle->au_queue.back = 0; + es_handle->au_queue.allocated_back = 0; + + const vm::ptr au_queue_elements = get_au_queue_elements(es_handle); + + for (u32 i = 0; i < core_es_attr->auQueueMaxSize; i++) + { + au_queue_elements[i].index = i; + au_queue_elements[i].unk = 0; + au_queue_elements[i].au_info.info.auAddr = vm::null; + au_queue_elements[i].au_info.info.auMaxSize = 0; + au_queue_elements[i].au_info.specific_info.set(au_queue_elements.addr() + (core_es_attr->auQueueMaxSize * static_cast(sizeof(DmuxAuQueueElement))) + (i * core_es_attr->specificInfoSize)); + au_queue_elements[i].au_info.specific_info_size = core_es_attr->specificInfoSize; + } + + demuxerHandle->enabled_es_num++; + *get_es_handles(demuxerHandle).rbegin() = es_handle; + *esHandle = es_handle; + + if (const error_code ret = lv2_syscall(ppu, es_handle->_dx_mes); ret != CELL_OK) + { + ensure(lv2_syscall(ppu, es_handle->_dx_mes) == CELL_OK); // Not checked on LLE + ensure(lv2_syscall(ppu, demuxerHandle->_dx_mhd) == CELL_OK); // Not checked on LLE + return ret; + } + + return lv2_syscall(ppu, demuxerHandle->_dx_mhd); +} + +error_code cellDmuxDisableEs(ppu_thread& ppu, vm::ptr esHandle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.notice("cellDmuxDisableEs(esHandle=*0x%x)", esHandle); + + if (!esHandle || !esHandle->_this || !esHandle->dmux_handle || esHandle->dmux_handle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) + { + return CELL_DMUX_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, esHandle->dmux_handle->_dx_mhd, 0); ret != CELL_OK) + { + return ret; + } + + if (const error_code ret = disable_es(ppu, *esHandle); ret != CELL_OK) + { + ensure(lv2_syscall(ppu, esHandle->dmux_handle->_dx_mhd) == CELL_OK); // Not checked on LLE + return ret; + } + + const std::span> es_handles = get_es_handles(esHandle->dmux_handle); + + std::shift_left(std::ranges::find(es_handles, static_cast>(esHandle)), es_handles.end(), 1); + + esHandle->dmux_handle->enabled_es_num--; + *es_handles.rbegin() = vm::null; + + return lv2_syscall(ppu, esHandle->dmux_handle->_dx_mhd); +} + +error_code cellDmuxResetEs(ppu_thread& ppu, vm::ptr esHandle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.notice("cellDmuxResetEs(esHandle=*0x%x)", esHandle); + + if (!esHandle || !esHandle->_this || !esHandle->dmux_handle || esHandle->dmux_handle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) + { + return CELL_DMUX_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, esHandle->dmux_handle->_dx_mhd, 0); ret != CELL_OK) + { + return ret; + } + + const u32 dmux_status = esHandle->dmux_handle->dmux_state; + + if (const error_code ret = lv2_syscall(ppu, esHandle->dmux_handle->_dx_mhd); ret != CELL_OK) + { + return ret; + } + + if (dmux_status & DMUX_STOPPED) + { + return CELL_DMUX_ERROR_SEQ; + } + + if (const error_code ret = lv2_syscall(ppu, esHandle->_dx_mes, 0); ret != CELL_OK) + { + return ret; + } + + if (const error_code ret = get_error(get_core_ops()->resetEs(ppu, esHandle->core_es_handle)); ret != CELL_OK) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, esHandle->_dx_mes); + return mutex_unlock_ret ? mutex_unlock_ret : ret; + } + + const auto au_queue_elements = get_au_queue_elements(esHandle); + + for (s32 i = 0; i < esHandle->au_queue.max_size; i++) + { + au_queue_elements[i].index = i; + au_queue_elements[i].unk = 0; + au_queue_elements[i].au_info.info.auAddr = vm::null; + au_queue_elements[i].au_info.info.auMaxSize = 0; + } + + esHandle->error_mem_size = 0; + esHandle->error_count = 0; + esHandle->au_queue.allocated_size = 0; + esHandle->au_queue.size = 0; + esHandle->au_queue.front = 0; + esHandle->au_queue.back = 0; + esHandle->au_queue.allocated_back = 0; + + return lv2_syscall(ppu, esHandle->_dx_mes); +} + +template +static error_code pop_au(ppu_thread& ppu, vm::ptr esHandle, vm::cpptr auInfo, vm::cpptr auSpecificInfo) +{ + if (!esHandle || !esHandle->_this || !esHandle->dmux_handle || esHandle->dmux_handle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) + { + return CELL_DMUX_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, esHandle->_dx_mes, 0); ret != CELL_OK) + { + return ret; + } + + if (ppu.state & cpu_flag::again) + { + return {}; + } + + if (esHandle->au_queue.size <= 0) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, esHandle->_dx_mes); + return mutex_unlock_ret ? mutex_unlock_ret : CELL_DMUX_ERROR_EMPTY; + } + + const vm::ptr au_info = (get_au_queue_elements(esHandle) + esHandle->au_queue.front).ptr(&DmuxAuQueueElement::au_info); + + if (auInfo) + { + *auInfo = au_info.ptr(&DmuxAuInfo::info); + } + + if (auSpecificInfo) + { + *auSpecificInfo = au_info->specific_info; + } + + if constexpr (!is_peek) + { + esHandle->au_queue.front = (esHandle->au_queue.front + 1) % esHandle->au_queue.max_size; + esHandle->au_queue.size--; + } + + return lv2_syscall(ppu, esHandle->_dx_mes); +} + +error_code cellDmuxGetAu(ppu_thread& ppu, vm::ptr esHandle, vm::cpptr auInfo, vm::cpptr auSpecificInfo) +{ + cellDmux.trace("cellDmuxGetAu(esHandle=*0x%x, auInfo=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfo, auSpecificInfo); + + return pop_au(ppu, esHandle, auInfo, auSpecificInfo); +} + +error_code cellDmuxPeekAu(ppu_thread& ppu, vm::ptr esHandle, vm::cpptr auInfo, vm::cpptr auSpecificInfo) +{ + cellDmux.trace("cellDmuxPeekAu(esHandle=*0x%x, auInfo=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfo, auSpecificInfo); + + return pop_au(ppu, esHandle, auInfo, auSpecificInfo); +} + +error_code cellDmuxGetAuEx(ppu_thread& ppu, vm::ptr esHandle, vm::cpptr auInfoEx, vm::cpptr auSpecificInfo) +{ + cellDmux.trace("cellDmuxGetAuEx(esHandle=*0x%x, auInfoEx=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfoEx, auSpecificInfo); + + return pop_au(ppu, esHandle, auInfoEx, auSpecificInfo); +} + +error_code cellDmuxPeekAuEx(ppu_thread& ppu, vm::ptr esHandle, vm::cpptr auInfoEx, vm::cpptr auSpecificInfo) +{ + cellDmux.trace("cellDmuxPeekAuEx(esHandle=*0x%x, auInfoEx=**0x%x, auSpecificInfo=**0x%x)", esHandle, auInfoEx, auSpecificInfo); + + return pop_au(ppu, esHandle, auInfoEx, auSpecificInfo); +} + +error_code cellDmuxReleaseAu(ppu_thread& ppu, vm::ptr esHandle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.trace("cellDmuxReleaseAu(esHandle=*0x%x)", esHandle); + + if (!esHandle || !esHandle->_this || !esHandle->dmux_handle || esHandle->dmux_handle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) + { + return CELL_DMUX_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, esHandle->_dx_mes, 0); ret != CELL_OK) + { + return ret; + } + + vm::bptr mem_addr; + u32 mem_size; + + if (esHandle->au_queue.allocated_size < 1) + { + if (esHandle->error_count == 0u) + { + const error_code mutex_unlock_ret = lv2_syscall(ppu, esHandle->_dx_mes); + return mutex_unlock_ret ? mutex_unlock_ret : CELL_DMUX_ERROR_SEQ; + } + + mem_addr = esHandle->error_mem_addr; + mem_size = esHandle->error_mem_size; + } + else + { + const DmuxAuInfo& au_info = get_au_queue_elements(esHandle)[esHandle->au_queue.allocated_back].au_info; + + mem_size = + esHandle->error_mem_size += au_info.info.auSize; + + if (esHandle->error_count == 0u) + { + mem_addr = au_info.info.auAddr; + } + else + { + mem_addr = esHandle->error_mem_addr; + } + + esHandle->au_queue.allocated_back = (esHandle->au_queue.allocated_back + 1) % esHandle->au_queue.max_size; + esHandle->au_queue.allocated_size--; + + if (esHandle->au_queue.allocated_size < esHandle->au_queue.size) + { + esHandle->au_queue.front = (esHandle->au_queue.front + 1) % esHandle->au_queue.max_size; + esHandle->au_queue.size--; + } + } + + if (const error_code ret = get_error(get_core_ops()->releaseAu(ppu, esHandle->core_es_handle, mem_addr, mem_size)); ret != CELL_OK) + { + if (esHandle->error_count == 0u) + { + esHandle->error_mem_addr = mem_addr; + } + + esHandle->error_count++; + + const error_code mutex_unlock_ret = lv2_syscall(ppu, esHandle->_dx_mes); + return mutex_unlock_ret ? mutex_unlock_ret : ret; + } + + esHandle->error_count = 0; + esHandle->error_mem_size = 0; + + return lv2_syscall(ppu, esHandle->_dx_mes); +} + +error_code cellDmuxFlushEs(ppu_thread& ppu, vm::ptr esHandle) +{ + // Blocking savestate creation due to ppu_thread::fast_call() + const std::unique_lock savestate_lock{ g_fxo->get(), std::try_to_lock }; + + if (!savestate_lock) + { + ppu.state += cpu_flag::again; + return {}; + } + + cellDmux.notice("cellDmuxFlushEs(esHandle=*0x%x)", esHandle); + + if (!esHandle || !esHandle->_this || !esHandle->dmux_handle || esHandle->dmux_handle->dmux_type.streamType != CELL_DMUX_STREAM_TYPE_PAMF) + { + return CELL_DMUX_ERROR_ARG; + } + + if (const error_code ret = lv2_syscall(ppu, esHandle->dmux_handle->_dx_mhd, 0); ret != CELL_OK) + { + return ret; + } + + const u32 dmux_state = esHandle->dmux_handle->dmux_state; + + if (const error_code ret = lv2_syscall(ppu, esHandle->dmux_handle->_dx_mhd); ret != CELL_OK) + { + return ret; + } + + if (!(dmux_state & DMUX_STOPPED)) + { + return CELL_DMUX_ERROR_SEQ; + } + + esHandle->flush_started = true; + + if (const error_code ret = get_error(get_core_ops()->flushEs(ppu, esHandle->core_es_handle)); ret != CELL_OK) + { + esHandle->flush_started = false; + return ret; + } - es->dmux->job.push(task, &es->dmux->is_closed); return CELL_OK; } @@ -1382,4 +1258,11 @@ DECLARE(ppu_module_manager::cellDmux)("cellDmux", []() REG_FUNC(cellDmux, cellDmuxPeekAuEx); REG_FUNC(cellDmux, cellDmuxReleaseAu); REG_FUNC(cellDmux, cellDmuxFlushEs); + + REG_HIDDEN_FUNC(notify_demux_done); + REG_HIDDEN_FUNC(notify_fatal_err); + REG_HIDDEN_FUNC(notify_prog_end_code); + + REG_HIDDEN_FUNC(notify_es_au_found); + REG_HIDDEN_FUNC(notify_es_flush_done); }); diff --git a/rpcs3/Emu/Cell/Modules/cellDmux.h b/rpcs3/Emu/Cell/Modules/cellDmux.h index dc17cb3314..3db8c63bee 100644 --- a/rpcs3/Emu/Cell/Modules/cellDmux.h +++ b/rpcs3/Emu/Cell/Modules/cellDmux.h @@ -1,7 +1,8 @@ #pragma once #include "Emu/Memory/vm_ptr.h" -#include "cellPamf.h" +#include "Emu/Cell/ErrorCodes.h" +#include "Utilities/BitField.h" // Error Codes enum CellDmuxError :u32 @@ -18,6 +19,10 @@ enum CellDmuxStreamType : s32 CELL_DMUX_STREAM_TYPE_UNDEF = 0, CELL_DMUX_STREAM_TYPE_PAMF = 1, CELL_DMUX_STREAM_TYPE_TERMINATOR = 2, + + // Only used in cellSail + CELL_DMUX_STREAM_TYPE_MP4 = 0x81, + CELL_DMUX_STREAM_TYPE_AVI = 0x82 }; enum CellDmuxMsgType : s32 @@ -48,13 +53,14 @@ struct CellDmuxEsMsg struct CellDmuxType { be_t streamType; // CellDmuxStreamType - be_t reserved[2]; + be_t reserved1; + be_t reserved2; }; struct CellDmuxType2 { - be_t streamType; // CellDmuxStreamType - be_t streamSpecificInfo; + be_t streamType; + vm::bcptr streamSpecificInfo; }; struct CellDmuxResource @@ -73,8 +79,8 @@ struct CellDmuxResourceEx be_t memSize; be_t ppuThreadPriority; be_t ppuThreadStackSize; - be_t spurs_addr; - u8 priority[8]; + vm::bptr spurs; // CellSpurs* + be_t priority; be_t maxContention; }; @@ -85,33 +91,23 @@ struct CellDmuxResourceSpurs be_t maxContention; }; -/* -struct CellDmuxResource2Ex -{ - b8 isResourceEx; //true - CellDmuxResourceEx resourceEx; -}; - -struct CellDmuxResource2NoEx -{ - b8 isResourceEx; //false - CellDmuxResource resource; -}; -*/ - struct CellDmuxResource2 { b8 isResourceEx; - be_t memAddr; - be_t memSize; - be_t ppuThreadPriority; - be_t ppuThreadStackSize; - be_t shit[4]; + + union + { + CellDmuxResource resource; + CellDmuxResourceEx resourceEx; + }; }; -using CellDmuxCbMsg = u32(u32 demuxerHandle, vm::cptr demuxerMsg, vm::ptr cbArg); +struct DmuxContext; +struct DmuxEsContext; -using CellDmuxCbEsMsg = u32(u32 demuxerHandle, u32 esHandle, vm::cptr esMsg, vm::ptr cbArg); +using CellDmuxCbMsg = u32(vm::ptr demuxerHandle, vm::cptr demuxerMsg, vm::ptr cbArg); + +using CellDmuxCbEsMsg = u32(vm::ptr demuxerHandle, vm::ptr esHandle, vm::cptr esMsg, vm::ptr cbArg); // Used for internal callbacks as well template @@ -177,6 +173,70 @@ struct DmuxAuInfo be_t specific_info_size; }; +struct DmuxAuQueueElement +{ + be_t index; + u8 unk; // unused + DmuxAuInfo au_info; +}; + +CHECK_SIZE(DmuxAuQueueElement, 0x38); + +enum DmuxState : u32 +{ + DMUX_STOPPED = 1 << 0, + DMUX_RUNNING = 1 << 1, +}; + +struct alignas(0x10) DmuxContext // CellDmuxHandle = DmuxContext* +{ + vm::bptr _this; + be_t _this_size; + be_t version; + be_t dmux_state; + CellDmuxType dmux_type; + CellDmuxCb dmux_cb; + b8 stream_is_set; + vm::bptr core_handle; + be_t version_; // Same value as 'version' + be_t user_data; + be_t max_enabled_es_num; + be_t enabled_es_num; + be_t _dx_mhd; // sys_mutex_t + u8 reserved[0x7c]; +}; + +CHECK_SIZE_ALIGN(DmuxContext, 0xc0, 0x10); + +struct alignas(0x10) DmuxEsContext // CellDmuxEsHandle = DmuxEsContext* +{ + be_t _dx_mes; // sys_mutex_t + be_t is_enabled; + be_t error_mem_size; + be_t error_count; + vm::bptr error_mem_addr; + vm::bptr _this; + be_t _this_size; + be_t _this_index; + vm::bptr dmux_handle; + CellDmuxEsCb es_cb; + vm::bptr core_es_handle; + bf_t, 0, 1> flush_started; + + struct + { + be_t max_size; + be_t allocated_size; + be_t size; + be_t front; + be_t back; + be_t allocated_back; + } + au_queue; +}; + +CHECK_SIZE_ALIGN(DmuxEsContext, 0x50, 0x10); + using DmuxNotifyDemuxDone = error_code(vm::ptr, u32, vm::ptr); using DmuxNotifyFatalErr = error_code(vm::ptr, u32, vm::ptr); using DmuxNotifyProgEndCode = error_code(vm::ptr, vm::ptr); @@ -194,10 +254,10 @@ using CellDmuxCoreOpSetStream = error_code(vm::ptr, vm::cptr, u32, b using CellDmuxCoreOpReleaseAu = error_code(vm::ptr, vm::ptr, u32); using CellDmuxCoreOpQueryEsAttr = error_code(vm::cptr, vm::cptr, vm::ptr); using CellDmuxCoreOpEnableEs = error_code(vm::ptr, vm::cptr, vm::cptr, vm::cptr>, vm::cptr>, vm::cptr, vm::pptr); -using CellDmuxCoreOpDisableEs = u32(vm::ptr); -using CellDmuxCoreOpFlushEs = u32(vm::ptr); -using CellDmuxCoreOpResetEs = u32(vm::ptr); -using CellDmuxCoreOpResetStreamAndWaitDone = u32(vm::ptr); +using CellDmuxCoreOpDisableEs = error_code(vm::ptr); +using CellDmuxCoreOpFlushEs = error_code(vm::ptr); +using CellDmuxCoreOpResetEs = error_code(vm::ptr); +using CellDmuxCoreOpResetStreamAndWaitDone = error_code(vm::ptr); struct CellDmuxCoreOps { diff --git a/rpcs3/Emu/Cell/Modules/cellDmuxPamf.cpp b/rpcs3/Emu/Cell/Modules/cellDmuxPamf.cpp index 91ee7e2426..ecf59db508 100644 --- a/rpcs3/Emu/Cell/Modules/cellDmuxPamf.cpp +++ b/rpcs3/Emu/Cell/Modules/cellDmuxPamf.cpp @@ -10,6 +10,7 @@ #include "cellDmuxPamf.h" #include +#include vm::gvar g_cell_dmux_core_ops_pamf; vm::gvar g_cell_dmux_core_ops_raw_es; @@ -998,6 +999,10 @@ void dmux_pamf_spu_context::operator()() // cellSpursMain() ensure(stream_info_queue->pop(stream_info)); set_stream({ stream_info.stream_addr.get_ptr(), stream_info.stream_size }, stream_info.continuity); + + // Delay demuxing a bit + // Prevents White Knight Chronicles II FMVs from freezing, since events are otherwise fired before the game has finished initializing FMV playback + thread_ctrl::wait_for(1'500); } process_next_pack(); @@ -1153,6 +1158,19 @@ void dmux_pamf_spu_context::save(utils::serial& ar) // PPU thread +template +static auto lv2_syscall(ppu_thread& ppu, auto&&... args) +{ + const auto ret = Syscall(ppu, std::forward(args)...); + + if (ppu.test_stopped()) + { + ppu.state += cpu_flag::again; + } + + return ret; +} + template void DmuxPamfContext::send_spu_command_and_wait(ppu_thread& ppu, bool waiting_for_spu_state, auto&&... cmd_params) { @@ -1194,7 +1212,7 @@ error_code DmuxPamfContext::wait_au_released_or_stream_reset(ppu_thread& ppu, u6 goto label1_waiting_for_au_released_state; } - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -1213,9 +1231,9 @@ error_code DmuxPamfContext::wait_au_released_or_stream_reset(ppu_thread& ppu, u6 savestate = dmux_pamf_state::waiting_for_au_released; label1_waiting_for_au_released_state: - if (sys_cond_wait(ppu, cond, 0) != CELL_OK) + if (lv2_syscall(ppu, cond, 0) != CELL_OK) { - sys_mutex_unlock(ppu, mutex); + lv2_syscall(ppu, mutex); return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -1233,13 +1251,13 @@ error_code DmuxPamfContext::wait_au_released_or_stream_reset(ppu_thread& ppu, u6 au_released_bitset = 0; - return sys_mutex_unlock(ppu, mutex) != CELL_OK ? static_cast(CELL_DMUX_PAMF_ERROR_FATAL) : CELL_OK; + return lv2_syscall(ppu, mutex) != CELL_OK ? static_cast(CELL_DMUX_PAMF_ERROR_FATAL) : CELL_OK; } template error_code DmuxPamfContext::set_au_reset(ppu_thread& ppu) { - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -1251,7 +1269,7 @@ error_code DmuxPamfContext::set_au_reset(ppu_thread& ppu) std::ranges::for_each(elementary_streams | std::views::filter([](auto es){ return !!es; }), [](auto& reset_next_au) { reset_next_au = reset; }, &DmuxPamfElementaryStream::reset_next_au); - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } template @@ -1357,7 +1375,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) savestate = dmux_pamf_state::starting_demux_done; label4_starting_demux_done_state: - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { savestate = dmux_pamf_state::starting_demux_done_mutex_lock_error; label5_starting_demux_done_mutex_lock_error_state: @@ -1369,7 +1387,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) sequence_state = DmuxPamfSequenceState::resetting; - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { savestate = dmux_pamf_state::starting_demux_done_mutex_unlock_error; label6_starting_demux_done_mutex_unlock_error_state: @@ -1422,7 +1440,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) { case DmuxPamfEventType::au_found: { - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1437,7 +1455,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) // If the elementary stream of the found access unit is not enabled, don't do anything if (!es || es->_this.get_ptr() != es || es->es_id != event.au_found.user_data) { - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1465,7 +1483,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) es->reset_next_au = false; } - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1491,7 +1509,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) es->au_specific_info[2] = read_from_ptr>(event.au_found.stream_header_buf, 1); } - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1536,7 +1554,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) savestate = dmux_pamf_state::demux_done_mutex_lock; label15_demux_done_mutex_lock_state: - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1551,9 +1569,9 @@ void DmuxPamfContext::exec(ppu_thread& ppu) savestate = dmux_pamf_state::demux_done_cond_signal; label16_demux_done_cond_signal_state: - if (sys_cond_signal_all(ppu, cond) != CELL_OK) + if (lv2_syscall(ppu, cond) != CELL_OK) { - sys_mutex_unlock(ppu, mutex); + lv2_syscall(ppu, mutex); savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1562,7 +1580,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) RETURN_ON_CPU_FLAG_AGAIN(); } - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1577,7 +1595,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) } case DmuxPamfEventType::flush_done: { - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1588,7 +1606,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) DmuxPamfElementaryStream* const es = find_es(event.flush_done.stream_id, event.flush_done.private_stream_id); const bool valid = es && es->_this.get_ptr() == es && es->es_id == event.flush_done.user_data; - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1631,7 +1649,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) savestate = dmux_pamf_state::resuming_demux_mutex_lock; label17_resuming_demux_mutex_lock_state: - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1658,7 +1676,7 @@ void DmuxPamfContext::exec(ppu_thread& ppu) ensure(cmd_result_queue.pop()); } - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { savestate = dmux_pamf_state::sending_fatal_err; continue; @@ -1964,8 +1982,8 @@ error_code DmuxPamfContext::open(ppu_thread& ppu, const CellDmuxPamfResource& re const vm::var mutex_attr = {{ SYS_SYNC_PRIORITY, SYS_SYNC_NOT_RECURSIVE, SYS_SYNC_NOT_PROCESS_SHARED, SYS_SYNC_NOT_ADAPTIVE, 0, 0, 0, { "_dxpmtx"_u64 } }}; const vm::var cond_attr = {{ SYS_SYNC_NOT_PROCESS_SHARED, 0, 0, { "_dxpcnd"_u64 } }}; - if (sys_mutex_create(ppu, _this.ptr(&DmuxPamfContext::mutex), mutex_attr) != CELL_OK - || sys_cond_create(ppu, _this.ptr(&DmuxPamfContext::cond), _this->mutex, cond_attr) != CELL_OK) + if (lv2_syscall(ppu, _this.ptr(&DmuxPamfContext::mutex), mutex_attr) != CELL_OK + || lv2_syscall(ppu, _this.ptr(&DmuxPamfContext::cond), _this->mutex, cond_attr) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2078,8 +2096,8 @@ error_code DmuxPamfContext::close(ppu_thread& ppu) g_fxo->get().free(0x40000); } - if (sys_cond_destroy(ppu, cond) != CELL_OK - || sys_mutex_destroy(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, cond) != CELL_OK + || lv2_syscall(ppu, mutex) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2117,7 +2135,7 @@ error_code DmuxPamfContext::reset_stream(ppu_thread& ppu) switch (savestate) { case 0: - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2130,7 +2148,7 @@ error_code DmuxPamfContext::reset_stream(ppu_thread& ppu) if (sequence_state != DmuxPamfSequenceState::running) { - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } [[fallthrough]]; @@ -2148,9 +2166,9 @@ error_code DmuxPamfContext::reset_stream(ppu_thread& ppu) [[fallthrough]]; case 2: - if (const error_code ret = sys_cond_signal_to(ppu, cond, static_cast(thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) + if (const error_code ret = lv2_syscall(ppu, cond, static_cast(thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) { - sys_mutex_unlock(ppu, mutex); + lv2_syscall(ppu, mutex); return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2160,7 +2178,7 @@ error_code DmuxPamfContext::reset_stream(ppu_thread& ppu) return {}; } - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; default: fmt::throw_exception("Unexpected savestate value: 0x%x", savestate); @@ -2215,7 +2233,7 @@ error_code _CellDmuxCoreOpCreateThread(ppu_thread& ppu, vm::ptr(ppu, mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2226,12 +2244,12 @@ error_code DmuxPamfContext::join_thread(ppu_thread& ppu) send_spu_command_and_wait(ppu, false); - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } - return sys_ppu_thread_join(ppu, static_cast(thread_id), +vm::var{}) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, static_cast(thread_id), +vm::var{}) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } error_code _CellDmuxCoreOpJoinThread(ppu_thread& ppu, vm::ptr handle) @@ -2264,7 +2282,7 @@ error_code DmuxPamfContext::set_stream(ppu_thread& ppu, vm::cptr stream_addr if (!waiting_for_spu_state) { - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2279,7 +2297,7 @@ error_code DmuxPamfContext::set_stream(ppu_thread& ppu, vm::cptr stream_addr if (!stream_info_queue.emplace(stream_address, stream_size, user_data, !discontinuity, raw_es)) { - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_BUSY : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_BUSY : CELL_DMUX_PAMF_ERROR_FATAL; } } @@ -2293,7 +2311,7 @@ error_code DmuxPamfContext::set_stream(ppu_thread& ppu, vm::cptr stream_addr sequence_state = DmuxPamfSequenceState::running; - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } template @@ -2320,7 +2338,7 @@ error_code DmuxPamfElementaryStream::release_au(ppu_thread& ppu, vm::ptr au_ switch (savestate) { case 0: - if (sys_mutex_lock(ppu, demuxer->mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, demuxer->mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2346,9 +2364,9 @@ error_code DmuxPamfElementaryStream::release_au(ppu_thread& ppu, vm::ptr au_ [[fallthrough]]; case 2: - if (const error_code ret = sys_cond_signal_to(ppu, demuxer->cond, static_cast(demuxer->thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) + if (const error_code ret = lv2_syscall(ppu, demuxer->cond, static_cast(demuxer->thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) { - sys_mutex_unlock(ppu, demuxer->mutex); + lv2_syscall(ppu, demuxer->mutex); return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2358,7 +2376,7 @@ error_code DmuxPamfElementaryStream::release_au(ppu_thread& ppu, vm::ptr au_ return {}; } - return sys_mutex_unlock(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; default: fmt::throw_exception("Unexpected savestate value: 0x%x", savestate); @@ -2461,7 +2479,7 @@ error_code DmuxPamfContext::enable_es(ppu_thread& ppu, u16 stream_id, u16 privat return CELL_DMUX_PAMF_ERROR_ARG; } - if (const error_code ret = sys_mutex_lock(ppu, mutex, 0); ret != CELL_OK) + if (const error_code ret = lv2_syscall(ppu, mutex, 0); ret != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2476,13 +2494,13 @@ error_code DmuxPamfContext::enable_es(ppu_thread& ppu, u16 stream_id, u16 privat if (enabled_es_num == max_enabled_es_num) { - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_NO_MEMORY : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_NO_MEMORY : CELL_DMUX_PAMF_ERROR_FATAL; } if (find_es(stream_id, private_stream_id)) { // Elementary stream is already enabled - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_ARG : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_ARG : CELL_DMUX_PAMF_ERROR_FATAL; } } @@ -2560,7 +2578,7 @@ error_code DmuxPamfContext::enable_es(ppu_thread& ppu, u16 stream_id, u16 privat enabled_es_num++; - if (sys_mutex_unlock(ppu, mutex) != CELL_OK) + if (lv2_syscall(ppu, mutex) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2573,7 +2591,7 @@ template error_code _CellDmuxCoreOpEnableEs(ppu_thread& ppu, vm::ptr handle, vm::cptr esFilterId, vm::cptr esResource, vm::cptr> notifyAuFound, vm::cptr> notifyFlushDone, vm::cptr esSpecificInfo, vm::pptr esHandle) { - cellDmuxPamf.notice("_CellDmuxCoreOpEnableEs(handle=*0x%x, esFilterId=*0x%x, esResource=*0x%x, notifyAuFound=*0x%x, notifyFlushDone=*0x%x, esSpecificInfo=*0x%x, esHandle)", + cellDmuxPamf.notice("_CellDmuxCoreOpEnableEs(handle=*0x%x, esFilterId=*0x%x, esResource=*0x%x, notifyAuFound=*0x%x, notifyFlushDone=*0x%x, esSpecificInfo=*0x%x, esHandle=**0x%x)", raw_es, handle, esFilterId, esResource, notifyAuFound, notifyFlushDone, esSpecificInfo, esHandle); if (!handle || !esFilterId || !esResource || !esResource->memAddr || esResource->memSize == 0u || !notifyAuFound || !notifyAuFound->cbFunc || !notifyAuFound->cbArg || !notifyFlushDone || !notifyFlushDone->cbFunc || !notifyFlushDone->cbArg) @@ -2614,7 +2632,7 @@ error_code DmuxPamfElementaryStream::disable_es(ppu_thread& ppu) switch (savestate) { case 0: - if (sys_mutex_lock(ppu, dmux->mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, dmux->mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2628,7 +2646,7 @@ error_code DmuxPamfElementaryStream::disable_es(ppu_thread& ppu) if (!dmux->find_es(stream_id, private_stream_id)) { // Elementary stream is already disabled - return sys_mutex_unlock(ppu, dmux->mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_ARG : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, dmux->mutex) == CELL_OK ? CELL_DMUX_PAMF_ERROR_ARG : CELL_DMUX_PAMF_ERROR_FATAL; } [[fallthrough]]; @@ -2659,9 +2677,9 @@ error_code DmuxPamfElementaryStream::disable_es(ppu_thread& ppu) [[fallthrough]]; case 2: - if (const error_code ret = sys_cond_signal_to(ppu, dmux->cond, static_cast(dmux->thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) + if (const error_code ret = lv2_syscall(ppu, dmux->cond, static_cast(dmux->thread_id)); ret != CELL_OK && ret != static_cast(CELL_EPERM)) { - sys_mutex_unlock(ppu, dmux->mutex); + lv2_syscall(ppu, dmux->mutex); return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2671,7 +2689,7 @@ error_code DmuxPamfElementaryStream::disable_es(ppu_thread& ppu) return {}; } - return sys_mutex_unlock(ppu, dmux->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, dmux->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; default: fmt::throw_exception("Unexpected savestate value: 0x%x", savestate); @@ -2698,7 +2716,7 @@ error_code DmuxPamfElementaryStream::flush_es(ppu_thread& ppu) const if (!waiting_for_spu_state) { - if (sys_mutex_lock(ppu, demuxer->mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, demuxer->mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2718,7 +2736,7 @@ error_code DmuxPamfElementaryStream::flush_es(ppu_thread& ppu) const return {}; } - return sys_mutex_unlock(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } error_code _CellDmuxCoreOpFlushEs(ppu_thread& ppu, vm::ptr esHandle) @@ -2741,7 +2759,7 @@ error_code DmuxPamfElementaryStream::reset_es(ppu_thread& ppu) const if (!waiting_for_spu_state) { - if (sys_mutex_lock(ppu, demuxer->mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, demuxer->mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2761,7 +2779,7 @@ error_code DmuxPamfElementaryStream::reset_es(ppu_thread& ppu) const return {}; } - return sys_mutex_unlock(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, demuxer->mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } error_code _CellDmuxCoreOpResetEs(ppu_thread& ppu, vm::ptr esHandle) @@ -2797,7 +2815,7 @@ error_code DmuxPamfContext::reset_stream_and_wait_done(ppu_thread& ppu) return {}; } - if (sys_mutex_lock(ppu, mutex, 0) != CELL_OK) + if (lv2_syscall(ppu, mutex, 0) != CELL_OK) { return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2809,9 +2827,9 @@ error_code DmuxPamfContext::reset_stream_and_wait_done(ppu_thread& ppu) while (sequence_state != DmuxPamfSequenceState::dormant) { - if (sys_cond_wait(ppu, cond, 0) != CELL_OK) + if (lv2_syscall(ppu, cond, 0) != CELL_OK) { - sys_mutex_unlock(ppu, mutex); + lv2_syscall(ppu, mutex); return CELL_DMUX_PAMF_ERROR_FATAL; } @@ -2821,7 +2839,7 @@ error_code DmuxPamfContext::reset_stream_and_wait_done(ppu_thread& ppu) } } - return sys_mutex_unlock(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; + return lv2_syscall(ppu, mutex) == CELL_OK ? static_cast(CELL_OK) : CELL_DMUX_PAMF_ERROR_FATAL; } error_code _CellDmuxCoreOpResetStreamAndWaitDone(ppu_thread& ppu, vm::ptr handle) diff --git a/rpcs3/Emu/Cell/Modules/cellGem.cpp b/rpcs3/Emu/Cell/Modules/cellGem.cpp index d45dace1ca..f9f5ea4100 100644 --- a/rpcs3/Emu/Cell/Modules/cellGem.cpp +++ b/rpcs3/Emu/Cell/Modules/cellGem.cpp @@ -1774,6 +1774,12 @@ public: shared_mutex mutex; + gem_tracker& operator=(thread_state) noexcept + { + wake_up_tracker(); + return *this; + } + private: atomic_t m_wake_up_tracker = 0; atomic_t m_tracker_done = 0; diff --git a/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp b/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp index 97375c4e6d..83b001cc52 100644 --- a/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp +++ b/rpcs3/Emu/Cell/Modules/cellNetCtl.cpp @@ -192,7 +192,46 @@ error_code cellNetCtlDelHandler(s32 hid) error_code cellNetCtlGetInfo(s32 code, vm::ptr info) { - cellNetCtl.warning("cellNetCtlGetInfo(code=0x%x (%s), info=*0x%x)", code, InfoCodeToName(code), info); + bool log_it_once = false; + + switch (code) + { + case CELL_NET_CTL_INFO_ETHER_ADDR: + case CELL_NET_CTL_INFO_DEVICE: + case CELL_NET_CTL_INFO_MTU: + case CELL_NET_CTL_INFO_LINK_TYPE: + case CELL_NET_CTL_INFO_IP_CONFIG: + case CELL_NET_CTL_INFO_IP_ADDRESS: + case CELL_NET_CTL_INFO_NETMASK: + case CELL_NET_CTL_INFO_DEFAULT_ROUTE: + case CELL_NET_CTL_INFO_HTTP_PROXY_CONFIG: + case CELL_NET_CTL_INFO_UPNP_CONFIG: + { + log_it_once = true; + break; + } + default: + { + break; + } + } + + bool log_it = true; + + if (log_it_once && vm::check_addr(info.addr())) + { + struct logged_t + { + std::array, 256> logged_code{}; + }; + + if (g_fxo->get().logged_code[::narrow(code)].exchange(true)) + { + log_it = false; + } + } + + (log_it ? cellNetCtl.warning : cellNetCtl.trace)("cellNetCtlGetInfo(code=0x%x (%s), info=*0x%x)", code, InfoCodeToName(code), info); auto& nph = g_fxo->get>(); diff --git a/rpcs3/Emu/Cell/Modules/cellPamf.cpp b/rpcs3/Emu/Cell/Modules/cellPamf.cpp index d63e7bb3e2..4bb383c2d2 100644 --- a/rpcs3/Emu/Cell/Modules/cellPamf.cpp +++ b/rpcs3/Emu/Cell/Modules/cellPamf.cpp @@ -5,14 +5,6 @@ #include #include "cellPamf.h" -const std::function SQUEUE_ALWAYS_EXIT = []() { return true; }; -const std::function SQUEUE_NEVER_EXIT = []() { return false; }; - -bool squeue_test_exit() -{ - return Emu.IsStopped(); -} - LOG_CHANNEL(cellPamf); template<> diff --git a/rpcs3/Emu/Cell/Modules/cellPamf.h b/rpcs3/Emu/Cell/Modules/cellPamf.h index abd89f8852..14608f9100 100644 --- a/rpcs3/Emu/Cell/Modules/cellPamf.h +++ b/rpcs3/Emu/Cell/Modules/cellPamf.h @@ -595,345 +595,3 @@ struct CellPamfReader CHECK_SIZE(CellPamfReader, 128); error_code cellPamfReaderInitialize(vm::ptr pSelf, vm::cptr pAddr, u64 fileSize, u32 attribute); - -#include -#include - -extern const std::function SQUEUE_ALWAYS_EXIT; -extern const std::function SQUEUE_NEVER_EXIT; - -bool squeue_test_exit(); - -// TODO: eliminate this boolshit -template -class squeue_t -{ - struct squeue_sync_var_t - { - struct - { - u32 position : 31; - u32 pop_lock : 1; - }; - struct - { - u32 count : 31; - u32 push_lock : 1; - }; - }; - - atomic_t m_sync; - - mutable std::mutex m_rcv_mutex; - mutable std::mutex m_wcv_mutex; - mutable std::condition_variable m_rcv; - mutable std::condition_variable m_wcv; - - T m_data[sq_size]; - - enum squeue_sync_var_result : u32 - { - SQSVR_OK = 0, - SQSVR_LOCKED = 1, - SQSVR_FAILED = 2, - }; - -public: - squeue_t() - : m_sync(squeue_sync_var_t{}) - { - } - - static u32 get_max_size() - { - return sq_size; - } - - bool is_full() const - { - return m_sync.load().count == sq_size; - } - - bool push(const T& data, const std::function& test_exit) - { - u32 pos = 0; - - while (u32 res = m_sync.atomic_op([&pos](squeue_sync_var_t& sync) -> u32 - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - - if (sync.push_lock) - { - return SQSVR_LOCKED; - } - if (sync.count == sq_size) - { - return SQSVR_FAILED; - } - - sync.push_lock = 1; - pos = sync.position + sync.count; - return SQSVR_OK; - })) - { - if (res == SQSVR_FAILED && (test_exit() || squeue_test_exit())) - { - return false; - } - - std::unique_lock wcv_lock(m_wcv_mutex); - m_wcv.wait_for(wcv_lock, std::chrono::milliseconds(1)); - } - - m_data[pos >= sq_size ? pos - sq_size : pos] = data; - - m_sync.atomic_op([](squeue_sync_var_t& sync) - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - ensure(!!sync.push_lock); - sync.push_lock = 0; - sync.count++; - }); - - m_rcv.notify_one(); - m_wcv.notify_one(); - return true; - } - - bool push(const T& data, const volatile bool* do_exit) - { - return push(data, [do_exit]() { return do_exit && *do_exit; }); - } - - bool push(const T& data) - { - return push(data, SQUEUE_NEVER_EXIT); - } - - bool try_push(const T& data) - { - return push(data, SQUEUE_ALWAYS_EXIT); - } - - bool pop(T& data, const std::function& test_exit) - { - u32 pos = 0; - - while (u32 res = m_sync.atomic_op([&pos](squeue_sync_var_t& sync) -> u32 - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - - if (!sync.count) - { - return SQSVR_FAILED; - } - if (sync.pop_lock) - { - return SQSVR_LOCKED; - } - - sync.pop_lock = 1; - pos = sync.position; - return SQSVR_OK; - })) - { - if (res == SQSVR_FAILED && (test_exit() || squeue_test_exit())) - { - return false; - } - - std::unique_lock rcv_lock(m_rcv_mutex); - m_rcv.wait_for(rcv_lock, std::chrono::milliseconds(1)); - } - - data = m_data[pos]; - - m_sync.atomic_op([](squeue_sync_var_t& sync) - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - ensure(!!sync.pop_lock); - sync.pop_lock = 0; - sync.position++; - sync.count--; - if (sync.position == sq_size) - { - sync.position = 0; - } - }); - - m_rcv.notify_one(); - m_wcv.notify_one(); - return true; - } - - bool pop(T& data, const volatile bool* do_exit) - { - return pop(data, [do_exit]() { return do_exit && *do_exit; }); - } - - bool pop(T& data) - { - return pop(data, SQUEUE_NEVER_EXIT); - } - - bool try_pop(T& data) - { - return pop(data, SQUEUE_ALWAYS_EXIT); - } - - bool peek(T& data, u32 start_pos, const std::function& test_exit) - { - ensure(start_pos < sq_size); - u32 pos = 0; - - while (u32 res = m_sync.atomic_op([&pos, start_pos](squeue_sync_var_t& sync) -> u32 - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - - if (sync.count <= start_pos) - { - return SQSVR_FAILED; - } - if (sync.pop_lock) - { - return SQSVR_LOCKED; - } - - sync.pop_lock = 1; - pos = sync.position + start_pos; - return SQSVR_OK; - })) - { - if (res == SQSVR_FAILED && (test_exit() || squeue_test_exit())) - { - return false; - } - - std::unique_lock rcv_lock(m_rcv_mutex); - m_rcv.wait_for(rcv_lock, std::chrono::milliseconds(1)); - } - - data = m_data[pos >= sq_size ? pos - sq_size : pos]; - - m_sync.atomic_op([](squeue_sync_var_t& sync) - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - ensure(!!sync.pop_lock); - sync.pop_lock = 0; - }); - - m_rcv.notify_one(); - return true; - } - - bool peek(T& data, u32 start_pos, const volatile bool* do_exit) - { - return peek(data, start_pos, [do_exit]() { return do_exit && *do_exit; }); - } - - bool peek(T& data, u32 start_pos = 0) - { - return peek(data, start_pos, SQUEUE_NEVER_EXIT); - } - - bool try_peek(T& data, u32 start_pos = 0) - { - return peek(data, start_pos, SQUEUE_ALWAYS_EXIT); - } - - class squeue_data_t - { - T* const m_data; - const u32 m_pos; - const u32 m_count; - - squeue_data_t(T* data, u32 pos, u32 count) - : m_data(data) - , m_pos(pos) - , m_count(count) - { - } - - public: - T& operator [] (u32 index) - { - ensure(index < m_count); - index += m_pos; - index = index < sq_size ? index : index - sq_size; - return m_data[index]; - } - }; - - void process(void(*proc)(squeue_data_t data)) - { - u32 pos, count; - - while (m_sync.atomic_op([&pos, &count](squeue_sync_var_t& sync) -> u32 - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - - if (sync.pop_lock || sync.push_lock) - { - return SQSVR_LOCKED; - } - - pos = sync.position; - count = sync.count; - sync.pop_lock = 1; - sync.push_lock = 1; - return SQSVR_OK; - })) - { - std::unique_lock rcv_lock(m_rcv_mutex); - m_rcv.wait_for(rcv_lock, std::chrono::milliseconds(1)); - } - - proc(squeue_data_t(m_data, pos, count)); - - m_sync.atomic_op([](squeue_sync_var_t& sync) - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - ensure(!!sync.pop_lock); - ensure(!!sync.push_lock); - sync.pop_lock = 0; - sync.push_lock = 0; - }); - - m_wcv.notify_one(); - m_rcv.notify_one(); - } - - void clear() - { - while (m_sync.atomic_op([](squeue_sync_var_t& sync) -> u32 - { - ensure(sync.count <= sq_size); - ensure(sync.position < sq_size); - - if (sync.pop_lock || sync.push_lock) - { - return SQSVR_LOCKED; - } - - sync.pop_lock = 1; - sync.push_lock = 1; - return SQSVR_OK; - })) - { - std::unique_lock rcv_lock(m_rcv_mutex); - m_rcv.wait_for(rcv_lock, std::chrono::milliseconds(1)); - } - - m_sync.exchange({}); - m_wcv.notify_one(); - m_rcv.notify_one(); - } -}; diff --git a/rpcs3/Emu/Cell/Modules/sceNp.h b/rpcs3/Emu/Cell/Modules/sceNp.h index 1bc0a345db..e6b8bff945 100644 --- a/rpcs3/Emu/Cell/Modules/sceNp.h +++ b/rpcs3/Emu/Cell/Modules/sceNp.h @@ -1267,6 +1267,11 @@ struct SceNpOnlineId { char data[SCE_NET_NP_ONLINEID_MAX_LENGTH + 1]; // char term; char dummy[3]; + + bool operator<(const SceNpOnlineId& other) const + { + return memcmp(data, other.data, sizeof(data)) < 0; + } }; // NP ID structure @@ -1283,6 +1288,11 @@ struct SceNpId }; u8 reserved[8]; + + bool operator<(const SceNpId& other) const + { + return handle < other.handle; + } }; CHECK_SIZE_ALIGN(SceNpId, 0x24, 1); @@ -1689,12 +1699,22 @@ struct SceNpLobbyId { u8 opt[28]; u8 reserved[8]; + + bool operator<(const SceNpLobbyId& other) const + { + return memcmp(opt, other.opt, sizeof(opt)) < 0; + } }; struct SceNpRoomId { u8 opt[28]; u8 reserved[8]; + + bool operator<(const SceNpRoomId& other) const + { + return memcmp(opt, other.opt, sizeof(opt)) < 0; + } }; struct SceNpMatchingAttr diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 4d690b344d..f5d91cc519 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -1286,7 +1286,7 @@ extern bool ppu_patch(u32 addr, u32 value) { if (addr % 4) { - ppu_log.fatal("Patch failed at 0x%x: unanligned memory address.", addr); + ppu_log.fatal("Patch failed at 0x%x: unaligned memory address.", addr); return false; } @@ -1364,9 +1364,7 @@ void ppu_thread::dump_regs(std::string& ret, std::any& custom_data) const u32 preferred_cr_field_index = 7; }; - dump_registers_data_t* func_data = nullptr; - - func_data = std::any_cast(&custom_data); + dump_registers_data_t* func_data = std::any_cast(&custom_data); if (!func_data) { @@ -2039,9 +2037,9 @@ std::vector> ppu_thread::dump_callstack_list() const return call_stack_list; } -std::string ppu_thread::dump_misc() const +void ppu_thread::dump_misc(std::string& ret, std::any& custom_data) const { - std::string ret = cpu_thread::dump_misc(); + cpu_thread::dump_misc(ret, custom_data); if (ack_suspend) { @@ -2096,7 +2094,6 @@ std::string ppu_thread::dump_misc() const { ret += '\n'; } - return ret; } void ppu_thread::dump_all(std::string& ret) const diff --git a/rpcs3/Emu/Cell/PPUThread.h b/rpcs3/Emu/Cell/PPUThread.h index 97c705aed5..cf5b91c487 100644 --- a/rpcs3/Emu/Cell/PPUThread.h +++ b/rpcs3/Emu/Cell/PPUThread.h @@ -145,7 +145,7 @@ public: virtual void dump_regs(std::string&, std::any& custom_data) const override; virtual std::string dump_callstack() const override; virtual std::vector> dump_callstack_list() const override; - virtual std::string dump_misc() const override; + virtual void dump_misc(std::string& ret, std::any& custom_data) const override; virtual void dump_all(std::string&) const override; virtual void cpu_task() override final; virtual void cpu_sleep() override; diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index b12cd9c55d..0205715328 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -340,7 +340,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info) const auto ftype = FunctionType::get(get_type(), { get_type(), // Exec base - m_ir->getPtrTy(), // PPU context + get_type(), // PPU context get_type(), // Segment address (for PRX) get_type(), // Memory base get_type(), // r0 @@ -386,7 +386,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info) const auto addr_array = new GlobalVariable(*m_module, addr_array_type, false, GlobalValue::PrivateLinkage, ConstantDataArray::get(m_context, vec_addrs)); // Create an array of function pointers - const auto func_table_type = ArrayType::get(m_ir->getPtrTy(), functions.size()); + const auto func_table_type = ArrayType::get(get_type(), functions.size()); const auto init_func_table = ConstantArray::get(func_table_type, functions); const auto func_table = new GlobalVariable(*m_module, func_table_type, false, GlobalVariable::PrivateLinkage, init_func_table); @@ -413,7 +413,7 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info) const auto func_pc = ZExt(m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst), get_type()); ptr_inst = dyn_cast(m_ir->CreateGEP(func_table->getValueType(), func_table, {m_ir->getInt64(0), index_value})); - assert(ptr_inst->getResultElementType() == m_ir->getPtrTy()); + assert(ptr_inst->getResultElementType() == get_type()); const auto faddr = m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst); const auto pos_32 = m_reloc ? m_ir->CreateAdd(func_pc, m_seg0) : func_pc; @@ -622,7 +622,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect) const auto pos = m_ir->CreateShl(indirect, 1); const auto ptr = m_ir->CreatePtrAdd(m_exec, pos); const auto val = m_ir->CreateLoad(get_type(), ptr); - callee = FunctionCallee(type, m_ir->CreateIntToPtr(val, m_ir->getPtrTy())); + callee = FunctionCallee(type, m_ir->CreateIntToPtr(val, get_type())); // Load new segment address const auto seg_base_ptr = m_ir->CreatePtrAdd(m_exec, m_ir->getInt64(vm::g_exec_addr_seg_offset)); @@ -5414,7 +5414,7 @@ MDNode* PPUTranslator::CheckBranchProbability(u32 bo) void PPUTranslator::build_interpreter() { #define BUILD_VEC_INST(i) { \ - m_function = llvm::cast(m_module->getOrInsertFunction("op_" #i, get_type(), m_ir->getPtrTy()).getCallee()); \ + m_function = llvm::cast(m_module->getOrInsertFunction("op_" #i, get_type(), get_type()).getCallee()); \ std::fill(std::begin(m_globals), std::end(m_globals), nullptr); \ std::fill(std::begin(m_locals), std::end(m_locals), nullptr); \ IRBuilder<> irb(BasicBlock::Create(m_context, "__entry", m_function)); \ diff --git a/rpcs3/Emu/Cell/SPUAnalyser.h b/rpcs3/Emu/Cell/SPUAnalyser.h index 123a629bed..1598551c7d 100644 --- a/rpcs3/Emu/Cell/SPUAnalyser.h +++ b/rpcs3/Emu/Cell/SPUAnalyser.h @@ -13,6 +13,7 @@ struct spu_itype static constexpr struct quadrop_tag{} _quadrop{}; // 4-op Instructions static constexpr struct xfloat_tag{} xfloat{}; // Instructions producing xfloat values static constexpr struct zregmod_tag{} zregmod{}; // Instructions not modifying any GPR + static constexpr struct pure_tag{} pure{}; // Instructions that always produce the same values as long as arguments are equal enum class type : unsigned char { @@ -158,6 +159,15 @@ struct spu_itype CUFLT, FRDS, // xfloat_tag last + CFLTS, + CFLTU, + FCEQ, + FCMEQ, + FCGT, + FCMGT, // floating_tag last + FSCRWR, + FSCRRD, + DFA, DFS, DFM, @@ -167,20 +177,11 @@ struct spu_itype DFNMA, FESD, - CFLTS, - CFLTU, - FCEQ, - FCMEQ, - FCGT, - FCMGT, - FSCRWR, - FSCRRD, - DFCEQ, DFCMEQ, DFCGT, DFCMGT, - DFTSV, // floating_tag last + DFTSV, SHLH, // shiftrot_tag first SHLHI, @@ -248,10 +249,10 @@ struct spu_itype return value >= BR && value <= BISL; } - // Test for floating point instruction + // Test for floating point instruction (32-bit float) friend constexpr bool operator &(type value, floating_tag) { - return value >= FMA && value <= DFTSV; + return value >= FMA && value <= FCMGT; } // Test for 4-op instruction @@ -301,8 +302,16 @@ struct spu_itype { return (value >= HEQ && value <= STQR) || (value >= BR && value <= BIHNZ); } + + // Test for instructions which always produce the same values as long as arguments and immediate values are equal + friend constexpr bool operator &(type value, pure_tag) + { + return (value >= ILH && value <= CLGTI); + } }; +using spu_itype_t = spu_itype::type; + struct spu_iflag { enum @@ -528,6 +537,8 @@ struct spu_iflag } }; +using spu_iflag_t = spu_iflag::flag; + #define NAME(x) static constexpr const char& x = *#x struct spu_iname diff --git a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp index 1b6003036b..53dc0df200 100644 --- a/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUCommonRecompiler.cpp @@ -52,6 +52,36 @@ struct span_less template inline constexpr span_less s_span_less{}; +template <> +void fmt_class_string::format(std::string& out, u64 arg) +{ + format_enum(out, arg, [](spu_recompiler_base::compare_direction arg) + { + switch (arg) + { + case spu_recompiler_base::CMP_SLESS: return "SLT"; + case spu_recompiler_base::CMP_SGREATER: return "SGT"; + case spu_recompiler_base::CMP_EQUAL: return "IEQ"; + case spu_recompiler_base::CMP_LLESS: return "ULT"; + case spu_recompiler_base::CMP_LGREATER: return "UGT"; + case spu_recompiler_base::CMP_SGREATER_EQUAL: return "SGE"; + case spu_recompiler_base::CMP_SLOWER_EQUAL: return "SLE"; + case spu_recompiler_base::CMP_NOT_EQUAL: return "INE"; + case spu_recompiler_base::CMP_LGREATER_EQUAL: return "UGE"; + case spu_recompiler_base::CMP_LLOWER_EQUAL: return "ULE"; + case spu_recompiler_base::CMP_UNKNOWN: + case spu_recompiler_base::CMP_NOT_EQUAL2: + case spu_recompiler_base::CMP_EQUAL2: + default: + { + break; + } + } + + return unknown; + }); +} + // Move 4 args for calling native function from a GHC calling convention function #if defined(ARCH_X64) static u8* move_args_ghc_to_native(u8* raw) @@ -2927,7 +2957,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (g_cfg.core.spu_block_size == spu_block_size_type::safe) { // Stop on special instructions (TODO) - m_targets[pos]; + m_targets[pos].push_back(SPU_LS_SIZE); next_block(); break; } @@ -2948,7 +2978,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s spu_log.error("[0x%x] Invalid interrupt flags (DE)", pos); } - m_targets[pos]; + m_targets[pos].push_back(SPU_LS_SIZE); next_block(); break; } @@ -2981,7 +3011,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s bool is_no_return = false; - if (pos_next >= lsa && pos_next < limit) + if (sl && pos_next >= lsa && pos_next < limit) { const u32 data_next = ls[pos_next / 4]; const auto type_next = g_spu_itype.decode(data_next); @@ -3002,7 +3032,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s is_no_return = is_no_return || (op_next.rb >= 4 && op_next.rb < 10); } - if (type_next & spu_itype::_quadrop && +iflags & +spu_iflag::use_rc) + if (+iflags & +spu_iflag::use_rc) { is_no_return = is_no_return || (op_next.ra >= 4 && op_next.rb < 10); } @@ -3244,6 +3274,12 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s spu_log.notice("[0x%x] At 0x%x: ignoring indirect branch (SYNC)", entry_point, pos); } + if (!(af & vf::is_const)) + { + // Possible unknown target + m_targets[pos].emplace_back(SPU_LS_SIZE); + } + if (type == spu_itype::BI || sl || is_no_return) { if (type == spu_itype::BI || g_cfg.core.spu_block_size == spu_block_size_type::safe || is_no_return) @@ -3308,7 +3344,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s is_no_return = is_no_return || (op_next.rb >= 4 && op_next.rb < 10); } - if (type_next & spu_itype::_quadrop && +iflags & +spu_iflag::use_rc) + if (+iflags & +spu_iflag::use_rc) { is_no_return = is_no_return || (op_next.rc >= 4 && op_next.rc < 10); } @@ -3834,17 +3870,26 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s continue; } + bool removed = false; + for (auto it2 = it->second.begin(); it2 != it->second.end();) { if (*it2 < lsa || *it2 >= limit) { it2 = it->second.erase(it2); + removed = true; continue; } it2++; } + if (removed) + { + it->second.emplace_back(SPU_LS_SIZE); + } + + std::sort(it->second.begin(), it->second.end()); it++; } @@ -3895,7 +3940,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const auto type = g_spu_itype.decode(op.opcode); - u8 reg_save = 255; + u8 reg_save = s_reg_max; if (type == spu_itype::STQD && op.ra == s_reg_sp && !block.reg_mod[op.rt] && !block.reg_use[op.rt]) { @@ -3915,7 +3960,17 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Register reg use only if it happens before reg mod if (!block.reg_mod[reg]) { - block.reg_use.set(reg); + if (type & spu_itype::floating) + { + block.reg_maybe_float.set(reg); + } + + if (type == spu_itype::SHUFB && reg == op.rc) + { + block.reg_maybe_shuffle_mask.set(reg); + } + + block.reg_use[reg]++; if (reg_save != reg && block.reg_save_dom[reg]) { @@ -3932,7 +3987,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s for (u8 reg : {s_reg_mfc_lsa, s_reg_mfc_tag, s_reg_mfc_size}) { if (!block.reg_mod[reg]) - block.reg_use.set(reg); + block.reg_use[reg]++; } } @@ -3986,7 +4041,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (i == s_reg_lr || (i >= 2 && i < s_reg_80) || i > s_reg_127) { if (!block.reg_mod[i]) - block.reg_use.set(i); + block.reg_use[i]++; if (!is_tail) { @@ -4863,19 +4918,24 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s return map; }; - struct putllc16_statistics_t + struct stats_t { atomic_t all = 0; atomic_t single = 0; - atomic_t nowrite = 0; std::array, 128> breaking_reason{}; }; - struct rchcnt_statistics_t + struct putllc16_statistics_t : stats_t + { + atomic_t nowrite = 0; + }; + + struct rchcnt_statistics_t : stats_t + { + }; + + struct reduced_statistics_t : stats_t { - atomic_t all = 0; - atomic_t single = 0; - std::array, 128> breaking_reason{}; }; // Pattern structures @@ -4987,6 +5047,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // RDCH/RCHCNT Loop analysis tracker rchcnt_loop_t rchcnt_loop{}; + reduced_loop_t reduced_loop{}; + block_reg_state_iterator(u32 _pc, usz _parent_iterator_index = umax, usz _parent_target_index = 0) noexcept : pc(_pc) , parent_iterator_index(_parent_iterator_index) @@ -4999,6 +5061,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s std::map atomic16_all; // RdAtomicStat location -> atomic loop optimization state std::map rchcnt_loop_all; // RDCH/RCHCNT location -> channel read loop optimization state + std::map reduced_loop_all; std::map getllar_starts; // True for failed loops std::map run_on_block; std::map logged_block; @@ -5007,6 +5070,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s atomic16_t dummy16{}; rchcnt_loop_t dummy_loop{}; + reduced_loop_t dummy_rloop{}; bool likely_putllc_loop = false; bool had_putllc_evaluation = false; @@ -5053,6 +5117,194 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s u32 iterator_id_alloc = 0; + auto get_block_targets = [&](u32 pc) -> std::span + { + if (m_block_info[pc / 4] && m_bbs.count(pc)) + { + return ::at32(m_bbs, pc).targets; + } + + return {}; + }; + + auto get_block_preds = [&](u32 pc) -> std::span + { + if (m_block_info[pc / 4] && m_bbs.count(pc)) + { + return ::at32(m_bbs, pc).preds; + } + + return {}; + }; + + const auto initiate_patterns = [&](block_reg_state_iterator& block_state_it, u32 bpc, bool is_multi_block) + { + // Initiate patterns (that are initiated on block start) + const auto& bb_body = ::at32(m_bbs, bpc); + + bool invalid = bb_body.size <= 2; + bool valid = true; + + u32 expected_sup_conds = 0; + u32 first_pred_of_loop = SPU_LS_SIZE; + + for (u32 pred : get_block_preds(bpc)) + { + if (is_multi_block ? pred >= bpc : pred == bpc) + { + first_pred_of_loop = std::min(pred, first_pred_of_loop); + } + } + + valid = first_pred_of_loop != SPU_LS_SIZE; + + const auto& bb_connect = ::at32(m_bbs, valid ? first_pred_of_loop : bpc); + + invalid = invalid || !valid; + valid = false; + + // Check loop connector block (must jump to block-next or to loop-start) + u32 targets_count = 0; + + for (u32 target : get_block_targets(first_pred_of_loop)) + { + valid = true; + targets_count++; + + if (first_pred_of_loop == bpc) + { + continue; + } + + if (target != bpc) + { + if (target != first_pred_of_loop + bb_connect.size * 4) + { + invalid = true; + } + } + } + + if (targets_count > 2) + { + invalid = true; + } + + const bool is_two_block_loop = targets_count == 1; + + invalid = invalid || !valid; + valid = false; + + // Check loop body block (must jump to last-block or another location) + + for (u32 block_pc = bpc; !invalid;) + { + targets_count = 0; + + const u32 cond_next = block_pc + ::at32(m_bbs, block_pc).size * 4; + valid = false; + + bool is_end = false; + + for (u32 target : get_block_targets(block_pc)) + { + targets_count++; + + if (target == cond_next) + { + // Conditional branch + valid = true; + } + + if (target <= block_pc && target > bpc) + { + // Branch backwards + invalid = true; + } + + if (target == bpc) + { + is_end = true; + } + } + + // if (bpc != block_pc) + // { + // for (u32 pred : get_block_preds(block_pc)) + // { + // if (pred < bpc || pred > first_pred_of_loop + ::at32(m_bbs, first_pred_of_loop).size * 4) + // { + // invalid = true; + // break; + // } + // } + // } + + if (targets_count > 2) + { + invalid = true; + break; + } + + if (cond_next == first_pred_of_loop && is_two_block_loop) + { + valid = true; + break; + } + + if (!valid) + { + break; + } + + if (bpc == first_pred_of_loop || is_end) + { + break; + } + + if (targets_count == 2) + { + expected_sup_conds++; + } + + block_pc = cond_next; + } + + invalid = invalid || !valid; + + if (bb_body.size > 2 && !invalid) + { + // Early filtering of false positives + const spu_opcode_t op{std::bit_cast>(::at32(result.data, (bpc - entry_point) / 4 + bb_body.size - 2))}; + const spu_opcode_t op2{std::bit_cast>(::at32(result.data, (bpc - entry_point) / 4))}; + + switch (g_spu_itype.decode(op.opcode)) + { + case spu_itype::RDCH: invalid = op.ra != SPU_RdDec; break; + case spu_itype::RCHCNT: invalid = true; break; + default: break; + } + + switch (g_spu_itype.decode(op2.opcode)) + { + case spu_itype::RDCH: invalid = invalid || op2.ra != SPU_RdDec; break; + case spu_itype::RCHCNT: invalid = true; break; + default: break; + } + } + + if (valid && !invalid && !reduced_loop_all.count(bpc) && expected_sup_conds == 0) + { + const auto reduced_loop = &block_state_it.reduced_loop; + reduced_loop->discard(); + reduced_loop->active = true; + reduced_loop->loop_pc = bpc; + reduced_loop->loop_end = first_pred_of_loop; + reduced_loop->expected_sup_conds = expected_sup_conds; + reduced_loop->is_two_block_loop = is_two_block_loop; + } + }; + for (u32 wf = 0, wi = 0, wa = entry_point, bpc = wa; wf <= 1;) { const bool is_form_block = wf == 0; @@ -5121,6 +5373,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s auto& vregs = is_form_block ? infos[bpc]->local_state : *true_state_walkby; const auto atomic16 = is_pattern_match ? &::at32(reg_state_it, wi).atomic16 : &dummy16; const auto rchcnt_loop = is_pattern_match ? &::at32(reg_state_it, wi).rchcnt_loop : &dummy_loop; + const auto reduced_loop = &::at32(reg_state_it, wi).reduced_loop; const u32 pos = wa; @@ -5244,10 +5497,71 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } }; + const auto break_reduced_loop_pattern = [&](u32 cause, reduced_loop_t previous) + { + if (previous.active && previous.loop_pc != SPU_LS_SIZE && reduced_loop_all.count(previous.loop_pc) == 0) + { + g_fxo->get().breaking_reason[cause]++; + + if (!spu_log.notice) + { + return; + } + + previous.active = false; + previous.failed = true; + + reduced_loop_all[previous.loop_pc] = previous; + + std::string break_error = fmt::format("Reduced loop pattern breakage [%x cause=%u] (read_pc=0x%x)", pos, cause, previous.loop_pc); + + const auto values = sort_breakig_reasons(g_fxo->get().breaking_reason); + + std::string tracing = "Top Breaking Reasons:"; + + usz i = 0; + usz fail_count = 0; + bool switched_to_minimal = false; + + for (auto it = values.begin(); it != values.end(); i++, it++) + { + fail_count += it->second; + + if (i >= 12) + { + continue; + } + + if (i < 8 && it->second > 1) + { + fmt::append(tracing, " [cause=%u, n=%d]", it->first, it->second); + } + else + { + if (!std::exchange(switched_to_minimal, true)) + { + fmt::append(tracing, "; More:"); + } + + fmt::append(tracing, " %u", it->first); + } + } + + fmt::append(tracing, " of %d failures", fail_count); + spu_log.notice("%s\n%s", break_error, tracing); + + std::string block_dump; + this->dump(result, block_dump, previous.loop_pc, previous.loop_end + 1); + + spu_log.notice("SPU Block Dump:\n%s", block_dump); + } + }; + const auto break_all_patterns = [&](u32 cause) { break_putllc16(cause, atomic16->discard()); break_channel_pattern(cause, rchcnt_loop->discard()); + break_reduced_loop_pattern(cause, reduced_loop->discard()); }; const auto calculate_absolute_ls_difference = [](u32 addr1, u32 addr2) @@ -5309,16 +5623,6 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s u32 stackframe_pc = SPU_LS_SIZE; usz entry_index = umax; - auto get_block_targets = [&](u32 pc) -> std::span - { - if (m_block_info[pc / 4] && m_bbs.count(pc)) - { - return m_bbs.at(pc).targets; - } - - return {}; - }; - u32 target_pc = SPU_LS_SIZE; bool insert_entry = false; bool is_code_backdoor = false; @@ -5508,7 +5812,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } } - const u32 previous_pc = m_bbs.at(reg_state_it[stackframe_it].pc).size * 4 + reg_state_it[stackframe_it].pc - 4; + const u32 previous_pc = ::at32(m_bbs, reg_state_it[stackframe_it].pc).size * 4 + reg_state_it[stackframe_it].pc - 4; bool may_return = previous_pc + 4 != entry_point + result.data.size() * 4 && (m_ret_info[(previous_pc / 4) + 1] || m_entry_info[previous_pc / 4]); @@ -5537,6 +5841,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Backup analyser information const auto atomic16_info = reg_state_it[stackframe_it].atomic16; const auto rchcnt_loop_info = reg_state_it[stackframe_it].rchcnt_loop; + const auto reduced_loop_info = reg_state_it[stackframe_it].reduced_loop; // Clean from the back possible because it does not affect old indices // Technically should always do a full cleanup at the moment @@ -5562,6 +5867,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s spu_log.trace("Emplacing: block_id=%d, pc=0x%x, target_it=%d/%d, new_pc=0x%x (has_it=%d)", reg_state_it[stackframe_it].iterator_id, stackframe_pc, entry_index + 1, target_size, target_pc, atomic16_info.active); auto& next = reg_state_it.emplace_back(target_pc, stackframe_it, 0); + initiate_patterns(next, target_pc, true); + if (!is_code_backdoor) { // Restore analyser information (if not an entry) @@ -5569,6 +5876,9 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (previous_pc != rchcnt_loop_info.branch_pc || target_pc == rchcnt_loop_info.branch_target) next.rchcnt_loop = rchcnt_loop_info; + + if (previous_pc + 4 == target_pc && reduced_loop_info.loop_pc != reduced_loop_info.loop_end && reduced_loop_info.active && target_pc <= reduced_loop_info.loop_end) + next.reduced_loop = reduced_loop_info; } else { @@ -5604,15 +5914,30 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (!infos.empty()) { - reg_state_it.emplace_back(::at32(infos, entry_point)->pc).iterator_id = iterator_id_alloc++;; + reg_state_it.emplace_back(::at32(infos, entry_point)->pc).iterator_id = iterator_id_alloc++; + + initiate_patterns(reg_state_it.back(), ::at32(infos, entry_point)->pc, true); } } } + const auto prev_wi = wi - 1; + if (prev_wi != umax && ::at32(reg_state_it, prev_wi).reduced_loop.active) + { + const auto reduced_loop = &::at32(reg_state_it, prev_wi).reduced_loop; + + for (const auto& [reg_num, reg] : reduced_loop->regs) + { + + } + } + if (wi < reg_state_it.size()) { wa = ::at32(reg_state_it, wi).pc; bpc = wa; + + initiate_patterns(::at32(reg_state_it, wi), bpc, false); } }; @@ -5737,7 +6062,8 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s if (!is_form_block) { // Call for external code - break_all_patterns(25); + break_putllc16(25, atomic16->discard()); + break_channel_pattern(25, rchcnt_loop->discard()); } } @@ -5762,6 +6088,149 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const auto op = spu_opcode_t{data}; const auto type = g_spu_itype.decode(data); + if (reduced_loop->active && !(type & spu_itype::zregmod)) + { + const u32 op_rt = type & spu_itype::_quadrop ? +op.rt4 : +op.rt; + + u32 ra = s_reg_max, rb = s_reg_max, rc = s_reg_max; + + if (::at32(m_use_ra, pos / 4)) + { + ra = op.ra; + } + + if (::at32(m_use_rb, pos / 4)) + { + rb = op.rb; + } + + if (::at32(m_use_rc, pos / 4)) + { + rc = op.rc; + } + + bool is_move_register_op = false; + + switch (type) + { + case spu_itype::SHLQBYI: + { + is_move_register_op = op.i7 == 0; + break; + } + // Technically only ORI is needed but I am taking into account possible third-party SPU compilers or hand-written assembly + case spu_itype::ORI: + case spu_itype::ORHI: + case spu_itype::ORBI: + case spu_itype::AI: + case spu_itype::AHI: + case spu_itype::XORI: + case spu_itype::XORHI: + case spu_itype::XORBI: + { + is_move_register_op = op.si10 == 0; + break; + } + case spu_itype::ANDI: + case spu_itype::ANDHI: + case spu_itype::ANDBI: + { + is_move_register_op = op.si10 == -1; + break; + } + default: + { + break; + } + } + + u32 reg_pos = SPU_LS_SIZE; + + auto org = reduced_loop->get_reg(op_rt); + + u32 reg_first = s_reg_max; + + for (u32 reg : {ra, rb, rc}) + { + if (reg != s_reg_max && reg != reg_first) + { + const auto arg = reduced_loop->find_reg(reg); + + if (arg && arg->modified >= 1) + { + reg_first = reg; + + if (reg_first != s_reg_max && !is_move_register_op) + { + // Multiple origins + org.add_instruction_modifier(spu_itype::UNK, op.opcode); + break; + } + } + } + } + + if (reg_first == s_reg_max) + { + org = {}; + + if (!is_move_register_op) + { + org.add_instruction_modifier(type, op.opcode); + } + } + else if (reg_first == rb) + { + std::swap(ra, rb); + } + else if (reg_first == rc) + { + std::swap(ra, rc); + } + + for (u32 reg : {ra, rb, rc}) + { + if (reg != s_reg_max) + { + const auto arg = reduced_loop->find_reg(reg); + + if (arg && arg->regs.count() != 0) + { + if (reg_first == reg) + { + org = *arg; + + if (!is_move_register_op) + { + org.add_instruction_modifier(type, op.opcode); + } + + continue; + } + + org.join_with_this(*arg); + } + else + { + org.add_register_origin(reg); + } + } + } + + if (type & spu_itype::memory || type == spu_itype::RDCH || type == spu_itype::RCHCNT) + { + // Register external origin + org.add_register_origin(s_reg_max); + } + + *ensure(reduced_loop->find_reg(op_rt)) = org; + } + + if (reduced_loop->active && ((type & spu_itype::memory) || type == spu_itype::STOP || type == spu_itype::STOPD)) + { + reduced_loop->is_constant_expression = false; + } + // For debugging if (false && likely_putllc_loop && is_pattern_match) { @@ -5848,12 +6317,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s break; } - if (type == spu_itype::SYNC) - { - // Remember - sync = true; - } - + break_reduced_loop_pattern(19, reduced_loop->discard()); break; } @@ -5861,10 +6325,6 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s case spu_itype::BI: case spu_itype::BISL: case spu_itype::BISLED: - case spu_itype::BIZ: - case spu_itype::BINZ: - case spu_itype::BIHZ: - case spu_itype::BIHNZ: { if (op.e || op.d) { @@ -5880,8 +6340,86 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s break; } + case spu_itype::BR: case spu_itype::BRA: { + if (reduced_loop->active) + { + if (!reduced_loop->is_two_block_loop || !reduced_loop->has_cond_state) + { + break_reduced_loop_pattern(20, reduced_loop->discard()); + break; + } + + for (const auto& [reg_num, reg] : reduced_loop->regs) + { + if (reg.is_loop_dictator(reg_num)) + { + if (reg.is_non_predictable_loop_dictator(reg_num)) + { + //break_reduced_loop_pattern(13, reduced_loop->discard()); + reduced_loop->is_constant_expression = false; + } + + reduced_loop->loop_dicts.set(reg_num); + } + } + + std::array reg_use{}; + std::bitset reg_maybe_float{}; + std::bitset reg_mod{}; + + for (auto it = m_bbs.find(reduced_loop->loop_pc); it != m_bbs.end() && it->first <= bpc; it++) + { + for (u32 i = 0; i < s_reg_max; i++) + { + if (!reg_mod[i]) + { + reg_use[i] += it->second.reg_use[i]; + } + } + + reg_maybe_float |= it->second.reg_maybe_float; + reg_mod |= it->second.reg_mod; + + // Note: update when sup_conds are implemented + if (it->first == bpc && it->first != reduced_loop->loop_pc) + { + reduced_loop->loop_may_update |= it->second.reg_mod; + } + } + + for (u32 i = 0; i < s_reg_max; i++) + { + if (!::at32(reduced_loop->loop_dicts, i)) + { + if (reg_use[i] && reg_mod[i]) + { + reduced_loop->is_constant_expression = false; + reduced_loop->loop_writes.set(i); + reduced_loop->loop_may_update.reset(i); + } + else if (reg_use[i]) + { + reduced_loop->loop_args.set(i); + + if (reg_use[i] >= 3 && reg_maybe_float[i]) + { + reduced_loop->gpr_not_nans.set(i); + } + } + } + else + { + // Cleanup + reduced_loop->loop_may_update.reset(i); + } + } + + reduced_loop_all.emplace(reduced_loop->loop_pc, *reduced_loop); + reduced_loop->discard(); + } + break; } @@ -5891,7 +6429,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const u32 next_pc = spu_branch_target(pos, 1); const u32 target = spu_branch_target(pos, op.i16); - if (rchcnt_loop->active) + while (rchcnt_loop->active) { const reg_state_t& rt = vregs[op.rt]; @@ -5907,16 +6445,710 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s rchcnt_loop->conditioned = true; rchcnt_loop->branch_pc = pos; rchcnt_loop->branch_target = rchcnt_loop->product_test_negate != (type == spu_itype::BRZ) ? target : next_pc; + } + + break; + } + + [[fallthrough]]; + } + case spu_itype::BRHZ: + case spu_itype::BRHNZ: + + case spu_itype::BIZ: + case spu_itype::BINZ: + case spu_itype::BIHZ: + case spu_itype::BIHNZ: + { + if (type == spu_itype::spu_itype::BIZ || type == spu_itype::BINZ || type == spu_itype::BIHZ || type == spu_itype::BIHNZ) + { + if (op.e || op.d) + { + break_all_patterns(27); break; } } - break; - } - case spu_itype::BR: - case spu_itype::BRHZ: - case spu_itype::BRHNZ: - { + const bool is_u16_jump = type == spu_itype::BRHZ || type == spu_itype::BRHNZ || type == spu_itype::BIHZ || type == spu_itype::BIHNZ; + const bool is_jump_zero = (type == spu_itype::BRZ || type == spu_itype::BRHZ || type == spu_itype::BIZ || type == spu_itype::BIHZ) ^ reduced_loop->is_two_block_loop; + + while (reduced_loop->active) + { + if (reduced_loop->expected_sup_conds) + { + break_reduced_loop_pattern(50, reduced_loop->discard()); + break; + } + + const u32 op_rt = op.rt; + + const auto reg = reduced_loop->find_reg(op_rt); + + if (!reg/* || reg->modified == 0*/) // See special case regarding branch with direct comparison with 0 + { + break_reduced_loop_pattern(1, reduced_loop->discard()); + break; + } + + bool should_have_argument_dictator = false; + bool should_have_argument_increment = false; + bool cond_val_incr_before_cond = false; + bool ends_with_comparison = false; + + bool pattern_ok1 = true; + + switch (reg->mod1_type) + { + case spu_itype::A: + { + should_have_argument_increment = true; + [[fallthrough]]; + } + case spu_itype::AI: + case spu_itype::AHI: + { + cond_val_incr_before_cond = true; + pattern_ok1 = true; + break; + } + case spu_itype::CEQ: + case spu_itype::CEQH: + case spu_itype::CEQB: + case spu_itype::CGT: + case spu_itype::CGTH: + case spu_itype::CGTB: + case spu_itype::CLGT: + case spu_itype::CLGTH: + case spu_itype::CLGTB: + { + ends_with_comparison = true; + should_have_argument_dictator = true; + break; + } + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + ends_with_comparison = true; + pattern_ok1 = true; + break; + } + default: + { + if (reg->modified == 0) + { + // Special case: target may be sourced from another register which would be the loop dictator + break; + } + + pattern_ok1 = false; + break; + } + } + + if (!pattern_ok1) + { + break_reduced_loop_pattern(9, reduced_loop->discard()); + break; + } + + if (reg->modified >= 2) + { + switch (reg->mod2_type) + { + case spu_itype::A: + { + should_have_argument_increment = true; + [[fallthrough]]; + } + case spu_itype::AI: + case spu_itype::AHI: + { + if (cond_val_incr_before_cond) + { + // AI twice + break_reduced_loop_pattern(8, reduced_loop->discard()); + pattern_ok1 = false; + break; + } + + cond_val_incr_before_cond = false; + pattern_ok1 = true; + break; + } + case spu_itype::CEQ: + case spu_itype::CEQH: + case spu_itype::CEQB: + case spu_itype::CGT: + case spu_itype::CGTH: + case spu_itype::CGTB: + case spu_itype::CLGT: + case spu_itype::CLGTH: + case spu_itype::CLGTB: + { + if (!cond_val_incr_before_cond) + { + // Double comparison + break_reduced_loop_pattern(19, reduced_loop->discard()); + pattern_ok1 = false; + break; + } + + pattern_ok1 = true; + ends_with_comparison = true; + should_have_argument_dictator = true; + break; + } + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + if (!cond_val_incr_before_cond) + { + // Double comparison + break_reduced_loop_pattern(19, reduced_loop->discard()); + pattern_ok1 = false; + break; + } + + ends_with_comparison = true; + pattern_ok1 = true; + break; + } + default: + { + pattern_ok1 = false; + break; + } + } + } + + if (!pattern_ok1) + { + break_reduced_loop_pattern(10, reduced_loop->discard()); + break; + } + + bool found_loop_dictator = false; + bool found_loop_argument_for_dictator = false; + u32 null_regs_found = 0; + + for (u32 i = 0; i < reg->regs.size() && reduced_loop->active; i++) + { + if (::at32(reg->regs, i)) + { + if (0) if (i == op_rt || reg->modified == 0) + { + // Special case: direct comparison with zero for 32-bits (the only supported form by SPU) + + if (is_jump_zero) + { + // Infinite or single-time "loop" + break_reduced_loop_pattern(3, reduced_loop->discard()); + break; + } + + if (reg->modified >= 2) + { + break_reduced_loop_pattern(22, reduced_loop->discard()); + break; + } + + reduced_loop->cond_val_mask = u32{umax}; + reduced_loop->cond_val_min = 0; + reduced_loop->cond_val_size = u32{umax}; + + auto comp_reg = i == op_rt ? reg : reduced_loop->find_reg(i); + + if (!comp_reg || !comp_reg->is_predictable_loop_dictator(i)) + { + break_reduced_loop_pattern(4, reduced_loop->discard()); + break; + } + + ensure(reg->modified == 1 || i != op_rt); + + reduced_loop->cond_val_incr = static_cast(comp_reg->IMM); + reduced_loop->cond_val_incr_before_cond = reg->modified == 1; + reduced_loop->cond_val_register_idx = i; + reduced_loop->cond_val_compare = CMP_NOT_EQUAL; + reduced_loop->cond_val_is_immediate = true; + + found_loop_dictator = true; + break; + } + + auto reg_org = reduced_loop->find_reg(i); + u32 reg_index = i; + + if (reg_org && !cond_val_incr_before_cond && reg_org->modified == 0 && reg_org->regs.count() - 1u <= 1u && !::at32(reg_org->regs, i)) + { + for (u32 j = 0; j <= s_reg_127; j++) + { + if (::at32(reg_org->regs, j)) + { + if (const auto reg_found = reduced_loop->find_reg(j)) + { + if (reg_found->modified) + { + reg_org = reg_found; + reg_index = j; + break; + } + } + } + } + } + + if (!reg_org || reg_org->is_null(reg_index)) + { + // if (found_loop_dictator && !reduced_loop->cond_val_incr_is_immediate) + // { + // ensure(reduced_loop->cond_val_incr < s_reg_max); + + // } + // if (!should_have_argument_dictator) + // { + // break_reduced_loop_pattern(11, reduced_loop->discard()); + // break; + // } + + // if (found_loop_argument_for_dictator) + // { + // break_reduced_loop_pattern(6, reduced_loop->discard()); + // break; + // } + + // found_loop_argument_for_dictator = true; + // reduced_loop->cond_val_is_immediate = false; + + // if (found_loop_dictator) + // { + // ensure(i == reduced_loop->cond_val_register_argument_idx); + // } + // else + // { + // reduced_loop->cond_val_register_argument_idx = i; + // } + + // if (found_loop_dictator && reg->regs.count() == 2) + // { + // break; + // } + + null_regs_found++; + continue; + } + + if (found_loop_dictator) + { + break_reduced_loop_pattern(13, reduced_loop->discard()); + break; + } + + found_loop_dictator = true; + + if (!reg_org->is_predictable_loop_dictator(i)) + { + break_reduced_loop_pattern(7, reduced_loop->discard()); + break; + } + + if (reg_index != i && ::at32(reg->regs, reg_index)) + { + // Unimplemented + break_reduced_loop_pattern(30, reduced_loop->discard()); + break; + } + + u32 cond_val_incr = static_cast(reg_org->IMM); + + if (reg_org->mod1_type == spu_itype::AI || reg_org->mod1_type == spu_itype::AHI) + { + reduced_loop->cond_val_incr_is_immediate = true; + reduced_loop->cond_val_incr = static_cast(reg_org->IMM); + } + else if (reg_org->mod1_type == spu_itype::A) + { + reduced_loop->cond_val_incr_is_immediate = false; + + const u32 op_ra = spu_opcode_t{reg_org->IMM}.ra; + const u32 op_rb = spu_opcode_t{reg_org->IMM}.rb; + + if (!(op_ra == reg_index || op_rb == reg_index)) + { + break_reduced_loop_pattern(25, reduced_loop->discard()); + break; + } + + const u32 incr_arg_reg = reg_index == op_ra ? op_rb : op_ra; + + if (!reduced_loop->is_reg_null(incr_arg_reg)) + { + break_reduced_loop_pattern(26, reduced_loop->discard()); + break; + } + + reduced_loop->cond_val_incr = incr_arg_reg; + } + else + { + break_reduced_loop_pattern(28, reduced_loop->discard()); + break; + } + + reduced_loop->cond_val_incr_before_cond = cond_val_incr_before_cond; + + u64 cmp_mask = 0; + compare_direction cmp_direction{}; + + if (!ends_with_comparison) + { + if (is_jump_zero) + { + // Infinite or single-time "loop" + break_reduced_loop_pattern(3, reduced_loop->discard()); + break; + } + + cmp_mask = is_u16_jump ? u16{umax} : u32{umax}; + reduced_loop->cond_val_min = 0; + reduced_loop->cond_val_is_immediate = true; + cmp_direction = CMP_NOT_EQUAL; + } + else if (!should_have_argument_dictator) + { + reduced_loop->cond_val_min = reg->IMM; + reduced_loop->cond_val_is_immediate = true; + + const auto cmp_optype = reg->reverse1_type() == spu_itype::XSBH ? reg->reverse2_type() : reg->reverse1_type(); + + switch (cmp_optype) + { + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + { + cmp_direction = CMP_EQUAL; + break; + } + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + { + cmp_direction = CMP_SGREATER; + break; + } + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + cmp_direction = CMP_LGREATER; + break; + } + default: + { + break_reduced_loop_pattern(21, reduced_loop->discard()); + } + } + + switch (cmp_optype) + { + case spu_itype::CEQI: + case spu_itype::CGTI: + case spu_itype::CLGTI: + { + cmp_mask = u32{umax}; + break; + } + case spu_itype::CLGTHI: + case spu_itype::CEQHI: + case spu_itype::CGTHI: + { + cmp_mask = u16{umax}; + break; + } + case spu_itype::CEQBI: + case spu_itype::CGTBI: + case spu_itype::CLGTBI: + { + cmp_mask = u8{umax}; + break; + } + default: break_reduced_loop_pattern(21, reduced_loop->discard()); + } + + if (is_jump_zero) + { + cmp_direction = compare_direction{cmp_direction ^ CMP_NEGATE_FLAG}; + } + + if (cmp_direction == CMP_EQUAL2 || cmp_direction == CMP_NOT_EQUAL2) + { + // Fixup (no sense in remembering the turnaround for euqality comparison) + cmp_direction = compare_direction{cmp_direction & ~CMP_TURNAROUND_FLAG}; + } + } + else + { + const u32 op_ra = spu_opcode_t{reg->IMM}.ra; + const u32 op_rb = spu_opcode_t{reg->IMM}.rb; + + if (!(op_ra == reg_index || op_rb == reg_index)) + { + break_reduced_loop_pattern(20, reduced_loop->discard()); + break; + } + + const auto cmp_optype = reg->reverse1_type() == spu_itype::XSBH ? reg->reverse2_type() : reg->reverse1_type(); + + switch (cmp_optype) + { + case spu_itype::CEQ: + case spu_itype::CEQH: + case spu_itype::CEQB: + { + cmp_direction = CMP_EQUAL; + break; + } + case spu_itype::CGT: + case spu_itype::CGTH: + case spu_itype::CGTB: + { + cmp_direction = CMP_SGREATER; + break; + } + case spu_itype::CLGT: + case spu_itype::CLGTH: + case spu_itype::CLGTB: + { + cmp_direction = CMP_LGREATER; + break; + } + default: ensure(false); + } + + switch (cmp_optype) + { + case spu_itype::CEQ: + case spu_itype::CGT: + case spu_itype::CLGT: + { + cmp_mask = u32{umax}; + break; + } + case spu_itype::CLGTH: + case spu_itype::CEQH: + case spu_itype::CGTH: + { + cmp_mask = u16{umax}; + break; + } + case spu_itype::CEQB: + case spu_itype::CGTB: + case spu_itype::CLGTB: + { + cmp_mask = u8{umax}; + break; + } + default: ensure(false); + } + + if (op_ra != i) + { + // Compare is on the oppsoite direction + // This variation exists only via register mode (due to lack of SPU opcodes) + cmp_direction = compare_direction{cmp_direction ^ CMP_TURNAROUND_FLAG}; + } + + if (is_jump_zero) + { + cmp_direction = compare_direction{cmp_direction ^ CMP_NEGATE_FLAG}; + } + + if (cmp_direction == CMP_EQUAL2 || cmp_direction == CMP_NOT_EQUAL2) + { + // Fixup (no sense in remembering the turnaround for euqality comparison) + cmp_direction = compare_direction{cmp_direction & ~CMP_TURNAROUND_FLAG}; + } + + // The loop dictator is the register that is not the argument + const u32 loop_arg_reg = reg_index == op_ra ? op_rb : op_ra; + const u32 loop_dict_reg = reg_index == op_ra ? op_ra : op_rb; + reduced_loop->cond_val_is_immediate = false; + + if (found_loop_argument_for_dictator) + { + ensure(loop_arg_reg == reduced_loop->cond_val_register_argument_idx); + } + else + { + reduced_loop->cond_val_register_argument_idx = loop_arg_reg; + } + + if (!reduced_loop->is_reg_null(loop_arg_reg)) + { + break_reduced_loop_pattern(27, reduced_loop->discard()); + break; + } + + found_loop_argument_for_dictator = true; + } + + if (cmp_direction == CMP_EQUAL) + { + // Infinite or single-time "loop" + break_reduced_loop_pattern(18, reduced_loop->discard()); + break; + } + + if (cmp_mask == u16{umax} && !is_u16_jump) + { + break_reduced_loop_pattern(14, reduced_loop->discard()); + break; + } + + if (cmp_mask == u8{umax}) + { + bool instructions_ok = false; + + if (is_u16_jump) + { + // If ANDI(0xff) is used, although unlikely, it fine as well for 16-bits + instructions_ok = FN(x == spu_itype::XSBH || x == spu_itype::ANDI)(!cond_val_incr_before_cond ? reg->mod2_type : reg->mod3_type); + } + else + { + instructions_ok = FN(x == spu_itype::ANDI)(!cond_val_incr_before_cond ? reg->mod2_type : reg->mod3_type); + } + + if (!instructions_ok) + { + break_reduced_loop_pattern(15, reduced_loop->discard()); + break; + } + } + + reduced_loop->cond_val_compare = cmp_direction; + reduced_loop->cond_val_mask = cmp_mask; + reduced_loop->cond_val_register_idx = reg_index; + + // if (!should_have_argument_dictator && reg->regs.count() == 1) + // { + // break; + // } + + // if (found_loop_argument_for_dictator && reg->regs.count() == 2) + // { + // break; + // } + } + } + + if (!found_loop_dictator) + { + break_reduced_loop_pattern(16, reduced_loop->discard()); + } + + if (should_have_argument_dictator && !found_loop_argument_for_dictator) + { + break_reduced_loop_pattern(17, reduced_loop->discard()); + } + + if (reduced_loop->active) + { + ensure(reduced_loop->cond_val_register_idx != umax); + + if (reduced_loop->is_two_block_loop) + { + reduced_loop->has_cond_state = true; + break; + } + + for (const auto& [reg_num, reg] : reduced_loop->regs) + { + if (reg.is_loop_dictator(reg_num)) + { + if (reg.is_non_predictable_loop_dictator(reg_num)) + { + //break_reduced_loop_pattern(13, reduced_loop->discard()); + reduced_loop->is_constant_expression = false; + } + + reduced_loop->loop_dicts.set(reg_num); + } + } + + std::array reg_use{}; + std::bitset reg_maybe_float{}; + std::bitset reg_mod{}; + + for (auto it = m_bbs.find(reduced_loop->loop_pc); it != m_bbs.end() && it->first <= bpc; it++) + { + for (u32 i = 0; i < s_reg_max; i++) + { + if (!reg_mod[i]) + { + reg_use[i] += it->second.reg_use[i]; + } + } + + reg_maybe_float |= it->second.reg_maybe_float; + reg_mod |= it->second.reg_mod; + + // Note: update when sup_conds are implemented + if (it->first == bpc && it->first != reduced_loop->loop_pc) + { + reduced_loop->loop_may_update |= it->second.reg_mod; + } + } + + for (u32 i = 0; i < s_reg_max; i++) + { + if (!::at32(reduced_loop->loop_dicts, i)) + { + if (reg_use[i] && reg_mod[i]) + { + reduced_loop->is_constant_expression = false; + reduced_loop->loop_writes.set(i); + reduced_loop->loop_may_update.reset(i); + } + else if (reg_use[i]) + { + reduced_loop->loop_args.set(i); + + if (reg_use[i] >= 3 && reg_maybe_float[i]) + { + reduced_loop->gpr_not_nans.set(i); + } + } + } + else + { + // Cleanup + reduced_loop->loop_may_update.reset(i); + } + } + + reduced_loop_all.emplace(reduced_loop->loop_pc, *reduced_loop); + reduced_loop->discard(); + } + + break; + } + break; } @@ -5929,17 +7161,49 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s case spu_itype::HLGTI: case spu_itype::LNOP: case spu_itype::NOP: - case spu_itype::MTSPR: case spu_itype::FSCRWR: { // Do nothing break; } - + + case spu_itype::MTSPR: + { + break_all_patterns(99); + break; + } + case spu_itype::WRCH: { break_channel_pattern(56, rchcnt_loop->discard()); + if (reduced_loop->active) + { + switch (op.ra) + { + case MFC_EAL: + case MFC_LSA: + case MFC_TagID: + case MFC_Size: + case MFC_EAH: + case SPU_WrDec: + case SPU_WrSRR0: + case SPU_WrEventAck: + case SPU_Set_Bkmk_Tag: + case SPU_PM_Start_Ev: + case SPU_PM_Stop_Ev: + case MFC_WrTagMask: + { + break; + } + default: + { + break_reduced_loop_pattern(18, reduced_loop->discard()); + break; + } + } + } + switch (op.ra) { case MFC_EAL: @@ -6202,6 +7466,14 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s const bool is_read = type == spu_itype::RDCH; bool invalidate = true; + if (!is_read || op.ra != SPU_RdDec) + { + if (reduced_loop->active) + { + break_reduced_loop_pattern(17, reduced_loop->discard()); + } + } + const auto it = rchcnt_loop_all.find(pos); if (it != rchcnt_loop_all.end()) @@ -7111,17 +8383,17 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s u32 ra = s_reg_max, rb = s_reg_max, rc = s_reg_max; - if (m_use_ra.test(pos / 4)) + if (::at32(m_use_ra, pos / 4)) { ra = op.ra; } - if (m_use_rb.test(pos / 4)) + if (::at32(m_use_rb, pos / 4)) { rb = op.rb; } - if (type & spu_itype::_quadrop && m_use_rc.test(pos / 4)) + if (::at32(m_use_rc, pos / 4)) { rc = op.rc; } @@ -7169,6 +8441,11 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s { for (u32 next_target : ::at32(m_targets, pos)) { + if (next_target == SPU_LS_SIZE) + { + continue; + } + add_block(next_target); } @@ -7353,6 +8630,76 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s } } + for (const auto& [loop_pc, pattern] : reduced_loop_all) + { + if (!pattern.active || pattern.loop_pc == SPU_LS_SIZE) + { + continue; + } + + if (inst_attr attr = m_inst_attrs[(loop_pc - entry_point) / 4]; attr == inst_attr::none) + { + const u64 hash = loop_pc / 4 + read_from_ptr>(func_hash.data()); + + add_pattern(inst_attr::reduced_loop, loop_pc - result.entry_point, 0, std::make_shared(pattern)); + + std::string regs = "{"; + + for (const auto& [reg_num, reg] : pattern.regs) + { + if (reg.is_loop_dictator(reg_num)) + { + if (regs.size() != 1) + { + regs += ","; + } + + fmt::append(regs, " r%u", reg_num); + } + } + + for (u32 i = 0; i < s_reg_max; i++) + { + if (::at32(pattern.loop_writes, i)) + { + if (regs.size() != 1) + { + regs += ","; + } + + fmt::append(regs, " r%u-w", i); + } + + if (::at32(pattern.loop_args, i)) + { + if (regs.size() != 1) + { + regs += ","; + } + + fmt::append(regs, " r%u-r", i); + } + + if (::at32(pattern.loop_may_update, i)) + { + if (regs.size() != 1) + { + regs += ","; + } + + fmt::append(regs, " r%u-m", i); + } + } + + regs += " }"; + + spu_log.success("Reduced Loop Pattern Detected! (REGS: %s, DICT: r%d, ARG: %s, Incr: %s (%s), CMP/Size: %s/%u, loop_pc=0x%x, 0x%x-%s)", regs, pattern.cond_val_register_idx + , pattern.cond_val_is_immediate ? fmt::format("0x%x", pattern.cond_val_min) : fmt::format("r%d", pattern.cond_val_register_argument_idx) + , pattern.cond_val_incr_is_immediate ? fmt::format("%d", static_cast(pattern.cond_val_incr)) : fmt::format("r%d", pattern.cond_val_incr), pattern.cond_val_incr_before_cond ? "BEFORE" : "AFTER" + , pattern.cond_val_compare, std::popcount(pattern.cond_val_mask), loop_pc, entry_point, func_hash); + } + } + if (likely_putllc_loop && !had_putllc_evaluation) { spu_log.notice("Likely missed PUTLLC16 patterns. (entry=0x%x)", entry_point); @@ -7363,7 +8710,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s // Blocks starting from 0x0 or invalid instruction won't be compiled, may need special interpreter fallback } - if (!m_patterns.empty()) + if (!m_patterns.empty() && g_cfg.core.spu_debug) { std::string out_dump; dump(result, out_dump); @@ -7386,7 +8733,7 @@ spu_program spu_recompiler_base::analyse(const be_t* ls, u32 entry_point, s return result; } -void spu_recompiler_base::dump(const spu_program& result, std::string& out) +void spu_recompiler_base::dump(const spu_program& result, std::string& out, u32 block_min, u32 block_max) { SPUDisAsm dis_asm(cpu_disasm_mode::dump, reinterpret_cast(result.data.data()), result.lower_bound); @@ -7409,10 +8756,18 @@ void spu_recompiler_base::dump(const spu_program& result, std::string& out) hash = "N/A"; } - fmt::append(out, "========== SPU BLOCK 0x%05x (size %u, %s) ==========\n\n", result.entry_point, result.data.size(), hash); + if (block_min == 0) + { + fmt::append(out, "========== SPU BLOCK 0x%05x (size %u, %s) ==========\n\n", result.entry_point, result.data.size(), hash); + } for (auto& bb : m_bbs) { + if (bb.first < block_min || bb.first >= block_max) + { + continue; + } + if (m_block_info[bb.first / 4]) { fmt::append(out, "A: [0x%05x] %s [%s]\n", bb.first, m_entry_info[bb.first / 4] ? (m_ret_info[bb.first / 4] ? "Chunk" : "Entry") : "Block", spu_block_hash{(hash_start & -65536) + bb.first / 4}); @@ -8435,9 +9790,9 @@ std::array& block_reg_info::evaluate_start_state(const s return walkby_state; } -void spu_recompiler_base::add_pattern(inst_attr attr, u32 start, u64 info) +void spu_recompiler_base::add_pattern(inst_attr attr, u32 start, u64 info, std::shared_ptr info_ptr) { - m_patterns[start] = pattern_info{info}; + m_patterns[start] = pattern_info{info, info_ptr}; m_inst_attrs[start / 4] = attr; } diff --git a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp index 856a039e5e..6837baaa97 100644 --- a/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPULLVMRecompiler.cpp @@ -60,6 +60,7 @@ const extern spu_decoder g_spu_iflag; #pragma GCC diagnostic pop #endif +#pragma optimize("", off) #ifdef ARCH_ARM64 #include "Emu/CPU/Backends/AArch64/AArch64JIT.h" #endif @@ -132,6 +133,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator llvm::MDNode* m_md_unlikely; llvm::MDNode* m_md_likely; + llvm::MDNode* m_md_spu_memory_domain; + llvm::MDNode* m_md_spu_context_domain; struct block_info { @@ -139,7 +142,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator spu_recompiler_base::block_info* bb{}; // Current block's entry block - llvm::BasicBlock* block; + llvm::BasicBlock* block{}; // Final block (for PHI nodes, set after completion) llvm::BasicBlock* block_end{}; @@ -150,11 +153,15 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Current register values std::array reg{}; + // Opimization: restoring register state for registers that would be rewritten in other blocks + std::array reg_save_and_restore{}; + // PHI nodes created for this block (if any) std::array phi{}; // Store instructions std::array store{}; + bool block_wide_reg_store_elimination = false; // Store reordering/elimination protection std::array store_context_last_id = fill_array(0); // Protects against illegal forward ordering @@ -189,10 +196,13 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator }; // Current block - block_info* m_block; + block_info* m_block = nullptr; // Current function or chunk - function_info* m_finfo; + function_info* m_finfo = nullptr; + + // Reduced Loop Pattern information (if available) + reduced_loop_t* m_reduced_loop_info = nullptr; // All blocks in the current function chunk std::unordered_map> m_blocks; @@ -364,7 +374,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator { if (i != s_reg_lr && i != s_reg_sp && (i < s_reg_80 || i > s_reg_127)) { - m_block->reg[i] = m_ir->CreateLoad(get_reg_type(i), init_reg_fixed(i)); + m_block->reg[i] = get_reg_fixed(i, get_reg_type(i)); } } @@ -549,6 +559,40 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator return _ptr(m_thread, ::offset32(offset_args...)); } + template + T* spu_mem_attr(T* inst) + { + if (auto load_inst = llvm::dyn_cast(inst)) + { + load_inst->setMetadata(llvm::LLVMContext::MD_noalias, m_md_spu_context_domain); + load_inst->setMetadata(llvm::LLVMContext::MD_alias_scope, m_md_spu_memory_domain); + } + else if (auto store_inst = llvm::dyn_cast(inst)) + { + store_inst->setMetadata(llvm::LLVMContext::MD_noalias, m_md_spu_context_domain); + store_inst->setMetadata(llvm::LLVMContext::MD_alias_scope, m_md_spu_memory_domain); + } + + return inst; + } + + template + T* spu_context_attr(T* inst) + { + if (auto load_inst = llvm::dyn_cast(inst)) + { + load_inst->setMetadata(llvm::LLVMContext::MD_alias_scope, m_md_spu_context_domain); + load_inst->setMetadata(llvm::LLVMContext::MD_noalias, m_md_spu_memory_domain); + } + else if (auto store_inst = llvm::dyn_cast(inst)) + { + store_inst->setMetadata(llvm::LLVMContext::MD_alias_scope, m_md_spu_context_domain); + store_inst->setMetadata(llvm::LLVMContext::MD_noalias, m_md_spu_memory_domain); + } + + return inst; + } + // Return default register type llvm::Type* get_reg_type(u32 index) { @@ -709,8 +753,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (!reg) { + if (m_block && m_block->block_wide_reg_store_elimination) + { + fmt::throw_exception("Unexpected load: [%s] at 0x%x (gpr=r%d)", m_hash, m_pos, index); + } + // Load register value if necessary reg = m_finfo && m_finfo->load[index] ? m_finfo->load[index] : m_ir->CreateLoad(get_reg_type(index), init_reg_fixed(index)); + spu_context_attr(reg); } if (reg->getType() == get_type()) @@ -920,6 +970,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (m_block) { + if (m_block->block_wide_reg_store_elimination) + { + // Don't save registers for the current block iteration + // Affected optimizations: + // 1. Single-block reduced loop + return; + } + // Keep the store's location in history of gpr preservaions m_block->store_context_last_id[index] = m_block->store_context_ctr[index]; m_block->store_context_first_id[index] = std::min(m_block->store_context_first_id[index], m_block->store_context_ctr[index]); @@ -935,7 +993,9 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator } // Write register to the context - _store = m_ir->CreateStore(is_xfloat ? double_to_xfloat(saved_value) : m_ir->CreateBitCast(value, get_reg_type(index)), addr); + _store = m_ir->CreateStore(is_xfloat ? double_to_xfloat(saved_value) : bitcast(value, get_reg_type(index)), addr); + + spu_context_attr(_store); } template @@ -1046,7 +1106,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator // Update PC for current or explicitly specified instruction address void update_pc(u32 target = -1) { - m_ir->CreateStore(m_ir->CreateAnd(get_pc(target + 1 ? target : m_pos), 0x3fffc), spu_ptr(&spu_thread::pc))->setVolatile(true); + spu_context_attr(m_ir->CreateStore(m_ir->CreateAnd(get_pc(target + 1 ? target : m_pos), 0x3fffc), spu_ptr(&spu_thread::pc)))->setVolatile(true); } // Call cpu_thread::check_state if necessary and return or continue (full check) @@ -1055,7 +1115,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator const auto pstate = spu_ptr(&spu_thread::state); const auto _body = llvm::BasicBlock::Create(m_context, "", m_function); const auto check = llvm::BasicBlock::Create(m_context, "", m_function); - m_ir->CreateCondBr(m_ir->CreateICmpEQ(m_ir->CreateLoad(get_type(), pstate, true), m_ir->getInt32(0)), _body, check, m_md_likely); + m_ir->CreateCondBr(m_ir->CreateICmpEQ(spu_context_attr(m_ir->CreateLoad(get_type(), pstate, true)), m_ir->getInt32(0)), _body, check, m_md_likely); m_ir->SetInsertPoint(check); update_pc(addr); @@ -1066,14 +1126,14 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator if (may_be_unsafe_for_savestate) { - m_ir->CreateStore(m_ir->getInt8(1), spu_ptr(&spu_thread::unsavable))->setVolatile(true); + spu_context_attr(m_ir->CreateStore(m_ir->getInt8(1), spu_ptr(&spu_thread::unsavable)))->setVolatile(true); } m_ir->CreateCall(m_test_state, {m_thread}); if (may_be_unsafe_for_savestate) { - m_ir->CreateStore(m_ir->getInt8(0), spu_ptr(&spu_thread::unsavable))->setVolatile(true); + spu_context_attr(m_ir->CreateStore(m_ir->getInt8(0), spu_ptr(&spu_thread::unsavable)))->setVolatile(true); } m_ir->CreateBr(_body); @@ -1509,6 +1569,16 @@ public: m_md_likely = llvm::MDTuple::get(m_context, {md_name, md_high, md_low}); m_md_unlikely = llvm::MDTuple::get(m_context, {md_name, md_low, md_high}); + const auto domain = llvm::MDNode::getDistinct(m_context, {llvm::MDString::get(m_context, "SPU_mem")}); + const auto scope = llvm::MDNode::get(m_context, {llvm::MDString::get(m_context, "SPU_mem_scope"), domain}); + + m_md_spu_memory_domain = llvm::MDNode::get(m_context, scope); + + const auto domain2 = llvm::MDNode::getDistinct(m_context, {llvm::MDString::get(m_context, "SPU_ctx")}); + const auto scope2 = llvm::MDNode::get(m_context, {llvm::MDString::get(m_context, "SPU_ctx_scope"), domain2}); + + m_md_spu_context_domain = llvm::MDNode::get(m_context, scope2); + // Initialize transform passes clear_transforms(); #ifdef ARCH_ARM64 @@ -1678,7 +1748,7 @@ public: // Emit state check const auto pstate = spu_ptr(&spu_thread::state); - m_ir->CreateCondBr(m_ir->CreateICmpNE(m_ir->CreateLoad(get_type(), pstate), m_ir->getInt32(0)), label_stop, label_test, m_md_unlikely); + m_ir->CreateCondBr(m_ir->CreateICmpNE(spu_context_attr(m_ir->CreateLoad(get_type(), pstate)), m_ir->getInt32(0)), label_stop, label_test, m_md_unlikely); // Emit code check u32 check_iterations = 0; @@ -2059,6 +2129,43 @@ public: bool need_check = false; m_block->bb = &bb; + // [1gJ45f2-0x00a40]: 16.4982% (113258) + // [ZsQTud1-0x0924c]: 6.1202% (42014) + // [ZsQTud1-0x08e54]: 5.6610% (38862) + // [0000000-0x3fffc]: 4.3764% (30043) + // [Zh4tpJM-0x00bcc]: 3.7908% (26023) + // [CFt8hXu-0x063b8]: 3.6177% (24835) + // [8YJCUjv-0x0ad18]: 3.2417% (22254) + // [Try3XHn-0x0f018]: 2.3721% (16284) + // [s6ti9iu-0x07678]: 1.8464% (12675) + // [oyxkAPv-0x0c22c]: 1.7776% (12203) + // [Q0jLqH4-0x00324]: 1.6015% (10994) + static const std::array, 4> to_nop + { + { } + }; + + bool found_block = false; + + for (auto& [hash, pos] : to_nop) + { + if (m_hash.find(hash) <= 2 && baddr == pos) + { + found_block = true; + break; + } + } + + if (found_block) + { + for (u32 i = 0; i < 100; i++) + { + auto value = m_ir->CreateLoad(get_type(), spu_ptr(&spu_thread::last_getllar_lsa)); + auto mod_val = m_ir->CreateFDiv(value, llvm::ConstantFP::get(value->getType(), 1.1 + i)); + m_ir->CreateStore(value, spu_ptr(&spu_thread::last_getllar_lsa)); + } + } + if (!bb.preds.empty()) { // Initialize registers and build PHI nodes if necessary @@ -2174,6 +2281,528 @@ public: check_state(baddr); } + const bool is_reduced_loop = m_inst_attrs[(baddr - start) / 4] == inst_attr::reduced_loop; + m_reduced_loop_info = is_reduced_loop ? std::static_pointer_cast(ensure(m_patterns.at(baddr - start).info_ptr)).get() : nullptr; + + BasicBlock* block_optimization_phi_parent = nullptr; + const auto block_optimization_inner = is_reduced_loop ? BasicBlock::Create(m_context, fmt::format("b-loop-it-0x%x", m_pos), m_function) : nullptr; + const auto block_optimization_exit_early = is_reduced_loop ? BasicBlock::Create(m_context, fmt::format("b-loop-exit-0x%x", m_pos), m_function) : nullptr; + const auto block_optimization_next = is_reduced_loop ? BasicBlock::Create(m_context, fmt::format("b2-0x%x", m_pos), m_function) : nullptr; + + std::array reduced_loop_phi_nodes{}; + std::array reduced_loop_init_regs{}; + + // Reserve additional iteration for rare case where GPR may not be rewritten after the iteration + // So that it would have to be rewritten by future code + // This avoids using additional PHI connectors + const u32 reserve_iterations = m_reduced_loop_info && m_reduced_loop_info->loop_may_update.count() != 0 ? 3 : 2; + + for (u32 i = 0; i < s_reg_max; i++) + { + if (m_reduced_loop_info && m_reduced_loop_info->loop_may_update.test(i)) + { + m_block->reg_save_and_restore[i] = m_block->reg[i]; + } + } + + auto make_reduced_loop_condition = [&](llvm::BasicBlock* optimization_block, bool is_second_time) + { + llvm::ICmpInst::Predicate compare{}; + + switch (m_reduced_loop_info->cond_val_compare) + { + case CMP_SLESS: compare = ICmpInst::ICMP_SLT; break; + case CMP_SGREATER: compare = ICmpInst::ICMP_SGT; break; + case CMP_EQUAL: compare = ICmpInst::ICMP_EQ; break; + case CMP_LLESS: compare = ICmpInst::ICMP_ULT; break; + case CMP_LGREATER: compare = ICmpInst::ICMP_UGT; break; + case CMP_SGREATER_EQUAL: compare = ICmpInst::ICMP_SGE; break; + case CMP_SLOWER_EQUAL: compare = ICmpInst::ICMP_SLE; break; + case CMP_NOT_EQUAL: compare = ICmpInst::ICMP_NE; break; + case CMP_LGREATER_EQUAL: compare = ICmpInst::ICMP_UGE; break; + case CMP_LLOWER_EQUAL: compare = ICmpInst::ICMP_ULE; break; + { + break; + } + case CMP_UNKNOWN: + case CMP_NOT_EQUAL2: + case CMP_EQUAL2: + default: + { + ensure(false); + break; + } + } + + llvm::Value* loop_dictator_before_adjustment{}; + llvm::Value* loop_dictator_after_adjustment{}; + + spu_opcode_t reg_target{}; + reg_target.rt = static_cast(m_reduced_loop_info->cond_val_register_idx); + + if (reg_target.rt != m_reduced_loop_info->cond_val_register_idx) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition register index: 0x%llx", m_reduced_loop_info->cond_val_register_idx); + } + + if (!m_block->reg[reg_target.rt]) + { + m_block->reg[reg_target.rt] = reduced_loop_init_regs[reg_target.rt]; + } + + switch (m_reduced_loop_info->cond_val_mask) + { + case u8{umax}: + { + loop_dictator_before_adjustment = get_scalar(get_vr(reg_target.rt)).eval(m_ir); + break; + } + case u16{umax}: + { + loop_dictator_before_adjustment = get_scalar(get_vr(reg_target.rt)).eval(m_ir); + break; + } + case u32{umax}: + { + loop_dictator_before_adjustment = get_scalar(get_vr(reg_target.rt)).eval(m_ir); + break; + } + case u64{umax}: + { + ensure(false); // TODO + loop_dictator_before_adjustment = get_scalar(get_vr(reg_target.rt)).eval(m_ir); + break; + } + default: + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition bit mask: 0x%llx", m_reduced_loop_info->cond_val_mask); + } + } + + const u32 type_bits = std::popcount(m_reduced_loop_info->cond_val_mask); + + llvm::Value* cond_val_incr = nullptr; + + if (m_reduced_loop_info->cond_val_incr_is_immediate) + { + cond_val_incr = m_ir->getIntN(type_bits, m_reduced_loop_info->cond_val_incr & m_reduced_loop_info->cond_val_mask); + } + else + { + spu_opcode_t reg_incr{}; + reg_incr.rt = static_cast(m_reduced_loop_info->cond_val_incr); + + if (reg_incr.rt != m_reduced_loop_info->cond_val_incr) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal increment arguemnt register index: 0x%llx", m_reduced_loop_info->cond_val_incr); + } + switch (m_reduced_loop_info->cond_val_mask) + { + case u8{umax}: + { + cond_val_incr = get_scalar(get_vr(reg_incr.rt)).eval(m_ir); + break; + } + case u16{umax}: + { + cond_val_incr = get_scalar(get_vr(reg_incr.rt)).eval(m_ir); + break; + } + case u32{umax}: + { + cond_val_incr = get_scalar(get_vr(reg_incr.rt)).eval(m_ir); + break; + } + case u64{umax}: + { + ensure(false); // TODO + cond_val_incr = get_scalar(get_vr(reg_incr.rt)).eval(m_ir); + break; + } + } + } + + if (m_reduced_loop_info->cond_val_incr_before_cond && !m_reduced_loop_info->cond_val_incr_before_cond_taken_in_account) + { + loop_dictator_after_adjustment = m_ir->CreateAdd(loop_dictator_before_adjustment, cond_val_incr); + } + else + { + loop_dictator_after_adjustment = loop_dictator_before_adjustment; + } + + llvm::Value* loop_argument = nullptr; + + if (m_reduced_loop_info->cond_val_is_immediate) + { + loop_argument = m_ir->CreateTrunc(m_ir->getInt64(m_reduced_loop_info->cond_val_min & m_reduced_loop_info->cond_val_mask), loop_dictator_before_adjustment->getType()); + } + else + { + spu_opcode_t reg_target2{}; + reg_target2.rt = static_cast(m_reduced_loop_info->cond_val_register_argument_idx); + + if (reg_target2.rt != m_reduced_loop_info->cond_val_register_argument_idx) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Illegal condition arguemnt register index: 0x%llx", m_reduced_loop_info->cond_val_register_argument_idx); + } + + switch (m_reduced_loop_info->cond_val_mask) + { + case u8{umax}: + { + loop_argument = get_scalar(get_vr(reg_target2.rt)).eval(m_ir); + break; + } + case u16{umax}: + { + loop_argument = get_scalar(get_vr(reg_target2.rt)).eval(m_ir); + break; + } + case u32{umax}: + { + loop_argument = get_scalar(get_vr(reg_target2.rt)).eval(m_ir); + break; + } + case u64{umax}: + { + ensure(false); // TODO + loop_argument = get_scalar(get_vr(reg_target2.rt)).eval(m_ir); + break; + } + } + } + + llvm::Value* condition = nullptr; + + if (reserve_iterations == 1) + { + condition = m_ir->CreateICmp(compare, loop_dictator_after_adjustment, loop_argument); + } + // else if ((m_reduced_loop_info->cond_val_compare == CMP_LGREATER || (m_reduced_loop_info->cond_val_compare == CMP_LGREATER_EQUAL && m_reduced_loop_info->cond_val_is_immediate && m_reduced_loop_info->cond_val_incr)) && cond_val_incr->getSExtValue() < 0) + // { + // const auto cond_val_incr_multiplied = m_ir->CreateMul(cond_val_incr, reserve_iterations - 1); + // condition = m_ir->CreateICmp(compare, select(m_ir->CreateICmpUGE(cond_val_incr_multiplied, loop_dictator_after_adjustment), m_ir->CreateAdd(loop_dictator_after_adjustment, cond_val_incr_multiplied), m_ir->getIntN(type_bits, 0)), loop_argument); + // } + else + { + //debugtrap(); + + llvm::Value* prev_it = loop_dictator_after_adjustment; + + for (u32 i = 0; i < reserve_iterations; i++) + { + if (i) + { + prev_it = m_ir->CreateAdd(prev_it, cond_val_incr); + } + + const auto also_cond = m_ir->CreateICmp(compare, prev_it, loop_argument); + condition = condition ? m_ir->CreateAnd(condition, also_cond) : also_cond; + } + } + + if (!is_second_time) + { + for (u32 i = 0, count = 0, prev_i = umax;; i++) + { + const bool is_last = !(count <= 20 && i < s_reg_max); + + if (is_last || m_reduced_loop_info->is_gpr_not_NaN_hint(i)) + { + count++; + + if (prev_i == umax) + { + if (!is_last) + { + prev_i = i; + continue; + } + + break; + } + + auto access_gpr = [&](u32 index) + { + spu_opcode_t op_arg{}; + op_arg.ra = index; + return get_vr(op_arg.ra); + }; + + // OR LSB to convert infinity to NaN + llvm::Value* arg1 = bitcast(access_gpr(prev_i) | splat(1)).eval(m_ir); + llvm::Value* arg2 = is_last ? arg1 : bitcast(access_gpr(i) | splat(1)).eval(m_ir); + + llvm::Value* acc = m_ir->CreateSExt(m_ir->CreateFCmpUNO(arg1, arg2), get_type()); + + // Pattern for PTEST + acc = m_ir->CreateBitCast(acc, get_type()); + + llvm::Value* elem = m_ir->CreateExtractElement(acc, u64{0}); + + for (u64 i = 1; i < 2; i++) + { + elem = m_ir->CreateOr(elem, m_ir->CreateExtractElement(acc, i)); + } + + // Compare result with zero + const auto cond_nans = m_ir->CreateICmpEQ(elem, m_ir->getInt64(0)); + condition = m_ir->CreateAnd(cond_nans, condition); + prev_i = umax; + } + + if (is_last) + { + break; + } + } + + // TODO: Optimze so constant evalatuated cases will not be checked + const bool is_cond_need_runtime_verify = compare == ICmpInst::ICMP_NE && (!m_reduced_loop_info->cond_val_is_immediate || m_reduced_loop_info->cond_val_incr % 2 == 0); + + if (is_cond_need_runtime_verify) + { + // Verify that it is actually possible to finish the loop and it is not an infinite loop + + // First: create a mask of the bits that definitely do not change between iterations (0 results in umax which is accurate here) + const auto no_change_bits = m_ir->CreateAnd(m_ir->CreateNot(cond_val_incr), m_ir->CreateSub(cond_val_incr, m_ir->getIntN(type_bits, 1))); + + // Compare that when the mask applied to both the result and the original value is the same + const auto cond_verify = m_ir->CreateICmpEQ(m_ir->CreateAnd(loop_dictator_after_adjustment, no_change_bits), m_ir->CreateAnd(loop_argument, no_change_bits)); + + // Amend condition + condition = m_ir->CreateAnd(cond_verify, condition); + } + } + + m_ir->CreateCondBr(condition, optimization_block, block_optimization_next); + }; + + if (is_reduced_loop) + { + for (u32 i = 0; i < s_reg_max; i++) + { + llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type() : get_reg_type(i); + + if (i < m_reduced_loop_info->loop_dicts.size() && (m_reduced_loop_info->loop_dicts.test(i) || m_reduced_loop_info->loop_writes.test(i))) + { + // Connect registers which are used and then modified by the block + auto value = m_block->reg[i]; + + if (!value || value->getType() != type) + { + value = get_reg_fixed(i, type); + } + + reduced_loop_init_regs[i] = value; + } + else if (i < m_reduced_loop_info->loop_dicts.size() && m_reduced_loop_info->loop_args.test(i)) + { + // Load registers used as arguments of the loop + if (!m_block->reg[i]) + { + m_block->reg[i] = get_reg_fixed(i, type); + } + } + } + + const auto prev_insert_block = m_ir->GetInsertBlock(); + + block_optimization_phi_parent = prev_insert_block; + + make_reduced_loop_condition(block_optimization_inner, false); + m_ir->SetInsertPoint(block_optimization_inner); + + for (u32 i = 0; i < s_reg_max; i++) + { + if (auto init_val = reduced_loop_init_regs[i]) + { + llvm::Type* type = g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate && bb.reg_maybe_xf[i] ? get_type() : get_reg_type(i); + + const auto _phi = m_ir->CreatePHI(init_val->getType(), 2, fmt::format("reduced_0x%05x_r%u", baddr, i)); + _phi->addIncoming(init_val, prev_insert_block); + + reduced_loop_phi_nodes[i] = _phi; + m_block->reg[i] = _phi; + } + } + + m_block->block_wide_reg_store_elimination = true; + } + + // Instructions emitting optimizations: Loop iteration is not the last + m_pos = baddr; + + // Masked opcodde -> register modification times + std::map>> masked_times; + std::array reg_states{}; + u32 s_reg_state{1}; + + for (u32 iteration_emit = 0; is_reduced_loop; m_pos += 4) + { + if (m_pos != baddr && m_block_info[m_pos / 4] && m_reduced_loop_info->loop_end < m_pos) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Exit(1) too early at 0x%x", m_pos); + } + + if (!(m_pos >= start && m_pos < end)) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Exit(2) too early at 0x%x", m_pos); + } + + if (m_ir->GetInsertBlock()->getTerminator()) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: Exit(3) too early at 0x%x", m_pos); + } + + const u32 op = std::bit_cast>(func.data[(m_pos - start) / 4]); + const auto itype = g_spu_itype.decode(op); + + if (itype & spu_itype::branch) + { + bool branches_back = false; + + for (u32 dest : op_branch_targets(m_pos, spu_opcode_t{op})) + { + branches_back = branches_back || dest == baddr; + } + + if (!branches_back) + { + continue; + } + + iteration_emit++; + + if (iteration_emit < 2) + { + // Reset mpos (with fixup) + m_pos = baddr - 4; + continue; + } + + // Optimization block body + const auto block_inner = m_ir->GetInsertBlock(); + + std::array block_reg_results{}; + + for (u32 i = 0; i < s_reg_max; i++) + { + if (auto phi = reduced_loop_phi_nodes[i]) + { + const auto type = phi->getType() == get_type() ? get_type() : get_reg_type(i); + block_reg_results[i] = ensure(get_reg_fixed(i, type)); + phi->addIncoming(block_reg_results[i], block_inner); + } + } + + ensure(!!m_block->reg[m_reduced_loop_info->cond_val_register_idx]); + make_reduced_loop_condition(block_optimization_inner, true); + m_ir->SetInsertPoint(block_optimization_next); + m_block->block_wide_reg_store_elimination = false; + + for (u32 i = 0; i < s_reg_max; i++) + { + if (const auto loop_value = block_reg_results[i]) + { + const auto phi = m_ir->CreatePHI(loop_value->getType(), 2, fmt::format("redres_0x%05x_r%u", baddr, i)); + + phi->addIncoming(loop_value, block_inner); + phi->addIncoming(reduced_loop_init_regs[i], block_optimization_phi_parent); + m_block->reg[i] = phi; + } + } + + + break; + } + + if (!op) + { + fmt::throw_exception("LLVM: Reduced Loop Pattern: [%s] Unexpected fallthrough to 0x%x (chunk=0x%x, entry=0x%x)", m_hash, m_pos, m_entry, m_function_queue[0]); + } + + const auto [reg_rt, reg_access, masked_op] = op_register_targets(m_pos, spu_opcode_t{op}); + + bool erased = false; + + const auto inst_times = std::array{reg_states[reg_access[0]], reg_states[reg_access[1]], reg_states[reg_access[2]]}; + + // Try to reuse the reult of the previous iteration (if argumnent registers have not been modified) + if (reg_rt < 128 && masked_times.count(masked_op) && masked_times[masked_op].first && m_inst_attrs[(m_pos - start) / 4] == inst_attr::none) + { + auto times = masked_times[masked_op].second; + + bool is_ok = true; + for (u32 regi = 0; regi < 3; regi++) + { + if (reg_access[regi] < 128 && times[regi] != inst_times[regi]) + { + is_ok = false; + } + } + + if (is_ok) + { + m_block->reg[reg_rt] = masked_times[masked_op].first; + erased = true; + } + } + + if (reg_rt < 128) + { + reg_states[reg_rt] = s_reg_state++; + } + + if (erased) + { + continue; + } + + m_next_op = 0; + + masked_times[masked_op] = {}; + + switch (m_inst_attrs[(m_pos - start) / 4]) + { + case inst_attr::putllc0: + { + putllc0_pattern(func, m_patterns.at(m_pos - start).info); + continue; + } + case inst_attr::putllc16: + { + putllc16_pattern(func, m_patterns.at(m_pos - start).info); + continue; + } + case inst_attr::omit: + { + // TODO + continue; + } + default: break; + } + + // Execute recompiler function (TODO) + (this->*decode(op))({op}); + + if (reg_rt < 128 && itype & spu_itype::pure && reg_rt != reg_access[0] && reg_rt != reg_access[1] && reg_rt != reg_access[2]) + { + masked_times[masked_op] = {ensure(m_block->reg[reg_rt]), inst_times}; + } + } + + for (u32 i = 0; i < s_reg_max; i++) + { + if (m_reduced_loop_info && m_reduced_loop_info->loop_may_update.test(i)) + { + m_block->reg[i] = m_block->reg_save_and_restore[i]; + } + } + + m_reduced_loop_info = nullptr; + // Emit instructions for (m_pos = baddr; m_pos >= start && m_pos < end && !m_ir->GetInsertBlock()->getTerminator(); m_pos += 4) { @@ -2640,6 +3269,8 @@ public: m_ir->SetInsertPoint(ins); auto si = llvm::cast(m_ir->Insert(bs->clone())); + spu_context_attr(si); + if (b2->store[i] == nullptr) { // Protect against backwards ordering now @@ -2705,7 +3336,7 @@ public: continue; m_ir->SetInsertPoint(ins); - m_ir->Insert(bs->clone()); + m_ir->Insert(spu_context_attr(bs->clone())); } bs->eraseFromParent(); @@ -2955,7 +3586,7 @@ public: // Create interpreter table const auto if_type = get_ftype(); - m_function_table = new GlobalVariable(*m_module, ArrayType::get(m_ir->getPtrTy(), 1ull << m_interp_magn), true, GlobalValue::InternalLinkage, nullptr); + m_function_table = new GlobalVariable(*m_module, ArrayType::get(get_type(), 1ull << m_interp_magn), true, GlobalValue::InternalLinkage, nullptr); init_luts(); @@ -2999,7 +3630,7 @@ public: m_ir->CreateStore(m_ir->CreateCall(get_intrinsic(Intrinsic::read_register), {rsp_name}), native_sp); // Decode (shift) and load function pointer - const auto first = m_ir->CreateLoad(m_ir->getPtrTy(), m_ir->CreateGEP(m_ir->getPtrTy(), m_interp_table, m_ir->CreateLShr(m_interp_op, 32u - m_interp_magn))); + const auto first = m_ir->CreateLoad(get_type(), m_ir->CreateGEP(get_type(), m_interp_table, m_ir->CreateLShr(m_interp_op, 32u - m_interp_magn))); const auto call0 = m_ir->CreateCall(if_type, first, {m_lsptr, m_thread, m_interp_pc, m_interp_op, m_interp_table, m_interp_7f0, m_interp_regs}); call0->setCallingConv(CallingConv::GHC); m_ir->CreateRetVoid(); @@ -3143,7 +3774,7 @@ public: const auto next_pc = itype & spu_itype::branch ? m_interp_pc : m_interp_pc_next; const auto be32_op = m_ir->CreateLoad(get_type(), _ptr(m_lsptr, m_ir->CreateZExt(next_pc, get_type()))); const auto next_op = m_ir->CreateCall(get_intrinsic(Intrinsic::bswap), {be32_op}); - const auto next_if = m_ir->CreateLoad(m_ir->getPtrTy(), m_ir->CreateGEP(m_ir->getPtrTy(), m_interp_table, m_ir->CreateLShr(next_op, 32u - m_interp_magn))); + const auto next_if = m_ir->CreateLoad(get_type(), m_ir->CreateGEP(get_type(), m_interp_table, m_ir->CreateLShr(next_op, 32u - m_interp_magn))); llvm::cast(next_if)->setVolatile(true); if (!(itype & spu_itype::branch)) @@ -3268,7 +3899,7 @@ public: } } - m_function_table->setInitializer(ConstantArray::get(ArrayType::get(m_ir->getPtrTy(), 1ull << m_interp_magn), iptrs)); + m_function_table->setInitializer(ConstantArray::get(ArrayType::get(get_type(), 1ull << m_interp_magn), iptrs)); m_function_table = nullptr; for (auto& f : *_module) @@ -5756,11 +6387,59 @@ public: void CEQI(spu_opcode_t op) { + // CEQHI following a comparison instruction (compare-equal negation) + if (!m_interp_magn && !op.si10 && match_vr(op.ra, [&](auto c, auto MT) + { + using VT = typename decltype(MT)::type; + using VT_HALF = s16[8]; + + if (auto [ok, a, b] = match_expr(c, bitcast(sext(match() == match())) << 16 >> 16); ok && m_block->block_wide_reg_store_elimination) + { + set_vr(op.rt, bitcast(sext(a != b)) << 16 >> 16); + return true; + } + + if (auto [ok, a, b] = match_expr(c, sext(MT == MT)); ok) + { + set_vr(op.rt, sext(a != b)); + return true; + } + + return false; + })) + { + return; + } + set_vr(op.rt, sext(get_vr(op.ra) == get_imm(op.si10))); } void CEQHI(spu_opcode_t op) { + // CEQHI following a comparison instruction (compare-equal negation) + if (!m_interp_magn && !op.si10 && match_vr(op.ra, [&](auto c, auto MT) + { + using VT = typename decltype(MT)::type; + using VT_HALF = s8[16]; + + if (auto [ok, a, b] = match_expr(c, bitcast(sext(match() == match())) << 8 >> 8); ok && m_block->block_wide_reg_store_elimination) + { + set_vr(op.rt, bitcast(sext(a != b)) << 8 >> 8); + return true; + } + + if (auto [ok, a, b] = match_expr(c, sext(match() == match())); ok) + { + set_vr(op.rt, sext(a != b)); + return true; + } + + return false; + })) + { + return; + } + set_vr(op.rt, sext(get_vr(op.ra) == get_imm(op.si10))); } @@ -6337,8 +7016,13 @@ public: return eval(bitcast(min(bitcast(v),splat(0xff7fffff)))); } - value_t clamp_smax(value_t v) + value_t clamp_smax(value_t v, u32 gpr = s_reg_max) { + if (m_reduced_loop_info && gpr < s_reg_max && m_reduced_loop_info->is_gpr_not_NaN_hint(gpr)) + { + return v; + } + if (m_use_avx512) { if (is_input_positive(v)) @@ -6358,16 +7042,6 @@ public: return eval(clamp_positive_smax(clamp_negative_smax(v))); } - // FMA favouring zeros - value_t xmuladd(value_t a, value_t b, value_t c) - { - const auto ma = eval(sext(fcmp_uno(a != fsplat(0.)))); - const auto mb = eval(sext(fcmp_uno(b != fsplat(0.)))); - const auto ca = eval(bitcast(bitcast(a) & mb)); - const auto cb = eval(bitcast(bitcast(b) & ma)); - return eval(fmuladd(ca, cb, c)); - } - // Checks for postive and negative zero, or Denormal (treated as zero) // If sign is +-1 check equality againts all sign bits bool is_spu_float_zero(v128 a, int sign = 0) @@ -6454,12 +7128,6 @@ public: set_vr(op.rt, frsqest(get_vr(op.ra))); } - template - static llvm_calli fcgt(T&& a, U&& b) - { - return {"spu_fcgt", {std::forward(a), std::forward(b)}}; - } - void FCGT(spu_opcode_t op) { if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) @@ -6468,11 +7136,8 @@ public: return; } - register_intrinsic("spu_fcgt", [&](llvm::CallInst* ci) + const auto fcgt = [&](value_t a, value_t b) { - const auto a = value(ci->getOperand(0)); - const auto b = value(ci->getOperand(1)); - const value_t ab[2]{a, b}; std::bitset<2> safe_int_compare(0); @@ -6504,6 +7169,16 @@ public: } } + if (m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.ra)) + { + safe_finite_compare.set(0); + } + + if (m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.rb)) + { + safe_finite_compare.set(1); + } + if (safe_int_compare.any()) { return eval(sext(bitcast(a) > bitcast(b))); @@ -6523,7 +7198,7 @@ public: const auto bi = eval(bitcast(b)); return eval(sext(fcmp_uno(a != b) & select((ai & bi) >= 0, ai > bi, ai < bi))); - }); + }; set_vr(op.rt, fcgt(get_vr(op.ra), get_vr(op.rb))); } @@ -6620,12 +7295,6 @@ public: set_vr(op.rt, fa(get_vr(op.ra), get_vr(op.rb))); } - template - static llvm_calli fs(T&& a, U&& b) - { - return {"spu_fs", {std::forward(a), std::forward(b)}}; - } - void FS(spu_opcode_t op) { if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::accurate) @@ -6634,29 +7303,26 @@ public: return; } - register_intrinsic("spu_fs", [&](llvm::CallInst* ci) + const auto fs = [&](value_t a, value_t b) { - const auto a = value(ci->getOperand(0)); - const auto b = value(ci->getOperand(1)); - if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { - const auto bc = clamp_smax(b); // for #4478 + const auto bc = clamp_smax(b, op.rb); // for #4478 return eval(a - bc); } else { return eval(a - b); } - }); + }; set_vr(op.rt, fs(get_vr(op.ra), get_vr(op.rb))); } - template - static llvm_calli fm(T&& a, U&& b) + template , typename W = llvm_place_stealer_t> + static auto fm(T&& a, U&& b, V&& a_not_nan = match_stealer(), W&& b_not_nan = match_stealer()) { - return llvm_calli{"spu_fm", {std::forward(a), std::forward(b)}}.set_order_equality_hint(1, 1); + return llvm_calli{"spu_fm", {std::forward(a), std::forward(b), a_not_nan, b_not_nan}}.set_order_equality_hint(1, 1, 2, 3); } void FM(spu_opcode_t op) @@ -6671,14 +7337,27 @@ public: { const auto a = value(ci->getOperand(0)); const auto b = value(ci->getOperand(1)); + const bool a_notnan = llvm::cast(ci->getOperand(2))->getZExtValue() != 0; + const bool b_notnan = llvm::cast(ci->getOperand(3))->getZExtValue() != 0; if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { - if (a.value == b.value) + if (a.value == b.value || (a_notnan && b_notnan)) { return eval(a * b); } + if (a_notnan) + { + const auto ma = sext(fcmp_uno(a != fsplat(0.))); + return eval(bitcast(bitcast(a * b) & ma)); + } + else if (b_notnan) + { + const auto mb = sext(fcmp_uno(b != fsplat(0.))); + return eval(bitcast(bitcast(a * b) & mb)); + } + const auto ma = sext(fcmp_uno(a != fsplat(0.))); const auto mb = sext(fcmp_uno(b != fsplat(0.))); return eval(bitcast(bitcast(a * b) & ma & mb)); @@ -6689,10 +7368,13 @@ public: } }); + const u32 a_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.ra) ? 1 : 0; + const u32 b_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.rb) ? 1 : 0; + if (op.ra == op.rb && !m_interp_magn) { const auto a = get_vr(op.ra); - set_vr(op.rt, fm(a, a)); + set_vr(op.rt, fm(a, a, splat(a_notnan), splat(a_notnan))); return; } @@ -6731,7 +7413,7 @@ public: } } - set_vr(op.rt, fm(a, b)); + set_vr(op.rt, fm(a, b, splat(a_notnan), splat(b_notnan))); } template @@ -7024,10 +7706,10 @@ public: set_vr(op.rt4, fnms(get_vr(op.ra), get_vr(op.rb), get_vr(op.rc))); } - template - static llvm_calli fma(T&& a, U&& b, V&& c) + template , typename X = llvm_place_stealer_t> + static llvm_calli fma(T&& a, U&& b, V&& c, W&& d = match_stealer(), X&& e = match_stealer()) { - return llvm_calli{"spu_fma", {std::forward(a), std::forward(b), std::forward(c)}}.set_order_equality_hint(1, 1, 0); + return llvm_calli{"spu_fma", {std::forward(a), std::forward(b), std::forward(c), std::forward(d), std::forward(e)}}.set_order_equality_hint(1, 1, 2, 3, 4); } template @@ -7046,14 +7728,35 @@ public: return; } + register_intrinsic("spu_fma", [&](llvm::CallInst* ci) { const auto a = value(ci->getOperand(0)); const auto b = value(ci->getOperand(1)); const auto c = value(ci->getOperand(2)); - + const bool a_notnan = llvm::cast(ci->getOperand(3))->getZExtValue() != 0; + const bool b_notnan = llvm::cast(ci->getOperand(4))->getZExtValue() != 0; + if (g_cfg.core.spu_xfloat_accuracy == xfloat_accuracy::approximate) { + if (a.value == b.value || (a_notnan && b_notnan)) + { + return fma32x4(a, b, c); + } + + if (a_notnan) + { + const auto ma = sext(fcmp_uno(a != fsplat(0.))); + const auto cb = bitcast(bitcast(b) & ma); + return fma32x4(a, eval(cb), c); + } + else if (b_notnan) + { + const auto mb = sext(fcmp_uno(b != fsplat(0.))); + const auto ca = bitcast(bitcast(a) & mb); + return fma32x4(eval(ca), b, c); + } + const auto ma = sext(fcmp_uno(a != fsplat(0.))); const auto mb = sext(fcmp_uno(b != fsplat(0.))); const auto ca = bitcast(bitcast(a) & mb); @@ -7102,6 +7805,9 @@ public: const auto [a, b, c] = get_vrs(op.ra, op.rb, op.rc); static const auto MT = match(); + const u32 a_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.ra) ? 1 : 0; + const u32 b_notnan = m_reduced_loop_info && m_reduced_loop_info->is_gpr_not_NaN_hint(op.rb) ? 1 : 0; + auto check_sqrt_pattern_for_float = [&](f32 float_value) -> bool { auto match_fnms = [&](f32 float_value) @@ -7297,7 +8003,13 @@ public: spu_log.todo("[%s:0x%05x] Unmatched spu_rsqrte(c) found in FMA", m_hash, m_pos); } - set_vr(op.rt4, fma(a, b, c)); + if (!m_interp_magn && op.ra == op.rb) + { + set_vr(op.rt4, fma(a, a, c, splat(a_notnan), splat(a_notnan))); + return; + } + + set_vr(op.rt4, fma(a, b, c, splat(a_notnan), splat(b_notnan))); } template @@ -7720,13 +8432,13 @@ public: void make_store_ls(value_t addr, value_t data) { const auto bswapped = byteswap(data); - m_ir->CreateStore(bswapped.eval(m_ir), _ptr(m_lsptr, addr.value)); + spu_mem_attr(m_ir->CreateStore(bswapped.eval(m_ir), _ptr(m_lsptr, addr.value))); } auto make_load_ls(value_t addr) { value_t data; - data.value = m_ir->CreateLoad(get_type(), _ptr(m_lsptr, addr.value)); + data.value = spu_mem_attr(m_ir->CreateLoad(get_type(), _ptr(m_lsptr, addr.value))); return byteswap(data); } @@ -7741,12 +8453,18 @@ public: { data._u32[3] %= SPU_LS_SIZE; - if (data._u32[3] % 0x10 == 0) + if (const u32 remainder = data._u32[3] % 0x10; remainder == 0) { value_t addr = eval(splat(data._u32[3]) + zext(extract(pair.second, 3) & 0x3fff0)); make_store_ls(addr, get_vr(op.rt)); return; } + else + { + value_t addr = eval(splat(data._u32[3] - remainder) + zext((extract(pair.second, 3) + remainder) & 0x3fff0)); + make_store_ls(addr, get_vr(op.rt)); + return; + } } } @@ -7765,12 +8483,18 @@ public: { data._u32[3] %= SPU_LS_SIZE; - if (data._u32[3] % 0x10 == 0) + if (const u32 remainder = data._u32[3] % 0x10; remainder == 0) { value_t addr = eval(splat(data._u32[3]) + zext(extract(pair.second, 3) & 0x3fff0)); set_vr(op.rt, make_load_ls(addr)); return; } + else + { + value_t addr = eval(splat(data._u32[3] - remainder) + zext((extract(pair.second, 3) + remainder) & 0x3fff0)); + set_vr(op.rt, make_load_ls(addr)); + return; + } } } @@ -7824,13 +8548,51 @@ public: } } - value_t addr = eval(zext(extract(get_vr(op.ra), 3) & 0x3fff0) + (get_imm(op.si10) << 4)); + const auto a = get_vr(op.ra); + + if (auto [ok, x, y] = match_expr(a, match() + match()); ok) + { + if (auto [ok1, data] = get_const_vector(x.value, m_pos + 1); ok1 && data._u32[3] % 16 == 0) + { + value_t addr = eval(zext(extract(y, 3) & 0x3fff0) + ((get_imm(op.si10) << 4) + splat(data._u32[3] & 0x3fff0))); + make_store_ls(addr, get_vr(op.rt)); + return; + } + + if (auto [ok2, data] = get_const_vector(y.value, m_pos + 2); ok2 && data._u32[3] % 16 == 0) + { + value_t addr = eval(zext(extract(x, 3) & 0x3fff0) + ((get_imm(op.si10) << 4) + splat(data._u32[3] & 0x3fff0))); + make_store_ls(addr, get_vr(op.rt)); + return; + } + } + + value_t addr = eval(zext(extract(a, 3) & 0x3fff0) + (get_imm(op.si10) << 4)); make_store_ls(addr, get_vr(op.rt)); } void LQD(spu_opcode_t op) { - value_t addr = eval(zext(extract(get_vr(op.ra), 3) & 0x3fff0) + (get_imm(op.si10) << 4)); + const auto a = get_vr(op.ra); + + if (auto [ok, x1, y1] = match_expr(a, match() + match()); ok) + { + if (auto [ok1, data] = get_const_vector(x1.value, m_pos + 1); ok1 && data._u32[3] % 16 == 0) + { + value_t addr = eval(zext(extract(y1, 3) & 0x3fff0) + ((get_imm(op.si10) << 4) + splat(data._u32[3] & 0x3fff0))); + set_vr(op.rt, make_load_ls(addr)); + return; + } + + if (auto [ok2, data] = get_const_vector(y1.value, m_pos + 2); ok2 && data._u32[3] % 16 == 0) + { + value_t addr = eval(zext(extract(x1, 3) & 0x3fff0) + ((get_imm(op.si10) << 4) + splat(data._u32[3] & 0x3fff0))); + set_vr(op.rt, make_load_ls(addr)); + return; + } + } + + value_t addr = eval(zext(extract(a, 3) & 0x3fff0) + (get_imm(op.si10) << 4)); set_vr(op.rt, make_load_ls(addr)); } diff --git a/rpcs3/Emu/Cell/SPUOpcodes.h b/rpcs3/Emu/Cell/SPUOpcodes.h index cea4513e3f..42d76792a2 100644 --- a/rpcs3/Emu/Cell/SPUOpcodes.h +++ b/rpcs3/Emu/Cell/SPUOpcodes.h @@ -24,6 +24,20 @@ union spu_opcode_t bf_t i16; // 9..24 bf_t si16; // 9..24, signed bf_t i18; // 7..24 + + // For 16-bit instructions in the context of 32-bits + u32 duplicate_si10() const + { + const u32 _16 = static_cast(static_cast(si10)); + return (_16 << 16) | _16; + } + + // For 8-bit instructions in the context of 32-bits + u32 duplicate_duplicate_si10() const + { + const u32 _8 = static_cast(si10 & 0xff); + return (_8 << 24) | (_8 << 16) | (_8 << 8) | _8; + } }; constexpr u32 spu_branch_target(u32 pc, u32 imm = 0) @@ -42,6 +56,7 @@ constexpr u32 spu_decode(u32 inst) } std::array op_branch_targets(u32 pc, spu_opcode_t op); +std::tuple, u32> op_register_targets(u32 /*pc*/, spu_opcode_t op); // SPU decoder object. D provides functions. T is function pointer type returned. template diff --git a/rpcs3/Emu/Cell/SPURecompiler.h b/rpcs3/Emu/Cell/SPURecompiler.h index 57d842e69d..6c629571d9 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.h +++ b/rpcs3/Emu/Cell/SPURecompiler.h @@ -4,12 +4,24 @@ #include "Utilities/lockless.h" #include "Utilities/address_range.h" #include "SPUThread.h" +#include "SPUAnalyser.h" #include #include #include #include #include +// std::bitset +template + requires requires(std::remove_cvref_t& x, T&& y) { x.count(); x.test(y); x.flip(y); } +[[nodiscard]] constexpr bool at32(CT&& container, T&& index, std::source_location src_loc = std::source_location::current()) +{ + const usz csv = container.size(); + if (csv <= std::forward(index)) [[unlikely]] + fmt::raw_range_error(src_loc, format_object_simplified(index), csv); + return container[std::forward(index)]; +} + // Helper class class spu_cache { @@ -201,6 +213,25 @@ public: __bitset_enum_max }; + enum compare_direction : u32 + { + CMP_TURNAROUND_FLAG = 0x1, + CMP_NEGATE_FLAG = 0x100, + CMP_SLESS = 0, + CMP_SGREATER = CMP_SLESS | CMP_TURNAROUND_FLAG, + CMP_EQUAL, + CMP_EQUAL2 = CMP_EQUAL | CMP_TURNAROUND_FLAG, + CMP_LLESS, + CMP_LGREATER = CMP_LLESS | CMP_TURNAROUND_FLAG, + CMP_SGREATER_EQUAL = CMP_SLESS | CMP_NEGATE_FLAG, + CMP_SLOWER_EQUAL = CMP_SGREATER | CMP_NEGATE_FLAG, + CMP_NOT_EQUAL = CMP_EQUAL | CMP_NEGATE_FLAG, + CMP_NOT_EQUAL2 = CMP_NOT_EQUAL | CMP_TURNAROUND_FLAG, + CMP_LGREATER_EQUAL = CMP_LLESS | CMP_NEGATE_FLAG, + CMP_LLOWER_EQUAL = CMP_LGREATER | CMP_NEGATE_FLAG, + CMP_UNKNOWN, + }; + struct reg_state_t { bs_t flag{+vf::is_null}; @@ -273,6 +304,406 @@ public: static u32 alloc_tag(bool reset = false) noexcept; }; + struct reduced_loop_t + { + bool active = false; // Single block loop detected + bool failed = false; + u32 loop_pc = SPU_LS_SIZE; + u32 loop_end = SPU_LS_SIZE; + + // False: single-block loop + // True: loop with a trailing block of aftermath (iteration update) stuff (like for (u32 i = 0; i < 10; /*update*/ i++)) + bool is_two_block_loop = false; + bool has_cond_state = false; + + // Loop stay-in state requirement + u64 cond_val_mask = umax; + u64 cond_val_min = 0; + u64 cond_val_size = 0; + compare_direction cond_val_compare{}; + u64 cond_val_incr = 0; + bool cond_val_incr_is_immediate = false; + u64 cond_val_register_argument_idx = umax; + u64 cond_val_register_idx = umax; + bool cond_val_incr_before_cond = false; + bool cond_val_incr_before_cond_taken_in_account = false; + bool cond_val_is_immediate = false; + + // Loop attributes + bool is_constant_expression = false; + bool is_secret = false; + + struct supplemental_condition_t + { + u64 immediate_value = umax; + u64 type_size = 0; + compare_direction val_compare{}; + }; + + // Supplemental loop condition: + // Inner conditions that depend on extrnal values (not produced inside the loop) + // all should evaluate to false in order for the optimization to work (at the moment) + // So succeeding can be treated linearly + u64 expected_sup_conds = 0; + u64 current_sup_conds_index = 0; + std::vector sup_conds; + + void take_cond_val_incr_before_cond_into_account() + { + if (cond_val_is_immediate && cond_val_incr_before_cond_taken_in_account && !cond_val_incr_before_cond_taken_in_account) + { + cond_val_min -= cond_val_incr; + cond_val_min &= cond_val_mask; + cond_val_incr_before_cond_taken_in_account = true; + } + } + + std::bitset loop_args; + std::bitset loop_dicts; + std::bitset loop_writes; + std::bitset loop_may_update; + std::bitset gpr_not_nans; + + struct origin_t + { + std::bitset regs{}; + u32 modified = 0; + spu_itype_t mod1_type = spu_itype::UNK; + spu_itype_t mod2_type = spu_itype::UNK; + spu_itype_t mod3_type = spu_itype::UNK; + u32 IMM = 0; + +private: + // Internal, please access using fixed order + spu_itype_t access_type(u32 i) const + { + if (i > modified) + { + return spu_itype::UNK; + } + + switch (i) + { + case 1: return mod1_type; + case 2: return mod2_type; + case 3: return mod3_type; + default: return spu_itype::UNK; + } + + return spu_itype::UNK; + } +public: + + spu_itype_t reverse1_type() + { + return access_type(modified); + } + + spu_itype_t reverse2_type() + { + return access_type(modified - 1); + } + + spu_itype_t reverse3_type() + { + return access_type(modified - 2); + } + + origin_t& join_with_this(const origin_t& rhs) + { + regs |= rhs.regs; + return *this; + } + + origin_t& join_with_this(u32 rhs) + { + regs.set(rhs); + return *this; + } + + origin_t& add_register_origin(u32 reg_val) + { + regs.set(reg_val); + return *this; + } + + bool is_single_reg_access(u32 reg_val) const + { + if (!modified) + { + return true; + } + + return regs.count() == 1 && ::at32(regs, reg_val); + } + + bool is_loop_dictator(u32 reg_val, bool test_predictable = false, bool should_predictable = true) const + { + if (!modified) + { + return false; + } + + if (regs.count() >= 1 && ::at32(regs, reg_val)) + { + if (!test_predictable) + { + return true; + } + + if (modified > 1) + { + return should_predictable ^ true; + } + + switch (mod1_type) + { + case spu_itype::A: + { + if (regs.count() == 2) + { + return should_predictable; + } + + return should_predictable ^ true; + } + case spu_itype::AI: + case spu_itype::AHI: + { + if (IMM && regs.count() == 1) + { + return should_predictable; + } + + return should_predictable ^ true; + } + default: break; + } + + return should_predictable ^ true; + } + + return false; + } + + bool is_predictable_loop_dictator(u32 reg_val) const + { + return is_loop_dictator(reg_val, true, true); + } + + bool is_non_predictable_loop_dictator(u32 reg_val) const + { + return is_loop_dictator(reg_val, true, false); + } + + bool is_null(u32 reg_val) const noexcept + { + if (modified) + { + return false; + } + + if (regs.count() - (::at32(regs, reg_val) ? 1 : 0)) + { + return false; + } + + return true; + } + + origin_t& add_instruction_modifier(spu_itype_t inst_type, u32 imm = 0) + { + if (inst_type == spu_itype::UNK) + { + mod1_type = spu_itype::UNK; + mod2_type = spu_itype::UNK; + mod3_type = spu_itype::UNK; + IMM = umax; + modified = 1; + return *this; + } + + if (modified == 1) + { + if (modified == 3) + { + mod1_type = spu_itype::UNK; + mod2_type = spu_itype::UNK; + mod3_type = spu_itype::UNK; + IMM = umax; + modified = 1; + return *this; + } + + bool is_ok = false; + switch (inst_type) + { + case spu_itype::XSBH: + { + const auto prev_type = modified == 1 ? mod1_type : mod2_type; + is_ok &= mod1_type == spu_itype::CEQB || mod1_type == spu_itype::CEQBI || mod1_type == spu_itype::CGTB || mod1_type == spu_itype::CGTBI || mod1_type == spu_itype::CLGTB || mod1_type == spu_itype::CLGTBI; + break; + } + case spu_itype::ANDI: + { + const auto prev_type = modified == 1 ? mod1_type : mod2_type; + is_ok &= mod1_type == spu_itype::CEQB || mod1_type == spu_itype::CEQBI || mod1_type == spu_itype::CGTB || mod1_type == spu_itype::CGTBI || mod1_type == spu_itype::CLGTB || mod1_type == spu_itype::CLGTBI; + is_ok &= (spu_opcode_t{imm}.si10 & 0xff) == 0xff; + break; + } + case spu_itype::CEQ: + case spu_itype::CEQH: + case spu_itype::CEQB: + case spu_itype::CGT: + case spu_itype::CGTH: + case spu_itype::CGTB: + case spu_itype::CLGT: + case spu_itype::CLGTH: + case spu_itype::CLGTB: + { + is_ok = modified == 1 && (mod1_type == spu_itype::AI || mod1_type == spu_itype::AHI); + IMM = imm; + break; + } + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + is_ok = modified == 1 && (mod1_type == spu_itype::AI || mod1_type == spu_itype::AHI); + IMM = spu_opcode_t{imm}.si10; + break; + } + } + + if (!is_ok) + { + mod1_type = spu_itype::UNK; + mod2_type = spu_itype::UNK; + mod3_type = spu_itype::UNK; + IMM = umax; + modified = 1; + return *this; + } + + (modified == 1 ? mod2_type : mod3_type) = inst_type; + modified++; + return *this; + } + + mod1_type = inst_type; + modified = 1; + + switch (inst_type) + { + case spu_itype::AHI: + { + IMM = spu_opcode_t{imm}.duplicate_si10(); + return *this; + } + case spu_itype::AI: + case spu_itype::ORI: + case spu_itype::XORI: + case spu_itype::ANDI: + + case spu_itype::CEQI: + case spu_itype::CEQHI: + case spu_itype::CEQBI: + case spu_itype::CGTI: + case spu_itype::CGTHI: + case spu_itype::CGTBI: + case spu_itype::CLGTI: + case spu_itype::CLGTHI: + case spu_itype::CLGTBI: + { + IMM = spu_opcode_t{imm}.si10; + return *this; + } + case spu_itype::ILA: + { + IMM = spu_opcode_t{imm}.i18; + return *this; + } + case spu_itype::IOHL: + case spu_itype::ILH: + case spu_itype::ILHU: + { + IMM = spu_opcode_t{imm}.i16; + return *this; + } + default: + { + IMM = imm; + break; + } + } + + return *this; + } + }; + + static origin_t make_reg(u32 reg_val) noexcept + { + origin_t org{}; + org.add_register_origin(reg_val); + return org; + } + + const origin_t* find_reg(u32 reg_val) const noexcept + { + for (auto& pair : regs) + { + if (pair.first == reg_val) + { + return &pair.second; + } + } + + return nullptr; + } + + origin_t* find_reg(u32 reg_val) noexcept + { + return const_cast(std::as_const(*this).find_reg(reg_val)); + } + + bool is_reg_null(u32 reg_val) const noexcept + { + if (const auto reg_found = find_reg(reg_val)) + { + return reg_found->is_null(reg_val); + } + + return true; + } + + bool is_gpr_not_NaN_hint(u32 i) const noexcept + { + return ::at32(gpr_not_nans, i); + } + + origin_t get_reg(u32 reg_val) noexcept + { + const auto org = find_reg(reg_val); + return org ? *org : regs.emplace_back(reg_val, std::remove_reference_t{}).second; + } + + std::vector> regs; + + // Return old state for error reporting + reduced_loop_t discard() + { + const reduced_loop_t old = *this; + *this = reduced_loop_t{}; + return old; + } + }; + protected: spu_runtime* m_spurt{}; @@ -326,8 +757,14 @@ protected: // Set if the initial register value in this block may be xfloat std::bitset reg_maybe_xf{}; - // Bit mask of the registers used (before modified) - std::bitset reg_use{}; + // Set if register is used in floating pont instruction + std::bitset reg_maybe_float{}; + + // Set if register is used as shuffle mask + std::bitset reg_maybe_shuffle_mask{}; + + // Number of times registers are used (before modified) + std::array reg_use{}; // Bit mask of the trivial (u32 x 4) constant value resulting in this block std::bitset reg_const{}; @@ -391,18 +828,23 @@ protected: putllc16, putllc0, rchcnt_loop, + reduced_loop, }; std::vector m_inst_attrs; struct pattern_info { - u64 info; + // Info via integral + u64 info{}; + + // Info via additional erased-typed pointer + std::shared_ptr info_ptr; }; - std::unordered_map m_patterns; + std::map m_patterns; - void add_pattern(inst_attr attr, u32 start, u64 info); + void add_pattern(inst_attr attr, u32 start, u64 info, std::shared_ptr info_ptr = nullptr); private: // For private use @@ -435,7 +877,7 @@ public: spu_program analyse(const be_t* ls, u32 entry_point, std::map>* out_target_list = nullptr); // Print analyser internal state - void dump(const spu_program& result, std::string& out); + void dump(const spu_program& result, std::string& out, u32 block_min = 0, u32 block_max = SPU_LS_SIZE); // Get SPU Runtime spu_runtime& get_runtime() diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index b9a77d7696..60e0f99cca 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -495,7 +495,8 @@ void do_cell_atomic_128_store(u32 addr, const void* to_write); extern thread_local u64 g_tls_fault_spu; -const spu_decoder s_spu_itype; +const extern spu_decoder g_spu_itype; +const extern spu_decoder g_spu_iflag; namespace vm { @@ -598,7 +599,7 @@ std::array op_branch_targets(u32 pc, spu_opcode_t op) { std::array res{spu_branch_target(pc + 4), umax}; - switch (const auto type = s_spu_itype.decode(op.opcode)) + switch (const auto type = g_spu_itype.decode(op.opcode)) { case spu_itype::BR: case spu_itype::BRA: @@ -639,6 +640,54 @@ std::array op_branch_targets(u32 pc, spu_opcode_t op) return res; } +std::tuple, u32> op_register_targets(u32 /*pc*/, spu_opcode_t op) +{ + std::tuple, u32> result{u32{umax}, std::array{128, 128, 128}, op.opcode}; + + const auto type = g_spu_itype.decode(op.opcode); + + if (type & spu_itype::zregmod) + { + std::get<2>(result) = 0; + return result; + } + + std::get<0>(result) = type & spu_itype::_quadrop ? op.rt4 : op.rt; + + spu_opcode_t op_masked = op; + + if (type & spu_itype::_quadrop) + { + op_masked.rt4 = 0; + } + else + { + op_masked.rt = 0; + } + + std::get<2>(result) = op_masked.opcode; + + if (auto iflags = g_spu_iflag.decode(op.opcode)) + { + if (+iflags & +spu_iflag::use_ra) + { + std::get<1>(result)[0] = op.ra; + } + + if (+iflags & +spu_iflag::use_rb) + { + std::get<1>(result)[1] = op.rb; + } + + if (+iflags & +spu_iflag::use_rc) + { + std::get<1>(result)[2] = op.rc; + } + } + + return result; +} + void spu_int_ctrl_t::set(u64 ints) { // leave only enabled interrupts @@ -988,7 +1037,7 @@ std::vector> spu_thread::dump_callstack_list() const passed[i / 4] = true; const spu_opcode_t op{_ref(i)}; - const auto type = s_spu_itype.decode(op.opcode); + const auto type = g_spu_itype.decode(op.opcode); if (start == 0 && type == spu_itype::STQD && op.ra == 1u && op.rt == 0u) { @@ -1090,11 +1139,62 @@ std::vector> spu_thread::dump_callstack_list() const return call_stack_list; } -std::string spu_thread::dump_misc() const +void spu_thread::dump_misc(std::string& ret, std::any& custom_data) const { - std::string ret = cpu_thread::dump_misc(); + cpu_thread::dump_misc(ret, custom_data); - fmt::append(ret, "Block Weight: %u (Retreats: %u)", block_counter, block_failure); + struct dump_misc_data_t + { + u32 cpu_id = umax; + u64 last_read_time = umax; + u64 last_block_counter = umax; + u64 update_count = 0; + + std::pair update(u64 current_block_counter, u64 current_timestamp = get_system_time()) + { + const u64 diff_time = current_timestamp <= last_read_time ? 0 : current_timestamp - last_read_time; + const u64 diff_block = current_block_counter <= last_block_counter ? 0 : current_block_counter - last_block_counter; + + if (last_read_time == umax || update_count >= 1000) + { + last_read_time = current_timestamp; + last_block_counter = current_block_counter; + update_count = 0; + } + else if (diff_time >= 100000 && diff_block >= 100) + { + // Update values to measure rate (but not fully so rate can be measured later) + last_read_time += diff_time / 10 * 9; + last_block_counter += diff_block / 10 * 9; + update_count++; + } + + return {diff_time, diff_block}; + } + }; + + dump_misc_data_t* func_data = std::any_cast(&custom_data); + + if (!func_data) + { + custom_data.reset(); + custom_data = std::make_any(); + func_data = ensure(std::any_cast(&custom_data)); + } + + if (func_data->cpu_id != this->id) + { + *func_data = {}; + func_data->cpu_id = this->id; + } + + const u64 current_block_counter = atomic_storage::load(block_counter); + + const auto [diff_time, diff_block] = func_data->update(current_block_counter); + + const u64 rate_of_diff = diff_block ? std::max(1, utils::rational_mul(diff_block, 1'000'000, std::max(diff_time, 1))) : 0; + + fmt::append(ret, "Block Weight: log10(%u/second): %.1f (Retreats: %u)", rate_of_diff, std::log10(std::max(rate_of_diff, 10)), block_failure); if (u64 hash = atomic_storage::load(block_hash)) { @@ -1145,8 +1245,6 @@ std::string spu_thread::dump_misc() const break; } } - - return ret; } void spu_thread::cpu_on_stop() @@ -3761,7 +3859,7 @@ bool spu_thread::is_exec_code(u32 addr, std::span ls_ptr, u32 base_add const u32 addr0 = spu_branch_target(addr); const spu_opcode_t op{read_from_ptr>(ls_ptr, addr0 - base_addr)}; - const auto type = s_spu_itype.decode(op.opcode); + const auto type = g_spu_itype.decode(op.opcode); if (type == spu_itype::UNK || !op.opcode) { @@ -3907,7 +4005,7 @@ bool spu_thread::is_exec_code(u32 addr, std::span ls_ptr, u32 base_add // Test the validity of a single instruction of the optional target // This function can't be too slow and is unlikely to improve results by a great deal const u32 op0 = read_from_ptr>(ls_ptr, route_pc - base_addr); - const spu_itype::type type0 = s_spu_itype.decode(op0); + const spu_itype::type type0 = g_spu_itype.decode(op0); if (type0 == spu_itype::UNK || !op0) { @@ -6878,7 +6976,7 @@ spu_exec_object spu_thread::capture_memory_as_elf(std::span>(all_data, pc0 - 4); // Try to find function entry (if they are placed sequentially search for BI $LR of previous function) - if (!op || op == 0x35000000u || s_spu_itype.decode(op) == spu_itype::UNK) + if (!op || op == 0x35000000u || g_spu_itype.decode(op) == spu_itype::UNK) { if (is_exec_code(pc0, { all_data.data(), SPU_LS_SIZE })) break; diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 9596f7b006..889d6f291c 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -630,7 +630,7 @@ public: virtual void dump_regs(std::string&, std::any& custom_data) const override; virtual std::string dump_callstack() const override; virtual std::vector> dump_callstack_list() const override; - virtual std::string dump_misc() const override; + virtual void dump_misc(std::string& ret, std::any& custom_data) const override; virtual void cpu_task() override final; virtual void cpu_on_stop() override; virtual void cpu_return() override; diff --git a/rpcs3/Emu/Cell/lv2/sys_fs.cpp b/rpcs3/Emu/Cell/lv2/sys_fs.cpp index ffd31227a8..2534f6a8c1 100644 --- a/rpcs3/Emu/Cell/lv2/sys_fs.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_fs.cpp @@ -140,6 +140,32 @@ bool verify_mself(const fs::file& mself_file) return true; } +// TODO: May not be thread-safe (or even, process-safe) +bool has_non_directory_components(std::string_view path) +{ + std::string path0{path}; + + while (true) + { + const std::string sub_path = fs::get_parent_dir(path0); + + if (sub_path.size() >= path0.size()) + { + break; + } + + fs::stat_t stat{}; + if (fs::get_stat(sub_path, stat)) + { + return !stat.is_directory; + } + + path0 = std::move(sub_path); + } + + return false; +} + lv2_fs_mount_info_map::lv2_fs_mount_info_map() { for (auto mp = &g_mp_sys_dev_root; mp; mp = mp->next) // Scan and keep track of pre-mounted devices @@ -899,11 +925,18 @@ lv2_file::open_raw_result_t lv2_file::open_raw(const std::string& local_path, s3 switch (auto error = fs::g_tls_error) { + case fs::error::notdir: return {CELL_ENOTDIR}; case fs::error::noent: return {CELL_ENOENT}; - default: sys_fs.error("lv2_file::open(): unknown error %s", error); break; - } + default: + { + if (has_non_directory_components(local_path)) + { + return {CELL_ENOTDIR}; + } - return {CELL_EIO}; + fmt::throw_exception("unknown error %s", error); + } + } } if (flags & CELL_FS_O_MSELF && !verify_mself(file)) @@ -1374,8 +1407,12 @@ error_code sys_fs_opendir(ppu_thread& ppu, vm::cptr path, vm::ptr fd) } default: { - sys_fs.error("sys_fs_opendir(): unknown error %s", error); - return {CELL_EIO, path}; + if (has_non_directory_components(local_path)) + { + return { CELL_ENOTDIR, path }; + } + + fmt::throw_exception("unknown error %s", error); } } } @@ -1558,6 +1595,10 @@ error_code sys_fs_stat(ppu_thread& ppu, vm::cptr path, vm::ptr { switch (auto error = fs::g_tls_error) { + case fs::error::notdir: + { + return { CELL_ENOTDIR, path}; + } case fs::error::noent: { // Try to analyse split file (TODO) @@ -1597,8 +1638,12 @@ error_code sys_fs_stat(ppu_thread& ppu, vm::cptr path, vm::ptr } default: { - sys_fs.error("sys_fs_stat(): unknown error %s", error); - return {CELL_EIO, path}; + if (has_non_directory_components(local_path)) + { + return { CELL_ENOTDIR, path }; + } + + fmt::throw_exception("unknown error %s", error); } } } @@ -1724,6 +1769,10 @@ error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr path, s32 mode) { switch (auto error = fs::g_tls_error) { + case fs::error::notdir: + { + return { CELL_ENOTDIR, path}; + } case fs::error::noent: { return {mp == &g_mp_sys_dev_hdd1 ? sys_fs.warning : sys_fs.error, CELL_ENOENT, path}; @@ -1732,10 +1781,16 @@ error_code sys_fs_mkdir(ppu_thread& ppu, vm::cptr path, s32 mode) { return {sys_fs.warning, CELL_EEXIST, path}; } - default: sys_fs.error("sys_fs_mkdir(): unknown error %s", error); - } + default: + { + if (has_non_directory_components(local_path)) + { + return { CELL_ENOTDIR, path }; + } - return {CELL_EIO, path}; // ??? + fmt::throw_exception("unknown error %s", error); + } + } } sys_fs.notice("sys_fs_mkdir(): directory %s created", path); @@ -1795,12 +1850,19 @@ error_code sys_fs_rename(ppu_thread& ppu, vm::cptr from, vm::cptr to { switch (auto error = fs::g_tls_error) { + case fs::error::notdir: return {CELL_ENOTDIR, from}; case fs::error::noent: return {CELL_ENOENT, from}; case fs::error::exist: return {CELL_EEXIST, to}; - default: sys_fs.error("sys_fs_rename(): unknown error %s", error); - } + default: + { + if (has_non_directory_components(local_from)) + { + return {CELL_ENOTDIR, from}; + } - return {CELL_EIO, from}; // ??? + fmt::throw_exception("unknown error %s", error); + } + } } sys_fs.notice("sys_fs_rename(): %s renamed to %s", from, to); @@ -1850,12 +1912,19 @@ error_code sys_fs_rmdir(ppu_thread& ppu, vm::cptr path) { switch (auto error = fs::g_tls_error) { + case fs::error::notdir: return {CELL_ENOTDIR, path}; case fs::error::noent: return {CELL_ENOENT, path}; case fs::error::notempty: return {CELL_ENOTEMPTY, path}; - default: sys_fs.error("sys_fs_rmdir(): unknown error %s", error); - } + default: + { + if (has_non_directory_components(local_path)) + { + return { CELL_ENOTDIR, path }; + } - return {CELL_EIO, path}; // ??? + fmt::throw_exception("unknown error %s", error); + } + } } sys_fs.notice("sys_fs_rmdir(): directory %s removed", path); @@ -1906,14 +1975,24 @@ error_code sys_fs_unlink(ppu_thread& ppu, vm::cptr path) { switch (auto error = fs::g_tls_error) { + case fs::error::notdir: + { + return { CELL_ENOTDIR, path }; + } case fs::error::noent: { return {mp == &g_mp_sys_dev_hdd1 ? sys_fs.warning : sys_fs.error, CELL_ENOENT, path}; } - default: sys_fs.error("sys_fs_unlink(): unknown error %s", error); - } + default: + { + if (has_non_directory_components(local_path)) + { + return { CELL_ENOTDIR, path }; + } - return {CELL_EIO, path}; // ??? + fmt::throw_exception("unknown error %s", error); + } + } } sys_fs.notice("sys_fs_unlink(): file %s deleted", path); @@ -2632,10 +2711,8 @@ error_code sys_fs_lseek(ppu_thread& ppu, u32 fd, s64 offset, s32 whence, vm::ptr switch (auto error = fs::g_tls_error) { case fs::error::inval: return {CELL_EINVAL, "fd=%u, offset=0x%x, whence=%d", fd, offset, whence}; - default: sys_fs.error("sys_fs_lseek(): unknown error %s", error); + default: fmt::throw_exception("unknown error %s", error); } - - return CELL_EIO; // ??? } lock.unlock(); @@ -2751,10 +2828,16 @@ error_code sys_fs_get_block_size(ppu_thread& ppu, vm::cptr path, vm::ptr(ppu.test_stopped()); @@ -2805,14 +2888,24 @@ error_code sys_fs_truncate(ppu_thread& ppu, vm::cptr path, u64 size) { switch (auto error = fs::g_tls_error) { + case fs::error::notdir: + { + return { CELL_ENOTDIR, path}; + } case fs::error::noent: { return {mp == &g_mp_sys_dev_hdd1 ? sys_fs.warning : sys_fs.error, CELL_ENOENT, path}; } - default: sys_fs.error("sys_fs_truncate(): unknown error %s", error); - } + default: + { + if (has_non_directory_components(local_path)) + { + return { CELL_ENOTDIR, path }; + } - return {CELL_EIO, path}; // ??? + fmt::throw_exception("unknown error %s", error); + } + } } return CELL_OK; @@ -2858,10 +2951,11 @@ error_code sys_fs_ftruncate(ppu_thread& ppu, u32 fd, u64 size) switch (auto error = fs::g_tls_error) { case fs::error::ok: - default: sys_fs.error("sys_fs_ftruncate(): unknown error %s", error); + default: + { + fmt::throw_exception("unknown error %s", error); + } } - - return CELL_EIO; // ??? } return CELL_OK; @@ -2907,6 +3001,10 @@ error_code sys_fs_chmod(ppu_thread&, vm::cptr path, s32 mode) { switch (auto error = fs::g_tls_error) { + case fs::error::notdir: + { + return { CELL_ENOTDIR, path}; + } case fs::error::noent: { // Try to locate split files @@ -2920,8 +3018,12 @@ error_code sys_fs_chmod(ppu_thread&, vm::cptr path, s32 mode) } default: { - sys_fs.error("sys_fs_chmod(): unknown error %s", error); - return {CELL_EIO, path}; + if (has_non_directory_components(local_path)) + { + return { CELL_ENOTDIR, path }; + } + + fmt::throw_exception("unknown error %s", error); } } } @@ -3053,14 +3155,24 @@ error_code sys_fs_utime(ppu_thread& ppu, vm::cptr path, vm::cptr g_prx_list { "libcelpenc.sprx", 0 }, { "libddpdec.sprx", 0 }, { "libdivxdec.sprx", 0 }, - { "libdmux.sprx", 0 }, - { "libdmuxpamf.sprx", 0 }, + { "libdmux.sprx", 1 }, + { "libdmuxpamf.sprx", 1 }, { "libdtslbrdec.sprx", 0 }, { "libfiber.sprx", 0 }, { "libfont.sprx", 0 }, diff --git a/rpcs3/Emu/Io/GameTablet.cpp b/rpcs3/Emu/Io/GameTablet.cpp index 4c2330d908..1d0fcf24fe 100644 --- a/rpcs3/Emu/Io/GameTablet.cpp +++ b/rpcs3/Emu/Io/GameTablet.cpp @@ -229,6 +229,9 @@ void usb_device_gametablet::interrupt_transfer(u32 buf_size, u8* buf, u32 /*endp case CELL_PAD_CTRL_LEFT: left = true; break; + case CELL_PAD_CTRL_PS: + gt.btn_ps |= 1; + break; default: break; } @@ -249,9 +252,6 @@ void usb_device_gametablet::interrupt_transfer(u32 buf_size, u8* buf, u32 /*endp case CELL_PAD_CTRL_TRIANGLE: gt.btn_triangle |= 1; break; - case CELL_PAD_CTRL_PS: - gt.btn_ps |= 1; - break; default: break; } diff --git a/rpcs3/Emu/Io/recording_config.h b/rpcs3/Emu/Io/recording_config.h index 127d24015f..ef73149f5e 100644 --- a/rpcs3/Emu/Io/recording_config.h +++ b/rpcs3/Emu/Io/recording_config.h @@ -13,13 +13,13 @@ struct cfg_recording final : cfg::node node_video(cfg::node* _this) : cfg::node(_this, "Video") {} cfg::uint<0, 60> framerate{this, "Framerate", 30}; - cfg::uint<0, 7680> width{this, "Width", 1280}; - cfg::uint<0, 4320> height{this, "Height", 720}; + cfg::uint<640, 7680> width{this, "Width", 1280}; + cfg::uint<360, 4320> height{this, "Height", 720}; cfg::uint<0, 192> pixel_format{this, "AVPixelFormat", 0}; // AVPixelFormat::AV_PIX_FMT_YUV420P cfg::uint<0, 0xFFFF> video_codec{this, "AVCodecID", 12}; // AVCodecID::AV_CODEC_ID_MPEG4 - cfg::uint<0, 25000000> video_bps{this, "Video Bitrate", 4000000}; - cfg::uint<0, 5> max_b_frames{this, "Max B-Frames", 2}; - cfg::uint<0, 20> gop_size{this, "Group of Pictures Size", 12}; + cfg::uint<1'000'000, 60'000'000> video_bps{this, "Video Bitrate", 4'000'000}; + cfg::uint<0, 3> max_b_frames{this, "Max B-Frames", 2}; + cfg::uint<1, 120> gop_size{this, "Group of Pictures Size", 30}; } video{ this }; @@ -28,7 +28,7 @@ struct cfg_recording final : cfg::node node_audio(cfg::node* _this) : cfg::node(_this, "Audio") {} cfg::uint<0x10000, 0x17000> audio_codec{this, "AVCodecID", 86018}; // AVCodecID::AV_CODEC_ID_AAC - cfg::uint<0, 25000000> audio_bps{this, "Audio Bitrate", 320000}; + cfg::uint<64'000, 320'000> audio_bps{this, "Audio Bitrate", 192'000}; } audio{ this }; diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 4476930607..4b3aaa0605 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -132,7 +132,7 @@ namespace rsx free_rsx_memory(Traits::get(sink)); } - Traits::clone_surface(cmd, sink, region.source, new_address, region); + Traits::clone_surface(cmd, sink, region.source, new_address, region, region.source->resolution_scaling_config); allocate_rsx_memory(Traits::get(sink)); if (invalidated) [[unlikely]] @@ -398,6 +398,7 @@ namespace rsx surface_antialiasing antialias, usz width, usz height, usz pitch, u8 bpp, + const rsx::surface_scaling_config_t& scaling_config, Args&&... extra_params) { surface_storage_type old_surface_storage; @@ -448,7 +449,7 @@ namespace rsx } } - if (Traits::surface_matches_properties(surface, format, width, height, antialias)) + if (Traits::surface_matches_properties(surface, format, width, height, antialias, scaling_config)) { if (!pitch_compatible) { @@ -495,7 +496,7 @@ namespace rsx for (auto It = invalidated_resources.begin(); It != invalidated_resources.end(); It++) { auto &surface = *It; - if (Traits::surface_matches_properties(surface, format, width, height, antialias, true)) + if (Traits::surface_matches_properties(surface, format, width, height, antialias, scaling_config, true)) { new_surface_storage = std::move(surface); Traits::notify_surface_reused(new_surface_storage); @@ -531,7 +532,7 @@ namespace rsx if (!new_surface) { ensure(store); - new_surface_storage = Traits::create_new_surface(address, format, width, height, pitch, antialias, std::forward(extra_params)...); + new_surface_storage = Traits::create_new_surface(address, format, width, height, pitch, antialias, scaling_config, std::forward(extra_params)...); new_surface = Traits::get(new_surface_storage); Traits::prepare_surface_for_drawing(command_list, new_surface); allocate_rsx_memory(new_surface); @@ -842,11 +843,13 @@ namespace rsx surface_color_format color_format, surface_antialiasing antialias, usz width, usz height, usz pitch, + const rsx::surface_scaling_config_t& scaling_config, Args&&... extra_params) { return bind_surface_address( command_list, address, color_format, antialias, width, height, pitch, get_format_block_size_in_bytes(color_format), + scaling_config, std::forward(extra_params)...); } @@ -857,12 +860,14 @@ namespace rsx surface_depth_format2 depth_format, surface_antialiasing antialias, usz width, usz height, usz pitch, + const rsx::surface_scaling_config_t& scaling_config, Args&&... extra_params) { return bind_surface_address( command_list, address, depth_format, antialias, width, height, pitch, get_format_block_size_in_bytes(depth_format), + scaling_config, std::forward(extra_params)...); } @@ -969,6 +974,7 @@ namespace rsx surface_raster_type raster_type, const std::array &surface_addresses, u32 address_z, const std::array &surface_pitch, u32 zeta_pitch, + const rsx::surface_scaling_config_t& scaling_config, Args&&... extra_params) { u32 clip_width = clip_horizontal_reg; @@ -998,7 +1004,7 @@ namespace rsx m_bound_render_targets[surface_index] = std::make_pair(surface_addresses[surface_index], bind_address_as_render_targets(command_list, surface_addresses[surface_index], color_format, antialias, - clip_width, clip_height, surface_pitch[surface_index], std::forward(extra_params)...)); + clip_width, clip_height, surface_pitch[surface_index], scaling_config, std::forward(extra_params)...)); m_bound_render_target_ids.push_back(surface_index); } @@ -1014,7 +1020,7 @@ namespace rsx { m_bound_depth_stencil = std::make_pair(address_z, bind_address_as_depth_stencil(command_list, address_z, depth_format, antialias, - clip_width, clip_height, zeta_pitch, std::forward(extra_params)...)); + clip_width, clip_height, zeta_pitch, scaling_config, std::forward(extra_params)...)); } else { @@ -1463,5 +1469,113 @@ namespace rsx } } } + + void sync_scaling_config(command_list_type cmd, const rsx::surface_scaling_config_t& active_config) + { + auto process_list_function = [&](surface_ranged_map& data, const utils::address_range32& range) + { + std::vector surfaces_to_clone; + + for (auto It = data.begin_range(range); It != data.end();) + { + auto surface = Traits::get(It->second); + if (surface->get_resolution_scaling_config() == active_config) + { + ++It; + continue; + } + + // Perform a test scaling and check if anything is different after scaling + // There are many cases where this will avoid creating new surfaces + const auto [new_w, new_h] = rsx::apply_resolution_scale( + active_config, + surface->template get_surface_width<>(), + surface->template get_surface_height<>()); + + if (new_w == surface->width() && new_h == surface->height()) + { + // Not affected by resolution scale. Just update the details and move on. + surface->resolution_scaling_config = active_config; + ++It; + continue; + } + + surfaces_to_clone.push_back(surface); + + // Invalidate the previous surface + invalidate(It->second); + It = data.erase(It); + } + + for (auto& surface : surfaces_to_clone) + { + // Enqueue the memory transfer + surface_storage_type sink{}; + deferred_clipped_region copy{}; + copy.width = surface->template get_surface_width<>(); + copy.height = surface->template get_surface_height<>(); + copy.transfer_scale_x = 1.f; + copy.transfer_scale_y = 1.f; + copy.target = nullptr; + copy.source = surface; + + Traits::clone_surface(cmd, sink, surface, surface->base_addr, copy, active_config); + allocate_rsx_memory(Traits::get(sink)); + + // Replace with the new one + auto new_surface = Traits::get(sink); + ensure(copy.target == new_surface); + data.emplace(surface->get_memory_range(), std::move(sink)); + + // Force barrier to reduce VRAM pressure + new_surface->memory_barrier(cmd, rsx::surface_access::memory_read); + } + }; + + const auto rtt_bind_backup = m_bound_render_targets; + const auto dsv_bind_backup = m_bound_depth_stencil; + + // Unbind everything. We'll restore it later + for (auto& rtt_bind : m_bound_render_targets) + { + rtt_bind = {}; + } + + m_bound_depth_stencil = {}; + + process_list_function(m_render_targets_storage, m_render_targets_memory_range); + process_list_function(m_depth_stencil_storage, m_depth_stencil_memory_range); + + // Restore bindings. + for (int i = 0; i < 4; ++i) + { + const auto address = rtt_bind_backup[i].first; + if (!address) + { + continue; + } + + auto rtt = m_render_targets_storage.find(address); + ensure(rtt != m_render_targets_storage.end()); + + m_bound_render_targets[i] = + { + address, + Traits::get(rtt->second) + }; + } + + if (const auto ds_address = dsv_bind_backup.first) + { + auto ds = m_depth_stencil_storage.find(ds_address); + ensure(ds != m_depth_stencil_storage.end()); + + m_bound_depth_stencil = + { + ds_address, + Traits::get(ds->second) + }; + } + } }; } diff --git a/rpcs3/Emu/RSX/Common/surface_utils.h b/rpcs3/Emu/RSX/Common/surface_utils.h index bf7dee2db3..8ec97600bc 100644 --- a/rpcs3/Emu/RSX/Common/surface_utils.h +++ b/rpcs3/Emu/RSX/Common/surface_utils.h @@ -88,18 +88,18 @@ namespace rsx auto dst_h = std::get<3>(region); // Apply resolution scale if needed - if (g_cfg.video.resolution_scale_percent != 100) - { - auto src = static_cast(source); + auto src = static_cast(source); + std::tie(src_w, src_h) = rsx::apply_resolution_scale( + src->resolution_scaling_config, + src_w, src_h, + src->template get_surface_width(), + src->template get_surface_height()); - std::tie(src_w, src_h) = rsx::apply_resolution_scale(src_w, src_h, - src->template get_surface_width(), - src->template get_surface_height()); - - std::tie(dst_w, dst_h) = rsx::apply_resolution_scale(dst_w, dst_h, - target_surface->template get_surface_width(), - target_surface->template get_surface_height()); - } + std::tie(dst_w, dst_h) = rsx::apply_resolution_scale( + target_surface->resolution_scaling_config, + dst_w, dst_h, + target_surface->template get_surface_width(), + target_surface->template get_surface_height()); width = src_w; height = src_h; @@ -146,6 +146,9 @@ namespace rsx u8 samples_x = 1; u8 samples_y = 1; + // Scaling configuration + surface_scaling_config_t resolution_scaling_config; + rsx::address_range32 memory_range; std::unique_ptr> resolve_surface; @@ -303,6 +306,11 @@ namespace rsx format_info.gcm_depth_format = format; } + void set_resolution_scaling_config(const surface_scaling_config_t& config) + { + resolution_scaling_config = config; + } + inline rsx::surface_color_format get_surface_color_format() const { return format_info.gcm_color_format; @@ -323,6 +331,11 @@ namespace rsx ); } + inline const rsx::surface_scaling_config_t& get_resolution_scaling_config() const + { + return resolution_scaling_config; + } + inline bool dirty() const { return (state_flags != rsx::surface_state_flags::ready) || !old_contents.empty(); @@ -541,10 +554,16 @@ namespace rsx } // Apply resolution scale if needed - if (g_cfg.video.resolution_scale_percent != 100) + if (resolution_scaling_config.scale_percent != 100 || + region.source->resolution_scaling_config.scale_percent != 100) { - auto [src_width, src_height] = rsx::apply_resolution_scale(slice.width, slice.height, slice.source->width(), slice.source->height()); - auto [dst_width, dst_height] = rsx::apply_resolution_scale(slice.width, slice.height, slice.target->width(), slice.target->height()); + const auto& src_res_scale = region.source->resolution_scaling_config; + const auto& dst_res_scale = resolution_scaling_config; + const auto src_surface = ensure(dynamic_cast(slice.source)); + const auto dst_surface = ensure(dynamic_cast(slice.target)); + + auto [src_width, src_height] = rsx::apply_resolution_scale(src_res_scale, slice.width, slice.height, src_surface->get_surface_width(), src_surface->get_surface_height()); + auto [dst_width, dst_height] = rsx::apply_resolution_scale(dst_res_scale, slice.width, slice.height, dst_surface->get_surface_width(), dst_surface->get_surface_height()); slice.transfer_scale_x *= f32(dst_width) / src_width; slice.transfer_scale_y *= f32(dst_height) / src_height; @@ -552,8 +571,8 @@ namespace rsx slice.width = src_width; slice.height = src_height; - std::tie(slice.src_x, slice.src_y) = rsx::apply_resolution_scale(slice.src_x, slice.src_y, slice.source->width(), slice.source->height()); - std::tie(slice.dst_x, slice.dst_y) = rsx::apply_resolution_scale(slice.dst_x, slice.dst_y, slice.target->width(), slice.target->height()); + std::tie(slice.src_x, slice.src_y) = rsx::apply_resolution_scale(src_res_scale, slice.src_x, slice.src_y, src_surface->get_surface_width(), src_surface->get_surface_height()); + std::tie(slice.dst_x, slice.dst_y) = rsx::apply_resolution_scale(dst_res_scale, slice.dst_x, slice.dst_y, dst_surface->get_surface_width(), dst_surface->get_surface_height()); } } diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index dfe8043bd3..8aed0ccc34 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -2413,9 +2413,13 @@ namespace rsx // 2. The image has to have been generated on the GPU (fbo or blit target only) rsx::simple_array sections; - const bool use_upscaling = (result.upload_context == rsx::texture_upload_context::framebuffer_storage && g_cfg.video.resolution_scale_percent != 100); + const bool use_upscaling = (result.upload_context == rsx::texture_upload_context::framebuffer_storage); + auto to_surface_type = [](const copy_region_descriptor& rgn) -> typename surface_store_type::surface_type + { + return static_cast(rgn.src); + }; - if (!helpers::append_mipmap_level(sections, result, attributes, 0, use_upscaling, attributes)) [[unlikely]] + if (!helpers::append_mipmap_level(to_surface_type, sections, result, attributes, 0, use_upscaling, attributes)) [[unlikely]] { // Abort if mip0 is not compatible return result; @@ -2445,7 +2449,7 @@ namespace rsx options, range, extended_dimension, m_rtts, std::forward(extras)...); if (!ret.validate() || - !helpers::append_mipmap_level(sections, ret, attr2, subsurface, use_upscaling, attributes)) + !helpers::append_mipmap_level(to_surface_type, sections, ret, attr2, subsurface, use_upscaling, attributes)) { // Abort break; @@ -2778,7 +2782,7 @@ namespace rsx surf->template get_surface_height() != surf->height()) { // Must go through a scaling operation due to resolution scaling being present - ensure(g_cfg.video.resolution_scale_percent != 100); + ensure(src_subres.surface->resolution_scaling_config.scale_percent != 100); use_null_region = false; } } @@ -3389,8 +3393,8 @@ namespace rsx { const auto surface_width = src_subres.surface->template get_surface_width(); const auto surface_height = src_subres.surface->template get_surface_height(); - std::tie(src_area.x1, src_area.y1) = rsx::apply_resolution_scale(src_area.x1, src_area.y1, surface_width, surface_height); - std::tie(src_area.x2, src_area.y2) = rsx::apply_resolution_scale(src_area.x2, src_area.y2, surface_width, surface_height); + std::tie(src_area.x1, src_area.y1) = rsx::apply_resolution_scale(src_subres.surface->resolution_scaling_config, src_area.x1, src_area.y1, surface_width, surface_height); + std::tie(src_area.x2, src_area.y2) = rsx::apply_resolution_scale(src_subres.surface->resolution_scaling_config, src_area.x2, src_area.y2, surface_width, surface_height); // The resource is of surface type; possibly disabled AA emulation src_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer_read, src_area); @@ -3400,8 +3404,8 @@ namespace rsx { const auto surface_width = dst_subres.surface->template get_surface_width(); const auto surface_height = dst_subres.surface->template get_surface_height(); - std::tie(dst_area.x1, dst_area.y1) = rsx::apply_resolution_scale(dst_area.x1, dst_area.y1, surface_width, surface_height); - std::tie(dst_area.x2, dst_area.y2) = rsx::apply_resolution_scale(dst_area.x2, dst_area.y2, surface_width, surface_height); + std::tie(dst_area.x1, dst_area.y1) = rsx::apply_resolution_scale(dst_subres.surface->resolution_scaling_config, dst_area.x1, dst_area.y1, surface_width, surface_height); + std::tie(dst_area.x2, dst_area.y2) = rsx::apply_resolution_scale(dst_subres.surface->resolution_scaling_config, dst_area.x2, dst_area.y2, surface_width, surface_height); // The resource is of surface type; possibly disabled AA emulation dst_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer_write, dst_area); diff --git a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h index 1560f40ad4..8898830034 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_helpers.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_helpers.h @@ -357,11 +357,11 @@ namespace rsx const auto surface_width = section.surface->template get_surface_width(); const auto surface_height = section.surface->template get_surface_height(); - const auto [src_width, src_height] = rsx::apply_resolution_scale(section.src_area.width, h, surface_width, surface_height); - const auto [dst_width, dst_height] = rsx::apply_resolution_scale(section.dst_area.width, h, attr.width, attr.height); + const auto [src_width, src_height] = rsx::apply_resolution_scale(section.surface->resolution_scaling_config, section.src_area.width, h, surface_width, surface_height); + const auto [dst_width, dst_height] = rsx::apply_resolution_scale(section.surface->resolution_scaling_config, section.dst_area.width, h, attr.width, attr.height); - std::tie(src_x, src_y) = rsx::apply_resolution_scale(src_x, src_y, surface_width, surface_height); - std::tie(dst_x, dst_y) = rsx::apply_resolution_scale(dst_x, dst_y, attr.width, attr.height); + std::tie(src_x, src_y) = rsx::apply_resolution_scale(section.surface->resolution_scaling_config, src_x, src_y, surface_width, surface_height); + std::tie(dst_x, dst_y) = rsx::apply_resolution_scale(section.surface->resolution_scaling_config, dst_x, dst_y, attr.width, attr.height); section.surface->memory_barrier(cmd, rsx::surface_access::transfer_read); @@ -430,8 +430,10 @@ namespace rsx if (scaling) { // Since output is upscaled, also upscale on dst - const auto [_dst_x, _dst_y] = rsx::apply_resolution_scale(static_cast(dst_offset.x), static_cast(dst_y - dst_slice_begin), attr.width, attr.height); - const auto [_dst_w, _dst_h] = rsx::apply_resolution_scale(dst_w, height, attr.width, attr.height); + + const auto& scaling_config = rsx::get_current_renderer()->resolution_scaling_config; + const auto [_dst_x, _dst_y] = rsx::apply_resolution_scale(scaling_config, static_cast(dst_offset.x), static_cast(dst_y - dst_slice_begin), attr.width, attr.height); + const auto [_dst_w, _dst_h] = rsx::apply_resolution_scale(scaling_config, dst_w, height, attr.width, attr.height); out.push_back ({ @@ -660,10 +662,10 @@ namespace rsx bool is_depth = texptr->is_depth_surface(); auto attr2 = attr; - if (rsx::get_resolution_scale_percent() != 100) + if (texptr->resolution_scaling_config.scale_percent != 100) { - const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(attr.width, attr.height, surface_width, surface_height); - const auto [unused, scaled_slice_h] = rsx::apply_resolution_scale(RSX_SURFACE_DIMENSION_IGNORED, attr.slice_h, surface_width, surface_height); + const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(texptr->resolution_scaling_config, attr.width, attr.height, surface_width, surface_height); + const auto [unused, scaled_slice_h] = rsx::apply_resolution_scale(texptr->resolution_scaling_config, RSX_SURFACE_DIMENSION_IGNORED, attr.slice_h, surface_width, surface_height); attr2.width = scaled_w; attr2.height = scaled_h; attr2.slice_h = scaled_slice_h; @@ -841,7 +843,8 @@ namespace rsx } // If this method was called, there is no easy solution, likely means atlas gather is needed - const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(attr2.width, attr2.height); + const auto& scaling_config = rsx::get_current_renderer()->resolution_scaling_config; + const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(scaling_config, attr2.width, attr2.height); const auto format_class = classify_format(attr2.gcm_format); const auto upload_context = (fbos.empty()) ? texture_upload_context::shader_read : texture_upload_context::framebuffer_storage; @@ -892,14 +895,15 @@ namespace rsx return result; } - template + template bool append_mipmap_level( + to_surface_type_converter&& as_surface_type, // Cast function to surface type rsx::simple_array& sections, // Destination list - const sampled_image_descriptor& level, // Descriptor for the image level being checked - const image_section_attributes_t& attr, // Attributes of image level - u8 mipmap_level, // Level index - bool apply_upscaling, // Whether to upscale the results or not - const image_section_attributes_t& level0_attr) // Attributes of the first mipmap level + const sampled_image_descriptor& level, // Descriptor for the image level being checked + const image_section_attributes_t& attr, // Attributes of image level + u8 mipmap_level, // Level index + bool apply_upscaling, // Whether to upscale the results or not + const image_section_attributes_t& level0_attr) // Attributes of the first mipmap level { if (level.image_handle) { @@ -916,7 +920,8 @@ namespace rsx // Calculate transfer dimensions from attr if (level.upload_context == rsx::texture_upload_context::framebuffer_storage) [[likely]] { - std::tie(mip.src_w, mip.src_h) = rsx::apply_resolution_scale(attr.width, attr.height); + auto rtv = as_surface_type(mip); + std::tie(mip.src_w, mip.src_h) = rsx::apply_resolution_scale(rtv->resolution_scaling_config, attr.width, attr.height); } else { @@ -964,7 +969,9 @@ namespace rsx if (apply_upscaling) { auto& mip = sections.back(); - std::tie(mip.dst_w, mip.dst_h) = rsx::apply_resolution_scale(mip.dst_w, mip.dst_h, level0_attr.width, level0_attr.height); + std::tie(mip.dst_w, mip.dst_h) = rsx::apply_resolution_scale( + as_surface_type(mip)->resolution_scaling_config, + mip.dst_w, mip.dst_h, level0_attr.width, level0_attr.height); } return true; diff --git a/rpcs3/Emu/RSX/Core/RSXDisplay.cpp b/rpcs3/Emu/RSX/Core/RSXDisplay.cpp index e263a945ef..f86c6dea04 100644 --- a/rpcs3/Emu/RSX/Core/RSXDisplay.cpp +++ b/rpcs3/Emu/RSX/Core/RSXDisplay.cpp @@ -49,7 +49,7 @@ namespace rsx } } - std::string framebuffer_statistics_t::to_string(bool squash) const + std::string framebuffer_statistics_t::to_string(const surface_scaling_config_t& scaling_config, bool squash) const { // Format is sorted by sample count struct sorted_message_t @@ -70,7 +70,7 @@ namespace rsx for (const auto& [aa_mode, stat] : data) { auto real_stat = stat; - std::tie(real_stat.width, real_stat.height) = apply_resolution_scale(stat.width, stat.height); + std::tie(real_stat.width, real_stat.height) = apply_resolution_scale(scaling_config, stat.width, stat.height); real_stats.push_back(real_stat); sorted_message_t msg; diff --git a/rpcs3/Emu/RSX/Core/RSXDisplay.h b/rpcs3/Emu/RSX/Core/RSXDisplay.h index 77407f2f19..6a04374e1b 100644 --- a/rpcs3/Emu/RSX/Core/RSXDisplay.h +++ b/rpcs3/Emu/RSX/Core/RSXDisplay.h @@ -12,6 +12,8 @@ namespace rsx { enum class surface_antialiasing : u8; + struct surface_scaling_config_t; + struct framebuffer_dimensions_t { u16 width; @@ -42,7 +44,7 @@ namespace rsx void add(u16 width, u16 height, rsx::surface_antialiasing aa); // Returns a formatted string representing the statistics collected over the frame. - std::string to_string(bool squash) const; + std::string to_string(const surface_scaling_config_t& scaling_config, bool squash) const; }; struct frame_statistics_t diff --git a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp index 53f6ce31e7..8ef45e1f06 100644 --- a/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp +++ b/rpcs3/Emu/RSX/Core/RSXDrawCommands.cpp @@ -699,7 +699,9 @@ namespace rsx const auto window_origin = REGS(m_ctx)->shader_window_origin(); const u32 window_height = REGS(m_ctx)->shader_window_height(); const auto pixel_center = REGS(m_ctx)->pixel_center(); - const f32 resolution_scale = (window_height <= static_cast(g_cfg.video.min_scalable_dimension)) ? 1.f : rsx::get_resolution_scale(); + const f32 resolution_scale = (window_height <= RSX(m_ctx)->resolution_scaling_config.min_scalable_dimension) + ? 1.f + : RSX(m_ctx)->resolution_scaling_config.scale_factor(); payload.wpos_scale = (window_origin == rsx::window_origin::top) ? (1.f / resolution_scale) : (-1.f / resolution_scale); payload.wpos_bias[0] = 0.f; diff --git a/rpcs3/Emu/RSX/GL/GLDraw.cpp b/rpcs3/Emu/RSX/GL/GLDraw.cpp index 3de2585523..d0c2e233e9 100644 --- a/rpcs3/Emu/RSX/GL/GLDraw.cpp +++ b/rpcs3/Emu/RSX/GL/GLDraw.cpp @@ -227,7 +227,7 @@ void GLGSRender::update_draw_state() case rsx::primitive_type::lines: case rsx::primitive_type::line_loop: case rsx::primitive_type::line_strip: - gl_state.line_width(rsx::method_registers.line_width() * rsx::get_resolution_scale()); + gl_state.line_width(rsx::method_registers.line_width() * resolution_scaling_config.scale_factor()); gl_state.enable(rsx::method_registers.line_smooth_enabled(), GL_LINE_SMOOTH); break; default: diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 20553eeb00..c1acabd601 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -73,6 +73,7 @@ void GLGSRender::set_viewport() { // NOTE: scale offset matrix already contains the viewport transformation const auto [clip_width, clip_height] = rsx::apply_resolution_scale( + resolution_scaling_config, rsx::method_registers.surface_clip_width(), rsx::method_registers.surface_clip_height()); glViewport(0, 0, clip_width, clip_height); @@ -936,7 +937,7 @@ void GLGSRender::load_program_env() m_draw_processor.fill_scale_offset_data(buf, false); m_draw_processor.fill_user_clip_data(buf + 64); *(reinterpret_cast(buf + 68)) = rsx::method_registers.transform_branch_bits(); - *(reinterpret_cast(buf + 72)) = rsx::method_registers.point_size() * rsx::get_resolution_scale(); + *(reinterpret_cast(buf + 72)) = rsx::method_registers.point_size() * resolution_scaling_config.scale_factor(); *(reinterpret_cast(buf + 76)) = rsx::method_registers.clip_min(); *(reinterpret_cast(buf + 80)) = rsx::method_registers.clip_max(); diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.cpp b/rpcs3/Emu/RSX/GL/GLOverlays.cpp index 9e588d5df2..a758804e4f 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.cpp +++ b/rpcs3/Emu/RSX/GL/GLOverlays.cpp @@ -399,8 +399,15 @@ namespace gl void ui_overlay_renderer::run(gl::command_context& cmd_, const areau& viewport, GLuint target, rsx::overlays::overlay& ui, bool flip_vertically) { - program_handle.uniforms["viewport"] = color4f(static_cast(viewport.width()), static_cast(viewport.height()), static_cast(viewport.x1), static_cast(viewport.y1)); - program_handle.uniforms["ui_scale"] = color4f(static_cast(ui.virtual_width), static_cast(ui.virtual_height), 1.f, 1.f); + ui.set_render_viewport( + static_cast(std::min(viewport.width(), std::numeric_limits::max())), + static_cast(std::min(viewport.height(), std::numeric_limits::max())) + ); + const auto ui_scale = color4f(static_cast(ui.virtual_width), static_cast(ui.virtual_height), 1.f, 1.f); + const auto ui_viewport = color4f(static_cast(viewport.width()), static_cast(viewport.height()), static_cast(viewport.x1), static_cast(viewport.y1)); + + program_handle.uniforms["viewport"] = ui_viewport; + program_handle.uniforms["ui_scale"] = ui_scale; saved_sampler_state save_30(30, m_sampler); saved_sampler_state save_31(31, m_sampler); @@ -458,12 +465,24 @@ namespace gl .texture_mode(texture_mode) .clip_fragments(cmd.config.clip_region) .pulse_glow(cmd.config.pulse_glow) + .set_sdf(cmd.config.sdf_config.func) .get(); program_handle.uniforms["timestamp"] = cmd.config.get_sinus_value(); program_handle.uniforms["albedo"] = cmd.config.color; program_handle.uniforms["clip_bounds"] = cmd.config.clip_rect; program_handle.uniforms["blur_intensity"] = static_cast(cmd.config.blur_strength); + + if (cmd.config.sdf_config.func != rsx::overlays::sdf_function::none) + { + auto sdf_config = cmd.config.sdf_config; + sdf_config.transform(static_cast(viewport).flipped_vertical(), {ui_scale.x, ui_scale.y}); + + program_handle.uniforms["sdf_params"] = color4f(sdf_config.hx, sdf_config.hy, sdf_config.br, sdf_config.bw); + program_handle.uniforms["sdf_origin"] = color2f(sdf_config.cx, sdf_config.cy); + program_handle.uniforms["sdf_border_color"] = sdf_config.border_color; + } + overlay_pass::run(cmd_, viewport, target, gl::image_aspect::color, true); } diff --git a/rpcs3/Emu/RSX/GL/GLPresent.cpp b/rpcs3/Emu/RSX/GL/GLPresent.cpp index 68a570e359..2aa11868ee 100644 --- a/rpcs3/Emu/RSX/GL/GLPresent.cpp +++ b/rpcs3/Emu/RSX/GL/GLPresent.cpp @@ -95,6 +95,7 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons image = section.surface->get_surface(rsx::surface_access::transfer_read); std::tie(info->width, info->height) = rsx::apply_resolution_scale( + resolution_scaling_config, std::min(surface_width, info->width), std::min(surface_height, info->height)); } @@ -225,7 +226,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) if (avconfig.stereo_enabled) [[unlikely]] { - const auto [unused, min_expected_height] = rsx::apply_resolution_scale(RSX_SURFACE_DIMENSION_IGNORED, buffer_height + 30); + const auto [unused, min_expected_height] = rsx::apply_resolution_scale(resolution_scaling_config, RSX_SURFACE_DIMENSION_IGNORED, buffer_height + 30); if (image_to_flip->height() < min_expected_height) { // Get image for second eye @@ -240,7 +241,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) else { // Account for possible insets - const auto [unused2, scaled_buffer_height] = rsx::apply_resolution_scale(RSX_SURFACE_DIMENSION_IGNORED, buffer_height); + const auto [unused2, scaled_buffer_height] = rsx::apply_resolution_scale(resolution_scaling_config, RSX_SURFACE_DIMENSION_IGNORED, buffer_height); buffer_height = std::min(image_to_flip->height() - min_expected_height, scaled_buffer_height); } } @@ -316,9 +317,11 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) // Lock to avoid modification during run-update chain std::lock_guard lock(*m_overlay_manager); + const areau display_area = {0, 0, static_cast(m_frame->client_width()), static_cast(m_frame->client_height())}; for (const auto& view : m_overlay_manager->get_views()) { - m_ui_renderer.run(cmd, aspect_ratio, target, *view.get(), flip_vertically); + const areau render_area = view->use_window_space ? display_area : aspect_ratio; + m_ui_renderer.run(cmd, render_area, target, *view.get(), flip_vertically); } } }; @@ -477,7 +480,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) "Texture uploads: %11u (%u from CPU - %02u%%, %u copies avoided)\n" "Vertex cache hits: %9u/%u (%u%%)\n" "Program cache lookup ellision: %u/%u (%u%%)", - info.stats.framebuffer_stats.to_string(!backend_config.supports_hw_msaa), + info.stats.framebuffer_stats.to_string(resolution_scaling_config, !backend_config.supports_hw_msaa), get_load(), info.stats.draw_calls, info.stats.setup_time, info.stats.vertex_upload_time, info.stats.textures_upload_time, info.stats.draw_exec_time, num_dirty_textures, texture_memory_size, num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate, @@ -514,6 +517,19 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info) m_frame->flip(m_context); rsx::thread::flip(info); + // Data sync + const rsx::surface_scaling_config_t active_res_scaling_config = + { + .scale_percent = static_cast(g_cfg.video.resolution_scale_percent), + .min_scalable_dimension = static_cast(g_cfg.video.min_scalable_dimension), + }; + + if (active_res_scaling_config != this->resolution_scaling_config) + { + m_rtts.sync_scaling_config(cmd, active_res_scaling_config); + this->resolution_scaling_config = active_res_scaling_config; + } + // Cleanup m_gl_texture_cache.on_frame_end(); m_vertex_cache->purge(); diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index a61fb4b365..34c2ca72d3 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -141,7 +141,8 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool /* m_framebuffer_layout.width, m_framebuffer_layout.height, m_framebuffer_layout.target, m_framebuffer_layout.aa_mode, m_framebuffer_layout.raster_type, m_framebuffer_layout.color_addresses, m_framebuffer_layout.zeta_address, - m_framebuffer_layout.actual_color_pitch, m_framebuffer_layout.actual_zeta_pitch); + m_framebuffer_layout.actual_color_pitch, m_framebuffer_layout.actual_zeta_pitch, + resolution_scaling_config); std::array color_targets; GLuint depth_stencil_target; @@ -448,7 +449,7 @@ void gl::render_target::load_memory(gl::command_context& cmd) subres.data = { vm::get_super_ptr(base_addr), static_cast::size_type>(rsx_pitch * surface_height * samples_y) }; // TODO: MSAA support - if (g_cfg.video.resolution_scale_percent == 100 && spp == 1) [[likely]] + if (resolution_scaling_config.scale_percent == 100 && spp == 1) [[likely]] { gl::upload_texture(cmd, this, get_gcm_format(), is_swizzled, { subres }); } diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.h b/rpcs3/Emu/RSX/GL/GLRenderTargets.h index ee3bc03956..f5e2252b92 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.h +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.h @@ -98,7 +98,7 @@ namespace gl bool matches_dimensions(u16 _width, u16 _height) const { //Use forward scaling to account for rounding and clamping errors - const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(_width, _height); + const auto [scaled_w, scaled_h] = rsx::apply_resolution_scale(resolution_scaling_config, _width, _height); return (scaled_w == width()) && (scaled_h == height()); } @@ -138,11 +138,12 @@ struct gl_render_target_traits u32 address, rsx::surface_color_format surface_color_format, usz width, usz height, usz pitch, - rsx::surface_antialiasing antialias + rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& resolution_scaling_config ) { auto format = rsx::internals::surface_color_format_to_gl(surface_color_format); - const auto [width_, height_] = rsx::apply_resolution_scale(static_cast(width), static_cast(height)); + const auto [width_, height_] = rsx::apply_resolution_scale(resolution_scaling_config, static_cast(width), static_cast(height)); u8 samples; rsx::surface_sample_layout sample_layout; @@ -162,6 +163,7 @@ struct gl_render_target_traits result->set_name(fmt::format("RTV_%u@0x%x", result->id(), address)); result->set_aa_mode(antialias); + result->set_resolution_scaling_config(resolution_scaling_config); result->set_native_pitch(static_cast(width) * get_format_block_size_in_bytes(surface_color_format) * result->samples_x); result->set_surface_dimensions(static_cast(width), static_cast(height), static_cast(pitch)); result->set_format(surface_color_format); @@ -182,11 +184,12 @@ struct gl_render_target_traits u32 address, rsx::surface_depth_format2 surface_depth_format, usz width, usz height, usz pitch, - rsx::surface_antialiasing antialias + rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& resolution_scaling_config ) { auto format = rsx::internals::surface_depth_format_to_gl(surface_depth_format); - const auto [width_, height_] = rsx::apply_resolution_scale(static_cast(width), static_cast(height)); + const auto [width_, height_] = rsx::apply_resolution_scale(resolution_scaling_config, static_cast(width), static_cast(height)); u8 samples; rsx::surface_sample_layout sample_layout; @@ -206,6 +209,7 @@ struct gl_render_target_traits result->set_name(fmt::format("DSV_%u@0x%x", result->id(), address)); result->set_aa_mode(antialias); + result->set_resolution_scaling_config(resolution_scaling_config); result->set_surface_dimensions(static_cast(width), static_cast(height), static_cast(pitch)); result->set_format(surface_depth_format); result->set_native_pitch(static_cast(width) * get_format_block_size_in_bytes(surface_depth_format) * result->samples_x); @@ -225,13 +229,17 @@ struct gl_render_target_traits void clone_surface( gl::command_context& cmd, std::unique_ptr& sink, gl::render_target* ref, - u32 address, barrier_descriptor_t& prev) + u32 address, barrier_descriptor_t& prev, + const rsx::surface_scaling_config_t& scaling_config) { if (!sink) { auto internal_format = static_cast(ref->get_internal_format()); - const auto [new_w, new_h] = rsx::apply_resolution_scale(prev.width, prev.height, - ref->get_surface_width(), ref->get_surface_height()); + const auto [new_w, new_h] = rsx::apply_resolution_scale( + scaling_config, + prev.width, prev.height, + ref->get_surface_width(), + ref->get_surface_height()); sink = std::make_unique(new_w, new_h, ref->samples(), internal_format, ref->format_class()); sink->add_ref(); @@ -240,6 +248,9 @@ struct gl_render_target_traits sink->state_flags = rsx::surface_state_flags::erase_bkgnd; sink->format_info = ref->format_info; + sink->sample_layout = ref->sample_layout; + sink->resolution_scaling_config = scaling_config; + sink->set_name(fmt::format("SINK_%u@0x%x", sink->id(), address)); sink->set_spp(ref->get_spp()); sink->set_native_pitch(static_cast(prev.width) * ref->get_bpp() * ref->samples_x); @@ -375,6 +386,7 @@ struct gl_render_target_traits gl::texture::internal_format format, usz width, usz height, rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& scaling_config, bool check_refs = false) { if (check_refs && surface->has_refs()) @@ -382,7 +394,8 @@ struct gl_render_target_traits return surface->get_internal_format() == format && surface->get_spp() == get_format_sample_count(antialias) && - surface->matches_dimensions(static_cast(width), static_cast(height)); + surface->matches_dimensions(static_cast(width), static_cast(height)) && + surface->resolution_scaling_config == scaling_config; } static @@ -391,10 +404,11 @@ struct gl_render_target_traits rsx::surface_color_format format, usz width, usz height, rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& scaling_config, bool check_refs=false) { const auto internal_fmt = rsx::internals::surface_color_format_to_gl(format).internal_format; - return int_surface_matches_properties(surface, internal_fmt, width, height, antialias, check_refs); + return int_surface_matches_properties(surface, internal_fmt, width, height, antialias, scaling_config, check_refs); } static @@ -403,10 +417,11 @@ struct gl_render_target_traits rsx::surface_depth_format2 format, usz width, usz height, rsx::surface_antialiasing antialias, + const rsx::surface_scaling_config_t& scaling_config, bool check_refs = false) { const auto internal_fmt = rsx::internals::surface_depth_format_to_gl(format).internal_format; - return int_surface_matches_properties(surface, internal_fmt, width, height, antialias, check_refs); + return int_surface_matches_properties(surface, internal_fmt, width, height, antialias, scaling_config, check_refs); } static diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 64e451e579..6f566b9a82 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -717,169 +717,168 @@ namespace gl } } } + + return; } - else + + std::pair upload_scratch_mem = {}, compute_scratch_mem = {}; + image_memory_requirements mem_info; + pixel_buffer_layout mem_layout; + + std::span dst_buffer = staging_buffer; + u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); + u64 image_linear_size = staging_buffer.size(); + + const auto min_required_buffer_size = std::max(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000); + + if (driver_caps.ARB_compute_shader_supported) { - std::pair upload_scratch_mem = {}, compute_scratch_mem = {}; - image_memory_requirements mem_info; - pixel_buffer_layout mem_layout; - - std::span dst_buffer = staging_buffer; - u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); - u64 image_linear_size = staging_buffer.size(); - - const auto min_required_buffer_size = std::max(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000); - - if (driver_caps.ARB_compute_shader_supported) + if (g_upload_transfer_buffer.size() < static_cast(min_required_buffer_size)) { - if (g_upload_transfer_buffer.size() < static_cast(min_required_buffer_size)) - { - g_upload_transfer_buffer.remove(); - g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, min_required_buffer_size); - } - - if (g_compute_decode_buffer.size() < min_required_buffer_size) - { - g_compute_decode_buffer.remove(); - g_compute_decode_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); - } + g_upload_transfer_buffer.remove(); + g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, min_required_buffer_size); } - for (const rsx::subresource_layout& layout : input_layouts) + if (g_compute_decode_buffer.size() < min_required_buffer_size) { - if (driver_caps.ARB_compute_shader_supported) + g_compute_decode_buffer.remove(); + g_compute_decode_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); + } + } + + for (const rsx::subresource_layout& layout : input_layouts) + { + if (driver_caps.ARB_compute_shader_supported) + { + u64 row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, caps.alignment); + + // We're in the "else" branch, so "is_compressed_host_format()" is always false. + // Handle emulated compressed formats with host unpack (R8G8 compressed) + row_pitch = std::max(row_pitch, dst->pitch()); + + // FIXME: Double-check this logic; it seems like we should always use texels both here and for row_pitch. + image_linear_size = row_pitch * layout.height_in_texel * layout.depth; + + compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast(image_linear_size), 256) }; + compute_scratch_mem.first = reinterpret_cast(static_cast(compute_scratch_mem.second)); + + g_upload_transfer_buffer.reserve_storage_on_heap(static_cast(image_linear_size)); + upload_scratch_mem = g_upload_transfer_buffer.alloc_from_heap(static_cast(image_linear_size), 256); + dst_buffer = { reinterpret_cast(upload_scratch_mem.first), image_linear_size }; + } + + rsx::io_buffer io_buf = dst_buffer; + caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024); + auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps); + + // Define upload region + coord3u region; + region.x = 0; + region.y = 0; + region.z = layout.layer; + region.width = layout.width_in_texel; + region.height = layout.height_in_texel; + region.depth = layout.depth; + + if (!driver_caps.ARB_compute_shader_supported) + { + unpack_settings.swap_bytes(op.require_swap); + dst->copy_from(staging_buffer, static_cast(gl_format), static_cast(gl_type), layout.level, region, unpack_settings); + continue; + } + + // 0. Preconf + mem_layout.alignment = static_cast(caps.alignment); + mem_layout.swap_bytes = op.require_swap; + mem_layout.format = gl_format; + mem_layout.type = gl_type; + mem_layout.block_size = block_size_in_bytes; + + // 2. Upload memory to GPU + if (!op.require_deswizzle) + { + g_upload_transfer_buffer.unmap(); + g_upload_transfer_buffer.copy_to(&g_compute_decode_buffer.get(), upload_scratch_mem.second, compute_scratch_mem.second, image_linear_size); + } + else + { + // 2.1 Copy data to deswizzle buf + if (g_deswizzle_scratch_buffer.size() < min_required_buffer_size) { - u64 row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, caps.alignment); - - // We're in the "else" branch, so "is_compressed_host_format()" is always false. - // Handle emulated compressed formats with host unpack (R8G8 compressed) - row_pitch = std::max(row_pitch, dst->pitch()); - - // FIXME: Double-check this logic; it seems like we should always use texels both here and for row_pitch. - image_linear_size = row_pitch * layout.height_in_texel * layout.depth; - - compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast(image_linear_size), 256) }; - compute_scratch_mem.first = reinterpret_cast(static_cast(compute_scratch_mem.second)); - - g_upload_transfer_buffer.reserve_storage_on_heap(static_cast(image_linear_size)); - upload_scratch_mem = g_upload_transfer_buffer.alloc_from_heap(static_cast(image_linear_size), 256); - dst_buffer = { reinterpret_cast(upload_scratch_mem.first), image_linear_size }; + g_deswizzle_scratch_buffer.remove(); + g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); } - rsx::io_buffer io_buf = dst_buffer; - caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024); - auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps); + u32 deswizzle_data_offset = g_deswizzle_scratch_buffer.alloc(static_cast(image_linear_size), 256); + g_upload_transfer_buffer.unmap(); + g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast(image_linear_size)); - // Define upload region - coord3u region; - region.x = 0; - region.y = 0; - region.z = layout.layer; - region.width = layout.width_in_texel; - region.height = layout.height_in_texel; - region.depth = layout.depth; + // 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem + const auto block_size = op.element_size * op.block_length; - if (driver_caps.ARB_compute_shader_supported) + if (op.require_swap) { - // 0. Preconf - mem_layout.alignment = static_cast(caps.alignment); - mem_layout.swap_bytes = op.require_swap; - mem_layout.format = gl_format; - mem_layout.type = gl_type; - mem_layout.block_size = block_size_in_bytes; + mem_layout.swap_bytes = false; - // 2. Upload memory to GPU - if (!op.require_deswizzle) + switch (op.element_size) { - g_upload_transfer_buffer.unmap(); - g_upload_transfer_buffer.copy_to(&g_compute_decode_buffer.get(), upload_scratch_mem.second, compute_scratch_mem.second, image_linear_size); + case 1: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 2: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 4: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + default: + fmt::throw_exception("Unimplemented element size deswizzle"); } - else - { - // 2.1 Copy data to deswizzle buf - if (g_deswizzle_scratch_buffer.size() < min_required_buffer_size) - { - g_deswizzle_scratch_buffer.remove(); - g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); - } - - u32 deswizzle_data_offset = g_deswizzle_scratch_buffer.alloc(static_cast(image_linear_size), 256); - g_upload_transfer_buffer.unmap(); - g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast(image_linear_size)); - - // 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem - const auto block_size = op.element_size * op.block_length; - - if (op.require_swap) - { - mem_layout.swap_bytes = false; - - switch (op.element_size) - { - case 1: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 2: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 4: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - default: - fmt::throw_exception("Unimplemented element size deswizzle"); - } - } - else - { - switch (op.element_size) - { - case 1: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 2: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 4: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - default: - fmt::throw_exception("Unimplemented element size deswizzle"); - } - } - - // Barrier - g_deswizzle_scratch_buffer.push_barrier(deswizzle_data_offset, static_cast(image_linear_size)); - } - - // 3. Update configuration - mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes; - mem_info.image_size_in_bytes = image_linear_size; - mem_info.memory_required = 0; - - // 4. Dispatch compute routines - copy_buffer_to_image(cmd, mem_layout, &g_compute_decode_buffer.get(), dst, compute_scratch_mem.first, layout.level, region, &mem_info); - - // Barrier - g_compute_decode_buffer.push_barrier(compute_scratch_mem.second, static_cast(image_linear_size)); } else { - unpack_settings.swap_bytes(op.require_swap); - dst->copy_from(staging_buffer, static_cast(gl_format), static_cast(gl_type), layout.level, region, unpack_settings); + switch (op.element_size) + { + case 1: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 2: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 4: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + default: + fmt::throw_exception("Unimplemented element size deswizzle"); + } } + + // Barrier + g_deswizzle_scratch_buffer.push_barrier(deswizzle_data_offset, static_cast(image_linear_size)); } + + // 3. Update configuration + mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes; + mem_info.image_size_in_bytes = image_linear_size; + mem_info.memory_required = 0; + + // 4. Dispatch compute routines + copy_buffer_to_image(cmd, mem_layout, &g_compute_decode_buffer.get(), dst, compute_scratch_mem.first, layout.level, region, &mem_info); + + // Barrier + g_compute_decode_buffer.push_barrier(compute_scratch_mem.second, static_cast(image_linear_size)); } } diff --git a/rpcs3/Emu/RSX/GL/GLTexture.h b/rpcs3/Emu/RSX/GL/GLTexture.h index 4a17dae081..6d0bbb18a3 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.h +++ b/rpcs3/Emu/RSX/GL/GLTexture.h @@ -14,20 +14,20 @@ namespace gl { struct pixel_buffer_layout { - GLenum format; - GLenum type; - u32 row_length; - u8 block_size; - bool swap_bytes; - u8 alignment; + GLenum format = GL_RGBA; + GLenum type = GL_UNSIGNED_BYTE; + u32 row_length = 0; + u8 block_size = 0; + bool swap_bytes = false; + u8 alignment = 0; u8 reserved; }; struct image_memory_requirements { - u64 image_size_in_texels; - u64 image_size_in_bytes; - u64 memory_required; + u64 image_size_in_texels = 0; + u64 image_size_in_bytes = 0; + u64 memory_required = 0; }; struct clear_cmd_info diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index a4b931186f..27b455374e 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -287,6 +287,7 @@ namespace gl u32 transfer_width = width; u32 transfer_height = height; u32 transfer_x = 0, transfer_y = 0; + u16 resolution_scale_percent = 100; if (context == rsx::texture_upload_context::framebuffer_storage) { @@ -295,9 +296,10 @@ namespace gl target_texture = surface->get_surface(rsx::surface_access::transfer_read); transfer_width *= surface->samples_x; transfer_height *= surface->samples_y; + resolution_scale_percent = surface->resolution_scaling_config.scale_percent; } - if ((rsx::get_resolution_scale_percent() != 100 && context == rsx::texture_upload_context::framebuffer_storage) || + if ((resolution_scale_percent != 100 && context == rsx::texture_upload_context::framebuffer_storage) || (vram_texture->pitch() != rsx_pitch)) { areai src_area = { 0, 0, 0, 0 }; diff --git a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp index 43c8a5d9f4..3cc40efed1 100644 --- a/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp +++ b/rpcs3/Emu/RSX/NV47/HW/nv4097.cpp @@ -633,9 +633,17 @@ namespace rsx case 2: break; default: - rsx_log.error("Unknown render mode %d", mode); + { + struct logged_t + { + atomic_t logged_cause[256]{}; + }; + + const auto& is_error = ::at32(g_fxo->get().logged_cause, mode).try_inc(10); + (is_error ? rsx_log.error : rsx_log.trace)("Unknown render mode %d", mode); return; } + } const u32 offset = arg & 0xffffff; auto address_ptr = util::get_report_data_impl(ctx, offset); diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_components.h b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_components.h index bd9b0e1068..88e1c53324 100644 --- a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_components.h +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_components.h @@ -16,7 +16,7 @@ namespace rsx { static constexpr u16 menu_entry_height = 40; static constexpr u16 menu_entry_margin = 30; - static constexpr u16 menu_checkbox_size = 20; + static constexpr u16 menu_checkbox_size = 24; static constexpr u16 element_height = 25; enum class page_navigation diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_savestate.cpp b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_savestate.cpp index b359fb562c..4d0681193f 100644 --- a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_savestate.cpp +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_savestate.cpp @@ -26,7 +26,7 @@ namespace rsx { if (!suspend_mode) { - Emu.after_kill_callback = []() { Emu.Restart(); }; + Emu.after_kill_callback = []() { Emu.Restart(true, false); }; // Make sure we keep the game window opened Emu.SetContinuousMode(true); diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.cpp b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.cpp index 3d32aaf844..9147b7e5c5 100644 --- a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.cpp +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.cpp @@ -152,16 +152,16 @@ namespace rsx home_menu_settings_video::home_menu_settings_video(s16 x, s16 y, u16 width, u16 height, bool use_separators, home_menu_page* parent) : home_menu_settings_page(x, y, width, height, use_separators, parent, get_localized_string(localized_string_id::HOME_MENU_SETTINGS_VIDEO)) { + add_unsigned_slider(&g_cfg.video.resolution_scale_percent, localized_string_id::HOME_MENU_SETTINGS_VIDEO_RESOLUTION_SCALE_PERCENT, "%", 25); + add_unsigned_slider(&g_cfg.video.min_scalable_dimension, localized_string_id::HOME_MENU_SETTINGS_VIDEO_RESOLUTION_SCALE_THRESHOLD, "px", 1); + add_dropdown(&g_cfg.video.vsync, localized_string_id::HOME_MENU_SETTINGS_VIDEO_VSYNC); add_dropdown(&g_cfg.video.frame_limit, localized_string_id::HOME_MENU_SETTINGS_VIDEO_FRAME_LIMIT); add_unsigned_slider(&g_cfg.video.anisotropic_level_override, localized_string_id::HOME_MENU_SETTINGS_VIDEO_ANISOTROPIC_OVERRIDE, "x", 2, {{0, "Auto"}}, {14}); add_dropdown(&g_cfg.video.output_scaling, localized_string_id::HOME_MENU_SETTINGS_VIDEO_OUTPUT_SCALING); - if (g_cfg.video.renderer == video_renderer::vulkan && g_cfg.video.output_scaling == output_scaling_mode::fsr) - { - add_unsigned_slider(&g_cfg.video.rcas_sharpening_intensity, localized_string_id::HOME_MENU_SETTINGS_VIDEO_RCAS_SHARPENING, " %", 1); - } + add_unsigned_slider(&g_cfg.video.rcas_sharpening_intensity, localized_string_id::HOME_MENU_SETTINGS_VIDEO_RCAS_SHARPENING, " %", 1); add_checkbox(&g_cfg.video.stretch_to_display_area, localized_string_id::HOME_MENU_SETTINGS_VIDEO_STRETCH_TO_DISPLAY); @@ -221,6 +221,7 @@ namespace rsx home_menu_settings_overlays::home_menu_settings_overlays(s16 x, s16 y, u16 width, u16 height, bool use_separators, home_menu_page* parent) : home_menu_settings_page(x, y, width, height, use_separators, parent, get_localized_string(localized_string_id::HOME_MENU_SETTINGS_OVERLAYS)) { + add_checkbox(&g_cfg.misc.play_music_during_boot, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_PLAY_MUSIC_DURING_BOOT); add_checkbox(&g_cfg.misc.show_trophy_popups, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_SHOW_TROPHY_POPUPS); add_checkbox(&g_cfg.misc.show_rpcn_popups, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_SHOW_RPCN_POPUPS); add_checkbox(&g_cfg.misc.show_shader_compilation_hint, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_SHOW_SHADER_COMPILATION_HINT); @@ -229,6 +230,7 @@ namespace rsx add_checkbox(&g_cfg.misc.show_pressure_intensity_toggle_hint, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_SHOW_PRESSURE_INTENSITY_TOGGLE_HINT); add_checkbox(&g_cfg.misc.show_analog_limiter_toggle_hint, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_SHOW_ANALOG_LIMITER_TOGGLE_HINT); add_checkbox(&g_cfg.misc.show_mouse_and_keyboard_toggle_hint, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_SHOW_MOUSE_AND_KB_TOGGLE_HINT); + add_checkbox(&g_cfg.misc.show_fatal_error_hints, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_SHOW_FATAL_ERROR_HINTS); add_checkbox(&g_cfg.video.record_with_overlays, localized_string_id::HOME_MENU_SETTINGS_OVERLAYS_RECORD_WITH_OVERLAYS); apply_layout(); @@ -252,10 +254,11 @@ namespace rsx add_dropdown(&g_cfg.video.perf_overlay.position, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_POSITION); add_checkbox(&g_cfg.video.perf_overlay.center_x, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_CENTER_X); add_checkbox(&g_cfg.video.perf_overlay.center_y, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_CENTER_Y); - add_unsigned_slider(&g_cfg.video.perf_overlay.margin_x, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_MARGIN_X, " px", 1); - add_unsigned_slider(&g_cfg.video.perf_overlay.margin_y, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_MARGIN_Y, " px", 1); + add_float_slider(&g_cfg.video.perf_overlay.margin_x, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_MARGIN_X, " %", 0.25f); + add_float_slider(&g_cfg.video.perf_overlay.margin_y, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_MARGIN_Y, " %", 0.25f); add_unsigned_slider(&g_cfg.video.perf_overlay.font_size, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_FONT_SIZE, " px", 1); add_unsigned_slider(&g_cfg.video.perf_overlay.opacity, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_OPACITY, " %", 1); + add_checkbox(&g_cfg.video.perf_overlay.perf_overlay_use_window_space, localized_string_id::HOME_MENU_SETTINGS_PERFORMANCE_OVERLAY_USE_WINDOW_SPACE); apply_layout(); } diff --git a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.h b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.h index 13f47eb41e..ba8b730d44 100644 --- a/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.h +++ b/rpcs3/Emu/RSX/Overlays/HomeMenu/overlay_home_menu_settings.h @@ -10,8 +10,6 @@ namespace rsx { namespace overlays { - void play_sound(sound_effect sound, std::optional volume); - struct home_menu_settings : public home_menu_page { public: diff --git a/rpcs3/Emu/RSX/Overlays/overlay_checkbox.cpp b/rpcs3/Emu/RSX/Overlays/overlay_checkbox.cpp index f7cb19237e..304c07a1f6 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_checkbox.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_checkbox.cpp @@ -84,7 +84,7 @@ namespace rsx::overlays void switchbox::set_size(u16 w, u16 h) { - const u16 dim = std::min(w, h); + const u16 dim = std::max(std::min(w, h), 14); box_layout::set_size(w, h); clear_items(); @@ -93,21 +93,17 @@ namespace rsx::overlays m_back_ellipse = nullptr; m_front_circle = nullptr; - if (dim < 4) - { - return; - } - auto ellipse_part = std::make_unique(); - auto circle_part = std::make_unique(); + auto circle_part = std::make_unique(); - ellipse_part->set_size(dim * 2, dim / 2); - ellipse_part->set_pos(0, dim / 4); - ellipse_part->radius = dim / 4; + ellipse_part->set_size(dim * 2, dim); + ellipse_part->set_padding(1); + ellipse_part->set_pos(0, 0); + ellipse_part->border_radius = (dim - 4) / 2; // Avoid perfect capsule shape since we want a border and perfect capsules can have a false border along the midline due to subgroup shenanigans circle_part->set_size(dim, dim); + circle_part->set_padding(4); circle_part->set_pos(0, 0); - circle_part->radius = dim / 2; m_back_ellipse = add_element(ellipse_part); m_front_circle = add_element(circle_part); @@ -130,13 +126,18 @@ namespace rsx::overlays if (m_is_checked) { - m_back_ellipse->back_color = this->fore_color * 0.5f; + m_back_ellipse->border_color.a = 0.f; + m_back_ellipse->border_size = 0; + m_back_ellipse->back_color = this->fore_color * 0.75f; m_back_ellipse->back_color.a = 1.f; - m_front_circle->back_color = this->fore_color; + m_front_circle->back_color = color4f(1.f); m_front_circle->set_pos(this->x + m_front_circle->w, this->y); } else { + m_back_ellipse->border_color = this->back_color * 0.75f; + m_back_ellipse->border_color.a = 1.f; + m_back_ellipse->border_size = 1; m_back_ellipse->back_color = this->back_color * 0.5f; m_back_ellipse->back_color.a = 1.f; m_front_circle->back_color = this->back_color; diff --git a/rpcs3/Emu/RSX/Overlays/overlay_controls.cpp b/rpcs3/Emu/RSX/Overlays/overlay_controls.cpp index 02b0ff5a68..5208efd747 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_controls.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_controls.cpp @@ -54,6 +54,43 @@ namespace rsx return result; } + void compiled_resource::sdf_config_t::transform(const areaf& target_viewport, const sizef& virtual_viewport) + { + const f32 scale_x = target_viewport.width() / virtual_viewport.width; + const f32 scale_y = target_viewport.height() / virtual_viewport.height; + + // Ideally the average should match the x and y scaling but arithmetic drift shifts the values around a bit. + // Also we need a way to define perfect circles when the aspect ratio is not respected. + const f32 scale_av = (scale_x + scale_y) / 2; + + hx *= scale_x; + hy *= scale_y; + br *= scale_av; + bw *= scale_av; + + // Border radius clamp + br = std::min({ br, hx, hy }); + + // Compute the function's origin. Account for flipped viewports as well. + if (target_viewport.x2 < target_viewport.x1) + { + cx = target_viewport.width() - (cx * scale_x) + target_viewport.x2; + } + else + { + cx = cx * scale_x + target_viewport.x1; + } + + if (target_viewport.y2 < target_viewport.y1) + { + cy = target_viewport.height() - (cy * scale_y) + target_viewport.y2; + } + else + { + cy = cy * scale_y + target_viewport.y1; + } + } + image_info::image_info(const std::string& filename, bool grayscaled) { fs::file f(filename, fs::read + fs::isfile); @@ -259,6 +296,12 @@ namespace rsx { v += vertex(x_offset, y_offset, 0.f, 0.f); } + + if (draw_commands[n].config.sdf_config.func != sdf_function::none) + { + draw_commands[n].config.sdf_config.cx += x_offset; + draw_commands[n].config.sdf_config.cy += y_offset; + } } } @@ -275,6 +318,12 @@ namespace rsx v += vertex(x_offset, y_offset, 0.f, 0.f); } + if (draw_commands[n].config.sdf_config.func != sdf_function::none) + { + draw_commands[n].config.sdf_config.cx += x_offset; + draw_commands[n].config.sdf_config.cy += y_offset; + } + draw_commands[n].config.clip_rect = clip_rect; draw_commands[n].config.clip_region = true; } @@ -382,25 +431,25 @@ namespace rsx m_is_compiled = false; } - void overlay_element::set_text(const std::string& text) + void overlay_element::set_text(std::string_view text) { std::u32string new_text = utf8_to_u32string(text); const bool is_dirty = this->text != new_text; - this->text = std::move(new_text); if (is_dirty) { + this->text = std::move(new_text); m_is_compiled = false; } } - void overlay_element::set_unicode_text(const std::u32string& text) + void overlay_element::set_unicode_text(std::u32string_view text) { const bool is_dirty = this->text != text; - this->text = text; if (is_dirty) { + this->text = text; m_is_compiled = false; } } @@ -585,6 +634,25 @@ namespace rsx return result; } + void overlay_element::configure_sdf(compiled_resource::command_config& config, sdf_function func) + { + const f32 rx = static_cast(x) + padding_left; + const f32 rw = static_cast(w) - (padding_left + padding_right); + const f32 ry = static_cast(y) + padding_top; + const f32 rh = static_cast(h) - (padding_top + padding_bottom); + + config.sdf_config.func = func; + config.sdf_config.cx = rx + (rw / 2.f); + config.sdf_config.cy = ry + (rh / 2.f); + config.sdf_config.hx = rw / 2.f; + config.sdf_config.hy = rh / 2.f; + config.sdf_config.br = 0.f; + config.sdf_config.bw = border_size; + config.sdf_config.border_color = border_color; + + config.disable_vertex_snap = true; + } + compiled_resource& overlay_element::get_compiled() { if (is_compiled()) @@ -609,6 +677,14 @@ namespace rsx config.pulse_sinus_offset = pulse_sinus_offset; config.pulse_speed_modifier = pulse_speed_modifier; + if (border_size != 0 && + border_color.a > 0.f && + w > border_size && + h > border_size) + { + configure_sdf(config, sdf_function::box); + } + auto& verts = compiled_resources_temp.draw_commands.front().verts; verts.resize(4); @@ -1095,82 +1171,33 @@ namespace rsx return compiled_resources; } -#ifdef __APPLE__ - if (true) -#else - if (radius == 0 || radius > (w / 2)) -#endif + overlay_element::get_compiled(); + auto& config = compiled_resources.draw_commands.front().config; + configure_sdf(config, sdf_function::rounded_box); + config.sdf_config.br = std::min({ static_cast(border_radius), config.sdf_config.hx, config.sdf_config.hy }); + + m_is_compiled = true; + return compiled_resources; + } + + compiled_resource& ellipse::get_compiled() + { + if (is_compiled()) + { + return compiled_resources; + } + + compiled_resources.clear(); + + if (!is_visible()) { - // Invalid radius - compiled_resources = overlay_element::get_compiled(); m_is_compiled = true; return compiled_resources; } - compiled_resource compiled_resources_temp = {}; - compiled_resources_temp.append({}); // Bg horizontal mid - compiled_resources_temp.append({}); // Bg horizontal top - compiled_resources_temp.append({}); // Bg horizontal bottom - compiled_resources_temp.append({}); // Bg upper-left - compiled_resources_temp.append({}); // Bg lower-left - compiled_resources_temp.append({}); // Bg upper-right - compiled_resources_temp.append({}); // Bg lower-right - - for (auto& draw_cmd : compiled_resources_temp.draw_commands) - { - auto& config = draw_cmd.config; - config.color = back_color; - config.disable_vertex_snap = true; - config.pulse_glow = pulse_effect_enabled; - config.pulse_sinus_offset = pulse_sinus_offset; - config.pulse_speed_modifier = pulse_speed_modifier; - } - - auto& bg0 = compiled_resources_temp.draw_commands[0]; - auto& bg1 = compiled_resources_temp.draw_commands[1]; - auto& bg2 = compiled_resources_temp.draw_commands[2]; - - bg0.verts.emplace_back(f32(x), f32(y + radius), 0.f, 0.f); - bg0.verts.emplace_back(f32(x + w), f32(y + radius), 0.f, 0.f); - bg0.verts.emplace_back(f32(x), f32(y + h) - radius, 0.f, 0.f); - bg0.verts.emplace_back(f32(x + w), f32(y + h) - radius, 0.f, 0.f); - - bg1.verts.emplace_back(f32(x + radius), f32(y), 0.f, 0.f); - bg1.verts.emplace_back(f32(x + w) - radius, f32(y), 0.f, 0.f); - bg1.verts.emplace_back(f32(x + radius), f32(y + radius), 0.f, 0.f); - bg1.verts.emplace_back(f32(x + w) - radius, f32(y + radius), 0.f, 0.f); - - bg2.verts.emplace_back(f32(x + radius), f32(y + h) - radius, 0.f, 0.f); - bg2.verts.emplace_back(f32(x + w) - radius, f32(y + h) - radius, 0.f, 0.f); - bg2.verts.emplace_back(f32(x + radius), f32(y + h), 0.f, 0.f); - bg2.verts.emplace_back(f32(x + w) - radius, f32(y + h), 0.f, 0.f); - - // Generate the quadrants - const f32 corners[4][2] = - { - { f32(x + radius), f32(y + radius) }, - { f32(x + radius), f32(y + h) - radius }, - { f32(x + w) - radius, f32(y + radius) }, - { f32(x + w) - radius, f32(y + h) - radius } - }; - - const f32 radius_f = static_cast(radius); - const f32 scale[4][2] = - { - { -radius_f, -radius_f }, - { -radius_f, +radius_f }, - { +radius_f, -radius_f }, - { +radius_f, +radius_f } - }; - - for (int i = 0; i < 4; ++i) - { - auto& command = compiled_resources_temp.draw_commands[i + 3]; - command.config.primitives = rsx::overlays::primitive_type::triangle_fan; - command.verts = generate_unit_quadrant(num_control_points, corners[i], scale[i]); - } - - compiled_resources.add(std::move(compiled_resources_temp), margin_left, margin_top); + rounded_rect::get_compiled(); + auto& config = compiled_resources.draw_commands.front().config; + configure_sdf(config, sdf_function::ellipse); m_is_compiled = true; return compiled_resources; diff --git a/rpcs3/Emu/RSX/Overlays/overlay_controls.h b/rpcs3/Emu/RSX/Overlays/overlay_controls.h index dcfe33b199..8fa835f595 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_controls.h +++ b/rpcs3/Emu/RSX/Overlays/overlay_controls.h @@ -31,6 +31,14 @@ namespace rsx triangle_fan = 4 }; + enum class sdf_function : u8 + { + none = 0, + ellipse, + box, + rounded_box, + }; + struct image_info_base { int w = 0, h = 0, channels = 0; @@ -95,6 +103,25 @@ namespace rsx struct compiled_resource { + struct sdf_config_t + { + sdf_function func = sdf_function::none; + + f32 cx; // Center x + f32 cy; // Center y + f32 hx; // Half-size in X + f32 hy; // Half-size in Y + f32 br; // Border radius + f32 bw; // Border width + + color4f border_color; + + // Transform a SDF definition from one reference frame to another + // Target viewport - your actual render area + // Virtual viewport - the internal design viewport + void transform(const areaf& target_viewport, const sizef& virtual_viewport); + }; + struct command_config { primitive_type primitives = primitive_type::quad_list; @@ -105,6 +132,8 @@ namespace rsx f32 pulse_sinus_offset = 0.0f; // The current pulse offset f32 pulse_speed_modifier = 0.005f; + sdf_config_t sdf_config; + areaf clip_rect = {}; bool clip_region = false; @@ -171,6 +200,9 @@ namespace rsx f32 pulse_sinus_offset = 0.0f; // The current pulse offset f32 pulse_speed_modifier = 0.005f; + u8 border_size = 0; + color4f border_color = { 0.f, 0.f, 0.f, 1.f }; + // Analog to command_config::get_sinus_value // Apply modifier for sinus pulse. Resets the pulse. For example: // 0 -> reset to 0.5 rising @@ -210,8 +242,8 @@ namespace rsx // NOTE: Functions as a simple position offset. Top left corner is the anchor. virtual void set_margin(u16 left, u16 top); virtual void set_margin(u16 margin); - virtual void set_text(const std::string& text); - virtual void set_unicode_text(const std::u32string& text); + virtual void set_text(std::string_view text); + virtual void set_unicode_text(std::u32string_view text); void set_text(localized_string_id id); void set_text(const localized_string& container); virtual void set_font(const char* font_name, u16 font_size); @@ -237,6 +269,8 @@ namespace rsx protected: bool m_is_compiled = false; // Only use m_is_compiled as a getter in is_compiled() if possible + + void configure_sdf(compiled_resource::command_config& config, sdf_function func); }; struct layout_container : public overlay_element @@ -316,13 +350,18 @@ namespace rsx struct rounded_rect : public overlay_element { - u8 radius = 5; - u8 num_control_points = 8; // Smoothness control + u16 border_radius = 5; using overlay_element::overlay_element; compiled_resource& get_compiled() override; }; + struct ellipse : public rounded_rect + { + using rounded_rect::rounded_rect; + compiled_resource& get_compiled() override; + }; + struct image_view : public overlay_element { protected: diff --git a/rpcs3/Emu/RSX/Overlays/overlay_edit_text.cpp b/rpcs3/Emu/RSX/Overlays/overlay_edit_text.cpp index ba3d138f96..4fad1f65e9 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_edit_text.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_edit_text.cpp @@ -95,12 +95,12 @@ namespace rsx } } - void edit_text::set_text(const std::string& text) + void edit_text::set_text(std::string_view text) { set_unicode_text(utf8_to_u32string(text)); } - void edit_text::set_unicode_text(const std::u32string& text) + void edit_text::set_unicode_text(std::u32string_view text) { value = text; diff --git a/rpcs3/Emu/RSX/Overlays/overlay_edit_text.hpp b/rpcs3/Emu/RSX/Overlays/overlay_edit_text.hpp index 624580e4f4..7d4f6d93b5 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_edit_text.hpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_edit_text.hpp @@ -26,8 +26,8 @@ namespace rsx using label::label; - void set_text(const std::string& text) override; - void set_unicode_text(const std::u32string& text) override; + void set_text(std::string_view text) override; + void set_unicode_text(std::u32string_view text) override; void set_placeholder(const std::u32string& placeholder_text); diff --git a/rpcs3/Emu/RSX/Overlays/overlay_list_view.cpp b/rpcs3/Emu/RSX/Overlays/overlay_list_view.cpp index 23c45d29bb..943da6271b 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_list_view.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_list_view.cpp @@ -17,7 +17,7 @@ namespace rsx scroll_indicator_grip->set_pos(1, 0); scroll_indicator_grip->set_size(5, 5); - scroll_indicator_grip->radius = 2; + scroll_indicator_grip->border_radius = 2; scroll_indicator_track->set_size(7, height); m_scroll_indicator = std::make_unique(); diff --git a/rpcs3/Emu/RSX/Overlays/overlay_message.cpp b/rpcs3/Emu/RSX/Overlays/overlay_message.cpp index 304e54de1e..1c49401d02 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_message.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_message.cpp @@ -90,7 +90,7 @@ namespace rsx return m_loc_id == id; } - bool message_item::text_matches(const std::u32string& text) const + bool message_item::text_matches(std::u32string_view text) const { return m_text.text == text; } diff --git a/rpcs3/Emu/RSX/Overlays/overlay_message.h b/rpcs3/Emu/RSX/Overlays/overlay_message.h index 219103e843..e8cb1a3285 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_message.h +++ b/rpcs3/Emu/RSX/Overlays/overlay_message.h @@ -31,7 +31,7 @@ namespace rsx compiled_resource& get_compiled() override; bool id_matches(localized_string_id id) const; - bool text_matches(const std::u32string& text) const; + bool text_matches(std::u32string_view text) const; void set_label_text(const std::string& text); diff --git a/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp b/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp index 8b68357208..635833c9d8 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.cpp @@ -6,6 +6,7 @@ #include "Emu/Cell/PPUThread.h" #include +#include #include #include "util/cpu_stats.hpp" @@ -93,7 +94,6 @@ namespace rsx { // left, top, right, bottom const areau padding { m_padding, m_padding - std::min(4, m_padding), m_padding, m_padding }; - const positionu margin { m_margin_x, m_margin_y }; positionu pos; u16 graph_width = 0; @@ -116,6 +116,26 @@ namespace rsx graph_height += m_padding; } + const u16 overlay_width = std::max(m_body.w, graph_width); + const u16 overlay_height = static_cast(m_body.h + graph_height); + const auto percent_to_margin_px = [](f32 margin_percent, u16 virtual_size, u16 overlay_size) -> u32 + { + if (overlay_size >= virtual_size) + { + return 0; + } + + const u32 max_margin = virtual_size - overlay_size; + const u32 margin_px = static_cast(std::lround((std::clamp(margin_percent, 0.0f, 100.0f) / 100.0f) * max_margin)); + return std::min(margin_px, max_margin); + }; + + const positionu margin + { + percent_to_margin_px(m_margin_x, m_virtual_width, overlay_width), + percent_to_margin_px(m_margin_y, m_virtual_height, overlay_height) + }; + switch (m_quadrant) { case screen_quadrant::top_left: @@ -123,27 +143,27 @@ namespace rsx pos.y = margin.y; break; case screen_quadrant::top_right: - pos.x = virtual_width - std::max(m_body.w, graph_width) - margin.x; + pos.x = m_virtual_width - overlay_width - margin.x; pos.y = margin.y; break; case screen_quadrant::bottom_left: pos.x = margin.x; - pos.y = virtual_height - m_body.h - graph_height - margin.y; + pos.y = m_virtual_height - overlay_height - margin.y; break; case screen_quadrant::bottom_right: - pos.x = virtual_width - std::max(m_body.w, graph_width) - margin.x; - pos.y = virtual_height - m_body.h - graph_height - margin.y; + pos.x = m_virtual_width - overlay_width - margin.x; + pos.y = m_virtual_height - overlay_height - margin.y; break; } if (m_center_x) { - pos.x = (virtual_width - std::max(m_body.w, graph_width)) / 2; + pos.x = overlay_width >= m_virtual_width ? 0 : (m_virtual_width - overlay_width) / 2; } if (m_center_y) { - pos.y = (virtual_height - m_body.h - graph_height) / 2; + pos.y = overlay_height >= m_virtual_height ? 0 : (m_virtual_height - overlay_height) / 2; } elm.set_pos(pos.x, pos.y); @@ -381,7 +401,7 @@ namespace rsx m_force_repaint = true; } - void perf_metrics_overlay::set_margins(u32 margin_x, u32 margin_y, bool center_x, bool center_y) + void perf_metrics_overlay::set_margins(f32 margin_x, f32 margin_y, bool center_x, bool center_y) { if (m_margin_x == margin_x && m_margin_y == margin_y && m_center_x == center_x && m_center_y == center_y) return; @@ -431,6 +451,38 @@ namespace rsx m_force_update = true; } + void perf_metrics_overlay::set_render_viewport(u16 width, u16 height) + { + u16 new_virtual_width = virtual_width; + u16 new_virtual_height = virtual_height; + + if (use_window_space && width > 0 && height > 0) + { + const double scale_x = static_cast(width) / virtual_width; + const double scale_y = static_cast(height) / virtual_height; + const double scale = std::min(scale_x, scale_y); + + new_virtual_width = static_cast(std::min( + static_cast(std::lround(width / scale)), + std::numeric_limits::max())); + + new_virtual_height = static_cast(std::min( + static_cast(std::lround(height / scale)), + std::numeric_limits::max())); + } + + if (m_virtual_width == new_virtual_width && m_virtual_height == new_virtual_height) + return; + + m_virtual_width = new_virtual_width; + m_virtual_height = new_virtual_height; + + if (m_is_initialised) + { + reset_transforms(); + } + } + void perf_metrics_overlay::update(u64 /*timestamp_us*/) { const auto elapsed_update = m_update_timer.GetElapsedTimeInMilliSec(); @@ -896,7 +948,8 @@ namespace rsx perf_overlay->set_update_interval(perf_settings.update_interval); perf_overlay->set_font(perf_settings.font); perf_overlay->set_font_size(perf_settings.font_size); - perf_overlay->set_margins(perf_settings.margin_x, perf_settings.margin_y, perf_settings.center_x.get(), perf_settings.center_y.get()); + perf_overlay->set_margins(static_cast(perf_settings.margin_x.get()), static_cast(perf_settings.margin_y.get()), perf_settings.center_x.get(), perf_settings.center_y.get()); + perf_overlay->use_window_space = perf_settings.perf_overlay_use_window_space.get(); perf_overlay->set_opacity(perf_settings.opacity / 100.f); perf_overlay->set_body_colors(perf_settings.color_body, perf_settings.background_body); perf_overlay->set_title_colors(perf_settings.color_title, perf_settings.background_title); diff --git a/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.h b/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.h index 4a121b5231..2b676e591b 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.h +++ b/rpcs3/Emu/RSX/Overlays/overlay_perf_metrics.h @@ -37,10 +37,12 @@ namespace rsx u32 m_frames{}; std::string m_font{}; u16 m_font_size{}; - u32 m_margin_x{}; // horizontal distance to the screen border relative to the screen_quadrant in px - u32 m_margin_y{}; // vertical distance to the screen border relative to the screen_quadrant in px + f32 m_margin_x{}; // horizontal distance to the screen border relative to the screen_quadrant in percent of the window width + f32 m_margin_y{}; // vertical distance to the screen border relative to the screen_quadrant in percent of the window height u32 m_padding{}; // space between overlay elements f32 m_opacity{}; // 0..1 + u16 m_virtual_width{virtual_width}; + u16 m_virtual_height{virtual_height}; bool m_center_x{}; // center the overlay horizontally bool m_center_y{}; // center the overlay vertically @@ -96,11 +98,14 @@ namespace rsx void set_update_interval(u32 update_interval); void set_font(std::string font); void set_font_size(u16 font_size); - void set_margins(u32 margin_x, u32 margin_y, bool center_x, bool center_y); + void set_margins(f32 margin_x, f32 margin_y, bool center_x, bool center_y); void set_opacity(f32 opacity); void set_body_colors(std::string color, std::string background); void set_title_colors(std::string color, std::string background); void force_next_update(); + void set_render_viewport(u16 width, u16 height) override; + u16 get_virtual_width() const override { return m_virtual_width; } + u16 get_virtual_height() const override { return m_virtual_height; } void update(u64 timestamp_us) override; diff --git a/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.cpp b/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.cpp index 860f54544a..b9140f2a69 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.cpp @@ -58,7 +58,7 @@ namespace rsx set_pos(x + dx, y + dy); } - void progress_bar::set_text(const std::string& str) + void progress_bar::set_text(std::string_view str) { text_view.set_text(str); text_view.align_text(text_align::center); diff --git a/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.hpp b/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.hpp index 6ed6b73c77..d622796dae 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.hpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_progress_bar.hpp @@ -24,7 +24,7 @@ namespace rsx void set_pos(s16 _x, s16 _y) override; void set_size(u16 _w, u16 _h) override; void translate(s16 dx, s16 dy) override; - void set_text(const std::string& str) override; + void set_text(std::string_view str) override; compiled_resource& get_compiled() override; }; diff --git a/rpcs3/Emu/RSX/Overlays/overlay_select.cpp b/rpcs3/Emu/RSX/Overlays/overlay_select.cpp index 5ef6eb254c..6e0d4a4c6c 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_select.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_select.cpp @@ -144,7 +144,7 @@ namespace rsx::overlays auto background = std::make_unique(); background->set_size(w, h); - background->radius = std::min(h / 4, 5); + background->border_radius = std::min(h / 4, 5); background->back_color = color4f(0.3f, 0.3f, 0.3f, 1.0f); const u16 arrow_size = std::min(h / 2, max_dropdown_arrow_dimension); diff --git a/rpcs3/Emu/RSX/Overlays/overlay_slider.cpp b/rpcs3/Emu/RSX/Overlays/overlay_slider.cpp index bf00563ef5..4a0d607212 100644 --- a/rpcs3/Emu/RSX/Overlays/overlay_slider.cpp +++ b/rpcs3/Emu/RSX/Overlays/overlay_slider.cpp @@ -8,7 +8,7 @@ namespace rsx::overlays constexpr u16 slider_indicator_radius = 8; constexpr u16 slider_indicator_dia = slider_indicator_radius * 2; constexpr const char* slider_label_font_family = "Arial"; - constexpr int slider_label_font_size = 10; + constexpr int slider_label_font_size = 11; void slider::init() { @@ -29,25 +29,27 @@ namespace rsx::overlays // Base components auto background = std::make_unique(); auto foreground = std::make_unique(); - auto indicator = std::make_unique(); + auto indicator = std::make_unique(); auto value_label = std::make_unique