From 3e1f5a0bfb402a5f175d84ac62ff5ff7b4687457 Mon Sep 17 00:00:00 2001
From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com>
Date: Wed, 21 Jan 2026 09:36:09 -0600
Subject: [PATCH 01/10] Kernel.Vmm: Handle sparse physical memory usage + other
 fixes (#3932)

* Initial work

* Bug fixing

deadlocks and broken unmaps

* Fix more bugs

broken memory pools

* More bug fixing

Still plenty more to fix though

* Even more bug fixing

Finally got Final Fantasy XV back to running, haven't found anymore bugs yet.

* More bugfixing

* Update memory.cpp

* Rewrite start

* Fix for oversized unmaps

* Oops

* Update address_space.cpp

* Clang

* Mac fix?

* Track VMA physical areas based on start in VMA

Allows me to simplify some logic, and should (finally) allow merging VMAs in memory code.

* Merge VMAs, fix some bugs

Finally possible thanks to address space + phys tracking changes

* Clang

* Oops

* Oops2

* Oops3

* Bugfixing

* SDK check for coalescing

Just to rule out any issues from games that wouldn't see coalescing in the first place.

* More ReleaseDirectMemory fixes

I really suck at logic some days

* Merge physical areas within VMAs

In games that perform a lot of similar mappings, you can wind up with 1000+ phys areas in one vma.
This should reduce some of the overhead that might cause.

* Hopefully fix Mac compile

Why must their uint64_t be different?

* Mac pt.2

Oops
---
 src/core/address_space.cpp              | 311 ++++++----
 src/core/address_space.h                |  22 +-
 src/core/devtools/widget/memory_map.cpp |   7 +-
 src/core/devtools/widget/memory_map.h   |   4 +-
 src/core/libraries/kernel/memory.cpp    |   1 +
 src/core/memory.cpp                     | 743 ++++++++++++++----------
 src/core/memory.h                       | 149 ++---
 7 files changed, 703 insertions(+), 534 deletions(-)
diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp
index 3f063ea76..422c67e17 100644
--- a/src/core/address_space.cpp
+++ b/src/core/address_space.cpp
@@ -93,7 +93,10 @@ static u64 BackingSize = ORBIS_KERNEL_TOTAL_MEM_DEV_PRO;
 
 struct MemoryRegion {
     VAddr base;
-    size_t size;
+    PAddr phys_base;
+    u64 size;
+    u32 prot;
+    s32 fd;
     bool is_mapped;
 };
 
@@ -159,7 +162,8 @@ struct AddressSpace::Impl {
             // Restrict region size to avoid overly fragmenting the virtual memory space.
             if (info.State == MEM_FREE && info.RegionSize > 0x1000000) {
                 VAddr addr = Common::AlignUp(reinterpret_cast<VAddr>(info.BaseAddress), alignment);
-                regions.emplace(addr, MemoryRegion{addr, size, false});
+                regions.emplace(addr,
+                                MemoryRegion{addr, PAddr(-1), size, PAGE_NOACCESS, -1, false});
             }
         }
 
@@ -207,29 +211,32 @@ struct AddressSpace::Impl {
     ~Impl() {
         if (virtual_base) {
             if (!VirtualFree(virtual_base, 0, MEM_RELEASE)) {
-                LOG_CRITICAL(Render, "Failed to free virtual memory");
+                LOG_CRITICAL(Core, "Failed to free virtual memory");
             }
         }
         if (backing_base) {
             if (!UnmapViewOfFile2(process, backing_base, MEM_PRESERVE_PLACEHOLDER)) {
-                LOG_CRITICAL(Render, "Failed to unmap backing memory placeholder");
+                LOG_CRITICAL(Core, "Failed to unmap backing memory placeholder");
             }
             if (!VirtualFreeEx(process, backing_base, 0, MEM_RELEASE)) {
-                LOG_CRITICAL(Render, "Failed to free backing memory");
+                LOG_CRITICAL(Core, "Failed to free backing memory");
             }
         }
         if (!CloseHandle(backing_handle)) {
-            LOG_CRITICAL(Render, "Failed to free backing memory file handle");
+            LOG_CRITICAL(Core, "Failed to free backing memory file handle");
         }
     }
 
-    void* Map(VAddr virtual_addr, PAddr phys_addr, size_t size, ULONG prot, uintptr_t fd = 0) {
-        // Before mapping we must carve a placeholder with the exact properties of our mapping.
-        auto* region = EnsureSplitRegionForMapping(virtual_addr, size);
-        region->is_mapped = true;
+    void* MapRegion(MemoryRegion* region) {
+        VAddr virtual_addr = region->base;
+        PAddr phys_addr = region->phys_base;
+        u64 size = region->size;
+        ULONG prot = region->prot;
+        s32 fd = region->fd;
+
         void* ptr = nullptr;
         if (phys_addr != -1) {
-            HANDLE backing = fd ? reinterpret_cast<HANDLE>(fd) : backing_handle;
+            HANDLE backing = fd != -1 ? reinterpret_cast<HANDLE>(fd) : backing_handle;
             if (fd && prot == PAGE_READONLY) {
                 DWORD resultvar;
                 ptr = VirtualAlloc2(process, reinterpret_cast<PVOID>(virtual_addr), size,
@@ -257,110 +264,136 @@ struct AddressSpace::Impl {
         return ptr;
     }
 
-    void Unmap(VAddr virtual_addr, size_t size, bool has_backing) {
-        bool ret;
-        if (has_backing) {
+    void UnmapRegion(MemoryRegion* region) {
+        VAddr virtual_addr = region->base;
+        PAddr phys_base = region->phys_base;
+        u64 size = region->size;
+
+        bool ret = false;
+        if (phys_base != -1) {
             ret = UnmapViewOfFile2(process, reinterpret_cast<PVOID>(virtual_addr),
                                    MEM_PRESERVE_PLACEHOLDER);
         } else {
             ret = VirtualFreeEx(process, reinterpret_cast<PVOID>(virtual_addr), size,
                                 MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER);
         }
-        ASSERT_MSG(ret, "Unmap operation on virtual_addr={:#X} failed: {}", virtual_addr,
+        ASSERT_MSG(ret, "Unmap on virtual_addr {:#x}, size {:#x} failed: {}", virtual_addr, size,
                    Common::GetLastErrorMsg());
-
-        // The unmap call will create a new placeholder region. We need to see if we can coalesce it
-        // with neighbors.
-        JoinRegionsAfterUnmap(virtual_addr, size);
     }
 
-    // The following code is inspired from Dolphin's MemArena
-    // https://github.com/dolphin-emu/dolphin/blob/deee3ee4/Source/Core/Common/MemArenaWin.cpp#L212
-    MemoryRegion* EnsureSplitRegionForMapping(VAddr address, size_t size) {
-        // Find closest region that is <= the given address by using upper bound and decrementing
-        auto it = regions.upper_bound(address);
-        ASSERT_MSG(it != regions.begin(), "Invalid address {:#x}", address);
-        --it;
-        ASSERT_MSG(!it->second.is_mapped,
-                   "Attempt to map {:#x} with size {:#x} which overlaps with {:#x} mapping",
-                   address, size, it->second.base);
-        auto& [base, region] = *it;
+    void SplitRegion(VAddr virtual_addr, u64 size) {
+        // First, get the region this range covers
+        auto it = std::prev(regions.upper_bound(virtual_addr));
 
-        const VAddr mapping_address = region.base;
-        const size_t region_size = region.size;
-        if (mapping_address == address) {
-            // If this region is already split up correctly we don't have to do anything
-            if (region_size == size) {
-                return &region;
+        // All unmapped areas will coalesce, so there should be a region
+        // containing the full requested range. If not, then something is mapped here.
+        ASSERT_MSG(it->second.base + it->second.size >= virtual_addr + size,
+                   "Cannot fit region into one placeholder");
+
+        // If the region is mapped, we need to unmap first before we can modify the placeholders.
+        if (it->second.is_mapped) {
+            ASSERT_MSG(it->second.phys_base != -1 || !it->second.is_mapped,
+                       "Cannot split unbacked mapping");
+            UnmapRegion(&it->second);
+        }
+
+        // We need to split this region to create a matching placeholder.
+        if (it->second.base != virtual_addr) {
+            // Requested address is not the start of the containing region,
+            // create a new region to represent the memory before the requested range.
+            auto& region = it->second;
+            u64 base_offset = virtual_addr - region.base;
+            u64 next_region_size = region.size - base_offset;
+            PAddr next_region_phys_base = -1;
+            if (region.is_mapped) {
+                next_region_phys_base = region.phys_base + base_offset;
             }
+            region.size = base_offset;
 
-            ASSERT_MSG(region_size >= size,
-                       "Region with address {:#x} and size {:#x} can't fit {:#x}", mapping_address,
-                       region_size, size);
-
-            // Split the placeholder.
-            if (!VirtualFreeEx(process, LPVOID(address), size,
+            // Use VirtualFreeEx to create the split.
+            if (!VirtualFreeEx(process, LPVOID(region.base), region.size,
                                MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER)) {
                 UNREACHABLE_MSG("Region splitting failed: {}", Common::GetLastErrorMsg());
-                return nullptr;
             }
 
-            // Update tracked mappings and return the first of the two
+            // If the mapping was mapped, remap the region.
+            if (region.is_mapped) {
+                MapRegion(&region);
+            }
+
+            // Store a new region matching the removed area.
+            it = regions.emplace_hint(std::next(it), virtual_addr,
+                                      MemoryRegion(virtual_addr, next_region_phys_base,
+                                                   next_region_size, region.prot, region.fd,
+                                                   region.is_mapped));
+        }
+
+        // At this point, the region's base will match virtual_addr.
+        // Now check for a size difference.
+        if (it->second.size != size) {
+            // The requested size is smaller than the current region placeholder.
+            // Update region to match the requested region,
+            // then make a new region to represent the remaining space.
+            auto& region = it->second;
+            VAddr next_region_addr = region.base + size;
+            u64 next_region_size = region.size - size;
+            PAddr next_region_phys_base = -1;
+            if (region.is_mapped) {
+                next_region_phys_base = region.phys_base + size;
+            }
             region.size = size;
-            const VAddr new_mapping_start = address + size;
-            regions.emplace_hint(std::next(it), new_mapping_start,
-                                 MemoryRegion(new_mapping_start, region_size - size, false));
-            return &region;
+
+            // Store the new region matching the remaining space
+            regions.emplace_hint(std::next(it), next_region_addr,
+                                 MemoryRegion(next_region_addr, next_region_phys_base,
+                                              next_region_size, region.prot, region.fd,
+                                              region.is_mapped));
+
+            // Use VirtualFreeEx to create the split.
+            if (!VirtualFreeEx(process, LPVOID(region.base), region.size,
+                               MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER)) {
+                UNREACHABLE_MSG("Region splitting failed: {}", Common::GetLastErrorMsg());
+            }
+
+            // If these regions were mapped, then map the unmapped area beyond the requested range.
+            if (region.is_mapped) {
+                MapRegion(&std::next(it)->second);
+            }
         }
 
-        ASSERT(mapping_address < address);
-
-        // Is there enough space to map this?
-        const size_t offset_in_region = address - mapping_address;
-        const size_t minimum_size = size + offset_in_region;
-        ASSERT(region_size >= minimum_size);
-
-        // Split the placeholder.
-        if (!VirtualFreeEx(process, LPVOID(address), size,
-                           MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER)) {
-            UNREACHABLE_MSG("Region splitting failed: {}", Common::GetLastErrorMsg());
-            return nullptr;
-        }
-
-        // Do we now have two regions or three regions?
-        if (region_size == minimum_size) {
-            // Split into two; update tracked mappings and return the second one
-            region.size = offset_in_region;
-            it = regions.emplace_hint(std::next(it), address, MemoryRegion(address, size, false));
-            return &it->second;
-        } else {
-            // Split into three; update tracked mappings and return the middle one
-            region.size = offset_in_region;
-            const VAddr middle_mapping_start = address;
-            const size_t middle_mapping_size = size;
-            const VAddr after_mapping_start = address + size;
-            const size_t after_mapping_size = region_size - minimum_size;
-            it = regions.emplace_hint(std::next(it), after_mapping_start,
-                                      MemoryRegion(after_mapping_start, after_mapping_size, false));
-            it = regions.emplace_hint(
-                it, middle_mapping_start,
-                MemoryRegion(middle_mapping_start, middle_mapping_size, false));
-            return &it->second;
+        // If the requested region was mapped, remap it.
+        if (it->second.is_mapped) {
+            MapRegion(&it->second);
         }
     }
 
-    void JoinRegionsAfterUnmap(VAddr address, size_t size) {
-        // There should be a mapping that matches the request exactly, find it
-        auto it = regions.find(address);
-        ASSERT_MSG(it != regions.end() && it->second.size == size,
-                   "Invalid address/size given to unmap.");
+    void* Map(VAddr virtual_addr, PAddr phys_addr, u64 size, ULONG prot, s32 fd = -1) {
+        // Split surrounding regions to create a placeholder
+        SplitRegion(virtual_addr, size);
+
+        // Get the region this range covers
+        auto it = std::prev(regions.upper_bound(virtual_addr));
         auto& [base, region] = *it;
-        region.is_mapped = false;
+
+        ASSERT_MSG(!region.is_mapped, "Cannot overwrite mapped region");
+
+        // Now we have a region matching the requested region, perform the actual mapping.
+        region.is_mapped = true;
+        region.phys_base = phys_addr;
+        region.prot = prot;
+        region.fd = fd;
+        return MapRegion(&region);
+    }
+
+    void CoalesceFreeRegions(VAddr virtual_addr) {
+        // First, get the region to update
+        auto it = std::prev(regions.upper_bound(virtual_addr));
+        ASSERT_MSG(!it->second.is_mapped, "Cannot coalesce mapped regions");
 
         // Check if a placeholder exists right before us.
         auto it_prev = it != regions.begin() ? std::prev(it) : regions.end();
         if (it_prev != regions.end() && !it_prev->second.is_mapped) {
-            const size_t total_size = it_prev->second.size + size;
+            const u64 total_size = it_prev->second.size + it->second.size;
             if (!VirtualFreeEx(process, LPVOID(it_prev->first), total_size,
                                MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS)) {
                 UNREACHABLE_MSG("Region coalescing failed: {}", Common::GetLastErrorMsg());
@@ -374,7 +407,7 @@ struct AddressSpace::Impl {
         // Check if a placeholder exists right after us.
         auto it_next = std::next(it);
         if (it_next != regions.end() && !it_next->second.is_mapped) {
-            const size_t total_size = it->second.size + it_next->second.size;
+            const u64 total_size = it->second.size + it_next->second.size;
             if (!VirtualFreeEx(process, LPVOID(it->first), total_size,
                                MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS)) {
                 UNREACHABLE_MSG("Region coalescing failed: {}", Common::GetLastErrorMsg());
@@ -385,7 +418,47 @@ struct AddressSpace::Impl {
         }
     }
 
-    void Protect(VAddr virtual_addr, size_t size, bool read, bool write, bool execute) {
+    void Unmap(VAddr virtual_addr, u64 size) {
+        // Loop through all regions in the requested range
+        u64 remaining_size = size;
+        VAddr current_addr = virtual_addr;
+        while (remaining_size > 0) {
+            // Get the region containing our current address.
+            auto it = std::prev(regions.upper_bound(current_addr));
+
+            // If necessary, split regions to ensure a valid unmap.
+            // To prevent complication, ensure size is within the bounds of the current region.
+            u64 base_offset = current_addr - it->second.base;
+            u64 size_to_unmap = std::min<u64>(it->second.size - base_offset, remaining_size);
+            if (current_addr != it->second.base || size_to_unmap != it->second.size) {
+                SplitRegion(current_addr, size_to_unmap);
+            }
+
+            // Repair the region pointer, as SplitRegion modifies the regions map.
+            it = std::prev(regions.upper_bound(current_addr));
+            auto& [base, region] = *it;
+
+            // Unmap the region if it was previously mapped
+            if (region.is_mapped) {
+                UnmapRegion(&region);
+            }
+
+            // Update region data
+            region.is_mapped = false;
+            region.fd = -1;
+            region.phys_base = -1;
+            region.prot = PAGE_NOACCESS;
+
+            // Coalesce any free space
+            CoalesceFreeRegions(current_addr);
+
+            // Update loop variables
+            remaining_size -= size_to_unmap;
+            current_addr += size_to_unmap;
+        }
+    }
+
+    void Protect(VAddr virtual_addr, u64 size, bool read, bool write, bool execute) {
         DWORD new_flags{};
 
         if (write && !read) {
@@ -415,7 +488,7 @@ struct AddressSpace::Impl {
 
         // If no flags are assigned, then something's gone wrong.
         if (new_flags == 0) {
-            LOG_CRITICAL(Common_Memory,
+            LOG_CRITICAL(Core,
                          "Unsupported protection flag combination for address {:#x}, size {}, "
                          "read={}, write={}, execute={}",
                          virtual_addr, size, read, write, execute);
@@ -429,8 +502,8 @@ struct AddressSpace::Impl {
                 continue;
             }
             const auto& region = it->second;
-            const size_t range_addr = std::max(region.base, virtual_addr);
-            const size_t range_size = std::min(region.base + region.size, virtual_end) - range_addr;
+            const u64 range_addr = std::max(region.base, virtual_addr);
+            const u64 range_size = std::min(region.base + region.size, virtual_end) - range_addr;
             DWORD old_flags{};
             if (!VirtualProtectEx(process, LPVOID(range_addr), range_size, new_flags, &old_flags)) {
                 UNREACHABLE_MSG(
@@ -453,11 +526,11 @@ struct AddressSpace::Impl {
     u8* backing_base{};
     u8* virtual_base{};
     u8* system_managed_base{};
-    size_t system_managed_size{};
+    u64 system_managed_size{};
     u8* system_reserved_base{};
-    size_t system_reserved_size{};
+    u64 system_reserved_size{};
     u8* user_base{};
-    size_t user_size{};
+    u64 user_size{};
     std::map<VAddr, MemoryRegion> regions;
 };
 #else
@@ -601,7 +674,7 @@ struct AddressSpace::Impl {
         }
     }
 
-    void* Map(VAddr virtual_addr, PAddr phys_addr, size_t size, PosixPageProtection prot,
+    void* Map(VAddr virtual_addr, PAddr phys_addr, u64 size, PosixPageProtection prot,
               int fd = -1) {
         m_free_regions.subtract({virtual_addr, virtual_addr + size});
         const int handle = phys_addr != -1 ? (fd == -1 ? backing_fd : fd) : -1;
@@ -613,10 +686,10 @@ struct AddressSpace::Impl {
         return ret;
     }
 
-    void Unmap(VAddr virtual_addr, size_t size, bool) {
+    void Unmap(VAddr virtual_addr, u64 size, bool) {
         // Check to see if we are adjacent to any regions.
-        auto start_address = virtual_addr;
-        auto end_address = start_address + size;
+        VAddr start_address = virtual_addr;
+        VAddr end_address = start_address + size;
         auto it = m_free_regions.find({start_address - 1, end_address + 1});
 
         // If we are, join with them, ensuring we stay in bounds.
@@ -634,7 +707,7 @@ struct AddressSpace::Impl {
         ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
     }
 
-    void Protect(VAddr virtual_addr, size_t size, bool read, bool write, bool execute) {
+    void Protect(VAddr virtual_addr, u64 size, bool read, bool write, bool execute) {
         int flags = PROT_NONE;
         if (read) {
             flags |= PROT_READ;
@@ -654,11 +727,11 @@ struct AddressSpace::Impl {
     int backing_fd;
     u8* backing_base{};
     u8* system_managed_base{};
-    size_t system_managed_size{};
+    u64 system_managed_size{};
     u8* system_reserved_base{};
-    size_t system_reserved_size{};
+    u64 system_reserved_size{};
     u8* user_base{};
-    size_t user_size{};
+    u64 user_size{};
     boost::icl::interval_set<VAddr> m_free_regions;
 };
 #endif
@@ -675,8 +748,7 @@ AddressSpace::AddressSpace() : impl{std::make_unique<Impl>()} {
 
 AddressSpace::~AddressSpace() = default;
 
-void* AddressSpace::Map(VAddr virtual_addr, size_t size, u64 alignment, PAddr phys_addr,
-                        bool is_exec) {
+void* AddressSpace::Map(VAddr virtual_addr, u64 size, PAddr phys_addr, bool is_exec) {
 #if ARCH_X86_64
     const auto prot = is_exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE;
 #else
@@ -687,8 +759,7 @@ void* AddressSpace::Map(VAddr virtual_addr, size_t size, u64 alignment, PAddr ph
     return impl->Map(virtual_addr, phys_addr, size, prot);
 }
 
-void* AddressSpace::MapFile(VAddr virtual_addr, size_t size, size_t offset, u32 prot,
-                            uintptr_t fd) {
+void* AddressSpace::MapFile(VAddr virtual_addr, u64 size, u64 offset, u32 prot, uintptr_t fd) {
 #ifdef _WIN32
     return impl->Map(virtual_addr, offset, size,
                      ToWindowsProt(std::bit_cast<Core::MemoryProt>(prot)), fd);
@@ -698,31 +769,15 @@ void* AddressSpace::MapFile(VAddr virtual_addr, size_t size, size_t offset, u32
 #endif
 }
 
-void AddressSpace::Unmap(VAddr virtual_addr, size_t size, VAddr start_in_vma, VAddr end_in_vma,
-                         PAddr phys_base, bool is_exec, bool has_backing, bool readonly_file) {
+void AddressSpace::Unmap(VAddr virtual_addr, u64 size, bool has_backing) {
 #ifdef _WIN32
-    // There does not appear to be comparable support for partial unmapping on Windows.
-    // Unfortunately, a least one title was found to require this. The workaround is to unmap
-    // the entire allocation and remap the portions outside of the requested unmapping range.
-    impl->Unmap(virtual_addr, size, has_backing && !readonly_file);
-
-    // TODO: Determine if any titles require partial unmapping support for un-backed allocations.
-    ASSERT_MSG(has_backing || (start_in_vma == 0 && end_in_vma == size),
-               "Partial unmapping of un-backed allocations is not supported");
-
-    if (start_in_vma != 0) {
-        Map(virtual_addr, start_in_vma, 0, phys_base, is_exec);
-    }
-
-    if (end_in_vma != size) {
-        Map(virtual_addr + end_in_vma, size - end_in_vma, 0, phys_base + end_in_vma, is_exec);
-    }
+    impl->Unmap(virtual_addr, size);
 #else
-    impl->Unmap(virtual_addr + start_in_vma, end_in_vma - start_in_vma, has_backing);
+    impl->Unmap(virtual_addr, size, has_backing);
 #endif
 }
 
-void AddressSpace::Protect(VAddr virtual_addr, size_t size, MemoryPermission perms) {
+void AddressSpace::Protect(VAddr virtual_addr, u64 size, MemoryPermission perms) {
     const bool read = True(perms & MemoryPermission::Read);
     const bool write = True(perms & MemoryPermission::Write);
     const bool execute = True(perms & MemoryPermission::Execute);
diff --git a/src/core/address_space.h b/src/core/address_space.h
index 5c50039bd..fa47bb47e 100644
--- a/src/core/address_space.h
+++ b/src/core/address_space.h
@@ -39,7 +39,7 @@ public:
     [[nodiscard]] const u8* SystemManagedVirtualBase() const noexcept {
         return system_managed_base;
     }
-    [[nodiscard]] size_t SystemManagedVirtualSize() const noexcept {
+    [[nodiscard]] u64 SystemManagedVirtualSize() const noexcept {
         return system_managed_size;
     }
 
@@ -49,7 +49,7 @@ public:
     [[nodiscard]] const u8* SystemReservedVirtualBase() const noexcept {
         return system_reserved_base;
     }
-    [[nodiscard]] size_t SystemReservedVirtualSize() const noexcept {
+    [[nodiscard]] u64 SystemReservedVirtualSize() const noexcept {
         return system_reserved_size;
     }
 
@@ -59,7 +59,7 @@ public:
     [[nodiscard]] const u8* UserVirtualBase() const noexcept {
         return user_base;
     }
-    [[nodiscard]] size_t UserVirtualSize() const noexcept {
+    [[nodiscard]] u64 UserVirtualSize() const noexcept {
         return user_size;
     }
 
@@ -73,17 +73,15 @@ public:
      *                  If zero is provided the mapping is considered as private.
      * @return A pointer to the mapped memory.
      */
-    void* Map(VAddr virtual_addr, size_t size, u64 alignment = 0, PAddr phys_addr = -1,
-              bool exec = false);
+    void* Map(VAddr virtual_addr, u64 size, PAddr phys_addr = -1, bool exec = false);
 
     /// Memory maps a specified file descriptor.
-    void* MapFile(VAddr virtual_addr, size_t size, size_t offset, u32 prot, uintptr_t fd);
+    void* MapFile(VAddr virtual_addr, u64 size, u64 offset, u32 prot, uintptr_t fd);
 
     /// Unmaps specified virtual memory area.
-    void Unmap(VAddr virtual_addr, size_t size, VAddr start_in_vma, VAddr end_in_vma,
-               PAddr phys_base, bool is_exec, bool has_backing, bool readonly_file);
+    void Unmap(VAddr virtual_addr, u64 size, bool has_backing);
 
-    void Protect(VAddr virtual_addr, size_t size, MemoryPermission perms);
+    void Protect(VAddr virtual_addr, u64 size, MemoryPermission perms);
 
     // Returns an interval set containing all usable regions.
     boost::icl::interval_set<VAddr> GetUsableRegions();
@@ -93,11 +91,11 @@ private:
     std::unique_ptr<Impl> impl;
     u8* backing_base{};
     u8* system_managed_base{};
-    size_t system_managed_size{};
+    u64 system_managed_size{};
     u8* system_reserved_base{};
-    size_t system_reserved_size{};
+    u64 system_reserved_size{};
     u8* user_base{};
-    size_t user_size{};
+    u64 user_size{};
 };
 
 } // namespace Core
diff --git a/src/core/devtools/widget/memory_map.cpp b/src/core/devtools/widget/memory_map.cpp
index 278c6595c..d1d1eb410 100644
--- a/src/core/devtools/widget/memory_map.cpp
+++ b/src/core/devtools/widget/memory_map.cpp
@@ -32,7 +32,7 @@ bool MemoryMapViewer::Iterator::DrawLine() {
         TableNextColumn();
         Text("%s", magic_enum::enum_name(m.prot).data());
         TableNextColumn();
-        if (m.is_exec) {
+        if (True(m.prot & MemoryProt::CpuExec)) {
             Text("X");
         }
         TableNextColumn();
@@ -44,7 +44,7 @@ bool MemoryMapViewer::Iterator::DrawLine() {
         return false;
     }
     auto m = dmem.it->second;
-    if (m.dma_type == DMAType::Free) {
+    if (m.dma_type == PhysicalMemoryType::Free) {
         ++dmem.it;
         return DrawLine();
     }
@@ -56,7 +56,8 @@ bool MemoryMapViewer::Iterator::DrawLine() {
     auto type = static_cast<::Libraries::Kernel::MemoryTypes>(m.memory_type);
     Text("%s", magic_enum::enum_name(type).data());
     TableNextColumn();
-    Text("%d", m.dma_type == DMAType::Pooled || m.dma_type == DMAType::Committed);
+    Text("%d",
+         m.dma_type == PhysicalMemoryType::Pooled || m.dma_type == PhysicalMemoryType::Committed);
     ++dmem.it;
     return true;
 }
diff --git a/src/core/devtools/widget/memory_map.h b/src/core/devtools/widget/memory_map.h
index cc7697c8c..3bbec4643 100644
--- a/src/core/devtools/widget/memory_map.h
+++ b/src/core/devtools/widget/memory_map.h
@@ -11,8 +11,8 @@ class MemoryMapViewer {
     struct Iterator {
         bool is_vma;
         struct {
-            MemoryManager::DMemMap::iterator it;
-            MemoryManager::DMemMap::iterator end;
+            MemoryManager::PhysMap::iterator it;
+            MemoryManager::PhysMap::iterator end;
         } dmem;
         struct {
             MemoryManager::VMAMap::iterator it;
diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp
index 62903ff72..3aec8193a 100644
--- a/src/core/libraries/kernel/memory.cpp
+++ b/src/core/libraries/kernel/memory.cpp
@@ -102,6 +102,7 @@ s32 PS4_SYSV_ABI sceKernelReleaseDirectMemory(u64 start, u64 len) {
     if (len == 0) {
         return ORBIS_OK;
     }
+    LOG_INFO(Kernel_Vmm, "called start = {:#x}, len = {:#x}", start, len);
     auto* memory = Core::Memory::Instance();
     memory->Free(start, len);
     return ORBIS_OK;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 3a4a16933..4567475cd 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -5,6 +5,7 @@
 #include "common/assert.h"
 #include "common/config.h"
 #include "common/debug.h"
+#include "common/elf_info.h"
 #include "core/file_sys/fs.h"
 #include "core/libraries/kernel/memory.h"
 #include "core/libraries/kernel/orbis_error.h"
@@ -25,6 +26,9 @@ MemoryManager::MemoryManager() {
                         VirtualMemoryArea{region.lower(), region.upper() - region.lower()});
         LOG_INFO(Kernel_Vmm, "{:#x} - {:#x}", region.lower(), region.upper());
     }
+
+    ASSERT_MSG(Libraries::Kernel::sceKernelGetCompiledSdkVersion(&sdk_version) == 0,
+               "Failed to get compiled SDK version");
 }
 
 MemoryManager::~MemoryManager() = default;
@@ -55,14 +59,14 @@ void MemoryManager::SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1
     // Insert an area that covers the direct memory physical address block.
     // Note that this should never be called after direct memory allocations have been made.
     dmem_map.clear();
-    dmem_map.emplace(0, DirectMemoryArea{0, total_direct_size});
+    dmem_map.emplace(0, PhysicalMemoryArea{0, total_direct_size});
 
     // Insert an area that covers the flexible memory physical address block.
     // Note that this should never be called after flexible memory allocations have been made.
     const auto remaining_physical_space = total_size - total_direct_size;
     fmem_map.clear();
     fmem_map.emplace(total_direct_size,
-                     FlexibleMemoryArea{total_direct_size, remaining_physical_space});
+                     PhysicalMemoryArea{total_direct_size, remaining_physical_space});
 
     LOG_INFO(Kernel_Vmm, "Configured memory regions: flexible size = {:#x}, direct size = {:#x}",
              total_flexible_size, total_direct_size);
@@ -115,6 +119,7 @@ void MemoryManager::SetPrtArea(u32 id, VAddr address, u64 size) {
 void MemoryManager::CopySparseMemory(VAddr virtual_addr, u8* dest, u64 size) {
     ASSERT_MSG(IsValidMapping(virtual_addr), "Attempted to access invalid address {:#x}",
                virtual_addr);
+    mutex.lock_shared();
 
     auto vma = FindVMA(virtual_addr);
     while (size) {
@@ -129,23 +134,46 @@ void MemoryManager::CopySparseMemory(VAddr virtual_addr, u8* dest, u64 size) {
         dest += copy_size;
         ++vma;
     }
+
+    mutex.unlock_shared();
 }
 
-bool MemoryManager::TryWriteBacking(void* address, const void* data, u32 num_bytes) {
+bool MemoryManager::TryWriteBacking(void* address, const void* data, u64 size) {
     const VAddr virtual_addr = std::bit_cast<VAddr>(address);
-    ASSERT_MSG(IsValidMapping(virtual_addr, num_bytes), "Attempted to access invalid address {:#x}",
+    ASSERT_MSG(IsValidMapping(virtual_addr, size), "Attempted to access invalid address {:#x}",
                virtual_addr);
-    const auto& vma = FindVMA(virtual_addr)->second;
-    if (!HasPhysicalBacking(vma)) {
-        return false;
+    mutex.lock_shared();
+
+    std::vector<VirtualMemoryArea> vmas_to_write;
+    auto current_vma = FindVMA(virtual_addr);
+    while (virtual_addr + size < current_vma->second.base + current_vma->second.size) {
+        if (!HasPhysicalBacking(current_vma->second)) {
+            mutex.unlock_shared();
+            return false;
+        }
+        vmas_to_write.emplace_back(current_vma->second);
+        current_vma++;
     }
-    u8* backing = impl.BackingBase() + vma.phys_base + (virtual_addr - vma.base);
-    memcpy(backing, data, num_bytes);
+
+    for (auto& vma : vmas_to_write) {
+        auto start_in_vma = std::max<VAddr>(virtual_addr, vma.base) - vma.base;
+        for (auto& phys_area : vma.phys_areas) {
+            if (!size) {
+                break;
+            }
+            u8* backing = impl.BackingBase() + phys_area.second.base + start_in_vma;
+            u64 copy_size = std::min<u64>(size, phys_area.second.size);
+            memcpy(backing, data, copy_size);
+            size -= copy_size;
+        }
+    }
+
+    mutex.unlock_shared();
     return true;
 }
 
 PAddr MemoryManager::PoolExpand(PAddr search_start, PAddr search_end, u64 size, u64 alignment) {
-    std::scoped_lock lk{mutex};
+    mutex.lock();
     alignment = alignment > 0 ? alignment : 64_KB;
 
     auto dmem_area = FindDmemArea(search_start);
@@ -155,7 +183,7 @@ PAddr MemoryManager::PoolExpand(PAddr search_start, PAddr search_end, u64 size,
     auto mapping_end = mapping_start + size;
 
     // Find the first free, large enough dmem area in the range.
-    while (dmem_area->second.dma_type != DMAType::Free ||
+    while (dmem_area->second.dma_type != PhysicalMemoryType::Free ||
            dmem_area->second.GetEnd() < mapping_end) {
         // The current dmem_area isn't suitable, move to the next one.
         dmem_area++;
@@ -171,33 +199,34 @@ PAddr MemoryManager::PoolExpand(PAddr search_start, PAddr search_end, u64 size,
     if (dmem_area == dmem_map.end()) {
         // There are no suitable mappings in this range
         LOG_ERROR(Kernel_Vmm, "Unable to find free direct memory area: size = {:#x}", size);
+        mutex.unlock();
         return -1;
     }
 
     // Add the allocated region to the list and commit its pages.
-    auto& area = CarveDmemArea(mapping_start, size)->second;
-    area.dma_type = DMAType::Pooled;
+    auto& area = CarvePhysArea(dmem_map, mapping_start, size)->second;
+    area.dma_type = PhysicalMemoryType::Pooled;
     area.memory_type = 3;
 
     // Track how much dmem was allocated for pools.
     pool_budget += size;
 
+    mutex.unlock();
     return mapping_start;
 }
 
 PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, u64 size, u64 alignment,
                               s32 memory_type) {
-    std::scoped_lock lk{mutex};
+    mutex.lock();
     alignment = alignment > 0 ? alignment : 16_KB;
 
     auto dmem_area = FindDmemArea(search_start);
-    auto mapping_start = search_start > dmem_area->second.base
-                             ? Common::AlignUp(search_start, alignment)
-                             : Common::AlignUp(dmem_area->second.base, alignment);
+    auto mapping_start =
+        Common::AlignUp(std::max<PAddr>(search_start, dmem_area->second.base), alignment);
     auto mapping_end = mapping_start + size;
 
     // Find the first free, large enough dmem area in the range.
-    while (dmem_area->second.dma_type != DMAType::Free ||
+    while (dmem_area->second.dma_type != PhysicalMemoryType::Free ||
            dmem_area->second.GetEnd() < mapping_end) {
         // The current dmem_area isn't suitable, move to the next one.
         dmem_area++;
@@ -213,19 +242,22 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, u64 size, u6
     if (dmem_area == dmem_map.end()) {
         // There are no suitable mappings in this range
         LOG_ERROR(Kernel_Vmm, "Unable to find free direct memory area: size = {:#x}", size);
+        mutex.unlock();
         return -1;
     }
 
     // Add the allocated region to the list and commit its pages.
-    auto& area = CarveDmemArea(mapping_start, size)->second;
+    auto& area = CarvePhysArea(dmem_map, mapping_start, size)->second;
     area.memory_type = memory_type;
-    area.dma_type = DMAType::Allocated;
+    area.dma_type = PhysicalMemoryType::Allocated;
     MergeAdjacent(dmem_map, dmem_area);
+
+    mutex.unlock();
     return mapping_start;
 }
 
 void MemoryManager::Free(PAddr phys_addr, u64 size) {
-    std::scoped_lock lk{mutex};
+    mutex.lock();
 
     // Release any dmem mappings that reference this physical block.
     std::vector<std::pair<VAddr, u64>> remove_list;
@@ -233,19 +265,21 @@ void MemoryManager::Free(PAddr phys_addr, u64 size) {
         if (mapping.type != VMAType::Direct) {
             continue;
         }
-        if (mapping.phys_base <= phys_addr && phys_addr < mapping.phys_base + mapping.size) {
-            const auto vma_start_offset = phys_addr - mapping.phys_base;
-            const auto addr_in_vma = mapping.base + vma_start_offset;
-            const auto size_in_vma =
-                mapping.size - vma_start_offset > size ? size : mapping.size - vma_start_offset;
+        for (auto& [offset_in_vma, phys_mapping] : mapping.phys_areas) {
+            if (phys_addr + size > phys_mapping.base &&
+                phys_addr < phys_mapping.base + phys_mapping.size) {
+                const u64 phys_offset =
+                    std::max<u64>(phys_mapping.base, phys_addr) - phys_mapping.base;
+                const VAddr addr_in_vma = mapping.base + offset_in_vma + phys_offset;
+                const u64 unmap_size = std::min<u64>(phys_mapping.size - phys_offset, size);
 
-            LOG_INFO(Kernel_Vmm, "Unmaping direct mapping {:#x} with size {:#x}", addr_in_vma,
-                     size_in_vma);
-            // Unmaping might erase from vma_map. We can't do it here.
-            remove_list.emplace_back(addr_in_vma, size_in_vma);
+                // Unmapping might erase from vma_map. We can't do it here.
+                remove_list.emplace_back(addr_in_vma, unmap_size);
+            }
         }
     }
     for (const auto& [addr, size] : remove_list) {
+        LOG_INFO(Kernel_Vmm, "Unmapping direct mapping {:#x} with size {:#x}", addr, size);
         UnmapMemoryImpl(addr, size);
     }
 
@@ -255,15 +289,13 @@ void MemoryManager::Free(PAddr phys_addr, u64 size) {
     auto dmem_area = FindDmemArea(phys_addr);
     while (dmem_area != dmem_map.end() && remaining_size > 0) {
         // Carve a free dmem area in place of this one.
-        const auto start_phys_addr =
-            phys_addr > dmem_area->second.base ? phys_addr : dmem_area->second.base;
+        const auto start_phys_addr = std::max<PAddr>(phys_addr, dmem_area->second.base);
         const auto offset_in_dma = start_phys_addr - dmem_area->second.base;
-        const auto size_in_dma = dmem_area->second.size - offset_in_dma > remaining_size
-                                     ? remaining_size
-                                     : dmem_area->second.size - offset_in_dma;
-        const auto dmem_handle = CarveDmemArea(start_phys_addr, size_in_dma);
+        const auto size_in_dma =
+            std::min<u64>(dmem_area->second.size - offset_in_dma, remaining_size);
+        const auto dmem_handle = CarvePhysArea(dmem_map, start_phys_addr, size_in_dma);
         auto& new_dmem_area = dmem_handle->second;
-        new_dmem_area.dma_type = DMAType::Free;
+        new_dmem_area.dma_type = PhysicalMemoryType::Free;
         new_dmem_area.memory_type = 0;
 
         // Merge the new dmem_area with dmem_map
@@ -274,12 +306,14 @@ void MemoryManager::Free(PAddr phys_addr, u64 size) {
         remaining_size -= size_in_dma;
         dmem_area = FindDmemArea(phys_addr_to_search);
     }
+
+    mutex.unlock();
 }
 
 s32 MemoryManager::PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot, s32 mtype) {
     ASSERT_MSG(IsValidMapping(virtual_addr, size), "Attempted to access invalid address {:#x}",
                virtual_addr);
-    std::scoped_lock lk{mutex};
+    mutex.lock();
 
     // Input addresses to PoolCommit are treated as fixed, and have a constant alignment.
     const u64 alignment = 64_KB;
@@ -289,6 +323,7 @@ s32 MemoryManager::PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot, s32
     if (vma.type != VMAType::PoolReserved) {
         // If we're attempting to commit non-pooled memory, return EINVAL
         LOG_ERROR(Kernel_Vmm, "Attempting to commit non-pooled memory at {:#x}", mapped_addr);
+        mutex.unlock();
         return ORBIS_KERNEL_ERROR_EINVAL;
     }
 
@@ -297,12 +332,14 @@ s32 MemoryManager::PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot, s32
         LOG_ERROR(Kernel_Vmm,
                   "Pooled region {:#x} to {:#x} is not large enough to commit from {:#x} to {:#x}",
                   vma.base, vma.base + vma.size, mapped_addr, mapped_addr + size);
+        mutex.unlock();
         return ORBIS_KERNEL_ERROR_EINVAL;
     }
 
     if (pool_budget <= size) {
         // If there isn't enough pooled memory to perform the mapping, return ENOMEM
         LOG_ERROR(Kernel_Vmm, "Not enough pooled memory to perform mapping");
+        mutex.unlock();
         return ORBIS_KERNEL_ERROR_ENOMEM;
     } else {
         // Track how much pooled memory this commit will take
@@ -314,48 +351,54 @@ s32 MemoryManager::PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot, s32
         prot |= MemoryProt::CpuRead;
     }
 
+    // Create the virtual mapping for the commit
+    const auto new_vma_handle = CarveVMA(virtual_addr, size);
+    auto& new_vma = new_vma_handle->second;
+    new_vma.disallow_merge = false;
+    new_vma.prot = prot;
+    new_vma.name = "anon";
+    new_vma.type = Core::VMAType::Pooled;
+    new_vma.phys_areas.clear();
+
     // Find suitable physical addresses
     auto handle = dmem_map.begin();
     u64 remaining_size = size;
     VAddr current_addr = mapped_addr;
-    while (handle != dmem_map.end() && remaining_size != 0) {
-        if (handle->second.dma_type != DMAType::Pooled) {
+    while (handle != dmem_map.end() && remaining_size > 0) {
+        if (handle->second.dma_type != PhysicalMemoryType::Pooled) {
             // Non-pooled means it's either not for pool use, or already committed.
             handle++;
             continue;
         }
 
         // On PS4, commits can make sparse physical mappings.
-        // For now, it's easier to create separate memory mappings for each physical mapping.
         u64 size_to_map = std::min<u64>(remaining_size, handle->second.size);
 
-        // Carve out the new VMA representing this mapping
-        const auto new_vma_handle = CarveVMA(current_addr, size_to_map);
-        auto& new_vma = new_vma_handle->second;
-        new_vma.disallow_merge = false;
-        new_vma.prot = prot;
-        new_vma.name = "anon";
-        new_vma.type = Core::VMAType::Pooled;
-        new_vma.is_exec = false;
-
         // Use the start of this area as the physical backing for this mapping.
-        const auto new_dmem_handle = CarveDmemArea(handle->second.base, size_to_map);
+        const auto new_dmem_handle = CarvePhysArea(dmem_map, handle->second.base, size_to_map);
         auto& new_dmem_area = new_dmem_handle->second;
-        new_dmem_area.dma_type = DMAType::Committed;
+        new_dmem_area.dma_type = PhysicalMemoryType::Committed;
         new_dmem_area.memory_type = mtype;
-        new_vma.phys_base = new_dmem_area.base;
-        handle = MergeAdjacent(dmem_map, new_dmem_handle);
 
-        // Perform the mapping
-        void* out_addr = impl.Map(current_addr, size_to_map, alignment, new_vma.phys_base, false);
+        // Add the dmem area to this vma, merge it with any similar tracked areas.
+        new_vma.phys_areas[current_addr - mapped_addr] = new_dmem_handle->second;
+        MergeAdjacent(new_vma.phys_areas, new_vma.phys_areas.find(current_addr - mapped_addr));
+
+        // Perform an address space mapping for each physical area
+        void* out_addr = impl.Map(current_addr, size_to_map, new_dmem_area.base);
         TRACK_ALLOC(out_addr, size_to_map, "VMEM");
 
+        handle = MergeAdjacent(dmem_map, new_dmem_handle);
         current_addr += size_to_map;
         remaining_size -= size_to_map;
         handle++;
     }
-    ASSERT_MSG(remaining_size == 0, "Unable to map physical memory");
+    ASSERT_MSG(remaining_size == 0, "Failed to commit pooled memory");
 
+    // Merge this VMA with similar nearby areas
+    MergeAdjacent(vma_map, new_vma_handle);
+
+    mutex.unlock();
     if (IsValidGpuMapping(mapped_addr, size)) {
         rasterizer->MapMemory(mapped_addr, size);
     }
@@ -376,13 +419,15 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo
         return ORBIS_KERNEL_ERROR_EINVAL;
     }
 
-    std::scoped_lock lk{mutex};
+    mutex.lock();
 
+    PhysHandle dmem_area;
     // Validate the requested physical address range
     if (phys_addr != -1) {
         if (total_direct_size < phys_addr + size) {
             LOG_ERROR(Kernel_Vmm, "Unable to map {:#x} bytes at physical address {:#x}", size,
                       phys_addr);
+            mutex.unlock();
             return ORBIS_KERNEL_ERROR_ENOMEM;
         }
 
@@ -390,49 +435,24 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo
         auto dmem_area = FindDmemArea(phys_addr);
         while (dmem_area != dmem_map.end() && dmem_area->second.base < phys_addr + size) {
             // If any requested dmem area is not allocated, return an error.
-            if (dmem_area->second.dma_type != DMAType::Allocated &&
-                dmem_area->second.dma_type != DMAType::Mapped) {
+            if (dmem_area->second.dma_type != PhysicalMemoryType::Allocated &&
+                dmem_area->second.dma_type != PhysicalMemoryType::Mapped) {
                 LOG_ERROR(Kernel_Vmm, "Unable to map {:#x} bytes at physical address {:#x}", size,
                           phys_addr);
+                mutex.unlock();
                 return ORBIS_KERNEL_ERROR_ENOMEM;
             }
 
             // If we need to perform extra validation, then check for Mapped dmem areas too.
-            if (validate_dmem && dmem_area->second.dma_type == DMAType::Mapped) {
+            if (validate_dmem && dmem_area->second.dma_type == PhysicalMemoryType::Mapped) {
                 LOG_ERROR(Kernel_Vmm, "Unable to map {:#x} bytes at physical address {:#x}", size,
                           phys_addr);
+                mutex.unlock();
                 return ORBIS_KERNEL_ERROR_EBUSY;
             }
 
             dmem_area++;
         }
-
-        // If the prior loop succeeds, we need to loop through again and carve out mapped dmas.
-        // This needs to be a separate loop to avoid modifying dmem map during failed calls.
-        auto phys_addr_to_search = phys_addr;
-        auto remaining_size = size;
-        dmem_area = FindDmemArea(phys_addr);
-        while (dmem_area != dmem_map.end() && remaining_size > 0) {
-            // Carve a new dmem area in place of this one with the appropriate type.
-            // Ensure the carved area only covers the current dmem area.
-            const auto start_phys_addr =
-                phys_addr > dmem_area->second.base ? phys_addr : dmem_area->second.base;
-            const auto offset_in_dma = start_phys_addr - dmem_area->second.base;
-            const auto size_in_dma = dmem_area->second.size - offset_in_dma > remaining_size
-                                         ? remaining_size
-                                         : dmem_area->second.size - offset_in_dma;
-            const auto dmem_handle = CarveDmemArea(start_phys_addr, size_in_dma);
-            auto& new_dmem_area = dmem_handle->second;
-            new_dmem_area.dma_type = DMAType::Mapped;
-
-            // Merge the new dmem_area with dmem_map
-            MergeAdjacent(dmem_map, dmem_handle);
-
-            // Get the next relevant dmem area.
-            phys_addr_to_search = phys_addr + size_in_dma;
-            remaining_size -= size_in_dma;
-            dmem_area = FindDmemArea(phys_addr_to_search);
-        }
     }
 
     // Limit the minimum address to SystemManagedVirtualBase to prevent hardware-specific issues.
@@ -463,6 +483,7 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo
         auto remaining_size = vma.base + vma.size - mapped_addr;
         if (!vma.IsFree() || remaining_size < size) {
             LOG_ERROR(Kernel_Vmm, "Unable to map {:#x} bytes at address {:#x}", size, mapped_addr);
+            mutex.unlock();
             return ORBIS_KERNEL_ERROR_ENOMEM;
         }
     } else {
@@ -473,6 +494,7 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo
         mapped_addr = SearchFree(mapped_addr, size, alignment);
         if (mapped_addr == -1) {
             // No suitable memory areas to map to
+            mutex.unlock();
             return ORBIS_KERNEL_ERROR_ENOMEM;
         }
     }
@@ -480,62 +502,109 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo
     // Create a memory area representing this mapping.
     const auto new_vma_handle = CarveVMA(mapped_addr, size);
     auto& new_vma = new_vma_handle->second;
-
-    // If type is Flexible, we need to track how much flexible memory is used here.
-    // We also need to determine a reasonable physical base to perform this mapping at.
-    if (type == VMAType::Flexible) {
-        flexible_usage += size;
-
-        // Find a suitable physical address
-        auto handle = fmem_map.begin();
-        while (handle != fmem_map.end() &&
-               (!handle->second.is_free || handle->second.size < size)) {
-            handle++;
-        }
-
-        // Some games will end up fragmenting the flexible address space.
-        ASSERT_MSG(handle != fmem_map.end() && handle->second.is_free,
-                   "No suitable physical memory areas to map");
-
-        // We'll use the start of this area as the physical backing for this mapping.
-        const auto new_fmem_handle = CarveFmemArea(handle->second.base, size);
-        auto& new_fmem_area = new_fmem_handle->second;
-        new_fmem_area.is_free = false;
-        phys_addr = new_fmem_area.base;
-        MergeAdjacent(fmem_map, new_fmem_handle);
-    }
-
+    const bool is_exec = True(prot & MemoryProt::CpuExec);
     if (True(prot & MemoryProt::CpuWrite)) {
         // On PS4, read is appended to write mappings.
         prot |= MemoryProt::CpuRead;
     }
 
-    const bool is_exec = True(prot & MemoryProt::CpuExec);
     new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce);
     new_vma.prot = prot;
     new_vma.name = name;
     new_vma.type = type;
-    new_vma.phys_base = phys_addr == -1 ? 0 : phys_addr;
-    new_vma.is_exec = is_exec;
+    new_vma.phys_areas.clear();
 
-    if (type == VMAType::Reserved) {
-        // Technically this should be done for direct and flexible mappings too,
-        // But some Windows-specific limitations make that hard to accomplish.
+    // If type is Flexible, we need to track how much flexible memory is used here.
+    // We also need to determine a reasonable physical base to perform this mapping at.
+    if (type == VMAType::Flexible) {
+        // Find suitable physical addresses
+        auto handle = fmem_map.begin();
+        u64 remaining_size = size;
+        VAddr current_addr = mapped_addr;
+        while (handle != fmem_map.end() && remaining_size != 0) {
+            if (handle->second.dma_type != PhysicalMemoryType::Free) {
+                // If the handle isn't free, we cannot use it.
+                handle++;
+                continue;
+            }
+
+            // Determine the size we can map here.
+            u64 size_to_map = std::min<u64>(remaining_size, handle->second.size);
+
+            // Create a physical area
+            const auto new_fmem_handle = CarvePhysArea(fmem_map, handle->second.base, size_to_map);
+            auto& new_fmem_area = new_fmem_handle->second;
+            new_fmem_area.dma_type = PhysicalMemoryType::Flexible;
+
+            // Add the new area to the vma, merge it with any similar tracked areas.
+            new_vma.phys_areas[current_addr - mapped_addr] = new_fmem_handle->second;
+            MergeAdjacent(new_vma.phys_areas, new_vma.phys_areas.find(current_addr - mapped_addr));
+
+            // Perform an address space mapping for each physical area
+            void* out_addr = impl.Map(current_addr, size_to_map, new_fmem_area.base, is_exec);
+            TRACK_ALLOC(out_addr, size_to_map, "VMEM");
+
+            handle = MergeAdjacent(fmem_map, new_fmem_handle);
+            current_addr += size_to_map;
+            remaining_size -= size_to_map;
+            flexible_usage += size_to_map;
+            handle++;
+        }
+        ASSERT_MSG(remaining_size == 0, "Failed to map physical memory");
+    } else if (type == VMAType::Direct) {
+        // Map the physical memory for this direct memory mapping.
+        auto phys_addr_to_search = phys_addr;
+        u64 remaining_size = size;
+        dmem_area = FindDmemArea(phys_addr);
+        while (dmem_area != dmem_map.end() && remaining_size > 0) {
+            // Carve a new dmem area in place of this one with the appropriate type.
+            // Ensure the carved area only covers the current dmem area.
+            const auto start_phys_addr = std::max<PAddr>(phys_addr, dmem_area->second.base);
+            const auto offset_in_dma = start_phys_addr - dmem_area->second.base;
+            const auto size_in_dma =
+                std::min<u64>(dmem_area->second.size - offset_in_dma, remaining_size);
+            const auto dmem_handle = CarvePhysArea(dmem_map, start_phys_addr, size_in_dma);
+            auto& new_dmem_area = dmem_handle->second;
+            new_dmem_area.dma_type = PhysicalMemoryType::Mapped;
+
+            // Add the dmem area to this vma, merge it with any similar tracked areas.
+            new_vma.phys_areas[phys_addr_to_search - phys_addr] = dmem_handle->second;
+            MergeAdjacent(new_vma.phys_areas,
+                          new_vma.phys_areas.find(phys_addr_to_search - phys_addr));
+
+            // Merge the new dmem_area with dmem_map
+            MergeAdjacent(dmem_map, dmem_handle);
+
+            // Get the next relevant dmem area.
+            phys_addr_to_search = phys_addr + size_in_dma;
+            remaining_size -= size_in_dma;
+            dmem_area = FindDmemArea(phys_addr_to_search);
+        }
+        ASSERT_MSG(remaining_size == 0, "Failed to map physical memory");
+    }
+
+    if (new_vma.type != VMAType::Direct || sdk_version >= Common::ElfInfo::FW_20) {
+        // Merge this VMA with similar nearby areas
+        // Direct memory mappings only coalesce on SDK version 2.00 or later.
         MergeAdjacent(vma_map, new_vma_handle);
     }
 
-    if (type == VMAType::Reserved || type == VMAType::PoolReserved) {
-        // For Reserved/PoolReserved mappings, we don't perform any address space allocations.
-        // Just set out_addr to mapped_addr instead.
-        *out_addr = std::bit_cast<void*>(mapped_addr);
-    } else {
+    *out_addr = std::bit_cast<void*>(mapped_addr);
+    if (type != VMAType::Reserved && type != VMAType::PoolReserved) {
+        // Flexible address space mappings were performed while finding direct memory areas.
+        if (type != VMAType::Flexible) {
+            impl.Map(mapped_addr, size, phys_addr, is_exec);
+        }
+        TRACK_ALLOC(*out_addr, size, "VMEM");
+
+        mutex.unlock();
+
         // If this is not a reservation, then map to GPU and address space
         if (IsValidGpuMapping(mapped_addr, size)) {
             rasterizer->MapMemory(mapped_addr, size);
         }
-        *out_addr = impl.Map(mapped_addr, size, alignment, phys_addr, is_exec);
-
-        TRACK_ALLOC(*out_addr, size, "VMEM");
+    } else {
+        mutex.unlock();
     }
 
     return ORBIS_OK;
@@ -547,13 +616,14 @@ s32 MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, u64 size, Memory
     ASSERT_MSG(IsValidMapping(mapped_addr, size), "Attempted to access invalid address {:#x}",
                mapped_addr);
 
-    std::scoped_lock lk{mutex};
+    mutex.lock();
 
     // Find first free area to map the file.
     if (False(flags & MemoryMapFlags::Fixed)) {
         mapped_addr = SearchFree(mapped_addr, size, 1);
         if (mapped_addr == -1) {
             // No suitable memory areas to map to
+            mutex.unlock();
             return ORBIS_KERNEL_ERROR_ENOMEM;
         }
     }
@@ -571,11 +641,13 @@ s32 MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, u64 size, Memory
     auto file = h->GetFile(fd);
     if (file == nullptr) {
         LOG_WARNING(Kernel_Vmm, "Invalid file for mmap, fd {}", fd);
+        mutex.unlock();
         return ORBIS_KERNEL_ERROR_EBADF;
     }
 
     if (file->type != Core::FileSys::FileType::Regular) {
         LOG_WARNING(Kernel_Vmm, "Unsupported file type for mmap, fd {}", fd);
+        mutex.unlock();
         return ORBIS_KERNEL_ERROR_EBADF;
     }
 
@@ -612,6 +684,8 @@ s32 MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, u64 size, Memory
     new_vma.fd = fd;
     new_vma.type = VMAType::File;
 
+    mutex.unlock();
+
     *out_addr = std::bit_cast<void*>(mapped_addr);
     return ORBIS_OK;
 }
@@ -619,13 +693,14 @@ s32 MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, u64 size, Memory
 s32 MemoryManager::PoolDecommit(VAddr virtual_addr, u64 size) {
     ASSERT_MSG(IsValidMapping(virtual_addr, size), "Attempted to access invalid address {:#x}",
                virtual_addr);
-    std::scoped_lock lk{mutex};
+    mutex.lock();
 
     // Do an initial search to ensure this decommit is valid.
     auto it = FindVMA(virtual_addr);
     while (it != vma_map.end() && it->second.base + it->second.size <= virtual_addr + size) {
         if (it->second.type != VMAType::PoolReserved && it->second.type != VMAType::Pooled) {
             LOG_ERROR(Kernel_Vmm, "Attempting to decommit non-pooled memory!");
+            mutex.unlock();
             return ORBIS_KERNEL_ERROR_EINVAL;
         }
         it++;
@@ -635,36 +710,46 @@ s32 MemoryManager::PoolDecommit(VAddr virtual_addr, u64 size) {
     u64 remaining_size = size;
     VAddr current_addr = virtual_addr;
     while (remaining_size != 0) {
-        const auto it = FindVMA(current_addr);
-        const auto& vma_base = it->second;
-        const bool is_exec = vma_base.is_exec;
+        const auto handle = FindVMA(current_addr);
+        const auto& vma_base = handle->second;
         const auto start_in_vma = current_addr - vma_base.base;
         const auto size_in_vma = std::min<u64>(remaining_size, vma_base.size - start_in_vma);
 
         if (vma_base.type == VMAType::Pooled) {
             // We always map PoolCommitted memory to GPU, so unmap when decomitting.
             if (IsValidGpuMapping(current_addr, size_in_vma)) {
+                mutex.unlock();
                 rasterizer->UnmapMemory(current_addr, size_in_vma);
+                mutex.lock();
             }
 
             // Track how much pooled memory is decommitted
             pool_budget += size_in_vma;
 
             // Re-pool the direct memory used by this mapping
-            const auto unmap_phys_base = vma_base.phys_base + start_in_vma;
-            const auto new_dmem_handle = CarveDmemArea(unmap_phys_base, size_in_vma);
-            auto& new_dmem_area = new_dmem_handle->second;
-            new_dmem_area.dma_type = DMAType::Pooled;
+            u64 size_to_free = size_in_vma;
+            auto phys_handle = std::prev(vma_base.phys_areas.upper_bound(start_in_vma));
+            while (phys_handle != vma_base.phys_areas.end() && size_to_free > 0) {
+                // Calculate physical memory offset, address, and size
+                u64 dma_offset =
+                    std::max<PAddr>(phys_handle->first, start_in_vma) - phys_handle->first;
+                PAddr phys_addr = phys_handle->second.base + dma_offset;
+                u64 size_in_dma =
+                    std::min<u64>(size_to_free, phys_handle->second.size - dma_offset);
 
-            // Coalesce with nearby direct memory areas.
-            MergeAdjacent(dmem_map, new_dmem_handle);
-        }
+                // Create a new dmem area reflecting the pooled region
+                const auto new_dmem_handle = CarvePhysArea(dmem_map, phys_addr, size_in_dma);
+                auto& new_dmem_area = new_dmem_handle->second;
+                new_dmem_area.dma_type = PhysicalMemoryType::Pooled;
 
-        if (vma_base.type != VMAType::PoolReserved) {
-            // Unmap the memory region.
-            impl.Unmap(vma_base.base, vma_base.size, start_in_vma, start_in_vma + size_in_vma,
-                       vma_base.phys_base, vma_base.is_exec, true, false);
-            TRACK_FREE(virtual_addr, "VMEM");
+                // Coalesce with nearby direct memory areas.
+                MergeAdjacent(dmem_map, new_dmem_handle);
+
+                // Increment loop
+                size_to_free -= size_in_dma;
+                phys_handle++;
+            }
+            ASSERT_MSG(size_to_free == 0, "Failed to decommit pooled memory");
         }
 
         // Mark region as pool reserved and attempt to coalesce it with neighbours.
@@ -672,108 +757,116 @@ s32 MemoryManager::PoolDecommit(VAddr virtual_addr, u64 size) {
         auto& vma = new_it->second;
         vma.type = VMAType::PoolReserved;
         vma.prot = MemoryProt::NoAccess;
-        vma.phys_base = 0;
         vma.disallow_merge = false;
         vma.name = "anon";
+        vma.phys_areas.clear();
         MergeAdjacent(vma_map, new_it);
 
         current_addr += size_in_vma;
         remaining_size -= size_in_vma;
     }
 
+    // Unmap from address space
+    impl.Unmap(virtual_addr, size, true);
+    TRACK_FREE(virtual_addr, "VMEM");
+
+    mutex.unlock();
     return ORBIS_OK;
 }
 
 s32 MemoryManager::UnmapMemory(VAddr virtual_addr, u64 size) {
-    std::scoped_lock lk{mutex};
     if (size == 0) {
         return ORBIS_OK;
     }
+    mutex.lock();
     virtual_addr = Common::AlignDown(virtual_addr, 16_KB);
     size = Common::AlignUp(size, 16_KB);
     ASSERT_MSG(IsValidMapping(virtual_addr, size), "Attempted to access invalid address {:#x}",
                virtual_addr);
-    return UnmapMemoryImpl(virtual_addr, size);
+    u64 bytes_unmapped = UnmapMemoryImpl(virtual_addr, size);
+    mutex.unlock();
+    return bytes_unmapped;
 }
 
 u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma_base, u64 size) {
-    const auto vma_base_addr = vma_base.base;
-    const auto vma_base_size = vma_base.size;
-    const auto type = vma_base.type;
-    const auto phys_base = vma_base.phys_base;
-    const bool is_exec = vma_base.is_exec;
-    const auto start_in_vma = virtual_addr - vma_base_addr;
-    const auto adjusted_size =
-        vma_base_size - start_in_vma < size ? vma_base_size - start_in_vma : size;
-    const bool has_backing = HasPhysicalBacking(vma_base) || type == VMAType::File;
-    const auto prot = vma_base.prot;
-    const bool readonly_file = prot == MemoryProt::CpuRead && type == VMAType::File;
+    const auto start_in_vma = virtual_addr - vma_base.base;
+    const auto size_in_vma = std::min<u64>(vma_base.size - start_in_vma, size);
+    const auto vma_type = vma_base.type;
+    const bool has_backing = HasPhysicalBacking(vma_base) || vma_base.type == VMAType::File;
+    const bool readonly_file =
+        vma_base.prot == MemoryProt::CpuRead && vma_base.type == VMAType::File;
+    const bool is_exec = True(vma_base.prot & MemoryProt::CpuExec);
 
-    if (type == VMAType::Free) {
-        return adjusted_size;
+    if (vma_base.type == VMAType::Free || vma_base.type == VMAType::Pooled) {
+        return size_in_vma;
     }
 
-    if (type == VMAType::Direct) {
-        // Unmap all direct memory areas covered by this unmap.
-        auto phys_addr = phys_base + start_in_vma;
-        auto remaining_size = adjusted_size;
-        DMemHandle dmem_handle = FindDmemArea(phys_addr);
-        while (dmem_handle != dmem_map.end() && remaining_size > 0) {
-            const auto start_in_dma = phys_addr - dmem_handle->second.base;
-            const auto size_in_dma = dmem_handle->second.size - start_in_dma > remaining_size
-                                         ? remaining_size
-                                         : dmem_handle->second.size - start_in_dma;
-            dmem_handle = CarveDmemArea(phys_addr, size_in_dma);
-            auto& dmem_area = dmem_handle->second;
-            dmem_area.dma_type = DMAType::Allocated;
-            remaining_size -= dmem_area.size;
-            phys_addr += dmem_area.size;
+    PAddr phys_base = 0;
+    VAddr current_addr = virtual_addr;
+    if (vma_base.phys_areas.size() > 0) {
+        u64 size_to_free = size_in_vma;
+        auto phys_handle = std::prev(vma_base.phys_areas.upper_bound(start_in_vma));
+        while (phys_handle != vma_base.phys_areas.end() && size_to_free > 0) {
+            // Calculate physical memory offset, address, and size
+            u64 dma_offset = std::max<PAddr>(phys_handle->first, start_in_vma) - phys_handle->first;
+            PAddr phys_addr = phys_handle->second.base + dma_offset;
+            u64 size_in_dma = std::min<u64>(size_to_free, phys_handle->second.size - dma_offset);
 
-            // Check if we can coalesce any dmem areas.
-            MergeAdjacent(dmem_map, dmem_handle);
-            dmem_handle = FindDmemArea(phys_addr);
+            // Create a new dmem area reflecting the pooled region
+            if (vma_type == VMAType::Direct) {
+                const auto new_dmem_handle = CarvePhysArea(dmem_map, phys_addr, size_in_dma);
+                auto& new_dmem_area = new_dmem_handle->second;
+                new_dmem_area.dma_type = PhysicalMemoryType::Allocated;
+
+                // Coalesce with nearby direct memory areas.
+                MergeAdjacent(dmem_map, new_dmem_handle);
+            } else if (vma_type == VMAType::Flexible) {
+                // Update fmem_map
+                const auto new_fmem_handle = CarvePhysArea(fmem_map, phys_addr, size_in_dma);
+                auto& new_fmem_area = new_fmem_handle->second;
+                new_fmem_area.dma_type = PhysicalMemoryType::Free;
+
+                // Coalesce with nearby flexible memory areas.
+                MergeAdjacent(fmem_map, new_fmem_handle);
+
+                // Zero out the old memory data
+                const auto unmap_hardware_address = impl.BackingBase() + phys_addr;
+                std::memset(unmap_hardware_address, 0, size_in_dma);
+
+                // Update flexible usage
+                flexible_usage -= size_in_dma;
+            }
+
+            // Increment through loop
+            size_to_free -= size_in_dma;
+            phys_handle++;
         }
-    }
-
-    if (type == VMAType::Flexible) {
-        flexible_usage -= adjusted_size;
-
-        // Now that there is a physical backing used for flexible memory,
-        // manually erase the contents before unmapping to prevent possible issues.
-        const auto unmap_hardware_address = impl.BackingBase() + phys_base + start_in_vma;
-        std::memset(unmap_hardware_address, 0, adjusted_size);
-
-        // Address space unmap needs the physical_base from the start of the vma,
-        // so calculate the phys_base to unmap from here.
-        const auto unmap_phys_base = phys_base + start_in_vma;
-        const auto new_fmem_handle = CarveFmemArea(unmap_phys_base, adjusted_size);
-        auto& new_fmem_area = new_fmem_handle->second;
-        new_fmem_area.is_free = true;
-        MergeAdjacent(fmem_map, new_fmem_handle);
+        ASSERT_MSG(size_to_free == 0, "Failed to unmap physical memory");
     }
 
     // Mark region as free and attempt to coalesce it with neighbours.
-    const auto new_it = CarveVMA(virtual_addr, adjusted_size);
+    const auto new_it = CarveVMA(virtual_addr, size_in_vma);
     auto& vma = new_it->second;
     vma.type = VMAType::Free;
     vma.prot = MemoryProt::NoAccess;
-    vma.phys_base = 0;
+    vma.phys_areas.clear();
     vma.disallow_merge = false;
     vma.name = "";
     MergeAdjacent(vma_map, new_it);
 
-    if (type != VMAType::Reserved && type != VMAType::PoolReserved) {
+    if (vma_type != VMAType::Reserved && vma_type != VMAType::PoolReserved) {
+        // Unmap the memory region.
+        impl.Unmap(virtual_addr, size_in_vma, has_backing);
+        TRACK_FREE(virtual_addr, "VMEM");
+
         // If this mapping has GPU access, unmap from GPU.
         if (IsValidGpuMapping(virtual_addr, size)) {
+            mutex.unlock();
             rasterizer->UnmapMemory(virtual_addr, size);
+            mutex.lock();
         }
-
-        // Unmap the memory region.
-        impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + adjusted_size,
-                   phys_base, is_exec, has_backing, readonly_file);
-        TRACK_FREE(virtual_addr, "VMEM");
     }
-    return adjusted_size;
+    return size_in_vma;
 }
 
 s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, u64 size) {
@@ -792,12 +885,13 @@ s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, u64 size) {
 
 s32 MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* prot) {
     ASSERT_MSG(IsValidMapping(addr), "Attempted to access invalid address {:#x}", addr);
-    std::scoped_lock lk{mutex};
+    mutex.lock_shared();
 
     const auto it = FindVMA(addr);
     const auto& vma = it->second;
     if (vma.IsFree()) {
         LOG_ERROR(Kernel_Vmm, "Address {:#x} is not mapped", addr);
+        mutex.unlock_shared();
         return ORBIS_KERNEL_ERROR_EACCES;
     }
 
@@ -810,6 +904,8 @@ s32 MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr
     if (prot != nullptr) {
         *prot = static_cast<u32>(vma.prot);
     }
+
+    mutex.unlock_shared();
     return ORBIS_OK;
 }
 
@@ -872,8 +968,6 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea& vma_base, u64 siz
 }
 
 s32 MemoryManager::Protect(VAddr addr, u64 size, MemoryProt prot) {
-    std::scoped_lock lk{mutex};
-
     // If size is zero, then there's nothing to protect
     if (size == 0) {
         return ORBIS_OK;
@@ -887,6 +981,7 @@ s32 MemoryManager::Protect(VAddr addr, u64 size, MemoryProt prot) {
         MemoryProt::CpuReadWrite | MemoryProt::CpuExec | MemoryProt::GpuReadWrite;
     MemoryProt valid_flags = prot & flag_mask;
 
+    mutex.lock();
     // Protect all VMAs between addr and addr + size.
     s64 protected_bytes = 0;
     while (protected_bytes < size) {
@@ -899,18 +994,18 @@ s32 MemoryManager::Protect(VAddr addr, u64 size, MemoryProt prot) {
         auto result = ProtectBytes(addr + protected_bytes, vma_base, size - protected_bytes, prot);
         if (result < 0) {
             // ProtectBytes returned an error, return it
+            mutex.unlock();
             return result;
         }
         protected_bytes += result;
     }
 
+    mutex.unlock();
     return ORBIS_OK;
 }
 
 s32 MemoryManager::VirtualQuery(VAddr addr, s32 flags,
                                 ::Libraries::Kernel::OrbisVirtualQueryInfo* info) {
-    std::scoped_lock lk{mutex};
-
     // FindVMA on addresses before the vma_map return garbage data.
     auto query_addr =
         addr < impl.SystemManagedVirtualBase() ? impl.SystemManagedVirtualBase() : addr;
@@ -918,6 +1013,8 @@ s32 MemoryManager::VirtualQuery(VAddr addr, s32 flags,
         LOG_WARNING(Kernel_Vmm, "VirtualQuery on free memory region");
         return ORBIS_KERNEL_ERROR_EACCES;
     }
+
+    mutex.lock_shared();
     auto it = FindVMA(query_addr);
 
     while (it != vma_map.end() && it->second.type == VMAType::Free && flags == 1) {
@@ -925,6 +1022,7 @@ s32 MemoryManager::VirtualQuery(VAddr addr, s32 flags,
     }
     if (it == vma_map.end() || it->second.type == VMAType::Free) {
         LOG_WARNING(Kernel_Vmm, "VirtualQuery on free memory region");
+        mutex.unlock_shared();
         return ORBIS_KERNEL_ERROR_EACCES;
     }
 
@@ -938,9 +1036,12 @@ s32 MemoryManager::VirtualQuery(VAddr addr, s32 flags,
     info->is_stack = vma.type == VMAType::Stack ? 1 : 0;
     info->is_pooled = vma.type == VMAType::PoolReserved || vma.type == VMAType::Pooled ? 1 : 0;
     info->is_committed = vma.IsMapped() ? 1 : 0;
-    if (vma.type == VMAType::Direct || vma.type == VMAType::Pooled) {
-        // Offset is only assigned for direct and pooled mappings.
-        info->offset = vma.phys_base;
+    info->memory_type = 0;
+    if (vma.type == VMAType::Direct) {
+        // Offset is only assigned for direct mappings.
+        ASSERT_MSG(vma.phys_areas.size() > 0, "No physical backing for direct mapping?");
+        info->offset = vma.phys_areas.begin()->second.base;
+        info->memory_type = vma.phys_areas.begin()->second.memory_type;
     }
     if (vma.type == VMAType::Reserved || vma.type == VMAType::PoolReserved) {
         // Protection is hidden from reserved mappings.
@@ -949,34 +1050,27 @@ s32 MemoryManager::VirtualQuery(VAddr addr, s32 flags,
 
     strncpy(info->name, vma.name.data(), ::Libraries::Kernel::ORBIS_KERNEL_MAXIMUM_NAME_LENGTH);
 
-    if (vma.type == VMAType::Direct) {
-        const auto dmem_it = FindDmemArea(vma.phys_base);
-        ASSERT_MSG(vma.phys_base <= dmem_it->second.GetEnd(), "vma.phys_base is not in dmem_map!");
-        info->memory_type = dmem_it->second.memory_type;
-    } else {
-        info->memory_type = ::Libraries::Kernel::ORBIS_KERNEL_WB_ONION;
-    }
-
+    mutex.unlock_shared();
     return ORBIS_OK;
 }
 
 s32 MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next,
                                      ::Libraries::Kernel::OrbisQueryInfo* out_info) {
-    std::scoped_lock lk{mutex};
-
     if (addr >= total_direct_size) {
         LOG_WARNING(Kernel_Vmm, "Unable to find allocated direct memory region to query!");
         return ORBIS_KERNEL_ERROR_EACCES;
     }
 
+    mutex.lock_shared();
     auto dmem_area = FindDmemArea(addr);
-    while (dmem_area != dmem_map.end() && dmem_area->second.dma_type == DMAType::Free &&
+    while (dmem_area != dmem_map.end() && dmem_area->second.dma_type == PhysicalMemoryType::Free &&
            find_next) {
         dmem_area++;
     }
 
-    if (dmem_area == dmem_map.end() || dmem_area->second.dma_type == DMAType::Free) {
+    if (dmem_area == dmem_map.end() || dmem_area->second.dma_type == PhysicalMemoryType::Free) {
         LOG_WARNING(Kernel_Vmm, "Unable to find allocated direct memory region to query!");
+        mutex.unlock_shared();
         return ORBIS_KERNEL_ERROR_EACCES;
     }
 
@@ -986,25 +1080,26 @@ s32 MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next,
     // Loop through all sequential mapped or allocated dmem areas
     // to determine the hardware accurate end.
     while (dmem_area != dmem_map.end() && dmem_area->second.memory_type == out_info->memoryType &&
-           (dmem_area->second.dma_type == DMAType::Mapped ||
-            dmem_area->second.dma_type == DMAType::Allocated)) {
+           (dmem_area->second.dma_type == PhysicalMemoryType::Mapped ||
+            dmem_area->second.dma_type == PhysicalMemoryType::Allocated)) {
         out_info->end = dmem_area->second.GetEnd();
         dmem_area++;
     }
 
+    mutex.unlock_shared();
     return ORBIS_OK;
 }
 
 s32 MemoryManager::DirectQueryAvailable(PAddr search_start, PAddr search_end, u64 alignment,
                                         PAddr* phys_addr_out, u64* size_out) {
-    std::scoped_lock lk{mutex};
+    mutex.lock_shared();
 
     auto dmem_area = FindDmemArea(search_start);
     PAddr paddr{};
     u64 max_size{};
 
     while (dmem_area != dmem_map.end()) {
-        if (dmem_area->second.dma_type != DMAType::Free) {
+        if (dmem_area->second.dma_type != PhysicalMemoryType::Free) {
             dmem_area++;
             continue;
         }
@@ -1037,18 +1132,20 @@ s32 MemoryManager::DirectQueryAvailable(PAddr search_start, PAddr search_end, u6
         dmem_area++;
     }
 
+    mutex.unlock_shared();
     *phys_addr_out = paddr;
     *size_out = max_size;
     return ORBIS_OK;
 }
 
 s32 MemoryManager::SetDirectMemoryType(VAddr addr, u64 size, s32 memory_type) {
-    std::scoped_lock lk{mutex};
+    mutex.lock();
 
     ASSERT_MSG(IsValidMapping(addr, size), "Attempted to access invalid address {:#x}", addr);
 
     // Search through all VMAs covered by the provided range.
     // We aren't modifying these VMAs, so it's safe to iterate through them.
+    VAddr current_addr = addr;
     auto remaining_size = size;
     auto vma_handle = FindVMA(addr);
     while (vma_handle != vma_map.end() && vma_handle->second.base < addr + size) {
@@ -1056,40 +1153,42 @@ s32 MemoryManager::SetDirectMemoryType(VAddr addr, u64 size, s32 memory_type) {
         if (vma_handle->second.type == VMAType::Direct ||
             vma_handle->second.type == VMAType::Pooled) {
             // Calculate position in vma
-            const auto start_in_vma = addr - vma_handle->second.base;
+            const auto start_in_vma = current_addr - vma_handle->second.base;
             const auto size_in_vma = vma_handle->second.size - start_in_vma;
-            auto phys_addr = vma_handle->second.phys_base + start_in_vma;
-            auto size_to_modify = remaining_size > size_in_vma ? size_in_vma : remaining_size;
+            const auto base_phys_addr = vma_handle->second.phys_areas.begin()->second.base;
+            auto size_to_modify = std::min<u64>(remaining_size, size_in_vma);
+            for (auto& phys_handle : vma_handle->second.phys_areas) {
+                if (size_to_modify == 0) {
+                    break;
+                }
 
-            // Loop through remaining dmem areas until the physical addresses represented
-            // are all adjusted.
-            DMemHandle dmem_handle = FindDmemArea(phys_addr);
-            while (dmem_handle != dmem_map.end() && size_in_vma >= size_to_modify &&
-                   size_to_modify > 0) {
-                const auto start_in_dma = phys_addr - dmem_handle->second.base;
-                const auto size_in_dma = dmem_handle->second.size - start_in_dma > size_to_modify
-                                             ? size_to_modify
-                                             : dmem_handle->second.size - start_in_dma;
-                dmem_handle = CarveDmemArea(phys_addr, size_in_dma);
+                const auto current_phys_addr =
+                    std::max<PAddr>(base_phys_addr, phys_handle.second.base);
+                if (current_phys_addr >= phys_handle.second.base + phys_handle.second.size) {
+                    continue;
+                }
+                const auto start_in_dma = current_phys_addr - phys_handle.second.base;
+                const auto size_in_dma = phys_handle.second.size - start_in_dma;
+
+                phys_handle.second.memory_type = memory_type;
+
+                auto dmem_handle = CarvePhysArea(dmem_map, current_phys_addr, size_in_dma);
                 auto& dmem_area = dmem_handle->second;
                 dmem_area.memory_type = memory_type;
                 size_to_modify -= dmem_area.size;
-                phys_addr += dmem_area.size;
-
-                // Check if we can coalesce any dmem areas now that the types are different.
                 MergeAdjacent(dmem_map, dmem_handle);
-                dmem_handle = FindDmemArea(phys_addr);
             }
         }
         remaining_size -= vma_handle->second.size;
         vma_handle++;
     }
 
+    mutex.unlock();
     return ORBIS_OK;
 }
 
 void MemoryManager::NameVirtualRange(VAddr virtual_addr, u64 size, std::string_view name) {
-    std::scoped_lock lk{mutex};
+    mutex.lock();
 
     // Sizes are aligned up to the nearest 16_KB
     auto aligned_size = Common::AlignUp(size, 16_KB);
@@ -1116,6 +1215,8 @@ void MemoryManager::NameVirtualRange(VAddr virtual_addr, u64 size, std::string_v
         current_addr += it->second.size;
         it = FindVMA(current_addr);
     }
+
+    mutex.unlock();
 }
 
 s32 MemoryManager::GetDirectMemoryType(PAddr addr, s32* directMemoryTypeOut,
@@ -1125,22 +1226,27 @@ s32 MemoryManager::GetDirectMemoryType(PAddr addr, s32* directMemoryTypeOut,
         return ORBIS_KERNEL_ERROR_ENOENT;
     }
 
+    mutex.lock_shared();
     const auto& dmem_area = FindDmemArea(addr)->second;
-    if (dmem_area.dma_type == DMAType::Free) {
+    if (dmem_area.dma_type == PhysicalMemoryType::Free) {
         LOG_ERROR(Kernel_Vmm, "Unable to find allocated direct memory region to check type!");
+        mutex.unlock_shared();
         return ORBIS_KERNEL_ERROR_ENOENT;
     }
 
     *directMemoryStartOut = reinterpret_cast<void*>(dmem_area.base);
     *directMemoryEndOut = reinterpret_cast<void*>(dmem_area.GetEnd());
     *directMemoryTypeOut = dmem_area.memory_type;
+    mutex.unlock_shared();
     return ORBIS_OK;
 }
 
 s32 MemoryManager::IsStack(VAddr addr, void** start, void** end) {
     ASSERT_MSG(IsValidMapping(addr), "Attempted to access invalid address {:#x}", addr);
+    mutex.lock_shared();
     const auto& vma = FindVMA(addr)->second;
     if (vma.IsFree()) {
+        mutex.unlock_shared();
         return ORBIS_KERNEL_ERROR_EACCES;
     }
 
@@ -1159,11 +1265,12 @@ s32 MemoryManager::IsStack(VAddr addr, void** start, void** end) {
         *end = reinterpret_cast<void*>(stack_end);
     }
 
+    mutex.unlock_shared();
     return ORBIS_OK;
 }
 
 s32 MemoryManager::GetMemoryPoolStats(::Libraries::Kernel::OrbisKernelMemoryPoolBlockStats* stats) {
-    std::scoped_lock lk{mutex};
+    mutex.lock_shared();
 
     // Run through dmem_map, determine how much physical memory is currently committed
     constexpr u64 block_size = 64_KB;
@@ -1171,7 +1278,7 @@ s32 MemoryManager::GetMemoryPoolStats(::Libraries::Kernel::OrbisKernelMemoryPool
 
     auto dma_handle = dmem_map.begin();
     while (dma_handle != dmem_map.end()) {
-        if (dma_handle->second.dma_type == DMAType::Committed) {
+        if (dma_handle->second.dma_type == PhysicalMemoryType::Committed) {
             committed_size += dma_handle->second.size;
         }
         dma_handle++;
@@ -1182,6 +1289,8 @@ s32 MemoryManager::GetMemoryPoolStats(::Libraries::Kernel::OrbisKernelMemoryPool
     // TODO: Determine how "cached blocks" work
     stats->allocated_cached_blocks = 0;
     stats->available_cached_blocks = 0;
+
+    mutex.unlock_shared();
     return ORBIS_OK;
 }
 
@@ -1251,6 +1360,52 @@ VAddr MemoryManager::SearchFree(VAddr virtual_addr, u64 size, u32 alignment) {
     return -1;
 }
 
+MemoryManager::VMAHandle MemoryManager::MergeAdjacent(VMAMap& handle_map, VMAHandle iter) {
+    const auto next_vma = std::next(iter);
+    if (next_vma != handle_map.end() && iter->second.CanMergeWith(next_vma->second)) {
+        u64 base_offset = iter->second.size;
+        iter->second.size += next_vma->second.size;
+        for (auto& area : next_vma->second.phys_areas) {
+            iter->second.phys_areas[base_offset + area.first] = area.second;
+        }
+        handle_map.erase(next_vma);
+    }
+
+    if (iter != handle_map.begin()) {
+        auto prev_vma = std::prev(iter);
+        if (prev_vma->second.CanMergeWith(iter->second)) {
+            u64 base_offset = prev_vma->second.size;
+            prev_vma->second.size += iter->second.size;
+            for (auto& area : iter->second.phys_areas) {
+                prev_vma->second.phys_areas[base_offset + area.first] = area.second;
+            }
+            handle_map.erase(iter);
+            iter = prev_vma;
+        }
+    }
+
+    return iter;
+}
+
+MemoryManager::PhysHandle MemoryManager::MergeAdjacent(PhysMap& handle_map, PhysHandle iter) {
+    const auto next_vma = std::next(iter);
+    if (next_vma != handle_map.end() && iter->second.CanMergeWith(next_vma->second)) {
+        iter->second.size += next_vma->second.size;
+        handle_map.erase(next_vma);
+    }
+
+    if (iter != handle_map.begin()) {
+        auto prev_vma = std::prev(iter);
+        if (prev_vma->second.CanMergeWith(iter->second)) {
+            prev_vma->second.size += iter->second.size;
+            handle_map.erase(iter);
+            iter = prev_vma;
+        }
+    }
+
+    return iter;
+}
+
 MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, u64 size) {
     auto vma_handle = FindVMA(virtual_addr);
 
@@ -1279,11 +1434,11 @@ MemoryManager::VMAHandle MemoryManager::CarveVMA(VAddr virtual_addr, u64 size) {
     return vma_handle;
 }
 
-MemoryManager::DMemHandle MemoryManager::CarveDmemArea(PAddr addr, u64 size) {
-    auto dmem_handle = FindDmemArea(addr);
-    ASSERT_MSG(addr <= dmem_handle->second.GetEnd(), "Physical address not in dmem_map");
+MemoryManager::PhysHandle MemoryManager::CarvePhysArea(PhysMap& map, PAddr addr, u64 size) {
+    auto pmem_handle = std::prev(map.upper_bound(addr));
+    ASSERT_MSG(addr <= pmem_handle->second.GetEnd(), "Physical address not in map");
 
-    const DirectMemoryArea& area = dmem_handle->second;
+    const PhysicalMemoryArea& area = pmem_handle->second;
     ASSERT_MSG(area.base <= addr, "Adding an allocation to already allocated region");
 
     const PAddr start_in_area = addr - area.base;
@@ -1293,38 +1448,14 @@ MemoryManager::DMemHandle MemoryManager::CarveDmemArea(PAddr addr, u64 size) {
 
     if (end_in_vma != area.size) {
         // Split VMA at the end of the allocated region
-        Split(dmem_handle, end_in_vma);
+        Split(map, pmem_handle, end_in_vma);
     }
     if (start_in_area != 0) {
         // Split VMA at the start of the allocated region
-        dmem_handle = Split(dmem_handle, start_in_area);
+        pmem_handle = Split(map, pmem_handle, start_in_area);
     }
 
-    return dmem_handle;
-}
-
-MemoryManager::FMemHandle MemoryManager::CarveFmemArea(PAddr addr, u64 size) {
-    auto fmem_handle = FindFmemArea(addr);
-    ASSERT_MSG(addr <= fmem_handle->second.GetEnd(), "Physical address not in fmem_map");
-
-    const FlexibleMemoryArea& area = fmem_handle->second;
-    ASSERT_MSG(area.base <= addr, "Adding an allocation to already allocated region");
-
-    const PAddr start_in_area = addr - area.base;
-    const PAddr end_in_vma = start_in_area + size;
-    ASSERT_MSG(end_in_vma <= area.size, "Mapping cannot fit inside free region: size = {:#x}",
-               size);
-
-    if (end_in_vma != area.size) {
-        // Split VMA at the end of the allocated region
-        Split(fmem_handle, end_in_vma);
-    }
-    if (start_in_area != 0) {
-        // Split VMA at the start of the allocated region
-        fmem_handle = Split(fmem_handle, start_in_area);
-    }
-
-    return fmem_handle;
+    return pmem_handle;
 }
 
 MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, u64 offset_in_vma) {
@@ -1337,13 +1468,43 @@ MemoryManager::VMAHandle MemoryManager::Split(VMAHandle vma_handle, u64 offset_i
     new_vma.size -= offset_in_vma;
 
     if (HasPhysicalBacking(new_vma)) {
-        new_vma.phys_base += offset_in_vma;
+        // Update physical areas map for both areas
+        new_vma.phys_areas.clear();
+
+        std::map<uintptr_t, PhysicalMemoryArea> old_vma_phys_areas;
+        for (auto& [offset, region] : old_vma.phys_areas) {
+            // Fully contained in first VMA
+            if (offset + region.size <= offset_in_vma) {
+                old_vma_phys_areas[offset] = region;
+            }
+            // Split between both VMAs
+            if (offset < offset_in_vma && offset + region.size > offset_in_vma) {
+                // Create region in old VMA
+                u64 size_in_old = offset_in_vma - offset;
+                old_vma_phys_areas[offset] = PhysicalMemoryArea{
+                    region.base, size_in_old, region.memory_type, region.dma_type};
+                // Create region in new VMA
+                PAddr new_base = region.base + size_in_old;
+                u64 size_in_new = region.size - size_in_old;
+                new_vma.phys_areas[0] =
+                    PhysicalMemoryArea{new_base, size_in_new, region.memory_type, region.dma_type};
+            }
+            // Fully contained in new VMA
+            if (offset >= offset_in_vma) {
+                new_vma.phys_areas[offset - offset_in_vma] = region;
+            }
+        }
+
+        // Set old_vma's physical areas map to the newly created map
+        old_vma.phys_areas = old_vma_phys_areas;
     }
+
     return vma_map.emplace_hint(std::next(vma_handle), new_vma.base, new_vma);
 }
 
-MemoryManager::DMemHandle MemoryManager::Split(DMemHandle dmem_handle, u64 offset_in_area) {
-    auto& old_area = dmem_handle->second;
+MemoryManager::PhysHandle MemoryManager::Split(PhysMap& map, PhysHandle phys_handle,
+                                               u64 offset_in_area) {
+    auto& old_area = phys_handle->second;
     ASSERT(offset_in_area < old_area.size && offset_in_area > 0);
 
     auto new_area = old_area;
@@ -1352,19 +1513,7 @@ MemoryManager::DMemHandle MemoryManager::Split(DMemHandle dmem_handle, u64 offse
     new_area.base += offset_in_area;
     new_area.size -= offset_in_area;
 
-    return dmem_map.emplace_hint(std::next(dmem_handle), new_area.base, new_area);
-}
-
-MemoryManager::FMemHandle MemoryManager::Split(FMemHandle fmem_handle, u64 offset_in_area) {
-    auto& old_area = fmem_handle->second;
-    ASSERT(offset_in_area < old_area.size && offset_in_area > 0);
-
-    auto new_area = old_area;
-    old_area.size = offset_in_area;
-    new_area.base += offset_in_area;
-    new_area.size -= offset_in_area;
-
-    return fmem_map.emplace_hint(std::next(fmem_handle), new_area.base, new_area);
+    return map.emplace_hint(std::next(phys_handle), new_area.base, new_area);
 }
 
 } // namespace Core
diff --git a/src/core/memory.h b/src/core/memory.h
index 7ebf9d34c..0664ed592 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -8,6 +8,7 @@
 #include <string>
 #include <string_view>
 #include "common/enum.h"
+#include "common/shared_first_mutex.h"
 #include "common/singleton.h"
 #include "common/types.h"
 #include "core/address_space.h"
@@ -54,12 +55,37 @@ enum class MemoryMapFlags : u32 {
 };
 DECLARE_ENUM_FLAG_OPERATORS(MemoryMapFlags)
 
-enum class DMAType : u32 {
+enum class PhysicalMemoryType : u32 {
     Free = 0,
     Allocated = 1,
     Mapped = 2,
     Pooled = 3,
     Committed = 4,
+    Flexible = 5,
+};
+
+struct PhysicalMemoryArea {
+    PAddr base = 0;
+    u64 size = 0;
+    s32 memory_type = 0;
+    PhysicalMemoryType dma_type = PhysicalMemoryType::Free;
+
+    PAddr GetEnd() const {
+        return base + size;
+    }
+
+    bool CanMergeWith(const PhysicalMemoryArea& next) const {
+        if (base + size != next.base) {
+            return false;
+        }
+        if (memory_type != next.memory_type) {
+            return false;
+        }
+        if (dma_type != next.dma_type) {
+            return false;
+        }
+        return true;
+    }
 };
 
 enum class VMAType : u32 {
@@ -74,60 +100,15 @@ enum class VMAType : u32 {
     File = 8,
 };
 
-struct DirectMemoryArea {
-    PAddr base = 0;
-    u64 size = 0;
-    s32 memory_type = 0;
-    DMAType dma_type = DMAType::Free;
-
-    PAddr GetEnd() const {
-        return base + size;
-    }
-
-    bool CanMergeWith(const DirectMemoryArea& next) const {
-        if (base + size != next.base) {
-            return false;
-        }
-        if (memory_type != next.memory_type) {
-            return false;
-        }
-        if (dma_type != next.dma_type) {
-            return false;
-        }
-        return true;
-    }
-};
-
-struct FlexibleMemoryArea {
-    PAddr base = 0;
-    u64 size = 0;
-    bool is_free = true;
-
-    PAddr GetEnd() const {
-        return base + size;
-    }
-
-    bool CanMergeWith(const FlexibleMemoryArea& next) const {
-        if (base + size != next.base) {
-            return false;
-        }
-        if (is_free != next.is_free) {
-            return false;
-        }
-        return true;
-    }
-};
-
 struct VirtualMemoryArea {
     VAddr base = 0;
     u64 size = 0;
-    PAddr phys_base = 0;
+    std::map<uintptr_t, PhysicalMemoryArea> phys_areas;
     VMAType type = VMAType::Free;
     MemoryProt prot = MemoryProt::NoAccess;
-    bool disallow_merge = false;
     std::string name = "";
-    uintptr_t fd = 0;
-    bool is_exec = false;
+    s32 fd = 0;
+    bool disallow_merge = false;
 
     bool Contains(VAddr addr, u64 size) const {
         return addr >= base && (addr + size) <= (base + this->size);
@@ -141,30 +122,32 @@ struct VirtualMemoryArea {
         return type != VMAType::Free && type != VMAType::Reserved && type != VMAType::PoolReserved;
     }
 
-    bool CanMergeWith(const VirtualMemoryArea& next) const {
+    bool CanMergeWith(VirtualMemoryArea& next) {
         if (disallow_merge || next.disallow_merge) {
             return false;
         }
         if (base + size != next.base) {
             return false;
         }
-        if ((type == VMAType::Direct || type == VMAType::Flexible || type == VMAType::Pooled) &&
-            phys_base + size != next.phys_base) {
-            return false;
+        if (type == VMAType::Direct && next.type == VMAType::Direct) {
+            auto& last_phys = std::prev(phys_areas.end())->second;
+            auto& first_next_phys = next.phys_areas.begin()->second;
+            if (last_phys.base + last_phys.size != first_next_phys.base ||
+                last_phys.memory_type != first_next_phys.memory_type) {
+                return false;
+            }
         }
         if (prot != next.prot || type != next.type) {
             return false;
         }
+
         return true;
     }
 };
 
 class MemoryManager {
-    using DMemMap = std::map<PAddr, DirectMemoryArea>;
-    using DMemHandle = DMemMap::iterator;
-
-    using FMemMap = std::map<PAddr, FlexibleMemoryArea>;
-    using FMemHandle = FMemMap::iterator;
+    using PhysMap = std::map<PAddr, PhysicalMemoryArea>;
+    using PhysHandle = PhysMap::iterator;
 
     using VMAMap = std::map<VAddr, VirtualMemoryArea>;
     using VMAHandle = VMAMap::iterator;
@@ -220,10 +203,11 @@ public:
         // Now make sure the full address range is contained in vma_map.
         auto vma_handle = FindVMA(virtual_addr);
         auto addr_to_check = virtual_addr;
-        s64 size_to_validate = size;
+        u64 size_to_validate = size;
         while (vma_handle != vma_map.end() && size_to_validate > 0) {
             const auto offset_in_vma = addr_to_check - vma_handle->second.base;
-            const auto size_in_vma = vma_handle->second.size - offset_in_vma;
+            const auto size_in_vma =
+                std::min<u64>(vma_handle->second.size - offset_in_vma, size_to_validate);
             size_to_validate -= size_in_vma;
             addr_to_check += size_in_vma;
             vma_handle++;
@@ -245,7 +229,7 @@ public:
 
     void CopySparseMemory(VAddr source, u8* dest, u64 size);
 
-    bool TryWriteBacking(void* address, const void* data, u32 num_bytes);
+    bool TryWriteBacking(void* address, const void* data, u64 size);
 
     void SetupMemoryRegions(u64 flexible_size, bool use_extended_mem1, bool use_extended_mem2);
 
@@ -300,34 +284,14 @@ private:
         return std::prev(vma_map.upper_bound(target));
     }
 
-    DMemHandle FindDmemArea(PAddr target) {
+    PhysHandle FindDmemArea(PAddr target) {
         return std::prev(dmem_map.upper_bound(target));
     }
 
-    FMemHandle FindFmemArea(PAddr target) {
+    PhysHandle FindFmemArea(PAddr target) {
         return std::prev(fmem_map.upper_bound(target));
     }
 
-    template <typename Handle>
-    Handle MergeAdjacent(auto& handle_map, Handle iter) {
-        const auto next_vma = std::next(iter);
-        if (next_vma != handle_map.end() && iter->second.CanMergeWith(next_vma->second)) {
-            iter->second.size += next_vma->second.size;
-            handle_map.erase(next_vma);
-        }
-
-        if (iter != handle_map.begin()) {
-            auto prev_vma = std::prev(iter);
-            if (prev_vma->second.CanMergeWith(iter->second)) {
-                prev_vma->second.size += iter->second.size;
-                handle_map.erase(iter);
-                iter = prev_vma;
-            }
-        }
-
-        return iter;
-    }
-
     bool HasPhysicalBacking(VirtualMemoryArea vma) {
         return vma.type == VMAType::Direct || vma.type == VMAType::Flexible ||
                vma.type == VMAType::Pooled;
@@ -335,17 +299,17 @@ private:
 
     VAddr SearchFree(VAddr virtual_addr, u64 size, u32 alignment);
 
+    VMAHandle MergeAdjacent(VMAMap& map, VMAHandle iter);
+
+    PhysHandle MergeAdjacent(PhysMap& map, PhysHandle iter);
+
     VMAHandle CarveVMA(VAddr virtual_addr, u64 size);
 
-    DMemHandle CarveDmemArea(PAddr addr, u64 size);
-
-    FMemHandle CarveFmemArea(PAddr addr, u64 size);
+    PhysHandle CarvePhysArea(PhysMap& map, PAddr addr, u64 size);
 
     VMAHandle Split(VMAHandle vma_handle, u64 offset_in_vma);
 
-    DMemHandle Split(DMemHandle dmem_handle, u64 offset_in_area);
-
-    FMemHandle Split(FMemHandle fmem_handle, u64 offset_in_area);
+    PhysHandle Split(PhysMap& map, PhysHandle dmem_handle, u64 offset_in_area);
 
     u64 UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma_base, u64 size);
 
@@ -353,14 +317,15 @@ private:
 
 private:
     AddressSpace impl;
-    DMemMap dmem_map;
-    FMemMap fmem_map;
+    PhysMap dmem_map;
+    PhysMap fmem_map;
     VMAMap vma_map;
-    std::mutex mutex;
+    Common::SharedFirstMutex mutex{};
     u64 total_direct_size{};
     u64 total_flexible_size{};
     u64 flexible_usage{};
     u64 pool_budget{};
+    s32 sdk_version{};
     Vulkan::Rasterizer* rasterizer{};
 
     struct PrtArea {

From 0d5c5f81a60701f15af32953b8aedeb201bc3f05 Mon Sep 17 00:00:00 2001
From: TheTurtle <geoster3d@gmail.com>
Date: Wed, 21 Jan 2026 22:49:35 +0200
Subject: [PATCH 02/10] video_core: Small readback optimization (#3941)

* pm4_cmds: Handle nop packet overflow

* liverpool: Detect DispatchDirect patches and promote to DispatchIndirect

* clang..

* log removed

---------

Co-authored-by: georgemoralis <giorgosmrls@gmail.com>
---
 src/video_core/amdgpu/liverpool.cpp | 27 +++++++++++++++++++++++++--
 src/video_core/amdgpu/pm4_cmds.h    |  2 +-
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp
index 3f307c51b..32ea1e8ed 100644
--- a/src/video_core/amdgpu/liverpool.cpp
+++ b/src/video_core/amdgpu/liverpool.cpp
@@ -830,7 +830,14 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
     FIBER_ENTER(acb_task_name[vqid]);
     auto& queue = asc_queues[{vqid}];
 
+    struct IndirectPatch {
+        const PM4Header* header;
+        VAddr indirect_addr;
+    };
+    boost::container::small_vector<IndirectPatch, 4> indirect_patches;
+
     auto base_addr = reinterpret_cast<VAddr>(acb.data());
+    size_t acb_size = acb.size_bytes();
     while (!acb.empty()) {
         ProcessCommands();
 
@@ -919,8 +926,18 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
                         dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
                        (dma_data->dst_sel == DmaDataDst::Memory ||
                         dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
-                rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->SrcAddress<VAddr>(),
-                                       dma_data->NumBytes(), false, false);
+                const u32 num_bytes = dma_data->NumBytes();
+                const VAddr src_addr = dma_data->SrcAddress<VAddr>();
+                const VAddr dst_addr = dma_data->DstAddress<VAddr>();
+                const PM4Header* header =
+                    reinterpret_cast<const PM4Header*>(dst_addr - sizeof(PM4Header));
+                if (dst_addr >= base_addr && dst_addr < base_addr + acb_size &&
+                    num_bytes == sizeof(PM4CmdDispatchIndirect::GroupDimensions) &&
+                    header->type == 3 && header->type3.opcode == PM4ItOpcode::DispatchDirect) {
+                    indirect_patches.emplace_back(header, src_addr);
+                } else {
+                    rasterizer->CopyBuffer(dst_addr, src_addr, num_bytes, false, false);
+                }
             } else {
                 UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
                                 u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
@@ -964,6 +981,12 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
         }
         case PM4ItOpcode::DispatchDirect: {
             const auto* dispatch_direct = reinterpret_cast<const PM4CmdDispatchDirect*>(header);
+            if (auto it = std::ranges::find(indirect_patches, header, &IndirectPatch::header);
+                it != indirect_patches.end()) {
+                const auto size = sizeof(PM4CmdDispatchIndirect::GroupDimensions);
+                rasterizer->DispatchIndirect(it->indirect_addr, 0, size);
+                break;
+            }
             auto& cs_program = GetCsRegs();
             cs_program.dim_x = dispatch_direct->dim_x;
             cs_program.dim_y = dispatch_direct->dim_y;
diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h
index eb48f3568..46ecb09d6 100644
--- a/src/video_core/amdgpu/pm4_cmds.h
+++ b/src/video_core/amdgpu/pm4_cmds.h
@@ -50,7 +50,7 @@ union PM4Type3Header {
     }
 
     u32 NumWords() const {
-        return count + 1;
+        return (count + 1) & 0x3fff;
     }
 
     u32 raw;

From 62813c0106f4602a1b1fac9e40e827e344434931 Mon Sep 17 00:00:00 2001
From: georgemoralis <giorgosmrls@gmail.com>
Date: Thu, 22 Jan 2026 17:03:22 +0200
Subject: [PATCH 03/10] Unified UserId (#3949)

* added OrbisUserServiceUserId in generic way to all classes that uses it

* clang
---
 .../libraries/app_content/app_content.cpp     |  4 +-
 src/core/libraries/audio3d/audio3d.cpp        | 10 ++---
 src/core/libraries/audio3d/audio3d.h          |  9 ++--
 .../libraries/companion/companion_httpd.cpp   |  4 +-
 .../game_live_streaming/gamelivestreaming.h   |  7 ++--
 src/core/libraries/np/np_auth.h               |  7 ++--
 src/core/libraries/np/np_manager.h            |  4 +-
 src/core/libraries/np/np_trophy.cpp           |  3 +-
 src/core/libraries/np/np_trophy.h             |  6 ++-
 src/core/libraries/pad/pad.cpp                | 10 +++--
 src/core/libraries/pad/pad.h                  | 12 ++++--
 src/core/libraries/remote_play/remoteplay.cpp |  3 +-
 src/core/libraries/remote_play/remoteplay.h   |  6 ++-
 src/core/libraries/save_data/save_backup.cpp  |  2 +-
 src/core/libraries/save_data/save_backup.h    |  9 ++--
 .../libraries/save_data/save_instance.cpp     | 10 ++---
 src/core/libraries/save_data/save_instance.h  | 17 ++++----
 src/core/libraries/save_data/save_memory.cpp  |  8 ++--
 src/core/libraries/save_data/save_memory.h    | 14 +++----
 src/core/libraries/save_data/savedata.cpp     | 42 ++++++++++---------
 src/core/libraries/save_data/savedata.h       | 18 ++++----
 src/core/libraries/share_play/shareplay.h     |  7 ++--
 src/core/libraries/system/systemservice.h     |  7 ++--
 src/core/libraries/videoout/video_out.cpp     |  4 +-
 src/core/libraries/videoout/video_out.h       |  9 ++--
 25 files changed, 124 insertions(+), 108 deletions(-)

diff --git a/src/core/libraries/app_content/app_content.cpp b/src/core/libraries/app_content/app_content.cpp
index 1523c2703..a5952c7ea 100644
--- a/src/core/libraries/app_content/app_content.cpp
+++ b/src/core/libraries/app_content/app_content.cpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-FileCopyrightText: Copyright 2024-2026 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include <cmath>
@@ -345,7 +345,7 @@ int PS4_SYSV_ABI sceAppContentInitialize(const OrbisAppContentInitParam* initPar
     if (addcont_count > 0) {
         SystemService::OrbisSystemServiceEvent event{};
         event.event_type = SystemService::OrbisSystemServiceEventType::EntitlementUpdate;
-        event.service_entitlement_update.user_id = 0;
+        event.service_entitlement_update.userId = 0;
         event.service_entitlement_update.np_service_label = 0;
         SystemService::PushSystemServiceEvent(event);
     }
diff --git a/src/core/libraries/audio3d/audio3d.cpp b/src/core/libraries/audio3d/audio3d.cpp
index 2ddbcd890..3f5fdcf78 100644
--- a/src/core/libraries/audio3d/audio3d.cpp
+++ b/src/core/libraries/audio3d/audio3d.cpp
@@ -29,10 +29,10 @@ s32 PS4_SYSV_ABI sceAudio3dAudioOutClose(const s32 handle) {
     return AudioOut::sceAudioOutClose(handle);
 }
 
-s32 PS4_SYSV_ABI
-sceAudio3dAudioOutOpen(const OrbisAudio3dPortId port_id, const OrbisUserServiceUserId user_id,
-                       s32 type, const s32 index, const u32 len, const u32 freq,
-                       const AudioOut::OrbisAudioOutParamExtendedInformation param) {
+s32 PS4_SYSV_ABI sceAudio3dAudioOutOpen(
+    const OrbisAudio3dPortId port_id, const Libraries::UserService::OrbisUserServiceUserId user_id,
+    s32 type, const s32 index, const u32 len, const u32 freq,
+    const AudioOut::OrbisAudioOutParamExtendedInformation param) {
     LOG_INFO(Lib_Audio3d,
              "called, port_id = {}, user_id = {}, type = {}, index = {}, len = {}, freq = {}",
              port_id, user_id, type, index, len, freq);
@@ -422,7 +422,7 @@ s32 PS4_SYSV_ABI sceAudio3dPortGetStatus() {
     return ORBIS_OK;
 }
 
-s32 PS4_SYSV_ABI sceAudio3dPortOpen(const OrbisUserServiceUserId user_id,
+s32 PS4_SYSV_ABI sceAudio3dPortOpen(const Libraries::UserService::OrbisUserServiceUserId user_id,
                                     const OrbisAudio3dOpenParameters* parameters,
                                     OrbisAudio3dPortId* port_id) {
     LOG_INFO(Lib_Audio3d, "called, user_id = {}, parameters = {}, id = {}", user_id,
diff --git a/src/core/libraries/audio3d/audio3d.h b/src/core/libraries/audio3d/audio3d.h
index 1057c1f31..ae20e39a8 100644
--- a/src/core/libraries/audio3d/audio3d.h
+++ b/src/core/libraries/audio3d/audio3d.h
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
+// SPDX-FileCopyrightText: Copyright 2025-2026 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #pragma once
@@ -15,8 +15,6 @@ class SymbolsResolver;
 
 namespace Libraries::Audio3d {
 
-using OrbisUserServiceUserId = s32;
-
 enum class OrbisAudio3dRate : u32 {
     ORBIS_AUDIO3D_RATE_48000 = 0,
 };
@@ -91,7 +89,8 @@ struct Audio3dState {
 };
 
 s32 PS4_SYSV_ABI sceAudio3dAudioOutClose(s32 handle);
-s32 PS4_SYSV_ABI sceAudio3dAudioOutOpen(OrbisAudio3dPortId port_id, OrbisUserServiceUserId user_id,
+s32 PS4_SYSV_ABI sceAudio3dAudioOutOpen(OrbisAudio3dPortId port_id,
+                                        Libraries::UserService::OrbisUserServiceUserId user_id,
                                         s32 type, s32 index, u32 len, u32 freq,
                                         AudioOut::OrbisAudioOutParamExtendedInformation param);
 s32 PS4_SYSV_ABI sceAudio3dAudioOutOutput(s32 handle, void* ptr);
@@ -127,7 +126,7 @@ s32 PS4_SYSV_ABI sceAudio3dPortGetQueueLevel(OrbisAudio3dPortId port_id, u32* qu
                                              u32* queue_available);
 s32 PS4_SYSV_ABI sceAudio3dPortGetState();
 s32 PS4_SYSV_ABI sceAudio3dPortGetStatus();
-s32 PS4_SYSV_ABI sceAudio3dPortOpen(OrbisUserServiceUserId user_id,
+s32 PS4_SYSV_ABI sceAudio3dPortOpen(Libraries::UserService::OrbisUserServiceUserId user_id,
                                     const OrbisAudio3dOpenParameters* parameters,
                                     OrbisAudio3dPortId* port_id);
 s32 PS4_SYSV_ABI sceAudio3dPortPush(OrbisAudio3dPortId port_id, OrbisAudio3dBlocking blocking);
diff --git a/src/core/libraries/companion/companion_httpd.cpp b/src/core/libraries/companion/companion_httpd.cpp
index a8756c4cd..6ffc8c052 100644
--- a/src/core/libraries/companion/companion_httpd.cpp
+++ b/src/core/libraries/companion/companion_httpd.cpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-FileCopyrightText: Copyright 2024-2026 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include "common/logging/log.h"
@@ -16,7 +16,7 @@ s32 PS4_SYSV_ABI sceCompanionHttpdAddHeader(const char* key, const char* value,
 }
 
 s32 PS4_SYSV_ABI
-sceCompanionHttpdGet2ndScreenStatus(Libraries::UserService::OrbisUserServiceUserId) {
+sceCompanionHttpdGet2ndScreenStatus(Libraries::UserService::OrbisUserServiceUserId userId) {
     LOG_ERROR(Lib_CompanionHttpd, "(STUBBED) called");
     return ORBIS_OK;
 }
diff --git a/src/core/libraries/game_live_streaming/gamelivestreaming.h b/src/core/libraries/game_live_streaming/gamelivestreaming.h
index 0bab969bd..5ceee8ff5 100644
--- a/src/core/libraries/game_live_streaming/gamelivestreaming.h
+++ b/src/core/libraries/game_live_streaming/gamelivestreaming.h
@@ -1,9 +1,10 @@
-// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-FileCopyrightText: Copyright 2024-2026 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #pragma once
 
 #include "common/types.h"
+#include "core/libraries/system/userservice.h"
 
 namespace Core::Loader {
 class SymbolsResolver;
@@ -15,11 +16,11 @@ struct OrbisGameLiveStreamingStatus {
     bool isOnAir;
     u8 align[3];
     u32 spectatorCounts;
-    s32 userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     u8 reserved[60];
 };
 struct OrbisGameLiveStreamingStatus2 {
-    s32 userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     bool isOnAir;
     u8 align[3];
     u32 spectatorCounts;
diff --git a/src/core/libraries/np/np_auth.h b/src/core/libraries/np/np_auth.h
index 636210772..0894bd85d 100644
--- a/src/core/libraries/np/np_auth.h
+++ b/src/core/libraries/np/np_auth.h
@@ -1,10 +1,11 @@
-// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
+// SPDX-FileCopyrightText: Copyright 2025-2026 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #pragma once
 
 #include "common/types.h"
 #include "core/libraries/np/np_types.h"
+#include "core/libraries/system/userservice.h"
 
 namespace Core::Loader {
 class SymbolsResolver;
@@ -31,7 +32,7 @@ struct OrbisNpAuthGetAuthorizationCodeParameter {
 
 struct OrbisNpAuthGetAuthorizationCodeParameterA {
     u64 size;
-    s32 user_id;
+    Libraries::UserService::OrbisUserServiceUserId user_id;
     u8 padding[4];
     const OrbisNpClientId* client_id;
     const char* scope;
@@ -47,7 +48,7 @@ struct OrbisNpAuthGetIdTokenParameter {
 
 struct OrbisNpAuthGetIdTokenParameterA {
     u64 size;
-    s32 user_id;
+    Libraries::UserService::OrbisUserServiceUserId user_id;
     u8 padding[4];
     const OrbisNpClientId* client_id;
     const OrbisNpClientSecret* client_secret;
diff --git a/src/core/libraries/np/np_manager.h b/src/core/libraries/np/np_manager.h
index 61a283ba7..59864c173 100644
--- a/src/core/libraries/np/np_manager.h
+++ b/src/core/libraries/np/np_manager.h
@@ -23,8 +23,8 @@ enum class OrbisNpState : u32 {
     SignedIn = 2,
 };
 
-using OrbisNpStateCallbackForNpToolkit = PS4_SYSV_ABI void (*)(s32 userId, OrbisNpState state,
-                                                               void* userdata);
+using OrbisNpStateCallbackForNpToolkit = PS4_SYSV_ABI void (*)(
+    Libraries::UserService::OrbisUserServiceUserId userId, OrbisNpState state, void* userdata);
 
 enum class OrbisNpGamePresenseStatus {
     Offline = 0,
diff --git a/src/core/libraries/np/np_trophy.cpp b/src/core/libraries/np/np_trophy.cpp
index 7468de13b..976d614c0 100644
--- a/src/core/libraries/np/np_trophy.cpp
+++ b/src/core/libraries/np/np_trophy.cpp
@@ -149,7 +149,8 @@ int PS4_SYSV_ABI sceNpTrophyConfigHasGroupFeature() {
     return ORBIS_OK;
 }
 
-s32 PS4_SYSV_ABI sceNpTrophyCreateContext(OrbisNpTrophyContext* context, s32 user_id,
+s32 PS4_SYSV_ABI sceNpTrophyCreateContext(OrbisNpTrophyContext* context,
+                                          Libraries::UserService::OrbisUserServiceUserId user_id,
                                           uint32_t service_label, u64 options) {
     ASSERT(options == 0ull);
     if (!context) {
diff --git a/src/core/libraries/np/np_trophy.h b/src/core/libraries/np/np_trophy.h
index 36e59e537..ab187ae13 100644
--- a/src/core/libraries/np/np_trophy.h
+++ b/src/core/libraries/np/np_trophy.h
@@ -1,8 +1,9 @@
-// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
+// SPDX-FileCopyrightText: Copyright 2025-2026 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #pragma once
 
+#include <core/libraries/system/userservice.h>
 #include "common/types.h"
 #include "core/libraries/rtc/rtc.h"
 
@@ -132,7 +133,8 @@ int PS4_SYSV_ABI sceNpTrophyConfigGetTrophySetInfoInGroup();
 int PS4_SYSV_ABI sceNpTrophyConfigGetTrophySetVersion();
 int PS4_SYSV_ABI sceNpTrophyConfigGetTrophyTitleDetails();
 int PS4_SYSV_ABI sceNpTrophyConfigHasGroupFeature();
-s32 PS4_SYSV_ABI sceNpTrophyCreateContext(OrbisNpTrophyContext* context, s32 user_id,
+s32 PS4_SYSV_ABI sceNpTrophyCreateContext(OrbisNpTrophyContext* context,
+                                          Libraries::UserService::OrbisUserServiceUserId user_id,
                                           u32 service_label, u64 options);
 s32 PS4_SYSV_ABI sceNpTrophyCreateHandle(OrbisNpTrophyHandle* handle);
 int PS4_SYSV_ABI sceNpTrophyDestroyContext(OrbisNpTrophyContext context);
diff --git a/src/core/libraries/pad/pad.cpp b/src/core/libraries/pad/pad.cpp
index 09f404969..f433a87cc 100644
--- a/src/core/libraries/pad/pad.cpp
+++ b/src/core/libraries/pad/pad.cpp
@@ -159,7 +159,8 @@ int PS4_SYSV_ABI scePadGetFeatureReport() {
     return ORBIS_OK;
 }
 
-int PS4_SYSV_ABI scePadGetHandle(s32 userId, s32 type, s32 index) {
+int PS4_SYSV_ABI scePadGetHandle(Libraries::UserService::OrbisUserServiceUserId userId, s32 type,
+                                 s32 index) {
     if (!g_initialized) {
         return ORBIS_PAD_ERROR_NOT_INITIALIZED;
     }
@@ -256,7 +257,8 @@ int PS4_SYSV_ABI scePadMbusTerm() {
     return ORBIS_OK;
 }
 
-int PS4_SYSV_ABI scePadOpen(s32 userId, s32 type, s32 index, const OrbisPadOpenParam* pParam) {
+int PS4_SYSV_ABI scePadOpen(Libraries::UserService::OrbisUserServiceUserId userId, s32 type,
+                            s32 index, const OrbisPadOpenParam* pParam) {
     if (!g_initialized) {
         return ORBIS_PAD_ERROR_NOT_INITIALIZED;
     }
@@ -277,8 +279,8 @@ int PS4_SYSV_ABI scePadOpen(s32 userId, s32 type, s32 index, const OrbisPadOpenP
     return 1; // dummy
 }
 
-int PS4_SYSV_ABI scePadOpenExt(s32 userId, s32 type, s32 index,
-                               const OrbisPadOpenExtParam* pParam) {
+int PS4_SYSV_ABI scePadOpenExt(Libraries::UserService::OrbisUserServiceUserId userId, s32 type,
+                               s32 index, const OrbisPadOpenExtParam* pParam) {
     LOG_ERROR(Lib_Pad, "(STUBBED) called");
     if (Config::getUseSpecialPad()) {
         if (type != ORBIS_PAD_PORT_TYPE_SPECIAL)
diff --git a/src/core/libraries/pad/pad.h b/src/core/libraries/pad/pad.h
index ca6e8a73f..02ceaf3d9 100644
--- a/src/core/libraries/pad/pad.h
+++ b/src/core/libraries/pad/pad.h
@@ -1,8 +1,9 @@
-// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-FileCopyrightText: Copyright 2024-2026 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #pragma once
 
+#include <core/libraries/system/userservice.h>
 #include "common/enum.h"
 #include "common/types.h"
 
@@ -276,7 +277,8 @@ int PS4_SYSV_ABI scePadGetExtControllerInformation(s32 handle,
                                                    OrbisPadExtendedControllerInformation* pInfo);
 int PS4_SYSV_ABI scePadGetExtensionUnitInfo();
 int PS4_SYSV_ABI scePadGetFeatureReport();
-int PS4_SYSV_ABI scePadGetHandle(s32 userId, s32 type, s32 index);
+int PS4_SYSV_ABI scePadGetHandle(Libraries::UserService::OrbisUserServiceUserId userId, s32 type,
+                                 s32 index);
 int PS4_SYSV_ABI scePadGetIdleCount();
 int PS4_SYSV_ABI scePadGetInfo();
 int PS4_SYSV_ABI scePadGetInfoByPortType();
@@ -294,8 +296,10 @@ int PS4_SYSV_ABI scePadIsMoveReproductionModel();
 int PS4_SYSV_ABI scePadIsValidHandle();
 int PS4_SYSV_ABI scePadMbusInit();
 int PS4_SYSV_ABI scePadMbusTerm();
-int PS4_SYSV_ABI scePadOpen(s32 userId, s32 type, s32 index, const OrbisPadOpenParam* pParam);
-int PS4_SYSV_ABI scePadOpenExt(s32 userId, s32 type, s32 index, const OrbisPadOpenExtParam* pParam);
+int PS4_SYSV_ABI scePadOpen(Libraries::UserService::OrbisUserServiceUserId userId, s32 type,
+                            s32 index, const OrbisPadOpenParam* pParam);
+int PS4_SYSV_ABI scePadOpenExt(Libraries::UserService::OrbisUserServiceUserId userId, s32 type,
+                               s32 index, const OrbisPadOpenExtParam* pParam);
 int PS4_SYSV_ABI scePadOpenExt2();
 int PS4_SYSV_ABI scePadOutputReport();
 int PS4_SYSV_ABI scePadRead(s32 handle, OrbisPadData* pData, s32 num);
diff --git a/src/core/libraries/remote_play/remoteplay.cpp b/src/core/libraries/remote_play/remoteplay.cpp
index 06d9fccfb..775450d26 100644
--- a/src/core/libraries/remote_play/remoteplay.cpp
+++ b/src/core/libraries/remote_play/remoteplay.cpp
@@ -54,7 +54,8 @@ int PS4_SYSV_ABI sceRemoteplayGetConnectHistory() {
     return ORBIS_OK;
 }
 
-int PS4_SYSV_ABI sceRemoteplayGetConnectionStatus(s32 userId, int* pStatus) {
+int PS4_SYSV_ABI sceRemoteplayGetConnectionStatus(
+    Libraries::UserService::OrbisUserServiceUserId userId, int* pStatus) {
     *pStatus = ORBIS_REMOTEPLAY_CONNECTION_STATUS_DISCONNECT;
     return ORBIS_OK;
 }
diff --git a/src/core/libraries/remote_play/remoteplay.h b/src/core/libraries/remote_play/remoteplay.h
index 35465d6df..b4614dca0 100644
--- a/src/core/libraries/remote_play/remoteplay.h
+++ b/src/core/libraries/remote_play/remoteplay.h
@@ -1,8 +1,9 @@
-// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-FileCopyrightText: Copyright 2024-2026 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #pragma once
 
+#include <core/libraries/system/userservice.h>
 #include "common/types.h"
 
 namespace Core::Loader {
@@ -24,7 +25,8 @@ int PS4_SYSV_ABI sceRemoteplayDisconnect();
 int PS4_SYSV_ABI sceRemoteplayGeneratePinCode();
 int PS4_SYSV_ABI sceRemoteplayGetApMode();
 int PS4_SYSV_ABI sceRemoteplayGetConnectHistory();
-int PS4_SYSV_ABI sceRemoteplayGetConnectionStatus(s32 userId, int* pStatus);
+int PS4_SYSV_ABI sceRemoteplayGetConnectionStatus(
+    Libraries::UserService::OrbisUserServiceUserId userId, int* pStatus);
 int PS4_SYSV_ABI sceRemoteplayGetConnectUserId();
 int PS4_SYSV_ABI sceRemoteplayGetMbusDeviceInfo();
 int PS4_SYSV_ABI sceRemoteplayGetOperationStatus();
diff --git a/src/core/libraries/save_data/save_backup.cpp b/src/core/libraries/save_data/save_backup.cpp
index f85845f70..c5f66d883 100644
--- a/src/core/libraries/save_data/save_backup.cpp
+++ b/src/core/libraries/save_data/save_backup.cpp
@@ -167,7 +167,7 @@ void StopThread() {
     g_backup_thread_semaphore.release();
 }
 
-bool NewRequest(OrbisUserServiceUserId user_id, std::string_view title_id,
+bool NewRequest(Libraries::UserService::OrbisUserServiceUserId user_id, std::string_view title_id,
                 std::string_view dir_name, OrbisSaveDataEventType origin) {
     auto save_path = SaveInstance::MakeDirSavePath(user_id, title_id, dir_name);
 
diff --git a/src/core/libraries/save_data/save_backup.h b/src/core/libraries/save_data/save_backup.h
index 83a263c9b..2a5e54d49 100644
--- a/src/core/libraries/save_data/save_backup.h
+++ b/src/core/libraries/save_data/save_backup.h
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-FileCopyrightText: Copyright 2024-2026 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #pragma once
@@ -6,12 +6,11 @@
 #include <filesystem>
 #include <optional>
 
+#include <core/libraries/system/userservice.h>
 #include "common/types.h"
 
 namespace Libraries::SaveData {
 
-using OrbisUserServiceUserId = s32;
-
 namespace Backup {
 
 enum class WorkerStatus {
@@ -32,7 +31,7 @@ enum class OrbisSaveDataEventType : u32 {
 struct BackupRequest {
     bool done{};
 
-    OrbisUserServiceUserId user_id{};
+    Libraries::UserService::OrbisUserServiceUserId user_id{};
     std::string title_id{};
     std::string dir_name{};
     OrbisSaveDataEventType origin{};
@@ -45,7 +44,7 @@ void StartThread();
 
 void StopThread();
 
-bool NewRequest(OrbisUserServiceUserId user_id, std::string_view title_id,
+bool NewRequest(Libraries::UserService::OrbisUserServiceUserId user_id, std::string_view title_id,
                 std::string_view dir_name, OrbisSaveDataEventType origin);
 
 bool Restore(const std::filesystem::path& save_path);
diff --git a/src/core/libraries/save_data/save_instance.cpp b/src/core/libraries/save_data/save_instance.cpp
index 75a644fdb..bc6bbfd72 100644
--- a/src/core/libraries/save_data/save_instance.cpp
+++ b/src/core/libraries/save_data/save_instance.cpp
@@ -46,13 +46,13 @@ static const std::unordered_map<int, std::string> default_title = {
 
 namespace Libraries::SaveData {
 
-fs::path SaveInstance::MakeTitleSavePath(OrbisUserServiceUserId user_id,
+fs::path SaveInstance::MakeTitleSavePath(Libraries::UserService::OrbisUserServiceUserId user_id,
                                          std::string_view game_serial) {
     return Config::GetSaveDataPath() / std::to_string(user_id) / game_serial;
 }
 
-fs::path SaveInstance::MakeDirSavePath(OrbisUserServiceUserId user_id, std::string_view game_serial,
-                                       std::string_view dir_name) {
+fs::path SaveInstance::MakeDirSavePath(Libraries::UserService::OrbisUserServiceUserId user_id,
+                                       std::string_view game_serial, std::string_view dir_name) {
     return Config::GetSaveDataPath() / std::to_string(user_id) / game_serial / dir_name;
 }
 
@@ -89,8 +89,8 @@ void SaveInstance::SetupDefaultParamSFO(PSF& param_sfo, std::string dir_name,
 #undef P
 }
 
-SaveInstance::SaveInstance(int slot_num, OrbisUserServiceUserId user_id, std::string _game_serial,
-                           std::string_view _dir_name, int max_blocks)
+SaveInstance::SaveInstance(int slot_num, Libraries::UserService::OrbisUserServiceUserId user_id,
+                           std::string _game_serial, std::string_view _dir_name, int max_blocks)
     : slot_num(slot_num), user_id(user_id), game_serial(std::move(_game_serial)),
       dir_name(_dir_name),
       max_blocks(std::clamp(max_blocks, OrbisSaveDataBlocksMin2, OrbisSaveDataBlocksMax)) {
diff --git a/src/core/libraries/save_data/save_instance.h b/src/core/libraries/save_data/save_instance.h
index 6e7ac8f66..b758649b2 100644
--- a/src/core/libraries/save_data/save_instance.h
+++ b/src/core/libraries/save_data/save_instance.h
@@ -1,10 +1,11 @@
-// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-FileCopyrightText: Copyright 2024-2026 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #pragma once
 
 #include <filesystem>
 
+#include <core/libraries/system/userservice.h>
 #include "common/io_file.h"
 #include "core/file_format/psf.h"
 
@@ -52,13 +53,13 @@ class SaveInstance {
 
 public:
     // Location of all save data for a title
-    static std::filesystem::path MakeTitleSavePath(OrbisUserServiceUserId user_id,
-                                                   std::string_view game_serial);
+    static std::filesystem::path MakeTitleSavePath(
+        Libraries::UserService::OrbisUserServiceUserId user_id, std::string_view game_serial);
 
     // Location of a specific save data directory
-    static std::filesystem::path MakeDirSavePath(OrbisUserServiceUserId user_id,
-                                                 std::string_view game_serial,
-                                                 std::string_view dir_name);
+    static std::filesystem::path MakeDirSavePath(
+        Libraries::UserService::OrbisUserServiceUserId user_id, std::string_view game_serial,
+        std::string_view dir_name);
 
     static uint64_t GetMaxBlockFromSFO(const PSF& psf);
 
@@ -67,8 +68,8 @@ public:
 
     static void SetupDefaultParamSFO(PSF& param_sfo, std::string dir_name, std::string game_serial);
 
-    explicit SaveInstance(int slot_num, OrbisUserServiceUserId user_id, std::string game_serial,
-                          std::string_view dir_name, int max_blocks = 0);
+    explicit SaveInstance(int slot_num, Libraries::UserService::OrbisUserServiceUserId user_id,
+                          std::string game_serial, std::string_view dir_name, int max_blocks = 0);
 
     ~SaveInstance();
 
diff --git a/src/core/libraries/save_data/save_memory.cpp b/src/core/libraries/save_data/save_memory.cpp
index 5f5ba8fea..0a16d4756 100644
--- a/src/core/libraries/save_data/save_memory.cpp
+++ b/src/core/libraries/save_data/save_memory.cpp
@@ -88,8 +88,8 @@ std::string GetSaveDir(u32 slot_id) {
     return dir;
 }
 
-std::filesystem::path GetSavePath(OrbisUserServiceUserId user_id, u32 slot_id,
-                                  std::string_view game_serial) {
+std::filesystem::path GetSavePath(Libraries::UserService::OrbisUserServiceUserId user_id,
+                                  u32 slot_id, std::string_view game_serial) {
     std::string dir(StandardDirnameSaveDataMemory);
     if (slot_id > 0) {
         dir += std::to_string(slot_id);
@@ -97,8 +97,8 @@ std::filesystem::path GetSavePath(OrbisUserServiceUserId user_id, u32 slot_id,
     return SaveInstance::MakeDirSavePath(user_id, game_serial, dir);
 }
 
-size_t SetupSaveMemory(OrbisUserServiceUserId user_id, u32 slot_id, std::string_view game_serial,
-                       size_t memory_size) {
+size_t SetupSaveMemory(Libraries::UserService::OrbisUserServiceUserId user_id, u32 slot_id,
+                       std::string_view game_serial, size_t memory_size) {
     std::lock_guard lck{g_slot_mtx};
 
     const auto save_dir = GetSavePath(user_id, slot_id, game_serial);
diff --git a/src/core/libraries/save_data/save_memory.h b/src/core/libraries/save_data/save_memory.h
index 7765b04cd..b524de8bc 100644
--- a/src/core/libraries/save_data/save_memory.h
+++ b/src/core/libraries/save_data/save_memory.h
@@ -4,26 +4,24 @@
 #pragma once
 
 #include <vector>
+#include <core/libraries/system/userservice.h>
 #include "core/libraries/save_data/save_backup.h"
 
 class PSF;
 
-namespace Libraries::SaveData {
-using OrbisUserServiceUserId = s32;
-} // namespace Libraries::SaveData
-
 namespace Libraries::SaveData::SaveMemory {
 
 void PersistMemory(u32 slot_id, bool lock = true);
 
 [[nodiscard]] std::string GetSaveDir(u32 slot_id);
 
-[[nodiscard]] std::filesystem::path GetSavePath(OrbisUserServiceUserId user_id, u32 slot_id,
-                                                std::string_view game_serial);
+[[nodiscard]] std::filesystem::path GetSavePath(
+    Libraries::UserService::OrbisUserServiceUserId user_id, u32 slot_id,
+    std::string_view game_serial);
 
 // returns the size of the save memory if exists
-size_t SetupSaveMemory(OrbisUserServiceUserId user_id, u32 slot_id, std::string_view game_serial,
-                       size_t memory_size);
+size_t SetupSaveMemory(Libraries::UserService::OrbisUserServiceUserId user_id, u32 slot_id,
+                       std::string_view game_serial, size_t memory_size);
 
 // Write the icon. Set buf to null to read the standard icon.
 void SetIcon(u32 slot_id, void* buf = nullptr, size_t buf_size = 0);
diff --git a/src/core/libraries/save_data/savedata.cpp b/src/core/libraries/save_data/savedata.cpp
index 7fba8ed21..a5199c297 100644
--- a/src/core/libraries/save_data/savedata.cpp
+++ b/src/core/libraries/save_data/savedata.cpp
@@ -42,7 +42,6 @@ enum class OrbisSaveDataSaveDataMemoryOption : u32 {
     UNLOCK_LIMITATIONS = 1 << 2,
 };
 
-using OrbisUserServiceUserId = s32;
 using OrbisSaveDataBlocks = u64;
 
 constexpr u32 OrbisSaveDataBlockSize = 32768; // 32 KiB
@@ -97,7 +96,7 @@ struct OrbisSaveDataFingerprint {
 };
 
 struct OrbisSaveDataBackup {
-    OrbisUserServiceUserId userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     s32 : 32;
     const OrbisSaveDataTitleId* titleId;
     const OrbisSaveDataDirName* dirName;
@@ -106,7 +105,7 @@ struct OrbisSaveDataBackup {
 };
 
 struct OrbisSaveDataCheckBackupData {
-    OrbisUserServiceUserId userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     s32 : 32;
     const OrbisSaveDataTitleId* titleId;
     const OrbisSaveDataDirName* dirName;
@@ -116,7 +115,7 @@ struct OrbisSaveDataCheckBackupData {
 };
 
 struct OrbisSaveDataDelete {
-    OrbisUserServiceUserId userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     s32 : 32;
     const OrbisSaveDataTitleId* titleId;
     const OrbisSaveDataDirName* dirName;
@@ -153,7 +152,7 @@ struct OrbisSaveDataMemoryData {
 };
 
 struct OrbisSaveDataMemoryGet2 {
-    OrbisUserServiceUserId userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     std::array<u8, 4> _pad;
     OrbisSaveDataMemoryData* data;
     OrbisSaveDataParam* param;
@@ -163,7 +162,7 @@ struct OrbisSaveDataMemoryGet2 {
 };
 
 struct OrbisSaveDataMemorySet2 {
-    OrbisUserServiceUserId userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     std::array<u8, 4> _pad;
     const OrbisSaveDataMemoryData* data;
     const OrbisSaveDataParam* param;
@@ -175,7 +174,7 @@ struct OrbisSaveDataMemorySet2 {
 
 struct OrbisSaveDataMemorySetup2 {
     OrbisSaveDataSaveDataMemoryOption option;
-    OrbisUserServiceUserId userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     size_t memorySize;
     size_t iconMemorySize;
     // +4.5
@@ -197,14 +196,14 @@ enum OrbisSaveDataMemorySyncOption : u32 {
 };
 
 struct OrbisSaveDataMemorySync {
-    OrbisUserServiceUserId userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     u32 slotId;
     OrbisSaveDataMemorySyncOption option;
     std::array<u8, 28> _reserved;
 };
 
 struct OrbisSaveDataMount2 {
-    OrbisUserServiceUserId userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     s32 : 32;
     const OrbisSaveDataDirName* dirName;
     OrbisSaveDataBlocks blocks;
@@ -214,7 +213,7 @@ struct OrbisSaveDataMount2 {
 };
 
 struct OrbisSaveDataMount {
-    OrbisUserServiceUserId userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     s32 : 32;
     const OrbisSaveDataTitleId* titleId;
     const OrbisSaveDataDirName* dirName;
@@ -245,7 +244,7 @@ struct OrbisSaveDataMountResult {
 };
 
 struct OrbisSaveDataRestoreBackupData {
-    OrbisUserServiceUserId userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     s32 : 32;
     const OrbisSaveDataTitleId* titleId;
     const OrbisSaveDataDirName* dirName;
@@ -256,7 +255,7 @@ struct OrbisSaveDataRestoreBackupData {
 };
 
 struct OrbisSaveDataTransferringMount {
-    OrbisUserServiceUserId userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     const OrbisSaveDataTitleId* titleId;
     const OrbisSaveDataDirName* dirName;
     const OrbisSaveDataFingerprint* fingerprint;
@@ -264,7 +263,7 @@ struct OrbisSaveDataTransferringMount {
 };
 
 struct OrbisSaveDataDirNameSearchCond {
-    OrbisUserServiceUserId userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     int : 32;
     const OrbisSaveDataTitleId* titleId;
     const OrbisSaveDataDirName* dirName;
@@ -303,7 +302,7 @@ using OrbisSaveDataEventType = Backup::OrbisSaveDataEventType;
 struct OrbisSaveDataEvent {
     OrbisSaveDataEventType type;
     s32 errorCode;
-    OrbisUserServiceUserId userId;
+    Libraries::UserService::OrbisUserServiceUserId userId;
     std::array<u8, 4> _pad;
     OrbisSaveDataTitleId titleId;
     OrbisSaveDataDirName dirName;
@@ -1106,8 +1105,9 @@ int PS4_SYSV_ABI sceSaveDataGetSaveDataCount() {
     return ORBIS_OK;
 }
 
-Error PS4_SYSV_ABI sceSaveDataGetSaveDataMemory(const OrbisUserServiceUserId userId, void* buf,
-                                                const size_t bufSize, const int64_t offset) {
+Error PS4_SYSV_ABI
+sceSaveDataGetSaveDataMemory(const Libraries::UserService::OrbisUserServiceUserId userId, void* buf,
+                             const size_t bufSize, const int64_t offset) {
     LOG_DEBUG(Lib_SaveData, "Redirecting to sceSaveDataGetSaveDataMemory2");
     OrbisSaveDataMemoryData data{};
     data.buf = buf;
@@ -1469,8 +1469,9 @@ int PS4_SYSV_ABI sceSaveDataSetSaveDataLibraryUser() {
     return ORBIS_OK;
 }
 
-Error PS4_SYSV_ABI sceSaveDataSetSaveDataMemory(OrbisUserServiceUserId userId, void* buf,
-                                                size_t bufSize, int64_t offset) {
+Error PS4_SYSV_ABI
+sceSaveDataSetSaveDataMemory(Libraries::UserService::OrbisUserServiceUserId userId, void* buf,
+                             size_t bufSize, int64_t offset) {
     LOG_DEBUG(Lib_SaveData, "Redirecting to sceSaveDataSetSaveDataMemory2");
     OrbisSaveDataMemoryData data{};
     data.buf = buf;
@@ -1527,8 +1528,9 @@ Error PS4_SYSV_ABI sceSaveDataSetSaveDataMemory2(const OrbisSaveDataMemorySet2*
     return Error::OK;
 }
 
-Error PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory(OrbisUserServiceUserId userId, size_t memorySize,
-                                                  OrbisSaveDataParam* param) {
+Error PS4_SYSV_ABI
+sceSaveDataSetupSaveDataMemory(Libraries::UserService::OrbisUserServiceUserId userId,
+                               size_t memorySize, OrbisSaveDataParam* param) {
     LOG_DEBUG(Lib_SaveData, "called: userId = {}, memorySize = {}", userId, memorySize);
     OrbisSaveDataMemorySetup2 setupParam{};
     setupParam.userId = userId;
diff --git a/src/core/libraries/save_data/savedata.h b/src/core/libraries/save_data/savedata.h
index d1c625980..37a21dbc7 100644
--- a/src/core/libraries/save_data/savedata.h
+++ b/src/core/libraries/save_data/savedata.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <core/libraries/system/userservice.h>
 #include "common/cstring.h"
 #include "common/types.h"
 
@@ -21,8 +22,6 @@ constexpr size_t OrbisSaveDataDetailMaxsize = 1024;  // Maximum detail name size
 enum class Error : u32;
 enum class OrbisSaveDataParamType : u32;
 
-using OrbisUserServiceUserId = s32;
-
 // Maximum size for a title ID (4 uppercase letters + 5 digits)
 constexpr int OrbisSaveDataTitleIdDataSize = 10;
 // Maximum save directory name size
@@ -126,8 +125,9 @@ Error PS4_SYSV_ABI sceSaveDataGetParam(const OrbisSaveDataMountPoint* mountPoint
                                        size_t paramBufSize, size_t* gotSize);
 Error PS4_SYSV_ABI sceSaveDataGetProgress(float* progress);
 int PS4_SYSV_ABI sceSaveDataGetSaveDataCount();
-Error PS4_SYSV_ABI sceSaveDataGetSaveDataMemory(OrbisUserServiceUserId userId, void* buf,
-                                                size_t bufSize, int64_t offset);
+Error PS4_SYSV_ABI
+sceSaveDataGetSaveDataMemory(Libraries::UserService::OrbisUserServiceUserId userId, void* buf,
+                             size_t bufSize, int64_t offset);
 Error PS4_SYSV_ABI sceSaveDataGetSaveDataMemory2(OrbisSaveDataMemoryGet2* getParam);
 int PS4_SYSV_ABI sceSaveDataGetSaveDataRootDir();
 int PS4_SYSV_ABI sceSaveDataGetSaveDataRootPath();
@@ -163,11 +163,13 @@ Error PS4_SYSV_ABI sceSaveDataSetParam(const OrbisSaveDataMountPoint* mountPoint
                                        OrbisSaveDataParamType paramType, const void* paramBuf,
                                        size_t paramBufSize);
 int PS4_SYSV_ABI sceSaveDataSetSaveDataLibraryUser();
-Error PS4_SYSV_ABI sceSaveDataSetSaveDataMemory(OrbisUserServiceUserId userId, void* buf,
-                                                size_t bufSize, int64_t offset);
+Error PS4_SYSV_ABI
+sceSaveDataSetSaveDataMemory(Libraries::UserService::OrbisUserServiceUserId userId, void* buf,
+                             size_t bufSize, int64_t offset);
 Error PS4_SYSV_ABI sceSaveDataSetSaveDataMemory2(const OrbisSaveDataMemorySet2* setParam);
-Error PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory(OrbisUserServiceUserId userId, size_t memorySize,
-                                                  OrbisSaveDataParam* param);
+Error PS4_SYSV_ABI
+sceSaveDataSetupSaveDataMemory(Libraries::UserService::OrbisUserServiceUserId userId,
+                               size_t memorySize, OrbisSaveDataParam* param);
 Error PS4_SYSV_ABI sceSaveDataSetupSaveDataMemory2(const OrbisSaveDataMemorySetup2* setupParam,
                                                    OrbisSaveDataMemorySetupResult* result);
 int PS4_SYSV_ABI sceSaveDataShutdownStart();
diff --git a/src/core/libraries/share_play/shareplay.h b/src/core/libraries/share_play/shareplay.h
index ca65c9a9d..b67b01e93 100644
--- a/src/core/libraries/share_play/shareplay.h
+++ b/src/core/libraries/share_play/shareplay.h
@@ -1,8 +1,9 @@
-// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-FileCopyrightText: Copyright 2024-2026 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #pragma once
 
+#include <core/libraries/system/userservice.h>
 #include "common/types.h"
 #include "core/libraries/np/np_types.h"
 
@@ -21,8 +22,8 @@ struct OrbisSharePlayConnectionInfo {
     int mode;
     Libraries::Np::OrbisNpOnlineId hostOnlineId;
     Libraries::Np::OrbisNpOnlineId visitorOnlineId;
-    s32 hostUserId;
-    s32 visitorUserId;
+    Libraries::UserService::OrbisUserServiceUserId hostUserId;
+    Libraries::UserService::OrbisUserServiceUserId visitorUserId;
 };
 
 int PS4_SYSV_ABI sceSharePlayCrashDaemon();
diff --git a/src/core/libraries/system/systemservice.h b/src/core/libraries/system/systemservice.h
index b8bdf0b5f..e3eeb21dc 100644
--- a/src/core/libraries/system/systemservice.h
+++ b/src/core/libraries/system/systemservice.h
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-FileCopyrightText: Copyright 2024-2026 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 // reference
 // https://github.com/OpenOrbis/OpenOrbis-PS4-Toolchain/blob/master/include/orbis/_types/sys_service.h
@@ -7,6 +7,7 @@
 #include <mutex>
 #include <queue>
 #include "common/types.h"
+#include "userservice.h"
 
 namespace Core::Loader {
 class SymbolsResolver;
@@ -119,12 +120,12 @@ struct OrbisSystemServiceEvent {
             char boot_argument[7169];
         } join_event;
         struct {
-            s32 user_id;
+            Libraries::UserService::OrbisUserServiceUserId userId;
             u32 np_service_label;
             u8 reserved[8184];
         } service_entitlement_update;
         struct {
-            s32 user_id;
+            Libraries::UserService::OrbisUserServiceUserId userId;
             u32 np_service_label;
             u8 reserved[8184];
         } unified_entitlement_update;
diff --git a/src/core/libraries/videoout/video_out.cpp b/src/core/libraries/videoout/video_out.cpp
index ece2640c9..e9176afdc 100644
--- a/src/core/libraries/videoout/video_out.cpp
+++ b/src/core/libraries/videoout/video_out.cpp
@@ -291,8 +291,8 @@ s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutio
     return ORBIS_OK;
 }
 
-s32 PS4_SYSV_ABI sceVideoOutOpen(SceUserServiceUserId userId, s32 busType, s32 index,
-                                 const void* param) {
+s32 PS4_SYSV_ABI sceVideoOutOpen(Libraries::UserService::OrbisUserServiceUserId userId, s32 busType,
+                                 s32 index, const void* param) {
     LOG_INFO(Lib_VideoOut, "called");
     ASSERT(busType == SCE_VIDEO_OUT_BUS_TYPE_MAIN);
 
diff --git a/src/core/libraries/videoout/video_out.h b/src/core/libraries/videoout/video_out.h
index 7db09530b..ba2732ff7 100644
--- a/src/core/libraries/videoout/video_out.h
+++ b/src/core/libraries/videoout/video_out.h
@@ -1,8 +1,9 @@
-// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+// SPDX-FileCopyrightText: Copyright 2024-2026 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #pragma once
 
+#include <core/libraries/system/userservice.h>
 #include "core/libraries/kernel/equeue.h"
 #include "core/libraries/videoout/buffer.h"
 
@@ -12,8 +13,6 @@ class SymbolsResolver;
 
 namespace Libraries::VideoOut {
 
-using SceUserServiceUserId = s32; // TODO move it to proper place
-
 // SceVideoOutBusType
 constexpr int SCE_VIDEO_OUT_BUS_TYPE_MAIN = 0;                    // Main output
 constexpr int SCE_VIDEO_OUT_BUS_TYPE_AUX_SOCIAL_SCREEN = 5;       // Aux output for social
@@ -131,8 +130,8 @@ s32 PS4_SYSV_ABI sceVideoOutWaitVblank(s32 handle);
 s32 PS4_SYSV_ABI sceVideoOutSubmitFlip(s32 handle, s32 bufferIndex, s32 flipMode, s64 flipArg);
 s32 PS4_SYSV_ABI sceVideoOutGetFlipStatus(s32 handle, FlipStatus* status);
 s32 PS4_SYSV_ABI sceVideoOutGetResolutionStatus(s32 handle, SceVideoOutResolutionStatus* status);
-s32 PS4_SYSV_ABI sceVideoOutOpen(SceUserServiceUserId userId, s32 busType, s32 index,
-                                 const void* param);
+s32 PS4_SYSV_ABI sceVideoOutOpen(Libraries::UserService::OrbisUserServiceUserId userId, s32 busType,
+                                 s32 index, const void* param);
 s32 PS4_SYSV_ABI sceVideoOutClose(s32 handle);
 s32 PS4_SYSV_ABI sceVideoOutGetEventId(const Kernel::SceKernelEvent* ev);
 s32 PS4_SYSV_ABI sceVideoOutGetEventData(const Kernel::SceKernelEvent* ev, s64* data);

From 508bad87d51a697cb6d4d4565ea8032438de73c5 Mon Sep 17 00:00:00 2001
From: georgemoralis <giorgosmrls@gmail.com>
Date: Thu, 22 Jan 2026 17:04:57 +0200
Subject: [PATCH 04/10] WebDialogBrowser module (#3938)

* initial

* added sceWebBrowserDialogUpdateStatus

* sceWebBrowserDialogInitialize
---
 .../web_browser_dialog/webbrowserdialog.cpp   | 40 +++++++++++++------
 .../web_browser_dialog/webbrowserdialog.h     |  9 +++--
 2 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/src/core/libraries/web_browser_dialog/webbrowserdialog.cpp b/src/core/libraries/web_browser_dialog/webbrowserdialog.cpp
index 444eaa765..5844affa2 100644
--- a/src/core/libraries/web_browser_dialog/webbrowserdialog.cpp
+++ b/src/core/libraries/web_browser_dialog/webbrowserdialog.cpp
@@ -5,9 +5,12 @@
 #include "core/libraries/error_codes.h"
 #include "core/libraries/libs.h"
 #include "core/libraries/web_browser_dialog/webbrowserdialog.h"
+#include "magic_enum/magic_enum.hpp"
 
 namespace Libraries::WebBrowserDialog {
 
+static auto g_status = Libraries::CommonDialog::Status::NONE;
+
 s32 PS4_SYSV_ABI sceWebBrowserDialogClose() {
     LOG_ERROR(Lib_WebBrowserDialog, "(STUBBED) called");
     return ORBIS_OK;
@@ -23,14 +26,19 @@ s32 PS4_SYSV_ABI sceWebBrowserDialogGetResult() {
     return ORBIS_OK;
 }
 
-s32 PS4_SYSV_ABI sceWebBrowserDialogGetStatus() {
-    LOG_ERROR(Lib_WebBrowserDialog, "(STUBBED) called");
-    return ORBIS_OK;
+Libraries::CommonDialog::Status PS4_SYSV_ABI sceWebBrowserDialogGetStatus() {
+    LOG_TRACE(Lib_MsgDlg, "called status={}", magic_enum::enum_name(g_status));
+    return g_status;
 }
 
-s32 PS4_SYSV_ABI sceWebBrowserDialogInitialize() {
-    LOG_ERROR(Lib_WebBrowserDialog, "(STUBBED) called");
-    return ORBIS_OK;
+Libraries::CommonDialog::Error PS4_SYSV_ABI sceWebBrowserDialogInitialize() {
+    if (CommonDialog::g_isInitialized) {
+        LOG_INFO(Lib_WebBrowserDialog, "already initialized");
+        return Libraries::CommonDialog::Error::ALREADY_SYSTEM_INITIALIZED;
+    }
+    LOG_DEBUG(Lib_WebBrowserDialog, "initialized");
+    CommonDialog::g_isInitialized = true;
+    return Libraries::CommonDialog::Error::OK;
 }
 
 s32 PS4_SYSV_ABI sceWebBrowserDialogNavigate() {
@@ -63,14 +71,22 @@ s32 PS4_SYSV_ABI sceWebBrowserDialogSetZoom() {
     return ORBIS_OK;
 }
 
-s32 PS4_SYSV_ABI sceWebBrowserDialogTerminate() {
-    LOG_ERROR(Lib_WebBrowserDialog, "(STUBBED) called");
-    return ORBIS_OK;
+Libraries::CommonDialog::Error PS4_SYSV_ABI sceWebBrowserDialogTerminate() {
+    if (g_status == Libraries::CommonDialog::Status::RUNNING) {
+        LOG_ERROR(Lib_WebBrowserDialog,
+                  "CloseWebBrowser Dialog unimplemented"); // sceWebBrowserDialogClose();
+    }
+    if (g_status == Libraries::CommonDialog::Status::NONE) {
+        return Libraries::CommonDialog::Error::NOT_INITIALIZED;
+    }
+    g_status = Libraries::CommonDialog::Status::NONE;
+    CommonDialog::g_isUsed = false;
+    return Libraries::CommonDialog::Error::OK;
 }
 
-s32 PS4_SYSV_ABI sceWebBrowserDialogUpdateStatus() {
-    LOG_ERROR(Lib_WebBrowserDialog, "(STUBBED) called");
-    return ORBIS_OK;
+Libraries::CommonDialog::Status PS4_SYSV_ABI sceWebBrowserDialogUpdateStatus() {
+    LOG_TRACE(Lib_MsgDlg, "called status={}", magic_enum::enum_name(g_status));
+    return g_status;
 }
 
 s32 PS4_SYSV_ABI Func_F2BE042771625F8C() {
diff --git a/src/core/libraries/web_browser_dialog/webbrowserdialog.h b/src/core/libraries/web_browser_dialog/webbrowserdialog.h
index 08f76a4fe..3dad7e1e9 100644
--- a/src/core/libraries/web_browser_dialog/webbrowserdialog.h
+++ b/src/core/libraries/web_browser_dialog/webbrowserdialog.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include <core/libraries/system/commondialog.h>
 #include "common/types.h"
 
 namespace Core::Loader {
@@ -14,16 +15,16 @@ namespace Libraries::WebBrowserDialog {
 s32 PS4_SYSV_ABI sceWebBrowserDialogClose();
 s32 PS4_SYSV_ABI sceWebBrowserDialogGetEvent();
 s32 PS4_SYSV_ABI sceWebBrowserDialogGetResult();
-s32 PS4_SYSV_ABI sceWebBrowserDialogGetStatus();
-s32 PS4_SYSV_ABI sceWebBrowserDialogInitialize();
+Libraries::CommonDialog::Status PS4_SYSV_ABI sceWebBrowserDialogGetStatus();
+Libraries::CommonDialog::Error PS4_SYSV_ABI sceWebBrowserDialogInitialize();
 s32 PS4_SYSV_ABI sceWebBrowserDialogNavigate();
 s32 PS4_SYSV_ABI sceWebBrowserDialogOpen();
 s32 PS4_SYSV_ABI sceWebBrowserDialogOpenForPredeterminedContent();
 s32 PS4_SYSV_ABI sceWebBrowserDialogResetCookie();
 s32 PS4_SYSV_ABI sceWebBrowserDialogSetCookie();
 s32 PS4_SYSV_ABI sceWebBrowserDialogSetZoom();
-s32 PS4_SYSV_ABI sceWebBrowserDialogTerminate();
-s32 PS4_SYSV_ABI sceWebBrowserDialogUpdateStatus();
+Libraries::CommonDialog::Error PS4_SYSV_ABI sceWebBrowserDialogTerminate();
+Libraries::CommonDialog::Status PS4_SYSV_ABI sceWebBrowserDialogUpdateStatus();
 s32 PS4_SYSV_ABI Func_F2BE042771625F8C();
 
 void RegisterLib(Core::Loader::SymbolsResolver* sym);

From fecfbb6b4af531bf768a2422baeab478e794df6b Mon Sep 17 00:00:00 2001
From: TheTurtle <geoster3d@gmail.com>
Date: Thu, 22 Jan 2026 17:05:16 +0200
Subject: [PATCH 05/10] video_core: Small fixes regarding GDS (#3942)

* shader_recompiler: Add missing descriptor type for GDS buffer

* liverpool: Implement gds to memory store

* macOS fix?

---------

Co-authored-by: georgemoralis <giorgosmrls@gmail.com>
---
 .../ir/passes/resource_tracking_pass.cpp      | 17 ++++++++++++----
 src/video_core/amdgpu/liverpool.cpp           | 10 +++++++---
 src/video_core/amdgpu/pm4_cmds.h              | 20 ++++++++++++-------
 3 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
index 53b161149..93129ac0e 100644
--- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
+++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@@ -660,6 +660,7 @@ void PatchGlobalDataShareAccess(IR::Block& block, IR::Inst& inst, Info& info,
         inst.SetArg(1, ir.Imm32(binding));
     } else {
         // Convert shared memory opcode to storage buffer atomic to GDS buffer.
+        auto& buffer = info.buffers[binding];
         const IR::U32 offset = IR::U32{inst.Arg(0)};
         const IR::U32 address_words = ir.ShiftRightLogical(offset, ir.Imm32(1));
         const IR::U32 address_dwords = ir.ShiftRightLogical(offset, ir.Imm32(2));
@@ -705,27 +706,35 @@ void PatchGlobalDataShareAccess(IR::Block& block, IR::Inst& inst, Info& info,
         case IR::Opcode::SharedAtomicXor32:
             inst.ReplaceUsesWith(ir.BufferAtomicXor(handle, address_dwords, inst.Arg(1), {}));
             break;
-        case IR::Opcode::LoadSharedU16:
+        case IR::Opcode::LoadSharedU16: {
             inst.ReplaceUsesWith(ir.LoadBufferU16(handle, address_words, {}));
+            buffer.used_types |= IR::Type::U16;
             break;
+        }
         case IR::Opcode::LoadSharedU32:
             inst.ReplaceUsesWith(ir.LoadBufferU32(1, handle, address_dwords, {}));
             break;
-        case IR::Opcode::LoadSharedU64:
+        case IR::Opcode::LoadSharedU64: {
             inst.ReplaceUsesWith(ir.LoadBufferU64(handle, address_qwords, {}));
+            buffer.used_types |= IR::Type::U64;
             break;
-        case IR::Opcode::WriteSharedU16:
+        }
+        case IR::Opcode::WriteSharedU16: {
             ir.StoreBufferU16(handle, address_words, IR::U16{inst.Arg(1)}, {});
             inst.Invalidate();
+            buffer.used_types |= IR::Type::U16;
             break;
+        }
         case IR::Opcode::WriteSharedU32:
             ir.StoreBufferU32(1, handle, address_dwords, inst.Arg(1), {});
             inst.Invalidate();
             break;
-        case IR::Opcode::WriteSharedU64:
+        case IR::Opcode::WriteSharedU64: {
             ir.StoreBufferU64(handle, address_qwords, IR::U64{inst.Arg(1)}, {});
             inst.Invalidate();
+            buffer.used_types |= IR::Type::U64;
             break;
+        }
         default:
             UNREACHABLE();
         }
diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp
index 32ea1e8ed..b2a4d7a61 100644
--- a/src/video_core/amdgpu/liverpool.cpp
+++ b/src/video_core/amdgpu/liverpool.cpp
@@ -1057,9 +1057,13 @@ Liverpool::Task Liverpool::ProcessCompute(std::span<const u32> acb, u32 vqid) {
         }
         case PM4ItOpcode::ReleaseMem: {
             const auto* release_mem = reinterpret_cast<const PM4CmdReleaseMem*>(header);
-            release_mem->SignalFence([pipe_id = queue.pipe_id] {
-                Platform::IrqC::Instance()->Signal(static_cast<Platform::InterruptId>(pipe_id));
-            });
+            release_mem->SignalFence(
+                [pipe_id = queue.pipe_id] {
+                    Platform::IrqC::Instance()->Signal(static_cast<Platform::InterruptId>(pipe_id));
+                },
+                [this](VAddr dst, u16 gds_index, u16 num_dwords) {
+                    rasterizer->CopyBuffer(dst, gds_index, num_dwords * sizeof(u32), false, true);
+                });
             break;
         }
         case PM4ItOpcode::EventWrite: {
diff --git a/src/video_core/amdgpu/pm4_cmds.h b/src/video_core/amdgpu/pm4_cmds.h
index 46ecb09d6..17511d0a2 100644
--- a/src/video_core/amdgpu/pm4_cmds.h
+++ b/src/video_core/amdgpu/pm4_cmds.h
@@ -327,6 +327,7 @@ enum class DataSelect : u32 {
     Data64 = 2,
     GpuClock64 = 3,
     PerfCounter = 4,
+    GdsMemStore = 5,
 };
 
 enum class InterruptSelect : u32 {
@@ -920,8 +921,9 @@ struct PM4CmdReleaseMem {
     u32 data_hi;
 
     template <typename T>
-    T* Address() const {
-        return reinterpret_cast<T*>(address_lo | u64(address_hi) << 32);
+    T Address() const {
+        u64 full_address = address_lo | (u64(address_hi) << 32);
+        return std::bit_cast<T>(full_address);
     }
 
     u32 DataDWord() const {
@@ -932,22 +934,26 @@ struct PM4CmdReleaseMem {
         return data_lo | u64(data_hi) << 32;
     }
 
-    void SignalFence(auto&& signal_irq) const {
+    void SignalFence(auto&& signal_irq, auto&& gds_to_mem) const {
         switch (data_sel.Value()) {
         case DataSelect::Data32Low: {
-            *Address<u32>() = DataDWord();
+            *Address<u32*>() = DataDWord();
             break;
         }
         case DataSelect::Data64: {
-            *Address<u64>() = DataQWord();
+            *Address<u64*>() = DataQWord();
             break;
         }
         case DataSelect::GpuClock64: {
-            *Address<u64>() = GetGpuClock64();
+            *Address<u64*>() = GetGpuClock64();
             break;
         }
         case DataSelect::PerfCounter: {
-            *Address<u64>() = GetGpuPerfCounter();
+            *Address<u64*>() = GetGpuPerfCounter();
+            break;
+        }
+        case DataSelect::GdsMemStore: {
+            gds_to_mem(Address<VAddr>(), gds_index, num_dw);
             break;
         }
         default: {

From 46a7c4e1f55b819c4ddec2a853f3f89a83198d2f Mon Sep 17 00:00:00 2001
From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com>
Date: Fri, 23 Jan 2026 16:17:57 -0600
Subject: [PATCH 06/10] Core: Miscellaneous memory fixes and slight
 optimizations (#3946)

* Optimizations

Microsoft allows you to coalesce multiple free placeholders in one VirtualFreeEx call, so we can perform the VirtualFreeEx after coalescing with neighboring regions to eliminate a VirtualFreeEx call in some situations.

* Remove unnecessary VirtualProtect call

As far as I can tell, this call wastes a bunch of time, and is completely unnecessary.
With our current codebase, simply supplying prot to MapViewOfFile3 works properly.

* Properly handle file mmaps with offsets

Pretty easy fix to perform while I'm here, so I might as well include it.

* Oops

Leftover stuff from local things + clang

* Disable tracy memory tracking

Tracy's memory tracking is built around a typical malloc/free API, so each individual alloc must correspond to a free.
Moving these to address space would fix issues on Windows, but Linux/Mac would have the same issues with our current code.
Disabling VMA merging is technically a fix, but since that's hardware-accurate behavior, I'd rather not disable it.

I'm sure there's a simple solution I'm missing, but unless other devs have a better idea of how this should be handled, the best I can do is disable it so we can keep using Tracy to trace performance.

* Update address_space.cpp

* Debug logging

Should give a decent idea of how nasty these AddressSpace calls are in games that lost perf.

* test removing thread safety

Just for testing, will revert afterwards.

* Check name before merging

Fixes a regression in Apex Legends

* Revert "test removing thread safety"

This reverts commit ab897f4b1ce7e56e8600ed72c9de5f2762e8693b.

* Move mutex locks before IsValidMapping calls

These aren't thread safe, this fixes a rare race condition that I ran into with Apex Legends.

* Revert "Debug logging"

This reverts commit eb2b12a46c6d8d49d7fd93284c6975651caaa34a.

* Proper VMA splitting in ProtectBytes, SetDirectMemoryType, and NameVirtualRange

Also slight optimization by eliminating AddressSpace protect calls when requested prot matches the previous prot.
Fixes a regression in God of War: Ragnarok

* Clang

* Fixes to SetDirectMemoryType logic

Fixes some regressions in Marvel's Spider-Man that occurred with my previous commits to this PR.

* Fix Genshin Impact again

* Assert on out-of-bounds protect calls

Our page tracking code is prone to causing this.

* test mutex again

This time, remember all mutex stuff

* Revert hack

I'll work on a better way to deal with mutexes in a bit, first I'm pushing up some extra fixes

* Proper logic for checked ReleaseDirectMemory, added bounds checks

Should help some games.

* Better logging for ReleaseDirectMemory errors.

* Only perform region coalescing after all unmap operations.

A small optimization for unmapping multiple regions. Since Microsoft lets you coalesce multiple placeholders at once, we can save doing any VirtualFreeEx calls for coalescing until after we unmap everything in the requested range.

* Separate VMA creation logic into a separate method, update MapFile to use it

MapFile is technically another "emulation" of MapMemory, both should follow similar logic.
To avoid duplicating code, move shared logic to a different function that both MapMemory and MapFile can call.

This fixes memory asserts in a couple of online-only apps I have.

* Clang

* Fix TryWriteBacking

This fixes a lot of regressions that got misattributed

Co-Authored-By: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com>

* Fix again

Fixes device lost crashes with some games after my last commit.

* Oops

* Mutex cleanup

Avoided changing anything in MapMemory, UnmapMemory, PoolCommit, or PoolDecommit since those all need a little extra granularity to prevent GPU deadlocking.

Everything else now uses standard library locks to make things a little simpler.

* Swap MapMemory and PoolCommit to use scoped lock

GPU maps are safe, so this is fine. Unmaps are the primary issue.

---------

Co-authored-by: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com>
---
 src/core/address_space.cpp           |  91 +++---
 src/core/libraries/kernel/memory.cpp |  19 +-
 src/core/memory.cpp                  | 420 ++++++++++++++-------------
 src/core/memory.h                    |  14 +-
 4 files changed, 296 insertions(+), 248 deletions(-)

diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp
index 422c67e17..965dfdc31 100644
--- a/src/core/address_space.cpp
+++ b/src/core/address_space.cpp
@@ -237,23 +237,26 @@ struct AddressSpace::Impl {
         void* ptr = nullptr;
         if (phys_addr != -1) {
             HANDLE backing = fd != -1 ? reinterpret_cast<HANDLE>(fd) : backing_handle;
-            if (fd && prot == PAGE_READONLY) {
+            if (fd != -1 && prot == PAGE_READONLY) {
                 DWORD resultvar;
                 ptr = VirtualAlloc2(process, reinterpret_cast<PVOID>(virtual_addr), size,
                                     MEM_RESERVE | MEM_COMMIT | MEM_REPLACE_PLACEHOLDER,
                                     PAGE_READWRITE, nullptr, 0);
-                bool ret = ReadFile(backing, ptr, size, &resultvar, NULL);
+
+                // phys_addr serves as an offset for file mmaps.
+                // Create an OVERLAPPED with the offset, then supply that to ReadFile
+                OVERLAPPED param{};
+                // Offset is the least-significant 32 bits, OffsetHigh is the most-significant.
+                param.Offset = phys_addr & 0xffffffffull;
+                param.OffsetHigh = (phys_addr & 0xffffffff00000000ull) >> 32;
+                bool ret = ReadFile(backing, ptr, size, &resultvar, &param);
                 ASSERT_MSG(ret, "ReadFile failed. {}", Common::GetLastErrorMsg());
                 ret = VirtualProtect(ptr, size, prot, &resultvar);
                 ASSERT_MSG(ret, "VirtualProtect failed. {}", Common::GetLastErrorMsg());
             } else {
                 ptr = MapViewOfFile3(backing, process, reinterpret_cast<PVOID>(virtual_addr),
-                                     phys_addr, size, MEM_REPLACE_PLACEHOLDER,
-                                     PAGE_EXECUTE_READWRITE, nullptr, 0);
+                                     phys_addr, size, MEM_REPLACE_PLACEHOLDER, prot, nullptr, 0);
                 ASSERT_MSG(ptr, "MapViewOfFile3 failed. {}", Common::GetLastErrorMsg());
-                DWORD resultvar;
-                bool ret = VirtualProtect(ptr, size, prot, &resultvar);
-                ASSERT_MSG(ret, "VirtualProtect failed. {}", Common::GetLastErrorMsg());
             }
         } else {
             ptr =
@@ -268,9 +271,11 @@ struct AddressSpace::Impl {
         VAddr virtual_addr = region->base;
         PAddr phys_base = region->phys_base;
         u64 size = region->size;
+        ULONG prot = region->prot;
+        s32 fd = region->fd;
 
         bool ret = false;
-        if (phys_base != -1) {
+        if ((fd != -1 && prot != PAGE_READONLY) || (fd == -1 && phys_base != -1)) {
             ret = UnmapViewOfFile2(process, reinterpret_cast<PVOID>(virtual_addr),
                                    MEM_PRESERVE_PLACEHOLDER);
         } else {
@@ -368,13 +373,17 @@ struct AddressSpace::Impl {
     }
 
     void* Map(VAddr virtual_addr, PAddr phys_addr, u64 size, ULONG prot, s32 fd = -1) {
-        // Split surrounding regions to create a placeholder
-        SplitRegion(virtual_addr, size);
-
-        // Get the region this range covers
+        // Get a pointer to the region containing virtual_addr
         auto it = std::prev(regions.upper_bound(virtual_addr));
-        auto& [base, region] = *it;
 
+        // If needed, split surrounding regions to create a placeholder
+        if (it->first != virtual_addr || it->second.size != size) {
+            SplitRegion(virtual_addr, size);
+            it = std::prev(regions.upper_bound(virtual_addr));
+        }
+
+        // Get the address and region for this range.
+        auto& [base, region] = *it;
         ASSERT_MSG(!region.is_mapped, "Cannot overwrite mapped region");
 
         // Now we have a region matching the requested region, perform the actual mapping.
@@ -390,31 +399,42 @@ struct AddressSpace::Impl {
         auto it = std::prev(regions.upper_bound(virtual_addr));
         ASSERT_MSG(!it->second.is_mapped, "Cannot coalesce mapped regions");
 
-        // Check if a placeholder exists right before us.
+        // Check if there are free placeholders before this area.
+        bool can_coalesce = false;
         auto it_prev = it != regions.begin() ? std::prev(it) : regions.end();
-        if (it_prev != regions.end() && !it_prev->second.is_mapped) {
-            const u64 total_size = it_prev->second.size + it->second.size;
-            if (!VirtualFreeEx(process, LPVOID(it_prev->first), total_size,
-                               MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS)) {
-                UNREACHABLE_MSG("Region coalescing failed: {}", Common::GetLastErrorMsg());
-            }
-
-            it_prev->second.size = total_size;
+        while (it_prev != regions.end() && !it_prev->second.is_mapped) {
+            // If there is an earlier region, move our iterator to that and increase size.
+            it_prev->second.size = it_prev->second.size + it->second.size;
             regions.erase(it);
             it = it_prev;
+
+            // Mark this region as coalesce-able.
+            can_coalesce = true;
+
+            // Get the next previous region.
+            it_prev = it != regions.begin() ? std::prev(it) : regions.end();
         }
 
-        // Check if a placeholder exists right after us.
+        // Check if there are free placeholders after this area.
         auto it_next = std::next(it);
-        if (it_next != regions.end() && !it_next->second.is_mapped) {
-            const u64 total_size = it->second.size + it_next->second.size;
-            if (!VirtualFreeEx(process, LPVOID(it->first), total_size,
+        while (it_next != regions.end() && !it_next->second.is_mapped) {
+            // If there is a later region, increase our current region's size
+            it->second.size = it->second.size + it_next->second.size;
+            regions.erase(it_next);
+
+            // Mark this region as coalesce-able.
+            can_coalesce = true;
+
+            // Get the next region
+            it_next = std::next(it);
+        }
+
+        // If there are placeholders to coalesce, then coalesce them.
+        if (can_coalesce) {
+            if (!VirtualFreeEx(process, LPVOID(it->first), it->second.size,
                                MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS)) {
                 UNREACHABLE_MSG("Region coalescing failed: {}", Common::GetLastErrorMsg());
             }
-
-            it->second.size = total_size;
-            regions.erase(it_next);
         }
     }
 
@@ -423,7 +443,7 @@ struct AddressSpace::Impl {
         u64 remaining_size = size;
         VAddr current_addr = virtual_addr;
         while (remaining_size > 0) {
-            // Get the region containing our current address.
+            // Get a pointer to the region containing virtual_addr
             auto it = std::prev(regions.upper_bound(current_addr));
 
             // If necessary, split regions to ensure a valid unmap.
@@ -432,10 +452,10 @@ struct AddressSpace::Impl {
             u64 size_to_unmap = std::min<u64>(it->second.size - base_offset, remaining_size);
             if (current_addr != it->second.base || size_to_unmap != it->second.size) {
                 SplitRegion(current_addr, size_to_unmap);
+                it = std::prev(regions.upper_bound(current_addr));
             }
 
-            // Repair the region pointer, as SplitRegion modifies the regions map.
-            it = std::prev(regions.upper_bound(current_addr));
+            // Get the address and region corresponding to this range.
             auto& [base, region] = *it;
 
             // Unmap the region if it was previously mapped
@@ -449,13 +469,13 @@ struct AddressSpace::Impl {
             region.phys_base = -1;
             region.prot = PAGE_NOACCESS;
 
-            // Coalesce any free space
-            CoalesceFreeRegions(current_addr);
-
             // Update loop variables
             remaining_size -= size_to_unmap;
             current_addr += size_to_unmap;
         }
+
+        // Coalesce any free space produced from these unmaps.
+        CoalesceFreeRegions(virtual_addr);
     }
 
     void Protect(VAddr virtual_addr, u64 size, bool read, bool write, bool execute) {
@@ -497,6 +517,7 @@ struct AddressSpace::Impl {
 
         const VAddr virtual_end = virtual_addr + size;
         auto it = --regions.upper_bound(virtual_addr);
+        ASSERT_MSG(it != regions.end(), "addr {:#x} out of bounds", virtual_addr);
         for (; it->first < virtual_end; it++) {
             if (!it->second.is_mapped) {
                 continue;
diff --git a/src/core/libraries/kernel/memory.cpp b/src/core/libraries/kernel/memory.cpp
index 3aec8193a..378064e44 100644
--- a/src/core/libraries/kernel/memory.cpp
+++ b/src/core/libraries/kernel/memory.cpp
@@ -89,22 +89,31 @@ s32 PS4_SYSV_ABI sceKernelAllocateMainDirectMemory(u64 len, u64 alignment, s32 m
 }
 
 s32 PS4_SYSV_ABI sceKernelCheckedReleaseDirectMemory(u64 start, u64 len) {
+    LOG_INFO(Kernel_Vmm, "called start = {:#x}, len = {:#x}", start, len);
+    if (!Common::Is16KBAligned(start) || !Common::Is16KBAligned(len)) {
+        LOG_ERROR(Kernel_Vmm, "Misaligned start or length, start = {:#x}, length = {:#x}", start,
+                  len);
+        return ORBIS_KERNEL_ERROR_EINVAL;
+    }
     if (len == 0) {
         return ORBIS_OK;
     }
-    LOG_INFO(Kernel_Vmm, "called start = {:#x}, len = {:#x}", start, len);
     auto* memory = Core::Memory::Instance();
-    memory->Free(start, len);
-    return ORBIS_OK;
+    return memory->Free(start, len, true);
 }
 
 s32 PS4_SYSV_ABI sceKernelReleaseDirectMemory(u64 start, u64 len) {
+    LOG_INFO(Kernel_Vmm, "called start = {:#x}, len = {:#x}", start, len);
+    if (!Common::Is16KBAligned(start) || !Common::Is16KBAligned(len)) {
+        LOG_ERROR(Kernel_Vmm, "Misaligned start or length, start = {:#x}, length = {:#x}", start,
+                  len);
+        return ORBIS_KERNEL_ERROR_EINVAL;
+    }
     if (len == 0) {
         return ORBIS_OK;
     }
-    LOG_INFO(Kernel_Vmm, "called start = {:#x}, len = {:#x}", start, len);
     auto* memory = Core::Memory::Instance();
-    memory->Free(start, len);
+    memory->Free(start, len, false);
     return ORBIS_OK;
 }
 
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 4567475cd..0726e8711 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -117,9 +117,9 @@ void MemoryManager::SetPrtArea(u32 id, VAddr address, u64 size) {
 }
 
 void MemoryManager::CopySparseMemory(VAddr virtual_addr, u8* dest, u64 size) {
+    std::shared_lock lk{mutex};
     ASSERT_MSG(IsValidMapping(virtual_addr), "Attempted to access invalid address {:#x}",
                virtual_addr);
-    mutex.lock_shared();
 
     auto vma = FindVMA(virtual_addr);
     while (size) {
@@ -134,46 +134,49 @@ void MemoryManager::CopySparseMemory(VAddr virtual_addr, u8* dest, u64 size) {
         dest += copy_size;
         ++vma;
     }
-
-    mutex.unlock_shared();
 }
 
 bool MemoryManager::TryWriteBacking(void* address, const void* data, u64 size) {
     const VAddr virtual_addr = std::bit_cast<VAddr>(address);
+    std::shared_lock lk{mutex};
     ASSERT_MSG(IsValidMapping(virtual_addr, size), "Attempted to access invalid address {:#x}",
                virtual_addr);
-    mutex.lock_shared();
 
     std::vector<VirtualMemoryArea> vmas_to_write;
     auto current_vma = FindVMA(virtual_addr);
-    while (virtual_addr + size < current_vma->second.base + current_vma->second.size) {
+    while (current_vma->second.Overlaps(virtual_addr, size)) {
         if (!HasPhysicalBacking(current_vma->second)) {
-            mutex.unlock_shared();
-            return false;
+            break;
         }
         vmas_to_write.emplace_back(current_vma->second);
         current_vma++;
     }
 
+    if (vmas_to_write.empty()) {
+        return false;
+    }
+
     for (auto& vma : vmas_to_write) {
         auto start_in_vma = std::max<VAddr>(virtual_addr, vma.base) - vma.base;
-        for (auto& phys_area : vma.phys_areas) {
+        auto phys_handle = std::prev(vma.phys_areas.upper_bound(start_in_vma));
+        for (; phys_handle != vma.phys_areas.end(); phys_handle++) {
             if (!size) {
                 break;
             }
-            u8* backing = impl.BackingBase() + phys_area.second.base + start_in_vma;
-            u64 copy_size = std::min<u64>(size, phys_area.second.size);
+            const u64 start_in_dma =
+                std::max<u64>(start_in_vma, phys_handle->first) - phys_handle->first;
+            u8* backing = impl.BackingBase() + phys_handle->second.base + start_in_dma;
+            u64 copy_size = std::min<u64>(size, phys_handle->second.size - start_in_dma);
             memcpy(backing, data, copy_size);
             size -= copy_size;
         }
     }
 
-    mutex.unlock_shared();
     return true;
 }
 
 PAddr MemoryManager::PoolExpand(PAddr search_start, PAddr search_end, u64 size, u64 alignment) {
-    mutex.lock();
+    std::scoped_lock lk{mutex};
     alignment = alignment > 0 ? alignment : 64_KB;
 
     auto dmem_area = FindDmemArea(search_start);
@@ -199,7 +202,6 @@ PAddr MemoryManager::PoolExpand(PAddr search_start, PAddr search_end, u64 size,
     if (dmem_area == dmem_map.end()) {
         // There are no suitable mappings in this range
         LOG_ERROR(Kernel_Vmm, "Unable to find free direct memory area: size = {:#x}", size);
-        mutex.unlock();
         return -1;
     }
 
@@ -211,13 +213,12 @@ PAddr MemoryManager::PoolExpand(PAddr search_start, PAddr search_end, u64 size,
     // Track how much dmem was allocated for pools.
     pool_budget += size;
 
-    mutex.unlock();
     return mapping_start;
 }
 
 PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, u64 size, u64 alignment,
                               s32 memory_type) {
-    mutex.lock();
+    std::scoped_lock lk{mutex};
     alignment = alignment > 0 ? alignment : 16_KB;
 
     auto dmem_area = FindDmemArea(search_start);
@@ -242,7 +243,6 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, u64 size, u6
     if (dmem_area == dmem_map.end()) {
         // There are no suitable mappings in this range
         LOG_ERROR(Kernel_Vmm, "Unable to find free direct memory area: size = {:#x}", size);
-        mutex.unlock();
         return -1;
     }
 
@@ -252,12 +252,52 @@ PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, u64 size, u6
     area.dma_type = PhysicalMemoryType::Allocated;
     MergeAdjacent(dmem_map, dmem_area);
 
-    mutex.unlock();
     return mapping_start;
 }
 
-void MemoryManager::Free(PAddr phys_addr, u64 size) {
-    mutex.lock();
+s32 MemoryManager::Free(PAddr phys_addr, u64 size, bool is_checked) {
+    // Basic bounds checking
+    if (phys_addr > total_direct_size || (is_checked && phys_addr + size > total_direct_size)) {
+        LOG_ERROR(Kernel_Vmm, "phys_addr {:#x}, size {:#x} goes outside dmem map", phys_addr, size);
+        if (is_checked) {
+            return ORBIS_KERNEL_ERROR_ENOENT;
+        }
+        return ORBIS_OK;
+    }
+
+    // Lock mutex
+    std::scoped_lock lk{mutex};
+
+    // If this is a checked free, then all direct memory in range must be allocated.
+    std::vector<std::pair<PAddr, u64>> free_list;
+    u64 remaining_size = size;
+    auto phys_handle = FindDmemArea(phys_addr);
+    for (; phys_handle != dmem_map.end(); phys_handle++) {
+        if (remaining_size == 0) {
+            // Done searching
+            break;
+        }
+        auto& dmem_area = phys_handle->second;
+        if (dmem_area.dma_type == PhysicalMemoryType::Free) {
+            if (is_checked) {
+                // Checked frees will error if anything in the area isn't allocated.
+                // Unchecked frees will just ignore free areas.
+                LOG_ERROR(Kernel_Vmm, "Attempting to release a free dmem area");
+                return ORBIS_KERNEL_ERROR_ENOENT;
+            }
+            continue;
+        }
+
+        // Store physical address and size to release
+        const PAddr current_phys_addr = std::max<PAddr>(phys_addr, phys_handle->first);
+        const u64 start_in_dma = current_phys_addr - phys_handle->first;
+        const u64 size_in_dma =
+            std::min<u64>(remaining_size, phys_handle->second.size - start_in_dma);
+        free_list.emplace_back(current_phys_addr, size_in_dma);
+
+        // Track remaining size to free
+        remaining_size -= size_in_dma;
+    }
 
     // Release any dmem mappings that reference this physical block.
     std::vector<std::pair<VAddr, u64>> remove_list;
@@ -284,36 +324,24 @@ void MemoryManager::Free(PAddr phys_addr, u64 size) {
     }
 
     // Unmap all dmem areas within this area.
-    auto phys_addr_to_search = phys_addr;
-    auto remaining_size = size;
-    auto dmem_area = FindDmemArea(phys_addr);
-    while (dmem_area != dmem_map.end() && remaining_size > 0) {
+    for (auto& [phys_addr, size] : free_list) {
         // Carve a free dmem area in place of this one.
-        const auto start_phys_addr = std::max<PAddr>(phys_addr, dmem_area->second.base);
-        const auto offset_in_dma = start_phys_addr - dmem_area->second.base;
-        const auto size_in_dma =
-            std::min<u64>(dmem_area->second.size - offset_in_dma, remaining_size);
-        const auto dmem_handle = CarvePhysArea(dmem_map, start_phys_addr, size_in_dma);
+        const auto dmem_handle = CarvePhysArea(dmem_map, phys_addr, size);
         auto& new_dmem_area = dmem_handle->second;
         new_dmem_area.dma_type = PhysicalMemoryType::Free;
         new_dmem_area.memory_type = 0;
 
         // Merge the new dmem_area with dmem_map
         MergeAdjacent(dmem_map, dmem_handle);
-
-        // Get the next relevant dmem area.
-        phys_addr_to_search = phys_addr + size_in_dma;
-        remaining_size -= size_in_dma;
-        dmem_area = FindDmemArea(phys_addr_to_search);
     }
 
-    mutex.unlock();
+    return ORBIS_OK;
 }
 
 s32 MemoryManager::PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot, s32 mtype) {
+    std::scoped_lock lk{mutex};
     ASSERT_MSG(IsValidMapping(virtual_addr, size), "Attempted to access invalid address {:#x}",
                virtual_addr);
-    mutex.lock();
 
     // Input addresses to PoolCommit are treated as fixed, and have a constant alignment.
     const u64 alignment = 64_KB;
@@ -323,7 +351,6 @@ s32 MemoryManager::PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot, s32
     if (vma.type != VMAType::PoolReserved) {
         // If we're attempting to commit non-pooled memory, return EINVAL
         LOG_ERROR(Kernel_Vmm, "Attempting to commit non-pooled memory at {:#x}", mapped_addr);
-        mutex.unlock();
         return ORBIS_KERNEL_ERROR_EINVAL;
     }
 
@@ -332,14 +359,12 @@ s32 MemoryManager::PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot, s32
         LOG_ERROR(Kernel_Vmm,
                   "Pooled region {:#x} to {:#x} is not large enough to commit from {:#x} to {:#x}",
                   vma.base, vma.base + vma.size, mapped_addr, mapped_addr + size);
-        mutex.unlock();
         return ORBIS_KERNEL_ERROR_EINVAL;
     }
 
     if (pool_budget <= size) {
         // If there isn't enough pooled memory to perform the mapping, return ENOMEM
         LOG_ERROR(Kernel_Vmm, "Not enough pooled memory to perform mapping");
-        mutex.unlock();
         return ORBIS_KERNEL_ERROR_ENOMEM;
     } else {
         // Track how much pooled memory this commit will take
@@ -386,7 +411,8 @@ s32 MemoryManager::PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot, s32
 
         // Perform an address space mapping for each physical area
         void* out_addr = impl.Map(current_addr, size_to_map, new_dmem_area.base);
-        TRACK_ALLOC(out_addr, size_to_map, "VMEM");
+        // Tracy memory tracking breaks from merging memory areas. Disabled for now.
+        // TRACK_ALLOC(out_addr, size_to_map, "VMEM");
 
         handle = MergeAdjacent(dmem_map, new_dmem_handle);
         current_addr += size_to_map;
@@ -398,7 +424,6 @@ s32 MemoryManager::PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot, s32
     // Merge this VMA with similar nearby areas
     MergeAdjacent(vma_map, new_vma_handle);
 
-    mutex.unlock();
     if (IsValidGpuMapping(mapped_addr, size)) {
         rasterizer->MapMemory(mapped_addr, size);
     }
@@ -406,54 +431,9 @@ s32 MemoryManager::PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot, s32
     return ORBIS_OK;
 }
 
-s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, MemoryProt prot,
-                             MemoryMapFlags flags, VMAType type, std::string_view name,
-                             bool validate_dmem, PAddr phys_addr, u64 alignment) {
-    // Certain games perform flexible mappings on loop to determine
-    // the available flexible memory size. Questionable but we need to handle this.
-    if (type == VMAType::Flexible && flexible_usage + size > total_flexible_size) {
-        LOG_ERROR(Kernel_Vmm,
-                  "Out of flexible memory, available flexible memory = {:#x}"
-                  " requested size = {:#x}",
-                  total_flexible_size - flexible_usage, size);
-        return ORBIS_KERNEL_ERROR_EINVAL;
-    }
-
-    mutex.lock();
-
-    PhysHandle dmem_area;
-    // Validate the requested physical address range
-    if (phys_addr != -1) {
-        if (total_direct_size < phys_addr + size) {
-            LOG_ERROR(Kernel_Vmm, "Unable to map {:#x} bytes at physical address {:#x}", size,
-                      phys_addr);
-            mutex.unlock();
-            return ORBIS_KERNEL_ERROR_ENOMEM;
-        }
-
-        // Validate direct memory areas involved in this call.
-        auto dmem_area = FindDmemArea(phys_addr);
-        while (dmem_area != dmem_map.end() && dmem_area->second.base < phys_addr + size) {
-            // If any requested dmem area is not allocated, return an error.
-            if (dmem_area->second.dma_type != PhysicalMemoryType::Allocated &&
-                dmem_area->second.dma_type != PhysicalMemoryType::Mapped) {
-                LOG_ERROR(Kernel_Vmm, "Unable to map {:#x} bytes at physical address {:#x}", size,
-                          phys_addr);
-                mutex.unlock();
-                return ORBIS_KERNEL_ERROR_ENOMEM;
-            }
-
-            // If we need to perform extra validation, then check for Mapped dmem areas too.
-            if (validate_dmem && dmem_area->second.dma_type == PhysicalMemoryType::Mapped) {
-                LOG_ERROR(Kernel_Vmm, "Unable to map {:#x} bytes at physical address {:#x}", size,
-                          phys_addr);
-                mutex.unlock();
-                return ORBIS_KERNEL_ERROR_EBUSY;
-            }
-
-            dmem_area++;
-        }
-    }
+std::pair<s32, MemoryManager::VMAHandle> MemoryManager::CreateArea(
+    VAddr virtual_addr, u64 size, MemoryProt prot, MemoryMapFlags flags, VMAType type,
+    std::string_view name, u64 alignment) {
 
     // Limit the minimum address to SystemManagedVirtualBase to prevent hardware-specific issues.
     VAddr mapped_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr;
@@ -483,8 +463,7 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo
         auto remaining_size = vma.base + vma.size - mapped_addr;
         if (!vma.IsFree() || remaining_size < size) {
             LOG_ERROR(Kernel_Vmm, "Unable to map {:#x} bytes at address {:#x}", size, mapped_addr);
-            mutex.unlock();
-            return ORBIS_KERNEL_ERROR_ENOMEM;
+            return {ORBIS_KERNEL_ERROR_ENOMEM, vma_map.end()};
         }
     } else {
         // When MemoryMapFlags::Fixed is not specified, and mapped_addr is 0,
@@ -494,8 +473,7 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo
         mapped_addr = SearchFree(mapped_addr, size, alignment);
         if (mapped_addr == -1) {
             // No suitable memory areas to map to
-            mutex.unlock();
-            return ORBIS_KERNEL_ERROR_ENOMEM;
+            return {ORBIS_KERNEL_ERROR_ENOMEM, vma_map.end()};
         }
     }
 
@@ -513,6 +491,64 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo
     new_vma.name = name;
     new_vma.type = type;
     new_vma.phys_areas.clear();
+    return {ORBIS_OK, new_vma_handle};
+}
+
+s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, MemoryProt prot,
+                             MemoryMapFlags flags, VMAType type, std::string_view name,
+                             bool validate_dmem, PAddr phys_addr, u64 alignment) {
+    // Certain games perform flexible mappings on loop to determine
+    // the available flexible memory size. Questionable but we need to handle this.
+    if (type == VMAType::Flexible && flexible_usage + size > total_flexible_size) {
+        LOG_ERROR(Kernel_Vmm,
+                  "Out of flexible memory, available flexible memory = {:#x}"
+                  " requested size = {:#x}",
+                  total_flexible_size - flexible_usage, size);
+        return ORBIS_KERNEL_ERROR_EINVAL;
+    }
+
+    std::scoped_lock lk{mutex};
+
+    PhysHandle dmem_area;
+    // Validate the requested physical address range
+    if (phys_addr != -1) {
+        if (total_direct_size < phys_addr + size) {
+            LOG_ERROR(Kernel_Vmm, "Unable to map {:#x} bytes at physical address {:#x}", size,
+                      phys_addr);
+            return ORBIS_KERNEL_ERROR_ENOMEM;
+        }
+
+        // Validate direct memory areas involved in this call.
+        auto dmem_area = FindDmemArea(phys_addr);
+        while (dmem_area != dmem_map.end() && dmem_area->second.base < phys_addr + size) {
+            // If any requested dmem area is not allocated, return an error.
+            if (dmem_area->second.dma_type != PhysicalMemoryType::Allocated &&
+                dmem_area->second.dma_type != PhysicalMemoryType::Mapped) {
+                LOG_ERROR(Kernel_Vmm, "Unable to map {:#x} bytes at physical address {:#x}", size,
+                          phys_addr);
+                return ORBIS_KERNEL_ERROR_ENOMEM;
+            }
+
+            // If we need to perform extra validation, then check for Mapped dmem areas too.
+            if (validate_dmem && dmem_area->second.dma_type == PhysicalMemoryType::Mapped) {
+                LOG_ERROR(Kernel_Vmm, "Unable to map {:#x} bytes at physical address {:#x}", size,
+                          phys_addr);
+                return ORBIS_KERNEL_ERROR_EBUSY;
+            }
+
+            dmem_area++;
+        }
+    }
+
+    auto [result, new_vma_handle] =
+        CreateArea(virtual_addr, size, prot, flags, type, name, alignment);
+    if (result != ORBIS_OK) {
+        return result;
+    }
+
+    auto& new_vma = new_vma_handle->second;
+    auto mapped_addr = new_vma.base;
+    bool is_exec = True(prot & MemoryProt::CpuExec);
 
     // If type is Flexible, we need to track how much flexible memory is used here.
     // We also need to determine a reasonable physical base to perform this mapping at.
@@ -542,7 +578,8 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo
 
             // Perform an address space mapping for each physical area
             void* out_addr = impl.Map(current_addr, size_to_map, new_fmem_area.base, is_exec);
-            TRACK_ALLOC(out_addr, size_to_map, "VMEM");
+            // Tracy memory tracking breaks from merging memory areas. Disabled for now.
+            // TRACK_ALLOC(out_addr, size_to_map, "VMEM");
 
             handle = MergeAdjacent(fmem_map, new_fmem_handle);
             current_addr += size_to_map;
@@ -594,60 +631,32 @@ s32 MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, u64 size, Memo
         // Flexible address space mappings were performed while finding direct memory areas.
         if (type != VMAType::Flexible) {
             impl.Map(mapped_addr, size, phys_addr, is_exec);
+            // Tracy memory tracking breaks from merging memory areas. Disabled for now.
+            // TRACK_ALLOC(mapped_addr, size, "VMEM");
         }
-        TRACK_ALLOC(*out_addr, size, "VMEM");
-
-        mutex.unlock();
 
         // If this is not a reservation, then map to GPU and address space
         if (IsValidGpuMapping(mapped_addr, size)) {
             rasterizer->MapMemory(mapped_addr, size);
         }
-    } else {
-        mutex.unlock();
     }
-
     return ORBIS_OK;
 }
 
 s32 MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, u64 size, MemoryProt prot,
                            MemoryMapFlags flags, s32 fd, s64 phys_addr) {
-    VAddr mapped_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr;
-    ASSERT_MSG(IsValidMapping(mapped_addr, size), "Attempted to access invalid address {:#x}",
-               mapped_addr);
-
-    mutex.lock();
-
-    // Find first free area to map the file.
-    if (False(flags & MemoryMapFlags::Fixed)) {
-        mapped_addr = SearchFree(mapped_addr, size, 1);
-        if (mapped_addr == -1) {
-            // No suitable memory areas to map to
-            mutex.unlock();
-            return ORBIS_KERNEL_ERROR_ENOMEM;
-        }
-    }
-
-    if (True(flags & MemoryMapFlags::Fixed)) {
-        const auto& vma = FindVMA(mapped_addr)->second;
-        const u64 remaining_size = vma.base + vma.size - virtual_addr;
-        ASSERT_MSG(!vma.IsMapped() && remaining_size >= size,
-                   "Memory region {:#x} to {:#x} isn't free enough to map region {:#x} to {:#x}",
-                   vma.base, vma.base + vma.size, virtual_addr, virtual_addr + size);
-    }
-
+    std::scoped_lock lk{mutex};
     // Get the file to map
+
     auto* h = Common::Singleton<Core::FileSys::HandleTable>::Instance();
     auto file = h->GetFile(fd);
     if (file == nullptr) {
         LOG_WARNING(Kernel_Vmm, "Invalid file for mmap, fd {}", fd);
-        mutex.unlock();
         return ORBIS_KERNEL_ERROR_EBADF;
     }
 
     if (file->type != Core::FileSys::FileType::Regular) {
         LOG_WARNING(Kernel_Vmm, "Unsupported file type for mmap, fd {}", fd);
-        mutex.unlock();
         return ORBIS_KERNEL_ERROR_EBADF;
     }
 
@@ -665,35 +674,36 @@ s32 MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, u64 size, Memory
         prot &= ~MemoryProt::CpuWrite;
     }
 
-    impl.MapFile(mapped_addr, size, phys_addr, std::bit_cast<u32>(prot), handle);
-
     if (prot >= MemoryProt::GpuRead) {
         // On real hardware, GPU file mmaps cause a full system crash due to an internal error.
         ASSERT_MSG(false, "Files cannot be mapped to GPU memory");
     }
+
     if (True(prot & MemoryProt::CpuExec)) {
         // On real hardware, execute permissions are silently removed.
         prot &= ~MemoryProt::CpuExec;
     }
 
-    // Add virtual memory area
-    auto& new_vma = CarveVMA(mapped_addr, size)->second;
-    new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce);
-    new_vma.prot = prot;
-    new_vma.name = "File";
-    new_vma.fd = fd;
-    new_vma.type = VMAType::File;
+    auto [result, new_vma_handle] =
+        CreateArea(virtual_addr, size, prot, flags, VMAType::File, "anon", 0);
+    if (result != ORBIS_OK) {
+        return result;
+    }
 
-    mutex.unlock();
+    auto& new_vma = new_vma_handle->second;
+    auto mapped_addr = new_vma.base;
+    bool is_exec = True(prot & MemoryProt::CpuExec);
+
+    impl.MapFile(mapped_addr, size, phys_addr, std::bit_cast<u32>(prot), handle);
 
     *out_addr = std::bit_cast<void*>(mapped_addr);
     return ORBIS_OK;
 }
 
 s32 MemoryManager::PoolDecommit(VAddr virtual_addr, u64 size) {
+    mutex.lock();
     ASSERT_MSG(IsValidMapping(virtual_addr, size), "Attempted to access invalid address {:#x}",
                virtual_addr);
-    mutex.lock();
 
     // Do an initial search to ensure this decommit is valid.
     auto it = FindVMA(virtual_addr);
@@ -768,7 +778,8 @@ s32 MemoryManager::PoolDecommit(VAddr virtual_addr, u64 size) {
 
     // Unmap from address space
     impl.Unmap(virtual_addr, size, true);
-    TRACK_FREE(virtual_addr, "VMEM");
+    // Tracy memory tracking breaks from merging memory areas. Disabled for now.
+    // TRACK_FREE(virtual_addr, "VMEM");
 
     mutex.unlock();
     return ORBIS_OK;
@@ -857,7 +868,8 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma
     if (vma_type != VMAType::Reserved && vma_type != VMAType::PoolReserved) {
         // Unmap the memory region.
         impl.Unmap(virtual_addr, size_in_vma, has_backing);
-        TRACK_FREE(virtual_addr, "VMEM");
+        // Tracy memory tracking breaks from merging memory areas. Disabled for now.
+        // TRACK_FREE(virtual_addr, "VMEM");
 
         // If this mapping has GPU access, unmap from GPU.
         if (IsValidGpuMapping(virtual_addr, size)) {
@@ -884,14 +896,13 @@ s32 MemoryManager::UnmapMemoryImpl(VAddr virtual_addr, u64 size) {
 }
 
 s32 MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* prot) {
+    std::shared_lock lk{mutex};
     ASSERT_MSG(IsValidMapping(addr), "Attempted to access invalid address {:#x}", addr);
-    mutex.lock_shared();
 
     const auto it = FindVMA(addr);
     const auto& vma = it->second;
     if (vma.IsFree()) {
         LOG_ERROR(Kernel_Vmm, "Address {:#x} is not mapped", addr);
-        mutex.unlock_shared();
         return ORBIS_KERNEL_ERROR_EACCES;
     }
 
@@ -905,7 +916,6 @@ s32 MemoryManager::QueryProtection(VAddr addr, void** start, void** end, u32* pr
         *prot = static_cast<u32>(vma.prot);
     }
 
-    mutex.unlock_shared();
     return ORBIS_OK;
 }
 
@@ -913,6 +923,8 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea& vma_base, u64 siz
                                 MemoryProt prot) {
     const auto start_in_vma = addr - vma_base.base;
     const auto adjusted_size = std::min<u64>(vma_base.size - start_in_vma, size);
+    const MemoryProt old_prot = vma_base.prot;
+    const MemoryProt new_prot = prot;
 
     if (vma_base.type == VMAType::Free || vma_base.type == VMAType::PoolReserved) {
         // On PS4, protecting freed memory does nothing.
@@ -953,8 +965,11 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea& vma_base, u64 siz
         prot &= ~MemoryProt::CpuExec;
     }
 
-    // Change protection
-    vma_base.prot = prot;
+    // Split VMAs and apply protection change.
+    const auto new_it = CarveVMA(addr, adjusted_size);
+    auto& new_vma = new_it->second;
+    new_vma.prot = prot;
+    MergeAdjacent(vma_map, new_it);
 
     if (vma_base.type == VMAType::Reserved) {
         // On PS4, protections change vma_map, but don't apply.
@@ -962,7 +977,10 @@ s64 MemoryManager::ProtectBytes(VAddr addr, VirtualMemoryArea& vma_base, u64 siz
         return adjusted_size;
     }
 
-    impl.Protect(addr, size, perms);
+    // Perform address-space memory protections if needed.
+    if (new_prot != old_prot) {
+        impl.Protect(addr, adjusted_size, perms);
+    }
 
     return adjusted_size;
 }
@@ -974,6 +992,7 @@ s32 MemoryManager::Protect(VAddr addr, u64 size, MemoryProt prot) {
     }
 
     // Ensure the range to modify is valid
+    std::scoped_lock lk{mutex};
     ASSERT_MSG(IsValidMapping(addr, size), "Attempted to access invalid address {:#x}", addr);
 
     // Appropriately restrict flags.
@@ -981,7 +1000,6 @@ s32 MemoryManager::Protect(VAddr addr, u64 size, MemoryProt prot) {
         MemoryProt::CpuReadWrite | MemoryProt::CpuExec | MemoryProt::GpuReadWrite;
     MemoryProt valid_flags = prot & flag_mask;
 
-    mutex.lock();
     // Protect all VMAs between addr and addr + size.
     s64 protected_bytes = 0;
     while (protected_bytes < size) {
@@ -994,13 +1012,11 @@ s32 MemoryManager::Protect(VAddr addr, u64 size, MemoryProt prot) {
         auto result = ProtectBytes(addr + protected_bytes, vma_base, size - protected_bytes, prot);
         if (result < 0) {
             // ProtectBytes returned an error, return it
-            mutex.unlock();
             return result;
         }
         protected_bytes += result;
     }
 
-    mutex.unlock();
     return ORBIS_OK;
 }
 
@@ -1014,7 +1030,7 @@ s32 MemoryManager::VirtualQuery(VAddr addr, s32 flags,
         return ORBIS_KERNEL_ERROR_EACCES;
     }
 
-    mutex.lock_shared();
+    std::shared_lock lk{mutex};
     auto it = FindVMA(query_addr);
 
     while (it != vma_map.end() && it->second.type == VMAType::Free && flags == 1) {
@@ -1022,7 +1038,6 @@ s32 MemoryManager::VirtualQuery(VAddr addr, s32 flags,
     }
     if (it == vma_map.end() || it->second.type == VMAType::Free) {
         LOG_WARNING(Kernel_Vmm, "VirtualQuery on free memory region");
-        mutex.unlock_shared();
         return ORBIS_KERNEL_ERROR_EACCES;
     }
 
@@ -1050,7 +1065,6 @@ s32 MemoryManager::VirtualQuery(VAddr addr, s32 flags,
 
     strncpy(info->name, vma.name.data(), ::Libraries::Kernel::ORBIS_KERNEL_MAXIMUM_NAME_LENGTH);
 
-    mutex.unlock_shared();
     return ORBIS_OK;
 }
 
@@ -1061,7 +1075,7 @@ s32 MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next,
         return ORBIS_KERNEL_ERROR_EACCES;
     }
 
-    mutex.lock_shared();
+    std::shared_lock lk{mutex};
     auto dmem_area = FindDmemArea(addr);
     while (dmem_area != dmem_map.end() && dmem_area->second.dma_type == PhysicalMemoryType::Free &&
            find_next) {
@@ -1070,7 +1084,6 @@ s32 MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next,
 
     if (dmem_area == dmem_map.end() || dmem_area->second.dma_type == PhysicalMemoryType::Free) {
         LOG_WARNING(Kernel_Vmm, "Unable to find allocated direct memory region to query!");
-        mutex.unlock_shared();
         return ORBIS_KERNEL_ERROR_EACCES;
     }
 
@@ -1086,13 +1099,12 @@ s32 MemoryManager::DirectMemoryQuery(PAddr addr, bool find_next,
         dmem_area++;
     }
 
-    mutex.unlock_shared();
     return ORBIS_OK;
 }
 
 s32 MemoryManager::DirectQueryAvailable(PAddr search_start, PAddr search_end, u64 alignment,
                                         PAddr* phys_addr_out, u64* size_out) {
-    mutex.lock_shared();
+    std::shared_lock lk{mutex};
 
     auto dmem_area = FindDmemArea(search_start);
     PAddr paddr{};
@@ -1132,91 +1144,90 @@ s32 MemoryManager::DirectQueryAvailable(PAddr search_start, PAddr search_end, u6
         dmem_area++;
     }
 
-    mutex.unlock_shared();
     *phys_addr_out = paddr;
     *size_out = max_size;
     return ORBIS_OK;
 }
 
 s32 MemoryManager::SetDirectMemoryType(VAddr addr, u64 size, s32 memory_type) {
-    mutex.lock();
+    std::scoped_lock lk{mutex};
 
     ASSERT_MSG(IsValidMapping(addr, size), "Attempted to access invalid address {:#x}", addr);
 
     // Search through all VMAs covered by the provided range.
     // We aren't modifying these VMAs, so it's safe to iterate through them.
     VAddr current_addr = addr;
-    auto remaining_size = size;
+    u64 remaining_size = size;
     auto vma_handle = FindVMA(addr);
-    while (vma_handle != vma_map.end() && vma_handle->second.base < addr + size) {
+    while (vma_handle != vma_map.end() && remaining_size > 0) {
+        // Calculate position in vma
+        const VAddr start_in_vma = current_addr - vma_handle->second.base;
+        const u64 size_in_vma =
+            std::min<u64>(remaining_size, vma_handle->second.size - start_in_vma);
+
         // Direct and Pooled mappings are the only ones with a memory type.
         if (vma_handle->second.type == VMAType::Direct ||
             vma_handle->second.type == VMAType::Pooled) {
-            // Calculate position in vma
-            const auto start_in_vma = current_addr - vma_handle->second.base;
-            const auto size_in_vma = vma_handle->second.size - start_in_vma;
-            const auto base_phys_addr = vma_handle->second.phys_areas.begin()->second.base;
-            auto size_to_modify = std::min<u64>(remaining_size, size_in_vma);
-            for (auto& phys_handle : vma_handle->second.phys_areas) {
-                if (size_to_modify == 0) {
-                    break;
-                }
+            // Split area to modify into a new VMA.
+            vma_handle = CarveVMA(current_addr, size_in_vma);
+            auto phys_handle = vma_handle->second.phys_areas.begin();
+            while (phys_handle != vma_handle->second.phys_areas.end()) {
+                // Update internal physical areas
+                phys_handle->second.memory_type = memory_type;
 
-                const auto current_phys_addr =
-                    std::max<PAddr>(base_phys_addr, phys_handle.second.base);
-                if (current_phys_addr >= phys_handle.second.base + phys_handle.second.size) {
-                    continue;
-                }
-                const auto start_in_dma = current_phys_addr - phys_handle.second.base;
-                const auto size_in_dma = phys_handle.second.size - start_in_dma;
-
-                phys_handle.second.memory_type = memory_type;
-
-                auto dmem_handle = CarvePhysArea(dmem_map, current_phys_addr, size_in_dma);
+                // Carve a new dmem area in dmem_map, update memory type there
+                auto dmem_handle =
+                    CarvePhysArea(dmem_map, phys_handle->second.base, phys_handle->second.size);
                 auto& dmem_area = dmem_handle->second;
                 dmem_area.memory_type = memory_type;
-                size_to_modify -= dmem_area.size;
-                MergeAdjacent(dmem_map, dmem_handle);
+
+                // Increment phys_handle
+                phys_handle++;
             }
+
+            // Check if VMA can be merged with adjacent areas after physical area modifications.
+            vma_handle = MergeAdjacent(vma_map, vma_handle);
         }
-        remaining_size -= vma_handle->second.size;
+        current_addr += size_in_vma;
+        remaining_size -= size_in_vma;
         vma_handle++;
     }
 
-    mutex.unlock();
     return ORBIS_OK;
 }
 
 void MemoryManager::NameVirtualRange(VAddr virtual_addr, u64 size, std::string_view name) {
-    mutex.lock();
+    std::scoped_lock lk{mutex};
 
     // Sizes are aligned up to the nearest 16_KB
-    auto aligned_size = Common::AlignUp(size, 16_KB);
+    u64 aligned_size = Common::AlignUp(size, 16_KB);
     // Addresses are aligned down to the nearest 16_KB
-    auto aligned_addr = Common::AlignDown(virtual_addr, 16_KB);
+    VAddr aligned_addr = Common::AlignDown(virtual_addr, 16_KB);
 
     ASSERT_MSG(IsValidMapping(aligned_addr, aligned_size),
                "Attempted to access invalid address {:#x}", aligned_addr);
     auto it = FindVMA(aligned_addr);
-    s64 remaining_size = aligned_size;
-    auto current_addr = aligned_addr;
-    while (remaining_size > 0) {
+    u64 remaining_size = aligned_size;
+    VAddr current_addr = aligned_addr;
+    while (remaining_size > 0 && it != vma_map.end()) {
+        const u64 start_in_vma = current_addr - it->second.base;
+        const u64 size_in_vma = std::min<u64>(remaining_size, it->second.size - start_in_vma);
         // Nothing needs to be done to free VMAs
         if (!it->second.IsFree()) {
-            if (remaining_size < it->second.size) {
-                // We should split VMAs here, but this could cause trouble for Windows.
-                // Instead log a warning and name the whole VMA.
-                LOG_WARNING(Kernel_Vmm, "Trying to partially name a range");
+            if (size_in_vma < it->second.size) {
+                it = CarveVMA(current_addr, size_in_vma);
+                auto& new_vma = it->second;
+                new_vma.name = name;
+            } else {
+                auto& vma = it->second;
+                vma.name = name;
             }
-            auto& vma = it->second;
-            vma.name = name;
         }
-        remaining_size -= it->second.size;
-        current_addr += it->second.size;
-        it = FindVMA(current_addr);
+        it = MergeAdjacent(vma_map, it);
+        remaining_size -= size_in_vma;
+        current_addr += size_in_vma;
+        it++;
     }
-
-    mutex.unlock();
 }
 
 s32 MemoryManager::GetDirectMemoryType(PAddr addr, s32* directMemoryTypeOut,
@@ -1226,24 +1237,22 @@ s32 MemoryManager::GetDirectMemoryType(PAddr addr, s32* directMemoryTypeOut,
         return ORBIS_KERNEL_ERROR_ENOENT;
     }
 
-    mutex.lock_shared();
+    std::shared_lock lk{mutex};
     const auto& dmem_area = FindDmemArea(addr)->second;
     if (dmem_area.dma_type == PhysicalMemoryType::Free) {
         LOG_ERROR(Kernel_Vmm, "Unable to find allocated direct memory region to check type!");
-        mutex.unlock_shared();
         return ORBIS_KERNEL_ERROR_ENOENT;
     }
 
     *directMemoryStartOut = reinterpret_cast<void*>(dmem_area.base);
     *directMemoryEndOut = reinterpret_cast<void*>(dmem_area.GetEnd());
     *directMemoryTypeOut = dmem_area.memory_type;
-    mutex.unlock_shared();
     return ORBIS_OK;
 }
 
 s32 MemoryManager::IsStack(VAddr addr, void** start, void** end) {
+    std::shared_lock lk{mutex};
     ASSERT_MSG(IsValidMapping(addr), "Attempted to access invalid address {:#x}", addr);
-    mutex.lock_shared();
     const auto& vma = FindVMA(addr)->second;
     if (vma.IsFree()) {
         mutex.unlock_shared();
@@ -1264,13 +1273,11 @@ s32 MemoryManager::IsStack(VAddr addr, void** start, void** end) {
     if (end != nullptr) {
         *end = reinterpret_cast<void*>(stack_end);
     }
-
-    mutex.unlock_shared();
     return ORBIS_OK;
 }
 
 s32 MemoryManager::GetMemoryPoolStats(::Libraries::Kernel::OrbisKernelMemoryPoolBlockStats* stats) {
-    mutex.lock_shared();
+    std::shared_lock lk{mutex};
 
     // Run through dmem_map, determine how much physical memory is currently committed
     constexpr u64 block_size = 64_KB;
@@ -1290,7 +1297,6 @@ s32 MemoryManager::GetMemoryPoolStats(::Libraries::Kernel::OrbisKernelMemoryPool
     stats->allocated_cached_blocks = 0;
     stats->available_cached_blocks = 0;
 
-    mutex.unlock_shared();
     return ORBIS_OK;
 }
 
diff --git a/src/core/memory.h b/src/core/memory.h
index 0664ed592..92a1016bf 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -114,6 +114,10 @@ struct VirtualMemoryArea {
         return addr >= base && (addr + size) <= (base + this->size);
     }
 
+    bool Overlaps(VAddr addr, u64 size) const {
+        return addr <= (base + this->size) && (addr + size) >= base;
+    }
+
     bool IsFree() const noexcept {
         return type == VMAType::Free;
     }
@@ -140,6 +144,9 @@ struct VirtualMemoryArea {
         if (prot != next.prot || type != next.type) {
             return false;
         }
+        if (name.compare(next.name) != 0) {
+            return false;
+        }
 
         return true;
     }
@@ -237,7 +244,7 @@ public:
 
     PAddr Allocate(PAddr search_start, PAddr search_end, u64 size, u64 alignment, s32 memory_type);
 
-    void Free(PAddr phys_addr, u64 size);
+    s32 Free(PAddr phys_addr, u64 size, bool is_checked);
 
     s32 PoolCommit(VAddr virtual_addr, u64 size, MemoryProt prot, s32 mtype);
 
@@ -297,6 +304,11 @@ private:
                vma.type == VMAType::Pooled;
     }
 
+    std::pair<s32, MemoryManager::VMAHandle> CreateArea(VAddr virtual_addr, u64 size,
+                                                        MemoryProt prot, MemoryMapFlags flags,
+                                                        VMAType type, std::string_view name,
+                                                        u64 alignment);
+
     VAddr SearchFree(VAddr virtual_addr, u64 size, u32 alignment);
 
     VMAHandle MergeAdjacent(VMAMap& map, VMAHandle iter);

From c8b45e5ebc5b257e68d2413fd59276269564714e Mon Sep 17 00:00:00 2001
From: Stephen Miller <56742918+StevenMiller123@users.noreply.github.com>
Date: Sat, 24 Jan 2026 00:05:56 -0600
Subject: [PATCH 07/10] Core: More memory hotfixes (#3954)

* Update memory.cpp

* Fix CoalesceFreeRegions to account for address space gaps

Fixes a regression in Saint's Row games.
---
 src/core/address_space.cpp | 10 ++++++----
 src/core/memory.cpp        | 13 ++-----------
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/src/core/address_space.cpp b/src/core/address_space.cpp
index 965dfdc31..f4a6b640e 100644
--- a/src/core/address_space.cpp
+++ b/src/core/address_space.cpp
@@ -399,10 +399,11 @@ struct AddressSpace::Impl {
         auto it = std::prev(regions.upper_bound(virtual_addr));
         ASSERT_MSG(!it->second.is_mapped, "Cannot coalesce mapped regions");
 
-        // Check if there are free placeholders before this area.
+        // Check if there are adjacent free placeholders before this area.
         bool can_coalesce = false;
         auto it_prev = it != regions.begin() ? std::prev(it) : regions.end();
-        while (it_prev != regions.end() && !it_prev->second.is_mapped) {
+        while (it_prev != regions.end() && !it_prev->second.is_mapped &&
+               it_prev->first + it_prev->second.size == it->first) {
             // If there is an earlier region, move our iterator to that and increase size.
             it_prev->second.size = it_prev->second.size + it->second.size;
             regions.erase(it);
@@ -415,9 +416,10 @@ struct AddressSpace::Impl {
             it_prev = it != regions.begin() ? std::prev(it) : regions.end();
         }
 
-        // Check if there are free placeholders after this area.
+        // Check if there are adjacent free placeholders after this area.
         auto it_next = std::next(it);
-        while (it_next != regions.end() && !it_next->second.is_mapped) {
+        while (it_next != regions.end() && !it_next->second.is_mapped &&
+               it->first + it->second.size == it_next->first) {
             // If there is a later region, increase our current region's size
             it->second.size = it->second.size + it_next->second.size;
             regions.erase(it_next);
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 0726e8711..32518907a 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -117,7 +117,6 @@ void MemoryManager::SetPrtArea(u32 id, VAddr address, u64 size) {
 }
 
 void MemoryManager::CopySparseMemory(VAddr virtual_addr, u8* dest, u64 size) {
-    std::shared_lock lk{mutex};
     ASSERT_MSG(IsValidMapping(virtual_addr), "Attempted to access invalid address {:#x}",
                virtual_addr);
 
@@ -138,7 +137,6 @@ void MemoryManager::CopySparseMemory(VAddr virtual_addr, u8* dest, u64 size) {
 
 bool MemoryManager::TryWriteBacking(void* address, const void* data, u64 size) {
     const VAddr virtual_addr = std::bit_cast<VAddr>(address);
-    std::shared_lock lk{mutex};
     ASSERT_MSG(IsValidMapping(virtual_addr, size), "Attempted to access invalid address {:#x}",
                virtual_addr);
 
@@ -701,7 +699,7 @@ s32 MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, u64 size, Memory
 }
 
 s32 MemoryManager::PoolDecommit(VAddr virtual_addr, u64 size) {
-    mutex.lock();
+    std::scoped_lock lk{mutex};
     ASSERT_MSG(IsValidMapping(virtual_addr, size), "Attempted to access invalid address {:#x}",
                virtual_addr);
 
@@ -710,7 +708,6 @@ s32 MemoryManager::PoolDecommit(VAddr virtual_addr, u64 size) {
     while (it != vma_map.end() && it->second.base + it->second.size <= virtual_addr + size) {
         if (it->second.type != VMAType::PoolReserved && it->second.type != VMAType::Pooled) {
             LOG_ERROR(Kernel_Vmm, "Attempting to decommit non-pooled memory!");
-            mutex.unlock();
             return ORBIS_KERNEL_ERROR_EINVAL;
         }
         it++;
@@ -728,9 +725,7 @@ s32 MemoryManager::PoolDecommit(VAddr virtual_addr, u64 size) {
         if (vma_base.type == VMAType::Pooled) {
             // We always map PoolCommitted memory to GPU, so unmap when decomitting.
             if (IsValidGpuMapping(current_addr, size_in_vma)) {
-                mutex.unlock();
                 rasterizer->UnmapMemory(current_addr, size_in_vma);
-                mutex.lock();
             }
 
             // Track how much pooled memory is decommitted
@@ -781,7 +776,6 @@ s32 MemoryManager::PoolDecommit(VAddr virtual_addr, u64 size) {
     // Tracy memory tracking breaks from merging memory areas. Disabled for now.
     // TRACK_FREE(virtual_addr, "VMEM");
 
-    mutex.unlock();
     return ORBIS_OK;
 }
 
@@ -789,13 +783,12 @@ s32 MemoryManager::UnmapMemory(VAddr virtual_addr, u64 size) {
     if (size == 0) {
         return ORBIS_OK;
     }
-    mutex.lock();
+    std::scoped_lock lk{mutex};
     virtual_addr = Common::AlignDown(virtual_addr, 16_KB);
     size = Common::AlignUp(size, 16_KB);
     ASSERT_MSG(IsValidMapping(virtual_addr, size), "Attempted to access invalid address {:#x}",
                virtual_addr);
     u64 bytes_unmapped = UnmapMemoryImpl(virtual_addr, size);
-    mutex.unlock();
     return bytes_unmapped;
 }
 
@@ -873,9 +866,7 @@ u64 MemoryManager::UnmapBytesFromEntry(VAddr virtual_addr, VirtualMemoryArea vma
 
         // If this mapping has GPU access, unmap from GPU.
         if (IsValidGpuMapping(virtual_addr, size)) {
-            mutex.unlock();
             rasterizer->UnmapMemory(virtual_addr, size);
-            mutex.lock();
         }
     }
     return size_in_vma;

From fa497f6bfdce7dad711de0c46cca81222abd2ea5 Mon Sep 17 00:00:00 2001
From: georgemoralis <giorgosmrls@gmail.com>
Date: Sat, 24 Jan 2026 14:57:24 +0200
Subject: [PATCH 08/10] added new cli parser using CLI11 (#3950)

* added new cli parser using CLI11

* pff

* fixed repo

* fix game autodetection

* clear unessecary comments

* added a check

* fixed?

* parse extras

* one more try

* readded -g

* fixed ignore_game_patches flag

* some rewrite improvements
---
 .gitmodules              |   4 +
 CMakeLists.txt           |   2 +-
 externals/CMakeLists.txt |   8 +-
 externals/ext-CLI11      |   1 +
 src/main.cpp             | 378 +++++++++++++++------------------------
 5 files changed, 158 insertions(+), 235 deletions(-)
 create mode 160000 externals/ext-CLI11

diff --git a/.gitmodules b/.gitmodules
index c0ba5e79d..82c40f4f9 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -123,3 +123,7 @@
 [submodule "externals/aacdec/fdk-aac"]
 	path = externals/aacdec/fdk-aac
 	url = https://android.googlesource.com/platform/external/aac
+[submodule "externals/ext-CLI11"]
+	path = externals/ext-CLI11
+	url = https://github.com/shadexternals/ext-CLI11.git
+	branch = main
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 929e0ebc7..5fe8ecb10 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1097,7 +1097,7 @@ create_target_directory_groups(shadps4)
 
 target_link_libraries(shadps4 PRIVATE magic_enum::magic_enum fmt::fmt toml11::toml11 tsl::robin_map xbyak::xbyak Tracy::TracyClient RenderDoc::API FFmpeg::ffmpeg Dear_ImGui gcn half::half ZLIB::ZLIB PNG::PNG)
 target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAllocator LibAtrac9 sirit Vulkan::Headers xxHash::xxhash Zydis::Zydis glslang::glslang SDL3::SDL3 SDL3_mixer::SDL3_mixer pugixml::pugixml)
-target_link_libraries(shadps4 PRIVATE stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json miniz fdk-aac)
+target_link_libraries(shadps4 PRIVATE stb::headers libusb::usb lfreist-hwinfo::hwinfo nlohmann_json::nlohmann_json miniz fdk-aac CLI11::CLI11)
 
 target_compile_definitions(shadps4 PRIVATE IMGUI_USER_CONFIG="imgui/imgui_config.h")
 target_compile_definitions(Dear_ImGui PRIVATE IMGUI_USER_CONFIG="${PROJECT_SOURCE_DIR}/src/imgui/imgui_config.h")
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index 8e96f9bec..f20310a91 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+# SPDX-FileCopyrightText: Copyright 2024-2026 shadPS4 Emulator Project
 # SPDX-License-Identifier: GPL-2.0-or-later
 
 set(BUILD_SHARED_LIBS OFF)
@@ -268,3 +268,9 @@ add_subdirectory(json)
 
 # miniz
 add_subdirectory(miniz)
+
+# cli11
+set(CLI11_BUILD_TESTS OFF CACHE BOOL "" FORCE)
+set(CLI11_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
+
+add_subdirectory(ext-CLI11)
\ No newline at end of file
diff --git a/externals/ext-CLI11 b/externals/ext-CLI11
new file mode 160000
index 000000000..1cce14833
--- /dev/null
+++ b/externals/ext-CLI11
@@ -0,0 +1 @@
+Subproject commit 1cce1483345e60997b87720948c37d6a34db2658
diff --git a/src/main.cpp b/src/main.cpp
index b09ea7f4d..9b263e250 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,16 +1,17 @@
 // SPDX-FileCopyrightText: Copyright 2025-2026 shadPS4 Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#include <filesystem>
+#include <iostream>
+#include <memory>
+#include <optional>
+#include <vector>
+#include <CLI/CLI.hpp>
 #include <SDL3/SDL_messagebox.h>
-#include "functional"
-#include "iostream"
-#include "string"
-#include "system_error"
-#include "unordered_map"
 
 #include <core/emulator_state.h>
-#include <fmt/core.h>
 #include "common/config.h"
+#include "common/key_manager.h"
 #include "common/logging/backend.h"
 #include "common/memory_patcher.h"
 #include "common/path_util.h"
@@ -22,265 +23,176 @@
 #ifdef _WIN32
 #include <windows.h>
 #endif
-#include <common/key_manager.h>
 
 int main(int argc, char* argv[]) {
 #ifdef _WIN32
     SetConsoleOutputCP(CP_UTF8);
 #endif
+
     IPC::Instance().Init();
-    // Init emulator state
-    std::shared_ptr<EmulatorState> m_emu_state = std::make_shared<EmulatorState>();
-    EmulatorState::SetInstance(m_emu_state);
-    // Load configurations
+
+    auto emu_state = std::make_shared<EmulatorState>();
+    EmulatorState::SetInstance(emu_state);
+
     const auto user_dir = Common::FS::GetUserPath(Common::FS::PathType::UserDir);
     Config::load(user_dir / "config.toml");
-    // temp copy the trophy key from old config to key manager if exists
+
+    // ---- Trophy key migration ----
     auto key_manager = KeyManager::GetInstance();
-    if (key_manager->GetAllKeys().TrophyKeySet.ReleaseTrophyKey.empty()) {
-        if (!Config::getTrophyKey().empty()) {
-
-            key_manager->SetAllKeys(
-                {.TrophyKeySet = {.ReleaseTrophyKey =
-                                      KeyManager::HexStringToBytes(Config::getTrophyKey())}});
-            key_manager->SaveToFile();
-        }
+    if (key_manager->GetAllKeys().TrophyKeySet.ReleaseTrophyKey.empty() &&
+        !Config::getTrophyKey().empty()) {
+        key_manager->SetAllKeys({.TrophyKeySet = {.ReleaseTrophyKey = KeyManager::HexStringToBytes(
+                                                      Config::getTrophyKey())}});
+        key_manager->SaveToFile();
     }
-    bool has_game_argument = false;
-    std::string game_path;
-    std::vector<std::string> game_args{};
-    std::optional<std::filesystem::path> game_folder;
 
-    bool waitForDebugger = false;
+    CLI::App app{"shadPS4 Emulator CLI"};
+
+    // ---- CLI state ----
+    std::optional<std::string> gamePath;
+    std::vector<std::string> gameArgs;
+    std::optional<std::filesystem::path> overrideRoot;
     std::optional<int> waitPid;
+    bool waitForDebugger = false;
 
-    // Map of argument strings to lambda functions
-    std::unordered_map<std::string, std::function<void(int&)>> arg_map = {
-        {"-h",
-         [&](int&) {
-             std::cout
-                 << "Usage: shadps4 [options] <elf or eboot.bin path>\n"
-                    "Options:\n"
-                    "  -g, --game <path|ID>          Specify game path to launch\n"
-                    " -- ...                         Parameters passed to the game ELF. "
-                    "Needs to be at the end of the line, and everything after \"--\" is a "
-                    "game argument.\n"
-                    "  -p, --patch <patch_file>      Apply specified patch file\n"
-                    "  -i, --ignore-game-patch       Disable automatic loading of game patch\n"
-                    "  -f, --fullscreen <true|false> Specify window initial fullscreen "
-                    "state. Does not overwrite the config file.\n"
-                    "  --add-game-folder <folder>    Adds a new game folder to the config.\n"
-                    "  --set-addon-folder <folder>   Sets the addon folder to the config.\n"
-                    "  --log-append                  Append log output to file instead of "
-                    "overwriting it.\n"
-                    "  --override-root <folder>      Override the game root folder. Default is the "
-                    "parent of game path\n"
-                    "  --wait-for-debugger           Wait for debugger to attach\n"
-                    "  --wait-for-pid <pid>          Wait for process with specified PID to stop\n"
-                    "  --config-clean                Run the emulator with the default config "
-                    "values, ignores the config file(s) entirely.\n"
-                    "  --config-global               Run the emulator with the base config file "
-                    "only, ignores game specific configs.\n"
-                    "  --show-fps                    Enable FPS counter display at startup\n"
-                    "  -h, --help                    Display this help message\n";
-             exit(0);
-         }},
-        {"--help", [&](int& i) { arg_map["-h"](i); }},
+    std::optional<std::string> fullscreenStr;
+    bool ignoreGamePatch = false;
+    bool showFps = false;
+    bool configClean = false;
+    bool configGlobal = false;
+    bool logAppend = false;
 
-        {"-g",
-         [&](int& i) {
-             if (i + 1 < argc) {
-                 game_path = argv[++i];
-                 has_game_argument = true;
-             } else {
-                 std::cerr << "Error: Missing argument for -g/--game\n";
-                 exit(1);
-             }
-         }},
-        {"--game", [&](int& i) { arg_map["-g"](i); }},
+    std::optional<std::filesystem::path> addGameFolder;
+    std::optional<std::filesystem::path> setAddonFolder;
+    std::optional<std::string> patchFile;
 
-        {"-p",
-         [&](int& i) {
-             if (i + 1 < argc) {
-                 MemoryPatcher::patch_file = argv[++i];
-             } else {
-                 std::cerr << "Error: Missing argument for -p/--patch\n";
-                 exit(1);
-             }
-         }},
-        {"--patch", [&](int& i) { arg_map["-p"](i); }},
+    // ---- Options ----
+    app.add_option("-g,--game", gamePath, "Game path or ID");
+    app.add_option("-p,--patch", patchFile, "Patch file to apply");
+    app.add_flag("-i,--ignore-game-patch", ignoreGamePatch,
+                 "Disable automatic loading of game patches");
 
-        {"-i", [&](int&) { Core::FileSys::MntPoints::ignore_game_patches = true; }},
-        {"--ignore-game-patch", [&](int& i) { arg_map["-i"](i); }},
-        {"-f",
-         [&](int& i) {
-             if (++i >= argc) {
-                 std::cerr << "Error: Missing argument for -f/--fullscreen\n";
-                 exit(1);
-             }
-             std::string f_param(argv[i]);
-             bool is_fullscreen;
-             if (f_param == "true") {
-                 is_fullscreen = true;
-             } else if (f_param == "false") {
-                 is_fullscreen = false;
-             } else {
-                 std::cerr
-                     << "Error: Invalid argument for -f/--fullscreen. Use 'true' or 'false'.\n";
-                 exit(1);
-             }
-             // Set fullscreen mode without saving it to config file
-             Config::setIsFullscreen(is_fullscreen);
-         }},
-        {"--fullscreen", [&](int& i) { arg_map["-f"](i); }},
-        {"--add-game-folder",
-         [&](int& i) {
-             if (++i >= argc) {
-                 std::cerr << "Error: Missing argument for --add-game-folder\n";
-                 exit(1);
-             }
-             std::string config_dir(argv[i]);
-             std::filesystem::path config_path = std::filesystem::path(config_dir);
-             std::error_code discard;
-             if (!std::filesystem::exists(config_path, discard)) {
-                 std::cerr << "Error: File does not exist: " << config_path << "\n";
-                 exit(1);
-             }
+    // FULLSCREEN: behavior-identical
+    app.add_option("-f,--fullscreen", fullscreenStr, "Fullscreen mode (true|false)");
 
-             Config::addGameInstallDir(config_path);
-             Config::save(Common::FS::GetUserPath(Common::FS::PathType::UserDir) / "config.toml");
-             std::cout << "Game folder successfully saved.\n";
-             exit(0);
-         }},
-        {"--set-addon-folder",
-         [&](int& i) {
-             if (++i >= argc) {
-                 std::cerr << "Error: Missing argument for --add-addon-folder\n";
-                 exit(1);
-             }
-             std::string config_dir(argv[i]);
-             std::filesystem::path config_path = std::filesystem::path(config_dir);
-             std::error_code discard;
-             if (!std::filesystem::exists(config_path, discard)) {
-                 std::cerr << "Error: File does not exist: " << config_path << "\n";
-                 exit(1);
-             }
+    app.add_option("--override-root", overrideRoot)->check(CLI::ExistingDirectory);
 
-             Config::setAddonInstallDir(config_path);
-             Config::save(Common::FS::GetUserPath(Common::FS::PathType::UserDir) / "config.toml");
-             std::cout << "Addon folder successfully saved.\n";
-             exit(0);
-         }},
-        {"--log-append", [&](int& i) { Common::Log::SetAppend(); }},
-        {"--config-clean", [&](int& i) { Config::setConfigMode(Config::ConfigMode::Clean); }},
-        {"--config-global", [&](int& i) { Config::setConfigMode(Config::ConfigMode::Global); }},
-        {"--override-root",
-         [&](int& i) {
-             if (++i >= argc) {
-                 std::cerr << "Error: Missing argument for --override-root\n";
-                 exit(1);
-             }
-             std::string folder_str{argv[i]};
-             std::filesystem::path folder{folder_str};
-             if (!std::filesystem::exists(folder) || !std::filesystem::is_directory(folder)) {
-                 std::cerr << "Error: Folder does not exist: " << folder_str << "\n";
-                 exit(1);
-             }
-             game_folder = folder;
-         }},
-        {"--wait-for-debugger", [&](int& i) { waitForDebugger = true; }},
-        {"--wait-for-pid",
-         [&](int& i) {
-             if (++i >= argc) {
-                 std::cerr << "Error: Missing argument for --wait-for-pid\n";
-                 exit(1);
-             }
-             waitPid = std::stoi(argv[i]);
-         }},
-        {"--show-fps", [&](int& i) { Config::setShowFpsCounter(true); }}};
+    app.add_flag("--wait-for-debugger", waitForDebugger);
+    app.add_option("--wait-for-pid", waitPid);
 
+    app.add_flag("--show-fps", showFps);
+    app.add_flag("--config-clean", configClean);
+    app.add_flag("--config-global", configGlobal);
+    app.add_flag("--log-append", logAppend);
+
+    app.add_option("--add-game-folder", addGameFolder)->check(CLI::ExistingDirectory);
+    app.add_option("--set-addon-folder", setAddonFolder)->check(CLI::ExistingDirectory);
+
+    // ---- Capture args after `--` verbatim ----
+    app.allow_extras();
+    app.parse_complete_callback([&]() {
+        const auto& extras = app.remaining();
+        if (!extras.empty()) {
+            gameArgs = extras;
+        }
+    });
+
+    // ---- No-args behavior ----
     if (argc == 1) {
-        if (!SDL_ShowSimpleMessageBox(
-                SDL_MESSAGEBOX_INFORMATION, "shadPS4",
-                "This is a CLI application. Please use the QTLauncher for a GUI: "
-                "https://github.com/shadps4-emu/shadps4-qtlauncher/releases",
-                nullptr))
-            std::cerr << "Could not display SDL message box! Error: " << SDL_GetError() << "\n";
-        int dummy = 0; // one does not simply pass 0 directly
-        arg_map.at("-h")(dummy);
+        SDL_ShowSimpleMessageBox(SDL_MESSAGEBOX_INFORMATION, "shadPS4",
+                                 "This is a CLI application. Please use the QTLauncher for a GUI:\n"
+                                 "https://github.com/shadps4-emu/shadps4-qtlauncher/releases",
+                                 nullptr);
+        std::cout << app.help();
         return -1;
     }
 
-    // Parse command-line arguments using the map
-    for (int i = 1; i < argc; ++i) {
-        std::string cur_arg = argv[i];
-        auto it = arg_map.find(cur_arg);
-        if (it != arg_map.end()) {
-            it->second(i); // Call the associated lambda function
-        } else if (i == argc - 1 && !has_game_argument) {
-            // Assume the last argument is the game file if not specified via -g/--game
-            game_path = argv[i];
-            has_game_argument = true;
-        } else if (std::string(argv[i]) == "--") {
-            if (i + 1 == argc) {
-                std::cerr << "Warning: -- is set, but no game arguments are added!\n";
-                break;
-            }
-            for (int j = i + 1; j < argc; j++) {
-                game_args.push_back(argv[j]);
-            }
-            break;
-        } else if (i + 1 < argc && std::string(argv[i + 1]) == "--") {
-            if (!has_game_argument) {
-                game_path = argv[i];
-                has_game_argument = true;
-            }
+    try {
+        app.parse(argc, argv);
+    } catch (const CLI::ParseError& e) {
+        return app.exit(e);
+    }
+
+    // ---- Utility commands ----
+    if (addGameFolder) {
+        Config::addGameInstallDir(*addGameFolder);
+        Config::save(user_dir / "config.toml");
+        std::cout << "Game folder successfully saved.\n";
+        return 0;
+    }
+
+    if (setAddonFolder) {
+        Config::setAddonInstallDir(*setAddonFolder);
+        Config::save(user_dir / "config.toml");
+        std::cout << "Addon folder successfully saved.\n";
+        return 0;
+    }
+
+    if (!gamePath.has_value()) {
+        if (!gameArgs.empty()) {
+            gamePath = gameArgs.front();
+            gameArgs.erase(gameArgs.begin());
         } else {
-            std::cerr << "Unknown argument: " << cur_arg << ", see --help for info.\n";
-        }
-    }
-
-    // If no game directory is set and no command line argument, prompt for it
-    if (Config::getGameInstallDirs().empty()) {
-        std::cerr << "Warning: No game folder set, please set it by calling shadps4"
-                     " with the --add-game-folder <folder_name> argument\n";
-    }
-
-    if (!has_game_argument) {
-        std::cerr << "Error: Please provide a game path or ID.\n";
-        exit(1);
-    }
-
-    // Check if the game path or ID exists
-    std::filesystem::path eboot_path(game_path);
-
-    // Check if the provided path is a valid file
-    if (!std::filesystem::exists(eboot_path)) {
-        // If not a file, treat it as a game ID and search in install directories recursively
-        bool game_found = false;
-        const int max_depth = 5;
-        for (const auto& install_dir : Config::getGameInstallDirs()) {
-            if (auto found_path = Common::FS::FindGameByID(install_dir, game_path, max_depth)) {
-                eboot_path = *found_path;
-                game_found = true;
-                break;
-            }
-        }
-        if (!game_found) {
-            std::cerr << "Error: Game ID or file path not found: " << game_path << std::endl;
+            std::cerr << "Error: Please provide a game path or ID.\n";
             return 1;
         }
     }
 
-    if (waitPid.has_value()) {
-        Core::Debugger::WaitForPid(waitPid.value());
+    // ---- Apply flags ----
+    if (patchFile)
+        MemoryPatcher::patch_file = *patchFile;
+
+    if (ignoreGamePatch)
+        Core::FileSys::MntPoints::ignore_game_patches = true;
+
+    if (fullscreenStr) {
+        if (*fullscreenStr == "true") {
+            Config::setIsFullscreen(true);
+        } else if (*fullscreenStr == "false") {
+            Config::setIsFullscreen(false);
+        } else {
+            std::cerr << "Error: Invalid argument for --fullscreen (use true|false)\n";
+            return 1;
+        }
     }
 
-    // Run the emulator with the resolved eboot path
-    Core::Emulator* emulator = Common::Singleton<Core::Emulator>::Instance();
+    if (showFps)
+        Config::setShowFpsCounter(true);
+
+    if (configClean)
+        Config::setConfigMode(Config::ConfigMode::Clean);
+
+    if (configGlobal)
+        Config::setConfigMode(Config::ConfigMode::Global);
+
+    if (logAppend)
+        Common::Log::SetAppend();
+
+    // ---- Resolve game path or ID ----
+    std::filesystem::path ebootPath(*gamePath);
+    if (!std::filesystem::exists(ebootPath)) {
+        bool found = false;
+        constexpr int maxDepth = 5;
+        for (const auto& installDir : Config::getGameInstallDirs()) {
+            if (auto foundPath = Common::FS::FindGameByID(installDir, *gamePath, maxDepth)) {
+                ebootPath = *foundPath;
+                found = true;
+                break;
+            }
+        }
+        if (!found) {
+            std::cerr << "Error: Game ID or file path not found: " << *gamePath << "\n";
+            return 1;
+        }
+    }
+
+    if (waitPid)
+        Core::Debugger::WaitForPid(*waitPid);
+
+    auto* emulator = Common::Singleton<Core::Emulator>::Instance();
     emulator->executableName = argv[0];
     emulator->waitForDebuggerBeforeRun = waitForDebugger;
-    emulator->Run(eboot_path, game_args, game_folder);
+    emulator->Run(ebootPath, gameArgs, overrideRoot);
 
     return 0;
 }

From 1e99c4b5066f9737612d89b3a5c1df1f59fc79f2 Mon Sep 17 00:00:00 2001
From: psucien <168137814+psucien@users.noreply.github.com>
Date: Mon, 26 Jan 2026 21:17:51 +0100
Subject: [PATCH 09/10] shader_recompiler: VS clip distance emulation for
 NVIDIA GPUs (#3958)

---
 CMakeLists.txt                                |  1 +
 externals/CMakeLists.txt                      |  1 +
 .../backend/spirv/spirv_emit_context.cpp      | 51 +++++++++++++++----
 src/shader_recompiler/ir/attribute.cpp        |  2 +-
 .../inject_clip_distance_attributes.cpp       | 41 +++++++++++++++
 src/shader_recompiler/ir/passes/ir_passes.h   |  3 +-
 src/shader_recompiler/profile.h               |  2 +-
 src/shader_recompiler/recompiler.cpp          | 17 ++++---
 src/shader_recompiler/runtime_info.h          |  7 ++-
 .../renderer_vulkan/vk_pipeline_cache.cpp     | 15 +++++-
 10 files changed, 116 insertions(+), 24 deletions(-)
 create mode 100644 src/shader_recompiler/ir/passes/inject_clip_distance_attributes.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5fe8ecb10..8c81c7550 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -916,6 +916,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/profile.h
                       src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
                       src/shader_recompiler/ir/passes/hull_shader_transform.cpp
                       src/shader_recompiler/ir/passes/identity_removal_pass.cpp
+                      src/shader_recompiler/ir/passes/inject_clip_distance_attributes.cpp
                       src/shader_recompiler/ir/passes/ir_passes.h
                       src/shader_recompiler/ir/passes/lower_buffer_format_to_raw.cpp
                       src/shader_recompiler/ir/passes/lower_fp64_to_fp32.cpp
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index f20310a91..e243f63db 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -204,6 +204,7 @@ add_subdirectory(tracy)
 
 # pugixml
 if (NOT TARGET pugixml::pugixml)
+    option(PUGIXML_NO_EXCEPTIONS "" ON)
     add_subdirectory(pugixml)
 endif()
 
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index cc6d19075..4600d30af 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -364,7 +364,7 @@ void EmitContext::DefineInputs() {
         }
         break;
     }
-    case LogicalStage::Fragment:
+    case LogicalStage::Fragment: {
         if (info.loads.GetAny(IR::Attribute::FragCoord)) {
             frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
         }
@@ -418,7 +418,13 @@ void EmitContext::DefineInputs() {
                                                     spv::StorageClass::Input);
             }
         }
-        for (s32 i = 0; i < runtime_info.fs_info.num_inputs; i++) {
+
+        const bool has_clip_distance_inputs = runtime_info.fs_info.clip_distance_emulation;
+        // Clip distances attribute vector is the last in inputs array
+        const auto num_inputs =
+            runtime_info.fs_info.num_inputs - (has_clip_distance_inputs ? 1 : 0);
+
+        for (s32 i = 0; i < num_inputs; i++) {
             const auto& input = runtime_info.fs_info.inputs[i];
             if (input.IsDefault()) {
                 continue;
@@ -428,12 +434,13 @@ void EmitContext::DefineInputs() {
             const auto [primary, auxiliary] = info.fs_interpolation[i];
             const Id type = F32[num_components];
             const Id attr_id = [&] {
+                const auto bind_location = input.param_index + (has_clip_distance_inputs ? 1 : 0);
                 if (primary == Qualifier::PerVertex &&
                     profile.supports_fragment_shader_barycentric) {
-                    return Name(DefineInput(TypeArray(type, ConstU32(3U)), input.param_index),
+                    return Name(DefineInput(TypeArray(type, ConstU32(3U)), bind_location),
                                 fmt::format("fs_in_attr{}_p", i));
                 }
-                return Name(DefineInput(type, input.param_index), fmt::format("fs_in_attr{}", i));
+                return Name(DefineInput(type, bind_location), fmt::format("fs_in_attr{}", i));
             }();
             if (primary == Qualifier::PerVertex) {
                 Decorate(attr_id, profile.supports_amd_shader_explicit_vertex_parameter
@@ -450,7 +457,15 @@ void EmitContext::DefineInputs() {
             input_params[i] = GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id, num_components,
                                                false, false, primary == Qualifier::PerVertex);
         }
+
+        if (has_clip_distance_inputs) {
+            const auto type = F32[MaxEmulatedClipDistances];
+            const auto attr_id = Name(DefineInput(type, 0), fmt::format("cldist_attr{}", 0));
+            input_params[num_inputs] = GetAttributeInfo(AmdGpu::NumberFormat::Float, attr_id,
+                                                        MaxEmulatedClipDistances, false);
+        }
         break;
+    }
     case LogicalStage::Compute:
         if (info.loads.GetAny(IR::Attribute::WorkgroupIndex) ||
             info.loads.GetAny(IR::Attribute::WorkgroupId)) {
@@ -546,11 +561,16 @@ void EmitContext::DefineVertexBlock() {
     const std::array<Id, 8> zero{f32_zero_value, f32_zero_value, f32_zero_value, f32_zero_value,
                                  f32_zero_value, f32_zero_value, f32_zero_value, f32_zero_value};
     output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
-    if (info.stores.GetAny(IR::Attribute::ClipDistance)) {
-        const Id type{TypeArray(F32[1], ConstU32(8U))};
-        const Id initializer{ConstantComposite(type, zero)};
-        clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output,
-                                        initializer);
+    const bool needs_clip_distance_emulation = l_stage == LogicalStage::Vertex &&
+                                               stage == Stage::Vertex &&
+                                               profile.needs_clip_distance_emulation;
+    if (!needs_clip_distance_emulation) {
+        if (info.stores.GetAny(IR::Attribute::ClipDistance)) {
+            const Id type{TypeArray(F32[1], ConstU32(8U))};
+            const Id initializer{ConstantComposite(type, zero)};
+            clip_distances = DefineVariable(type, spv::BuiltIn::ClipDistance,
+                                            spv::StorageClass::Output, initializer);
+        }
     }
     if (info.stores.GetAny(IR::Attribute::CullDistance)) {
         const Id type{TypeArray(F32[1], ConstU32(8U))};
@@ -583,16 +603,27 @@ void EmitContext::DefineOutputs() {
                 Name(output_attr_array, "out_attrs");
             }
         } else {
+            const auto has_clip_distance_outputs = info.stores.GetAny(IR::Attribute::ClipDistance);
+            u32 num_attrs = 0u;
             for (u32 i = 0; i < IR::NumParams; i++) {
                 const IR::Attribute param{IR::Attribute::Param0 + i};
                 if (!info.stores.GetAny(param)) {
                     continue;
                 }
                 const u32 num_components = info.stores.NumComponents(param);
-                const Id id{DefineOutput(F32[num_components], i)};
+                const Id id{
+                    DefineOutput(F32[num_components], i + (has_clip_distance_outputs ? 1 : 0))};
                 Name(id, fmt::format("out_attr{}", i));
                 output_params[i] =
                     GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
+                ++num_attrs;
+            }
+
+            if (has_clip_distance_outputs) {
+                clip_distances = Id{DefineOutput(F32[MaxEmulatedClipDistances], 0)};
+                output_params[num_attrs] = GetAttributeInfo(
+                    AmdGpu::NumberFormat::Float, clip_distances, MaxEmulatedClipDistances, true);
+                Name(clip_distances, fmt::format("cldist_attr{}", 0));
             }
         }
         break;
diff --git a/src/shader_recompiler/ir/attribute.cpp b/src/shader_recompiler/ir/attribute.cpp
index 84a9fafeb..e74b62817 100644
--- a/src/shader_recompiler/ir/attribute.cpp
+++ b/src/shader_recompiler/ir/attribute.cpp
@@ -101,7 +101,7 @@ std::string NameOf(Attribute attribute) {
     case Attribute::Param31:
         return "Param31";
     case Attribute::ClipDistance:
-        return "ClipDistanace";
+        return "ClipDistance";
     case Attribute::CullDistance:
         return "CullDistance";
     case Attribute::RenderTargetIndex:
diff --git a/src/shader_recompiler/ir/passes/inject_clip_distance_attributes.cpp b/src/shader_recompiler/ir/passes/inject_clip_distance_attributes.cpp
new file mode 100644
index 000000000..cf93142a1
--- /dev/null
+++ b/src/shader_recompiler/ir/passes/inject_clip_distance_attributes.cpp
@@ -0,0 +1,41 @@
+// SPDX-FileCopyrightText: Copyright 2026 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "shader_recompiler/info.h"
+#include "shader_recompiler/ir/basic_block.h"
+#include "shader_recompiler/ir/ir_emitter.h"
+#include "shader_recompiler/ir/program.h"
+
+namespace Shader {
+
+void InjectClipDistanceAttributes(IR::Program& program, RuntimeInfo& runtime_info) {
+    auto& info = runtime_info.fs_info;
+
+    if (!info.clip_distance_emulation || program.info.l_stage != LogicalStage::Fragment) {
+        return;
+    }
+
+    auto* first_block = *program.blocks.begin();
+    auto it = std::ranges::find_if(first_block->Instructions(), [](const IR::Inst& inst) {
+        return inst.GetOpcode() == IR::Opcode::Prologue;
+    });
+    ASSERT(it != first_block->end());
+    ++it;
+    ASSERT(it != first_block->end());
+    ++it;
+
+    IR::IREmitter ir{*first_block, it};
+
+    // We don't know how many clip distances are exported by VS as it is not processed at this point
+    // yet. Here is an assumption that we will have not more than 4 of them (while max is 8) to save
+    // one attributes export slot.
+    const auto attrib = IR::Attribute::Param0 + info.num_inputs;
+    for (u32 comp = 0; comp < MaxEmulatedClipDistances; ++comp) {
+        const auto attr_read = ir.GetAttribute(attrib, comp);
+        const auto cond_id = ir.FPLessThan(attr_read, ir.Imm32(0.0f));
+        ir.Discard(cond_id);
+    }
+    ++info.num_inputs;
+}
+
+} // namespace Shader
diff --git a/src/shader_recompiler/ir/passes/ir_passes.h b/src/shader_recompiler/ir/passes/ir_passes.h
index 5bf362284..f103b6736 100644
--- a/src/shader_recompiler/ir/passes/ir_passes.h
+++ b/src/shader_recompiler/ir/passes/ir_passes.h
@@ -8,7 +8,8 @@
 
 namespace Shader {
 struct Profile;
-}
+void InjectClipDistanceAttributes(IR::Program& program, RuntimeInfo& runtime_info);
+} // namespace Shader
 
 namespace Shader::Optimization {
 
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index 52e37bbf0..038a80733 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -41,7 +41,7 @@ struct Profile {
     bool needs_lds_barriers{};
     bool needs_buffer_offsets{};
     bool needs_unorm_fixup{};
-    bool _pad0{};
+    bool needs_clip_distance_emulation{};
 };
 
 } // namespace Shader
diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp
index 4764ddbec..f4fa45afc 100644
--- a/src/shader_recompiler/recompiler.cpp
+++ b/src/shader_recompiler/recompiler.cpp
@@ -13,17 +13,16 @@ namespace Shader {
 
 IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
     size_t num_syntax_blocks{};
-    for (const auto& node : syntax_list) {
-        if (node.type == IR::AbstractSyntaxNode::Type::Block) {
+    for (const auto& [_, type] : syntax_list) {
+        if (type == IR::AbstractSyntaxNode::Type::Block) {
             ++num_syntax_blocks;
         }
     }
-    IR::BlockList blocks;
+    IR::BlockList blocks{};
     blocks.reserve(num_syntax_blocks);
-    u32 order_index{};
-    for (const auto& node : syntax_list) {
-        if (node.type == IR::AbstractSyntaxNode::Type::Block) {
-            blocks.push_back(node.data.block);
+    for (const auto& [data, type] : syntax_list) {
+        if (type == IR::AbstractSyntaxNode::Type::Block) {
+            blocks.push_back(data.block);
         }
     }
     return blocks;
@@ -60,6 +59,10 @@ IR::Program TranslateProgram(const std::span<const u32>& code, Pools& pools, Inf
     program.blocks = GenerateBlocks(program.syntax_list);
     program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
 
+    // On NVIDIA GPUs HW interpolation of clip distance values seems broken, and we need to emulate
+    // it with expensive discard in PS.
+    Shader::InjectClipDistanceAttributes(program, runtime_info);
+
     // Run optimization passes
     if (!profile.support_float64) {
         Shader::Optimization::LowerFp64ToFp32(program);
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
index 8620ab970..04e176765 100644
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@@ -34,6 +34,7 @@ enum class LogicalStage : u32 {
 };
 
 constexpr u32 MaxStageTypes = static_cast<u32>(LogicalStage::NumLogicalStages);
+constexpr auto MaxEmulatedClipDistances = 4u;
 
 constexpr Stage StageFromIndex(size_t index) noexcept {
     return static_cast<Stage>(index);
@@ -201,14 +202,16 @@ struct FragmentRuntimeInfo {
     std::array<PsInput, 32> inputs;
     std::array<PsColorBuffer, MaxColorBuffers> color_buffers;
     AmdGpu::ShaderExportFormat z_export_format;
-    u8 mrtz_mask;
-    bool dual_source_blending;
+    u8 mrtz_mask{};
+    bool dual_source_blending{false};
+    bool clip_distance_emulation{false};
 
     bool operator==(const FragmentRuntimeInfo& other) const noexcept {
         return std::ranges::equal(color_buffers, other.color_buffers) &&
                en_flags == other.en_flags && addr_flags == other.addr_flags &&
                num_inputs == other.num_inputs && z_export_format == other.z_export_format &&
                mrtz_mask == other.mrtz_mask && dual_source_blending == other.dual_source_blending &&
+               clip_distance_emulation == other.clip_distance_emulation &&
                std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(),
                                   other.inputs.begin() + num_inputs);
     }
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a0ea58817..1b0af1d17 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -101,7 +101,7 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
     switch (stage) {
     case Stage::Local: {
         BuildCommon(regs.ls_program);
-        Shader::TessellationDataConstantBuffer tess_constants;
+        Shader::TessellationDataConstantBuffer tess_constants{};
         const auto* hull_info = infos[u32(Shader::LogicalStage::TessellationControl)];
         hull_info->ReadTessConstantBuffer(tess_constants);
         info.ls_info.ls_stride = tess_constants.ls_stride;
@@ -199,6 +199,10 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
         for (u32 i = 0; i < Shader::MaxColorBuffers; i++) {
             info.fs_info.color_buffers[i] = graphics_key.color_buffers[i];
         }
+        info.fs_info.clip_distance_emulation =
+            regs.vs_output_control.clip_distance_enable &&
+            !regs.stage_enable.IsStageEnabled(static_cast<u32>(Stage::Local)) &&
+            profile.needs_clip_distance_emulation;
         break;
     }
     case Stage::Compute: {
@@ -266,6 +270,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
                               instance.GetDriverID() == vk::DriverId::eMoltenvk,
         .needs_buffer_offsets = instance.StorageMinAlignment() > 4,
         .needs_unorm_fixup = instance.GetDriverID() == vk::DriverId::eMoltenvk,
+        .needs_clip_distance_emulation = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
     };
 
     WarmUp();
@@ -460,7 +465,13 @@ bool PipelineCache::RefreshGraphicsStages() {
 
     infos.fill(nullptr);
     modules.fill(nullptr);
-    bind_stage(Stage::Fragment, LogicalStage::Fragment);
+    const auto result = bind_stage(Stage::Fragment, LogicalStage::Fragment);
+    if (!result && regs.vs_output_control.clip_distance_enable &&
+        profile.needs_clip_distance_emulation) {
+        // TODO: need to implement a discard only fallback shader
+        LOG_WARNING(Render_Vulkan,
+                    "Clip distance emulation is ineffective due to absense of fragment shader");
+    }
 
     const auto* fs_info = infos[static_cast<u32>(LogicalStage::Fragment)];
     key.mrt_mask = fs_info ? fs_info->mrt_mask : 0u;

From 514e3634722c35bf4b1ae0044a65d23752c18962 Mon Sep 17 00:00:00 2001
From: Berk <parantezprojects@gmail.com>
Date: Tue, 27 Jan 2026 11:09:32 +0300
Subject: [PATCH 10/10] Add Docker build support and documentation (#3960)

* Docker builder support

* update licenses

* oops I forgot change this description
---
 README.md                                     |  7 +-
 .../.devcontainer/devcontainer.json           | 45 +++++++++
 documents/Docker Builder/.docker/Dockerfile   | 38 ++++++++
 documents/Docker Builder/docker-compose.yml   | 10 ++
 documents/building-docker.md                  | 91 +++++++++++++++++++
 5 files changed, 190 insertions(+), 1 deletion(-)
 create mode 100644 documents/Docker Builder/.devcontainer/devcontainer.json
 create mode 100644 documents/Docker Builder/.docker/Dockerfile
 create mode 100644 documents/Docker Builder/docker-compose.yml
 create mode 100644 documents/building-docker.md

diff --git a/README.md b/README.md
index e43a2408d..0fb5c26ed 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 <!--
-SPDX-FileCopyrightText: 2024 shadPS4 Emulator Project
+SPDX-FileCopyrightText: 2026 shadPS4 Emulator Project
 SPDX-License-Identifier: GPL-2.0-or-later
 -->
 
@@ -58,6 +58,11 @@ This project began for fun. Given our limited free time, it may take some time b
 
 # Building
 
+## Docker
+
+For building shadPS4 in a containerized environment using Docker and VSCode, check the instructions here:  
+[**Docker Build Instructions**](https://github.com/shadps4-emu/shadPS4/blob/main/documents/building-docker.md)
+
 ## Windows
 
 Check the build instructions for [**Windows**](https://github.com/shadps4-emu/shadPS4/blob/main/documents/building-windows.md).
diff --git a/documents/Docker Builder/.devcontainer/devcontainer.json b/documents/Docker Builder/.devcontainer/devcontainer.json
new file mode 100644
index 000000000..32e301bd9
--- /dev/null
+++ b/documents/Docker Builder/.devcontainer/devcontainer.json	
@@ -0,0 +1,45 @@
+// SPDX-FileCopyrightText: 2026 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+{
+    "name": "shadPS4-dev",
+    "dockerComposeFile": [
+        "../docker-compose.yml"
+    ],
+    "containerEnv": {
+        "GITHUB_TOKEN": "${localEnv:GITHUB_TOKEN}",
+        "GITHUB_USER": "${localEnv:GITHUB_USER}"
+    },
+    "service": "shadps4",
+    "workspaceFolder": "/workspaces/shadPS4",
+    "remoteUser": "root",
+    "shutdownAction": "none",
+    "customizations": {
+        "vscode": {
+            "extensions": [
+                "llvm-vs-code-extensions.vscode-clangd"
+            ],
+            "settings": {
+                "C_Cpp.intelliSenseEngine": "disabled",
+                "clangd.arguments": [
+                    "--background-index",
+                    "--clang-tidy",
+                    "--completion-style=detailed",
+                    "--header-insertion=never"
+                ]
+            }
+        }
+    },
+    "settings": {
+        "cmake.configureOnOpen": false,
+        "cmake.generator": "Unix Makefiles",
+        "cmake.environment": {
+            "CC": "clang",
+            "CXX": "clang++"
+        },
+        "cmake.configureSettings": {
+            "CMAKE_CXX_STANDARD": "23",
+            "CMAKE_CXX_STANDARD_REQUIRED": "ON"
+        }
+    }
+}
\ No newline at end of file
diff --git a/documents/Docker Builder/.docker/Dockerfile b/documents/Docker Builder/.docker/Dockerfile
new file mode 100644
index 000000000..285144374
--- /dev/null
+++ b/documents/Docker Builder/.docker/Dockerfile	
@@ -0,0 +1,38 @@
+# SPDX-FileCopyrightText: 2026 shadPS4 Emulator Project
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+FROM ubuntu:24.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    clang \
+    git \
+    ca-certificates \
+    wget \
+    libasound2-dev \
+    libpulse-dev \
+    libopenal-dev \
+    libssl-dev \
+    zlib1g-dev \
+    libedit-dev \
+    libudev-dev \
+    libevdev-dev \
+    libsdl2-dev \
+    libjack-dev \
+    libsndio-dev \
+    libxtst-dev \
+    libvulkan-dev \
+    vulkan-validationlayers \
+    libpng-dev \
+    clang-tidy \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN wget -qO - https://apt.kitware.com/keys/kitware-archive-latest.asc | gpg --dearmor -o /usr/share/keyrings/kitware-archive-keyring.gpg \
+    && echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ noble main" > /etc/apt/sources.list.d/kitware.list \
+    && apt-get update \
+    && apt-get install -y cmake \
+    && rm -rf /var/lib/apt/lists/*/*
+
+WORKDIR /workspaces/shadPS4
\ No newline at end of file
diff --git a/documents/Docker Builder/docker-compose.yml b/documents/Docker Builder/docker-compose.yml
new file mode 100644
index 000000000..39efefa72
--- /dev/null
+++ b/documents/Docker Builder/docker-compose.yml	
@@ -0,0 +1,10 @@
+# SPDX-FileCopyrightText: 2026 shadPS4 Emulator Project
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+services:
+  shadps4:
+    build:
+      context: ./.docker
+    volumes:
+      - ./emu:/workspaces/shadPS4:cached
+    tty: true
diff --git a/documents/building-docker.md b/documents/building-docker.md
new file mode 100644
index 000000000..95be00044
--- /dev/null
+++ b/documents/building-docker.md
@@ -0,0 +1,91 @@
+<!--
+SPDX-FileCopyrightText: 2026 shadPS4 Emulator Project
+SPDX-License-Identifier: GPL-2.0-or-later
+-->
+
+# Building shadPS4 with Docker and VSCode Support
+
+This guide explains how to build **shadPS4** using Docker while keeping full compatibility with **VSCode** development.
+
+---
+
+## Prerequisites
+
+Before starting, ensure you have:
+
+- **Docker Engine** or **Docker Desktop** installed  
+  [Installation Guide](https://docs.docker.com/engine/install/)
+
+- **Git** installed on your system.
+
+---
+
+## Step 1: Prepare the Docker Environment
+
+Inside the container (or on your host if mounting volumes):
+
+1. Navigate to the repository folder containing the Docker Builder folder:
+
+```bash
+cd <path-to-repo>
+```
+
+2. Start the Docker container:
+
+```bash
+docker compose up -d
+```
+
+This will spin up a container with all the necessary build dependencies, including Clang, CMake, SDL2, Vulkan, and more.
+
+## Step 2: Clone shadPS4 Source
+
+```bash
+mkdir emu
+cd emu
+git clone --recursive https://github.com/shadps4-emu/shadPS4.git .
+
+or your fork link.
+```
+
+3. Initialize submodules:
+
+```bash
+git submodule update --init --recursive
+```
+
+## Step 3: Build with CMake
+
+Generate the build directory and configure the project using Clang:
+
+```bash
+cmake -S . -B build/ -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
+```
+
+Then build the project:
+
+```bash
+cmake --build ./build --parallel $(nproc)
+```
+
+* Tip: To enable debug builds, add -DCMAKE_BUILD_TYPE=Debug to the CMake command.
+
+---
+
+After a successful build, the executable is located at:
+
+```bash
+./build/shadps4
+```
+
+## Step 4: VSCode Integration
+
+1. Open the repository in VSCode.
+2. The CMake Tools extension should automatically detect the build directory inside the container or on your host.
+3. You can configure build options, build, and debug directly from the VSCode interface without extra manual setup.
+
+# Notes
+
+* The Docker environment contains all dependencies, so you don’t need to install anything manually.
+* Using Clang inside Docker ensures consistent builds across Linux and macOS runners.
+* GitHub Actions are recommended for cross-platform builds, including Windows .exe output, which is not trivial to produce locally without Visual Studio or clang-cl.
\ No newline at end of file