From 7c2e252f9a1222ebd7c94a9c4267678fd0f7be94 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 29 Dec 2023 16:19:41 +0100 Subject: [PATCH 1/6] Common/MemArena: Add function for getting page size --- Source/Core/Common/MemArena.h | 10 ++++++++++ Source/Core/Common/MemArenaAndroid.cpp | 5 +++++ Source/Core/Common/MemArenaDarwin.cpp | 7 +++++++ Source/Core/Common/MemArenaUnix.cpp | 5 +++++ Source/Core/Common/MemArenaWin.cpp | 15 +++++++++++++++ 5 files changed, 42 insertions(+) diff --git a/Source/Core/Common/MemArena.h b/Source/Core/Common/MemArena.h index d9d472079cd..021ebe98c05 100644 --- a/Source/Core/Common/MemArena.h +++ b/Source/Core/Common/MemArena.h @@ -115,6 +115,16 @@ public: /// void UnmapFromMemoryRegion(void* view, size_t size); + /// + /// Return the system's page size. + /// + size_t GetPageSize() const; + + /// + /// Return the system's page size or required page alignment, whichever is larger. + /// + size_t GetPageAlignment() const; + private: #ifdef _WIN32 WindowsMemoryRegion* EnsureSplitRegionForMapping(void* address, size_t size); diff --git a/Source/Core/Common/MemArenaAndroid.cpp b/Source/Core/Common/MemArenaAndroid.cpp index bcba64e1e1f..1db5afb91d3 100644 --- a/Source/Core/Common/MemArenaAndroid.cpp +++ b/Source/Core/Common/MemArenaAndroid.cpp @@ -144,6 +144,11 @@ void MemArena::UnmapFromMemoryRegion(void* view, size_t size) NOTICE_LOG_FMT(MEMMAP, "mmap failed"); } +size_t MemArena::GetPageSize() const +{ + return sysconf(_SC_PAGESIZE); +} + LazyMemoryRegion::LazyMemoryRegion() = default; LazyMemoryRegion::~LazyMemoryRegion() diff --git a/Source/Core/Common/MemArenaDarwin.cpp b/Source/Core/Common/MemArenaDarwin.cpp index c528e327990..2316f5dcb56 100644 --- a/Source/Core/Common/MemArenaDarwin.cpp +++ b/Source/Core/Common/MemArenaDarwin.cpp @@ -3,6 +3,8 @@ #include "Common/MemArena.h" +#include + #include "Common/Assert.h" #include "Common/Logging/Log.h" @@ -163,6 +165,11 @@ void MemArena::UnmapFromMemoryRegion(void* view, size_t size) } } +size_t MemArena::GetPageSize() const +{ + return getpagesize(); +} + LazyMemoryRegion::LazyMemoryRegion() = default; LazyMemoryRegion::~LazyMemoryRegion() diff --git a/Source/Core/Common/MemArenaUnix.cpp b/Source/Core/Common/MemArenaUnix.cpp index 83026f76b8b..13398de7613 100644 --- a/Source/Core/Common/MemArenaUnix.cpp +++ b/Source/Core/Common/MemArenaUnix.cpp @@ -110,6 +110,11 @@ void MemArena::UnmapFromMemoryRegion(void* view, size_t size) NOTICE_LOG_FMT(MEMMAP, "mmap failed"); } +size_t MemArena::GetPageSize() const +{ + return sysconf(_SC_PAGESIZE); +} + LazyMemoryRegion::LazyMemoryRegion() = default; LazyMemoryRegion::~LazyMemoryRegion() diff --git a/Source/Core/Common/MemArenaWin.cpp b/Source/Core/Common/MemArenaWin.cpp index 79079797753..51df5445d2b 100644 --- a/Source/Core/Common/MemArenaWin.cpp +++ b/Source/Core/Common/MemArenaWin.cpp @@ -438,6 +438,21 @@ void MemArena::UnmapFromMemoryRegion(void* view, size_t size) UnmapViewOfFile(view); } +size_t MemArena::GetPageSize() const +{ + SYSTEM_INFO si; + GetSystemInfo(&si); + + if (!m_memory_functions.m_address_MapViewOfFile3) + { + // In this case, we can only map pages that are 64K aligned. + // See https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 + return std::max(si.dwPageSize, 64 * 1024); + } + + return si.dwPageSize; +} + LazyMemoryRegion::LazyMemoryRegion() { InitWindowsMemoryFunctions(&m_memory_functions); From aabf1023c0bd21d6a5bc0af07cd67852be81d21b Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 20 Jun 2025 08:56:15 +0200 Subject: [PATCH 2/6] Core: Detect SR updates --- Source/Core/Core/PowerPC/GDBStub.cpp | 2 ++ .../Interpreter_SystemRegisters.cpp | 6 +++-- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 -- .../JitArm64/JitArm64_SystemRegisters.cpp | 26 ------------------- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 24 ++++++++--------- Source/Core/Core/PowerPC/MMU.cpp | 4 +++ Source/Core/Core/PowerPC/MMU.h | 1 + Source/Core/Core/PowerPC/PowerPC.cpp | 6 ----- Source/Core/Core/PowerPC/PowerPC.h | 2 -- .../DolphinQt/Debugger/RegisterWidget.cpp | 6 ++++- 10 files changed, 28 insertions(+), 51 deletions(-) diff --git a/Source/Core/Core/PowerPC/GDBStub.cpp b/Source/Core/Core/PowerPC/GDBStub.cpp index fb5a9a99c11..da282389f53 100644 --- a/Source/Core/Core/PowerPC/GDBStub.cpp +++ b/Source/Core/Core/PowerPC/GDBStub.cpp @@ -36,6 +36,7 @@ typedef SSIZE_T ssize_t; #include "Core/Host.h" #include "Core/PowerPC/BreakPoints.h" #include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/MMU.h" #include "Core/PowerPC/PPCCache.h" #include "Core/PowerPC/PowerPC.h" #include "Core/System.h" @@ -648,6 +649,7 @@ static void WriteRegister() else if (id >= 71 && id < 87) { ppc_state.sr[id - 71] = re32hex(bufptr); + system.GetMMU().SRUpdated(); } else if (id >= 88 && id < 104) { diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index e632ab403d2..8f30be84f4f 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -203,7 +203,8 @@ void Interpreter::mtsr(Interpreter& interpreter, UGeckoInstruction inst) const u32 index = inst.SR; const u32 value = ppc_state.gpr[inst.RS]; - ppc_state.SetSR(index, value); + ppc_state.sr[index] = value; + interpreter.m_system.GetMMU().SRUpdated(); } void Interpreter::mtsrin(Interpreter& interpreter, UGeckoInstruction inst) @@ -217,7 +218,8 @@ void Interpreter::mtsrin(Interpreter& interpreter, UGeckoInstruction inst) const u32 index = (ppc_state.gpr[inst.RB] >> 28) & 0xF; const u32 value = ppc_state.gpr[inst.RS]; - ppc_state.SetSR(index, value); + ppc_state.sr[index] = value; + interpreter.m_system.GetMMU().SRUpdated(); } void Interpreter::mftb(Interpreter& interpreter, UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 77ba6d0bb35..d32ed290487 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -122,9 +122,7 @@ public: void mcrf(UGeckoInstruction inst); void mcrxr(UGeckoInstruction inst); void mfsr(UGeckoInstruction inst); - void mtsr(UGeckoInstruction inst); void mfsrin(UGeckoInstruction inst); - void mtsrin(UGeckoInstruction inst); void twx(UGeckoInstruction inst); void mfspr(UGeckoInstruction inst); void mftb(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 54592951e48..21a0003887a 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -291,14 +291,6 @@ void JitArm64::mfsr(UGeckoInstruction inst) LDR(IndexType::Unsigned, gpr.R(inst.RD), PPC_REG, PPCSTATE_OFF_SR(inst.SR)); } -void JitArm64::mtsr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - STR(IndexType::Unsigned, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF_SR(inst.SR)); -} - void JitArm64::mfsrin(UGeckoInstruction inst) { INSTRUCTION_START @@ -317,24 +309,6 @@ void JitArm64::mfsrin(UGeckoInstruction inst) LDR(RD, addr, ArithOption(EncodeRegTo64(index), true)); } -void JitArm64::mtsrin(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - u32 b = inst.RB, d = inst.RD; - gpr.BindToRegister(d, d == b); - - ARM64Reg RB = gpr.R(b); - ARM64Reg RD = gpr.R(d); - auto index = gpr.GetScopedReg(); - auto addr = gpr.GetScopedReg(); - - UBFM(index, RB, 28, 31); - ADDI2R(EncodeRegTo64(addr), PPC_REG, PPCSTATE_OFF_SR(0), EncodeRegTo64(addr)); - STR(RD, EncodeRegTo64(addr), ArithOption(EncodeRegTo64(index), true)); -} - void JitArm64::twx(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index eb97db9b91e..73ae6cfe651 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -266,18 +266,18 @@ constexpr std::array s_table31{{ {759, &JitArm64::stfXX}, // stfdux {983, &JitArm64::stfXX}, // stfiwx - {19, &JitArm64::mfcr}, // mfcr - {83, &JitArm64::mfmsr}, // mfmsr - {144, &JitArm64::mtcrf}, // mtcrf - {146, &JitArm64::mtmsr}, // mtmsr - {210, &JitArm64::mtsr}, // mtsr - {242, &JitArm64::mtsrin}, // mtsrin - {339, &JitArm64::mfspr}, // mfspr - {467, &JitArm64::mtspr}, // mtspr - {371, &JitArm64::mftb}, // mftb - {512, &JitArm64::mcrxr}, // mcrxr - {595, &JitArm64::mfsr}, // mfsr - {659, &JitArm64::mfsrin}, // mfsrin + {19, &JitArm64::mfcr}, // mfcr + {83, &JitArm64::mfmsr}, // mfmsr + {144, &JitArm64::mtcrf}, // mtcrf + {146, &JitArm64::mtmsr}, // mtmsr + {210, &JitArm64::FallBackToInterpreter}, // mtsr + {242, &JitArm64::FallBackToInterpreter}, // mtsrin + {339, &JitArm64::mfspr}, // mfspr + {467, &JitArm64::mtspr}, // mtspr + {371, &JitArm64::mftb}, // mftb + {512, &JitArm64::mcrxr}, // mcrxr + {595, &JitArm64::mfsr}, // mfsr + {659, &JitArm64::mfsrin}, // mfsrin {4, &JitArm64::twx}, // tw {598, &JitArm64::DoNothing}, // sync diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index ad14ebbc2c7..ec09acb03bb 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -1219,6 +1219,10 @@ void MMU::SDRUpdated() m_ppc_state.pagetable_hashmask = ((htabmask << 10) | 0x3ff); } +void MMU::SRUpdated() +{ +} + enum class TLBLookupResult { Found, diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 018657cd5f4..aebcc24aa98 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -237,6 +237,7 @@ public: // TLB functions void SDRUpdated(); + void SRUpdated(); void InvalidateTLBEntry(u32 address); void DBATUpdated(); void IBATUpdated(); diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 59f1e3d1bf2..20ded835348 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -672,12 +672,6 @@ void PowerPCManager::MSRUpdated() m_system.GetJitInterface().UpdateMembase(); } -void PowerPCState::SetSR(u32 index, u32 value) -{ - DEBUG_LOG_FMT(POWERPC, "{:08x}: MMU: Segment register {} set to {:08x}", pc, index, value); - sr[index] = value; -} - // FPSCR update functions void PowerPCState::UpdateFPRFDouble(double dvalue) diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index ea972590cb3..8232196af14 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -193,8 +193,6 @@ struct PowerPCState cr.SetField(1, (fpscr.FX << 3) | (fpscr.FEX << 2) | (fpscr.VX << 1) | fpscr.OX); } - void SetSR(u32 index, u32 value); - void SetCarry(u32 ca) { xer_ca = ca; } u32 GetCarry() const { return xer_ca; } diff --git a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp index 8bb9d882916..35253b49cdb 100644 --- a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp +++ b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp @@ -15,6 +15,7 @@ #include "Core/Core.h" #include "Core/Debugger/CodeTrace.h" #include "Core/HW/ProcessorInterface.h" +#include "Core/PowerPC/MMU.h" #include "Core/PowerPC/PowerPC.h" #include "Core/System.h" #include "DolphinQt/Host.h" @@ -405,7 +406,10 @@ void RegisterWidget::PopulateTable() AddRegister( i, 7, RegisterType::sr, "SR" + std::to_string(i), [this, i] { return m_system.GetPPCState().sr[i]; }, - [this, i](u64 value) { m_system.GetPPCState().sr[i] = value; }); + [this, i](u64 value) { + m_system.GetPPCState().sr[i] = value; + m_system.GetMMU().SRUpdated(); + }); } // Special registers From 7f49fe350c817828af3b5d9e69e3e7673fab1dd8 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 20 Jun 2025 09:17:21 +0200 Subject: [PATCH 3/6] Core: Create fastmem mappings for page address translation Previously we've only been setting up fastmem mappings for block address translation, but now we also do it for page address translation. This increases performance when games access memory using page tables, but decreases performance when games set up page tables. The tlbie instruction is used as an indication that the mappings need to be updated. There are some accuracy downsides: * The TLB is now effectively infinitely large, which matters if games don't use tlbie when modifying page tables. * The R and C bits for page table entries get set pessimistically rather than when the page is actually accessed. No games are known to be broken by these inaccuracies, but unfortunately the second inaccuracy causes a large performance regression in Rogue Squadron 3. You still get the old, more accurate behavior if Enable Write-Back Cache is on. --- Source/Core/Core/HW/Memmap.cpp | 85 +++++++++++++--- Source/Core/Core/HW/Memmap.h | 9 +- Source/Core/Core/PowerPC/MMU.cpp | 141 ++++++++++++++++++++++++++- Source/Core/Core/PowerPC/MMU.h | 11 +++ Source/Core/Core/PowerPC/PowerPC.cpp | 9 +- Source/Core/Core/State.cpp | 2 +- 6 files changed, 240 insertions(+), 17 deletions(-) diff --git a/Source/Core/Core/HW/Memmap.cpp b/Source/Core/Core/HW/Memmap.cpp index 675e9420096..05a0efa2aaa 100644 --- a/Source/Core/Core/HW/Memmap.cpp +++ b/Source/Core/Core/HW/Memmap.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -41,7 +42,8 @@ namespace Memory { -MemoryManager::MemoryManager(Core::System& system) : m_system(system) +MemoryManager::MemoryManager(Core::System& system) + : m_page_size(m_arena.GetPageSize()), m_system(system) { } @@ -233,13 +235,19 @@ bool MemoryManager::InitFastmemArena() return true; } -void MemoryManager::UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) +void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) { - for (auto& entry : m_logical_mapped_entries) + for (auto& entry : m_dbat_mapped_entries) { m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } - m_logical_mapped_entries.clear(); + m_dbat_mapped_entries.clear(); + + for (auto& entry : m_page_table_mapped_entries) + { + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + } + m_page_table_mapped_entries.clear(); m_logical_page_mappings.fill(nullptr); @@ -288,13 +296,12 @@ void MemoryManager::UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) void* mapped_pointer = m_arena.MapInMemoryRegion(position, mapped_size, base); if (!mapped_pointer) { - PanicAlertFmt( - "Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} " - "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", - intersection_start, mapped_size, logical_address); - exit(0); + PanicAlertFmt("Memory::UpdateDBATMappings(): Failed to map memory region at 0x{:08X} " + "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", + intersection_start, mapped_size, logical_address); + continue; } - m_logical_mapped_entries.push_back({mapped_pointer, mapped_size}); + m_dbat_mapped_entries.push_back({mapped_pointer, mapped_size}); } m_logical_page_mappings[i] = @@ -305,6 +312,54 @@ void MemoryManager::UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) } } +void MemoryManager::UpdatePageTableMappings(const std::map& page_mappings) +{ + if (m_page_size > PowerPC::HW_PAGE_SIZE) + return; + + for (auto& entry : m_page_table_mapped_entries) + { + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + } + m_page_table_mapped_entries.clear(); + + for (const auto [logical_address, translated_address] : page_mappings) + { + constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE; + for (const auto& physical_region : m_physical_regions) + { + if (!physical_region.active) + continue; + + u32 mapping_address = physical_region.physical_address; + u32 mapping_end = mapping_address + physical_region.size; + u32 intersection_start = std::max(mapping_address, translated_address); + u32 intersection_end = std::min(mapping_end, translated_address + logical_size); + if (intersection_start < intersection_end) + { + // Found an overlapping region; map it. + if (m_is_fastmem_arena_initialized) + { + u32 position = physical_region.shm_position + intersection_start - mapping_address; + u8* base = m_logical_base + logical_address + intersection_start - translated_address; + u32 mapped_size = intersection_end - intersection_start; + + void* mapped_pointer = m_arena.MapInMemoryRegion(position, mapped_size, base); + if (!mapped_pointer) + { + PanicAlertFmt( + "Memory::UpdatePageTableMappings(): Failed to map memory region at 0x{:08X} " + "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", + intersection_start, mapped_size, logical_address); + continue; + } + m_page_table_mapped_entries.push_back({mapped_pointer, mapped_size}); + } + } + } + } +} + void MemoryManager::DoState(PointerWrap& p) { const u32 current_ram_size = GetRamSize(); @@ -386,11 +441,17 @@ void MemoryManager::ShutdownFastmemArena() m_arena.UnmapFromMemoryRegion(base, region.size); } - for (auto& entry : m_logical_mapped_entries) + for (auto& entry : m_dbat_mapped_entries) { m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } - m_logical_mapped_entries.clear(); + m_dbat_mapped_entries.clear(); + + for (auto& entry : m_page_table_mapped_entries) + { + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + } + m_page_table_mapped_entries.clear(); m_arena.ReleaseMemoryRegion(); diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index e0708605dbf..a0950ab7248 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include #include #include @@ -99,7 +100,8 @@ public: void ShutdownFastmemArena(); void DoState(PointerWrap& p); - void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table); + void UpdateDBATMappings(const PowerPC::BatTable& dbat_table); + void UpdatePageTableMappings(const std::map& page_mappings); void Clear(); @@ -207,6 +209,8 @@ private: // The MemArena class Common::MemArena m_arena; + const size_t m_page_size; + // Dolphin allocates memory to represent four regions: // - 32MB RAM (actually 24MB on hardware), available on GameCube and Wii // - 64MB "EXRAM", RAM only available on Wii @@ -247,7 +251,8 @@ private: // TODO: Do we want to handle the mirrors of the GC RAM? std::array m_physical_regions{}; - std::vector m_logical_mapped_entries; + std::vector m_dbat_mapped_entries; + std::vector m_page_table_mapped_entries; std::array m_physical_page_mappings{}; std::array m_logical_page_mappings{}; diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index ec09acb03bb..a0198dfa6d5 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -33,6 +33,7 @@ #include "Common/Align.h" #include "Common/Assert.h" #include "Common/BitUtils.h" +#include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" @@ -58,6 +59,22 @@ MMU::MMU(Core::System& system, Memory::MemoryManager& memory, PowerPC::PowerPCMa MMU::~MMU() = default; +void MMU::Reset() +{ + m_page_mappings.clear(); +#ifndef _ARCH_32 + m_memory.UpdatePageTableMappings(m_page_mappings); +#endif +} + +void MMU::DoState(PointerWrap& p) +{ + // Instead of storing m_page_mappings in savestates, we *could* recalculate it based on memory + // here in DoState, but this could lead to us getting a more up-to-date set of page mappings + // than we had when the savestate was created, which could be a problem for TAS determinism. + p.Do(m_page_mappings); +} + // Overloaded byteswap functions, for use within the templated functions below. [[maybe_unused]] static u8 bswap(u8 val) { @@ -1217,10 +1234,13 @@ void MMU::SDRUpdated() m_ppc_state.pagetable_base = htaborg << 16; m_ppc_state.pagetable_hashmask = ((htabmask << 10) | 0x3ff); + + PageTableUpdated(); } void MMU::SRUpdated() { + PageTableUpdated(); } enum class TLBLookupResult @@ -1310,6 +1330,124 @@ void MMU::InvalidateTLBEntry(u32 address) m_ppc_state.tlb[PowerPC::DATA_TLB_INDEX][entry_index].Invalidate(); m_ppc_state.tlb[PowerPC::INST_TLB_INDEX][entry_index].Invalidate(); + + PageTableUpdated(); +} + +void MMU::PageTableUpdated() +{ +#ifndef _ARCH_32 + m_page_mappings.clear(); + + if (m_ppc_state.m_enable_dcache) + { + // Because fastmem isn't in use when accurate dcache emulation is enabled, setting up mappings + // would be a waste of time. Skipping setting up mappings also comes with the bonus of skipping + // the inaccurate behavior of setting the R and C bits of PTE2 as soon as a page is mapped. + return; + } + + const u32 page_table_mask = m_ppc_state.pagetable_hashmask; + const u32 page_table_base = m_ppc_state.pagetable_base; + const u32 page_table_end = (page_table_base | (page_table_mask << 6)) + (1 << 6); + const u32 page_table_size = page_table_end - page_table_base; + + u8* page_table_view = m_system.GetMemory().GetPointerForRange(page_table_base, page_table_size); + if (!page_table_view) + { + WARN_LOG_FMT(POWERPC, "Failed to read page table at {:#010x}-{:#010x}", page_table_base, + page_table_end); + m_memory.UpdatePageTableMappings(m_page_mappings); + return; + } + + const auto read_page_table = [&](u32 H) { + for (u32 i = 0; i <= page_table_mask; ++i) + { + for (u32 j = 0; j < 8; ++j) + { + const u32 pte_addr = (page_table_base | ((i & page_table_mask) << 6)) + j * 8; + + UPTE_Lo pte1(Common::swap32(page_table_view + pte_addr - page_table_base)); + UPTE_Hi pte2(Common::swap32(page_table_view + pte_addr - page_table_base + 4)); + + if (!pte1.V) + continue; + + if (pte1.H != H) + continue; + + // There are quirks related to uncached memory that can't be correctly emulated by fast + // accesses, so we don't map uncached memory. (However, no software at all is known to + // trigger these quirks through page address translation, only through block address + // translation.) + const bool wi = (pte2.WIMG & 0b1100) != 0; + if (wi) + continue; + + // Due to hash masking, the upper bits of page_index_from_hash might not match the actual + // page index. But these bits fully overlap with the API (abbreviated page index), so we can + // overwrite these bits with the API from pte1 and thereby get the correct page index. + // + // In other words: logical_address.API must be written to after logical_address.page_index! + u32 page_index_from_hash = i ^ pte1.VSID; + if (pte1.H) + page_index_from_hash = ~page_index_from_hash; + EffectiveAddress logical_address; + logical_address.offset = 0; + logical_address.page_index = page_index_from_hash; + logical_address.API = pte1.API; + + for (u32 k = 0; k < std::size(m_ppc_state.sr); ++k) + { + const auto sr = UReg_SR{m_ppc_state.sr[k]}; + if (sr.VSID != pte1.VSID || sr.T != 0) + continue; + + logical_address.SR = k; + + // Block address translation takes priority over page address translation. + if (m_dbat_table[logical_address.Hex >> PowerPC::BAT_INDEX_SHIFT] & + PowerPC::BAT_MAPPED_BIT) + { + continue; + } + + // Fast accesses don't support memchecks, so force slow accesses by removing fastmem + // mappings for all overlapping virtual pages. + constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE; + if (m_power_pc.GetMemChecks().OverlapsMemcheck(logical_address.Hex, logical_size)) + continue; + + const u32 physical_address = pte2.RPN << 12; + + // Important: This doesn't overwrite anything already present in m_page_mappings. + m_page_mappings.emplace(logical_address.Hex, physical_address); + + // HACK: We set R and C, which indicate whether a page have been read from and written to + // respectively, when a page is mapped rather than when it's actually accessed. The latter + // is probably possible using some fault handling logic, but for now it seems like more + // work than it's worth. + if (!pte2.R || !pte2.C) + { + pte2.R = 1; + pte2.C = 1; + + const u32 pte2_swapped = Common::swap32(pte2.Hex); + std::memcpy(page_table_view + pte_addr - page_table_base + 4, &pte2_swapped, + sizeof(pte2_swapped)); + } + } + } + } + }; + + // We need to read all H=0 PTEs first, because H=0 takes priority over H=1. + read_page_table(0); + read_page_table(1); + + m_memory.UpdatePageTableMappings(m_page_mappings); +#endif } // Page Address Translation @@ -1537,7 +1675,8 @@ void MMU::DBATUpdated() } #ifndef _ARCH_32 - m_memory.UpdateLogicalMemory(m_dbat_table); + m_memory.UpdateDBATMappings(m_dbat_table); + m_memory.UpdatePageTableMappings(m_page_mappings); #endif // IsOptimizable*Address and dcbz depends on the BAT mapping, so we need a flush here. diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index aebcc24aa98..c801df7a158 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -12,6 +13,8 @@ #include "Common/CommonTypes.h" #include "Common/TypeUtils.h" +class PointerWrap; + namespace Core { class CPUThreadGuard; @@ -117,6 +120,9 @@ public: MMU& operator=(MMU&& other) = delete; ~MMU(); + void Reset(); + void DoState(PointerWrap& p); + // Routines for debugger UI, cheats, etc. to access emulated memory from the // perspective of the CPU. Not for use by core emulation routines. // Use "Host" prefix. @@ -239,6 +245,7 @@ public: void SDRUpdated(); void SRUpdated(); void InvalidateTLBEntry(u32 address); + void PageTableUpdated(); void DBATUpdated(); void IBATUpdated(); @@ -318,6 +325,10 @@ private: PowerPC::PowerPCManager& m_power_pc; PowerPC::PowerPCState& m_ppc_state; + // STATE_TO_SAVE + std::map m_page_mappings; + // END STATE_TO_SAVE + BatTable m_ibat_table; BatTable m_dbat_table; }; diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 20ded835348..7df4066427d 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -105,6 +105,9 @@ void PowerPCManager::DoState(PointerWrap& p) m_ppc_state.iCache.DoState(memory, p); m_ppc_state.dCache.DoState(memory, p); + auto& mmu = m_system.GetMMU(); + mmu.DoState(p); + if (p.IsReadMode()) { if (!m_ppc_state.m_enable_dcache) @@ -116,7 +119,6 @@ void PowerPCManager::DoState(PointerWrap& p) RoundingModeUpdated(m_ppc_state); RecalculateAllFeatureFlags(m_ppc_state); - auto& mmu = m_system.GetMMU(); mmu.IBATUpdated(); mmu.DBATUpdated(); } @@ -253,6 +255,10 @@ void PowerPCManager::RefreshConfig() { INFO_LOG_FMT(POWERPC, "Flushing data cache"); m_ppc_state.dCache.FlushAll(m_system.GetMemory()); + + // No page table mappings are created when accurate dcache emulation is enabled. + // If there are any that can be created, let's create them now. + m_system.GetMMU().PageTableUpdated(); } } @@ -282,6 +288,7 @@ void PowerPCManager::Reset() ResetRegisters(); m_ppc_state.iCache.Reset(m_system.GetJitInterface()); m_ppc_state.dCache.Reset(); + m_system.GetMMU().Reset(); } void PowerPCManager::ScheduleInvalidateCacheThreadSafe(u32 address) diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index ffd811bd6c7..d2e2c699ec2 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -95,7 +95,7 @@ static size_t s_state_writes_in_queue; static std::condition_variable s_state_write_queue_is_empty; // Don't forget to increase this after doing changes on the savestate system -constexpr u32 STATE_VERSION = 175; // Last changed in PR 13751 +constexpr u32 STATE_VERSION = 176; // Last changed in PR 13768 // Increase this if the StateExtendedHeader definition changes constexpr u32 EXTENDED_HEADER_VERSION = 1; // Last changed in PR 12217 From f33257f20d66ac55d1f37035c8c629ee64efa5d1 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 20 Jun 2025 09:16:55 +0200 Subject: [PATCH 4/6] Core: Postpone page table updates when DR isn't set Page table mappings are only used when DR is set, so if page tables are updated when DR isn't set, we can wait with updating page table mappings until DR gets set. This lets us batch page table updates in the Disney Trio of Destruction, improving performance when the games are loading data. It doesn't help much for GameCube games, because those run tlbie with DR set. The PowerPCState struct has had its members slightly reordered. I had to put pagetable_update_pending less than 4 KiB from the start so AArch64's LDRB (immediate) can access it, and I also took the opportunity to move some other members around to cut down on padding. --- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 23 +++++++++++++- .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 13 +++++--- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 31 ++++++++++++++++++- Source/Core/Core/PowerPC/MMU.cpp | 16 ++++++++-- Source/Core/Core/PowerPC/MMU.h | 1 + Source/Core/Core/PowerPC/PowerPC.cpp | 5 +++ Source/Core/Core/PowerPC/PowerPC.h | 24 ++++++++------ 7 files changed, 94 insertions(+), 19 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index 377b4388fb4..117c90a2449 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -509,6 +509,8 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg) { ASSERT(!msr.IsSimpleReg(scratch_reg)); + constexpr u32 dr_bit = 1 << UReg_MSR{}.DR.StartBit(); + // Update mem_ptr auto& memory = m_system.GetMemory(); if (msr.IsImm()) @@ -520,7 +522,7 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg) { MOV(64, R(RMEM), ImmPtr(memory.GetLogicalBase())); MOV(64, R(scratch_reg), ImmPtr(memory.GetPhysicalBase())); - TEST(32, msr, Imm32(1 << (31 - 27))); + TEST(32, msr, Imm32(dr_bit)); CMOVcc(64, RMEM, R(scratch_reg), CC_Z); } MOV(64, PPCSTATE(mem_ptr), R(RMEM)); @@ -544,6 +546,25 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg) OR(32, R(scratch_reg), Imm32(other_feature_flags)); MOV(32, PPCSTATE(feature_flags), R(scratch_reg)); } + + // Call PageTableUpdatedFromJit if needed + if (!msr.IsImm() || UReg_MSR(msr.Imm32()).DR) + { + gpr.Flush(); + fpr.Flush(); + FixupBranch dr_unset; + if (!msr.IsImm()) + { + TEST(32, msr, Imm32(dr_bit)); + dr_unset = J_CC(CC_Z); + } + CMP(8, PPCSTATE(pagetable_update_pending), Imm8(0)); + FixupBranch update_not_pending = J_CC(CC_E); + ABI_CallFunctionP(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU()); + SetJumpTarget(update_not_pending); + if (!msr.IsImm()) + SetJumpTarget(dr_unset); + } } void Jit64::WriteExit(u32 destination, bool bl, u32 after) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 63313583594..c485e395175 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -436,11 +436,14 @@ void Jit64::mtmsr(UGeckoInstruction inst) FALLBACK_IF(jo.fp_exceptions); { - RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read); - RegCache::Realize(Rs); - MOV(32, PPCSTATE(msr), Rs); - - MSRUpdated(Rs, RSCRATCH2); + OpArg Rs_op_arg; + { + RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read); + RegCache::Realize(Rs); + MOV(32, PPCSTATE(msr), Rs); + Rs_op_arg = Rs; + } + MSRUpdated(Rs_op_arg, RSCRATCH2); } gpr.Flush(); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 1eda45c58e3..56688c8bb7d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -448,10 +448,27 @@ void JitArm64::MSRUpdated(u32 msr) MOVI2R(WA, feature_flags); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags)); } + + // Call PageTableUpdatedFromJit if needed + if (UReg_MSR(msr).DR) + { + gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + + auto WA = gpr.GetScopedReg(); + + static_assert(PPCSTATE_OFF(pagetable_update_pending) < 0x1000); + LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(pagetable_update_pending)); + FixupBranch update_not_pending = CBZ(WA); + ABI_CallFunction(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU()); + SetJumpTarget(update_not_pending); + } } void JitArm64::MSRUpdated(ARM64Reg msr) { + constexpr LogicalImm dr_bit(1ULL << UReg_MSR{}.DR.StartBit(), GPRSize::B32); + auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); @@ -459,7 +476,7 @@ void JitArm64::MSRUpdated(ARM64Reg msr) auto& memory = m_system.GetMemory(); MOVP2R(MEM_REG, jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase()); MOVP2R(XA, jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase()); - TST(msr, LogicalImm(1 << (31 - 27), GPRSize::B32)); + TST(msr, dr_bit); CSEL(MEM_REG, MEM_REG, XA, CCFlags::CC_NEQ); STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr)); @@ -473,6 +490,18 @@ void JitArm64::MSRUpdated(ARM64Reg msr) if (other_feature_flags != 0) ORR(WA, WA, LogicalImm(other_feature_flags, GPRSize::B32)); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags)); + + // Call PageTableUpdatedFromJit if needed + MOV(WA, msr); + gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + FixupBranch dr_unset = TBZ(WA, dr_bit); + static_assert(PPCSTATE_OFF(pagetable_update_pending) < 0x1000); + LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(pagetable_update_pending)); + FixupBranch update_not_pending = CBZ(WA); + ABI_CallFunction(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU()); + SetJumpTarget(update_not_pending); + SetJumpTarget(dr_unset); } void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return, diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index a0198dfa6d5..02871e65e12 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -1240,7 +1240,10 @@ void MMU::SDRUpdated() void MMU::SRUpdated() { - PageTableUpdated(); + if (m_ppc_state.msr.DR) + PageTableUpdated(); + else + m_ppc_state.pagetable_update_pending = true; } enum class TLBLookupResult @@ -1331,11 +1334,15 @@ void MMU::InvalidateTLBEntry(u32 address) m_ppc_state.tlb[PowerPC::DATA_TLB_INDEX][entry_index].Invalidate(); m_ppc_state.tlb[PowerPC::INST_TLB_INDEX][entry_index].Invalidate(); - PageTableUpdated(); + if (m_ppc_state.msr.DR) + PageTableUpdated(); + else + m_ppc_state.pagetable_update_pending = true; } void MMU::PageTableUpdated() { + m_ppc_state.pagetable_update_pending = false; #ifndef _ARCH_32 m_page_mappings.clear(); @@ -1450,6 +1457,11 @@ void MMU::PageTableUpdated() #endif } +void MMU::PageTableUpdatedFromJit(MMU* mmu) +{ + mmu->PageTableUpdated(); +} + // Page Address Translation template MMU::TranslateAddressResult MMU::TranslatePageAddress(const EffectiveAddress address, bool* wi) diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index c801df7a158..49d63e6c243 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -246,6 +246,7 @@ public: void SRUpdated(); void InvalidateTLBEntry(u32 address); void PageTableUpdated(); + static void PageTableUpdatedFromJit(MMU* mmu); void DBATUpdated(); void IBATUpdated(); diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 7df4066427d..e361f45e4e1 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -97,6 +97,7 @@ void PowerPCManager::DoState(PointerWrap& p) p.DoArray(m_ppc_state.tlb); p.Do(m_ppc_state.pagetable_base); p.Do(m_ppc_state.pagetable_hashmask); + p.Do(m_ppc_state.pagetable_update_pending); p.Do(m_ppc_state.reserve); p.Do(m_ppc_state.reserve_address); @@ -283,6 +284,7 @@ void PowerPCManager::Reset() { m_ppc_state.pagetable_base = 0; m_ppc_state.pagetable_hashmask = 0; + m_ppc_state.pagetable_update_pending = false; m_ppc_state.tlb = {}; ResetRegisters(); @@ -676,6 +678,9 @@ void PowerPCManager::MSRUpdated() m_ppc_state.feature_flags = static_cast( (m_ppc_state.feature_flags & FEATURE_FLAG_PERFMON) | ((m_ppc_state.msr.Hex >> 4) & 0x3)); + if (m_ppc_state.msr.DR && m_ppc_state.pagetable_update_pending) + m_system.GetMMU().PageTableUpdated(); + m_system.GetJitInterface().UpdateMembase(); } diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 8232196af14..375604c9af3 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -122,6 +122,9 @@ struct PowerPCState u32 pc = 0; // program counter u32 npc = 0; + // Storage for the stack pointer of the BLR optimization. + u8* stored_stack_pointer = nullptr; + // gather pipe pointer for JIT access u8* gather_pipe_ptr = nullptr; u8* gather_pipe_base_ptr = nullptr; @@ -157,6 +160,14 @@ struct PowerPCState // lscbx u16 xer_stringctrl = 0; + // Reservation monitor for lwarx and its friend stwcxd. These two don't really need to be + // this early in the struct, but due to how the padding works out, they fit nicely here. + u32 reserve_address; + bool reserve; + + bool pagetable_update_pending = false; + bool m_enable_dcache = false; + #ifdef _M_X86_64 // This member exists only for the purpose of an assertion that its offset <= 0x100. std::tuple<> above_fits_in_first_0x100; @@ -171,22 +182,15 @@ struct PowerPCState // JitArm64 needs 64-bit alignment for SPR_TL. alignas(8) u32 spr[1024]{}; - // Storage for the stack pointer of the BLR optimization. - u8* stored_stack_pointer = nullptr; u8* mem_ptr = nullptr; - std::array, NUM_TLBS> tlb; - u32 pagetable_base = 0; u32 pagetable_hashmask = 0; - InstructionCache iCache; - bool m_enable_dcache = false; - Cache dCache; + std::array, NUM_TLBS> tlb; - // Reservation monitor for lwarx and its friend stwcxd. - bool reserve; - u32 reserve_address; + InstructionCache iCache; + Cache dCache; void UpdateCR1() { From a1c789bdfd2d0680e45ac58a3d7bb95cf37564e1 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 20 Jun 2025 16:51:30 +0200 Subject: [PATCH 5/6] Core: Update page table mappings incrementally Removing and readding every page table mapping every time something changes in the page table is very slow. Instead, let's generate a diff and ask Memmap to update only the diff. --- Source/Core/Core/HW/Memmap.cpp | 46 +- Source/Core/Core/HW/Memmap.h | 6 +- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 12 +- Source/Core/Core/PowerPC/MMU.cpp | 429 ++++++++++++++---- Source/Core/Core/PowerPC/MMU.h | 41 +- Source/Core/Core/PowerPC/PowerPC.cpp | 11 +- Source/Core/Core/PowerPC/PowerPC.h | 2 +- 7 files changed, 441 insertions(+), 106 deletions(-) diff --git a/Source/Core/Core/HW/Memmap.cpp b/Source/Core/Core/HW/Memmap.cpp index 05a0efa2aaa..f972460d327 100644 --- a/Source/Core/Core/HW/Memmap.cpp +++ b/Source/Core/Core/HW/Memmap.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -243,11 +244,7 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) } m_dbat_mapped_entries.clear(); - for (auto& entry : m_page_table_mapped_entries) - { - m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); - } - m_page_table_mapped_entries.clear(); + RemoveAllPageTableMappings(); m_logical_page_mappings.fill(nullptr); @@ -301,7 +298,7 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) intersection_start, mapped_size, logical_address); continue; } - m_dbat_mapped_entries.push_back({mapped_pointer, mapped_size}); + m_dbat_mapped_entries.push_back({mapped_pointer, mapped_size, logical_address}); } m_logical_page_mappings[i] = @@ -312,18 +309,12 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) } } -void MemoryManager::UpdatePageTableMappings(const std::map& page_mappings) +void MemoryManager::AddPageTableMappings(const std::map& mappings) { if (m_page_size > PowerPC::HW_PAGE_SIZE) return; - for (auto& entry : m_page_table_mapped_entries) - { - m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); - } - m_page_table_mapped_entries.clear(); - - for (const auto [logical_address, translated_address] : page_mappings) + for (const auto [logical_address, translated_address] : mappings) { constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE; for (const auto& physical_region : m_physical_regions) @@ -353,13 +344,38 @@ void MemoryManager::UpdatePageTableMappings(const std::map& page_mappi intersection_start, mapped_size, logical_address); continue; } - m_page_table_mapped_entries.push_back({mapped_pointer, mapped_size}); + m_page_table_mapped_entries.push_back({mapped_pointer, mapped_size, logical_address}); } } } } } +void MemoryManager::RemovePageTableMappings(const std::set& mappings) +{ + if (m_page_size > PowerPC::HW_PAGE_SIZE) + return; + + if (mappings.empty()) + return; + + std::erase_if(m_page_table_mapped_entries, [this, &mappings](const LogicalMemoryView& entry) { + const bool remove = mappings.contains(entry.logical_address); + if (remove) + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + return remove; + }); +} + +void MemoryManager::RemoveAllPageTableMappings() +{ + for (auto& entry : m_page_table_mapped_entries) + { + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + } + m_page_table_mapped_entries.clear(); +} + void MemoryManager::DoState(PointerWrap& p) { const u32 current_ram_size = GetRamSize(); diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index a0950ab7248..894caf7e0b0 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,7 @@ struct LogicalMemoryView { void* mapped_pointer; u32 mapped_size; + u32 logical_address; }; class MemoryManager @@ -101,7 +103,9 @@ public: void DoState(PointerWrap& p); void UpdateDBATMappings(const PowerPC::BatTable& dbat_table); - void UpdatePageTableMappings(const std::map& page_mappings); + void AddPageTableMappings(const std::map& mappings); + void RemovePageTableMappings(const std::set& mappings); + void RemoveAllPageTableMappings(); void Clear(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 990b2dcee79..92bbc2365d2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -33,19 +33,19 @@ constexpr Arm64Gen::ARM64Reg DISPATCHER_PC = Arm64Gen::ARM64Reg::W26; PowerPC::PowerPCState, elem); \ _Pragma("GCC diagnostic pop") \ }()) +#else +#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem)) +#endif #define PPCSTATE_OFF_ARRAY(elem, i) \ (PPCSTATE_OFF(elem[0]) + sizeof(PowerPC::PowerPCState::elem[0]) * (i)) -#else -#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem)) -#define PPCSTATE_OFF_ARRAY(elem, i) \ - (offsetof(PowerPC::PowerPCState, elem[0]) + sizeof(PowerPC::PowerPCState::elem[0]) * (i)) -#endif +#define PPCSTATE_OFF_STD_ARRAY(elem, i) \ + (PPCSTATE_OFF(elem) + sizeof(PowerPC::PowerPCState::elem[0]) * (i)) #define PPCSTATE_OFF_GPR(i) PPCSTATE_OFF_ARRAY(gpr, i) #define PPCSTATE_OFF_CR(i) PPCSTATE_OFF_ARRAY(cr.fields, i) -#define PPCSTATE_OFF_SR(i) PPCSTATE_OFF_ARRAY(sr, i) +#define PPCSTATE_OFF_SR(i) PPCSTATE_OFF_STD_ARRAY(sr, i) #define PPCSTATE_OFF_SPR(i) PPCSTATE_OFF_ARRAY(spr, i) static_assert(std::is_same_v); diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 02871e65e12..92cb7c5cd33 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -25,10 +25,17 @@ #include "Core/PowerPC/MMU.h" +#include #include #include #include +#include #include +#include + +#ifdef _M_X86_64 +#include +#endif #include "Common/Align.h" #include "Common/Assert.h" @@ -61,18 +68,37 @@ MMU::~MMU() = default; void MMU::Reset() { + m_page_table.clear(); m_page_mappings.clear(); #ifndef _ARCH_32 - m_memory.UpdatePageTableMappings(m_page_mappings); + m_memory.RemoveAllPageTableMappings(); #endif } -void MMU::DoState(PointerWrap& p) +void MMU::DoState(PointerWrap& p, bool sr_changed) { - // Instead of storing m_page_mappings in savestates, we *could* recalculate it based on memory + // Instead of storing m_page_table in savestates, we *could* refetch it from memory // here in DoState, but this could lead to us getting a more up-to-date set of page mappings // than we had when the savestate was created, which could be a problem for TAS determinism. - p.Do(m_page_mappings); + if (p.IsReadMode()) + { + if (sr_changed) + { + // Non-incremental update of page table mappings. + p.Do(m_page_table); + SRUpdated(); + } + else + { + // Incremental update of page table mappings. + p.Do(m_temp_page_table); + PageTableUpdated(m_temp_page_table); + } + } + else + { + p.Do(m_page_table); + } } // Overloaded byteswap functions, for use within the templated functions below. @@ -1240,10 +1266,12 @@ void MMU::SDRUpdated() void MMU::SRUpdated() { - if (m_ppc_state.msr.DR) - PageTableUpdated(); - else - m_ppc_state.pagetable_update_pending = true; +#ifndef _ARCH_32 + // Our incremental handling of page table updates can't handle SR changing, so throw away all + // existing mappings and then reparse the whole page table. + m_memory.RemoveAllPageTableMappings(); + ReloadPageTable(); +#endif } enum class TLBLookupResult @@ -1343,9 +1371,13 @@ void MMU::InvalidateTLBEntry(u32 address) void MMU::PageTableUpdated() { m_ppc_state.pagetable_update_pending = false; -#ifndef _ARCH_32 - m_page_mappings.clear(); +#ifdef _ARCH_32 + // If a savestate is brought from a 64-bit system to a 32-bit system, clear m_page_table. + // Not doing this means a stale m_page_table would stick around, which could be a problem + // if the savestate is then brought to a 64-bit system again. + m_page_table.clear(); +#else if (m_ppc_state.m_enable_dcache) { // Because fastmem isn't in use when accurate dcache emulation is enabled, setting up mappings @@ -1354,9 +1386,8 @@ void MMU::PageTableUpdated() return; } - const u32 page_table_mask = m_ppc_state.pagetable_hashmask; const u32 page_table_base = m_ppc_state.pagetable_base; - const u32 page_table_end = (page_table_base | (page_table_mask << 6)) + (1 << 6); + const u32 page_table_end = (page_table_base | (m_ppc_state.pagetable_hashmask << 6)) + (1 << 6); const u32 page_table_size = page_table_end - page_table_base; u8* page_table_view = m_system.GetMemory().GetPointerForRange(page_table_base, page_table_size); @@ -1364,98 +1395,334 @@ void MMU::PageTableUpdated() { WARN_LOG_FMT(POWERPC, "Failed to read page table at {:#010x}-{:#010x}", page_table_base, page_table_end); - m_memory.UpdatePageTableMappings(m_page_mappings); + + // Remove host mappings, because we no longer know if they're up to date. + m_memory.RemoveAllPageTableMappings(); + + // Because we removed host mappings, incremental updates won't work correctly. + // Start over from scratch. + m_page_table.clear(); + m_page_mappings.clear(); return; } - const auto read_page_table = [&](u32 H) { - for (u32 i = 0; i <= page_table_mask; ++i) + PageTableUpdated(std::span(page_table_view, page_table_size)); +#endif +} + +#ifndef _ARCH_32 +void MMU::ReloadPageTable() +{ + m_page_mappings.clear(); + + m_temp_page_table.clear(); + std::swap(m_page_table, m_temp_page_table); + PageTableUpdated(m_temp_page_table); +} + +void MMU::PageTableUpdated(std::span page_table) +{ + // PowerPC's priority order for PTEs that have the same logical adress is as follows: + // + // * Primary PTEs (H=0) take priority over secondary PTEs (H=1). + // * If two PTEs have equal H values, they must be in the same PTEG due to how the hash + // incorporates the logical address and H. The PTE located first in the PTEG takes priority. + + m_removed_mappings.clear(); + m_added_mappings.clear(); + + if (m_page_table.size() != page_table.size()) + { + m_page_table.clear(); + m_page_table.resize(page_table.size()); + } + + u8* old_page_table = m_page_table.data(); + u8* new_page_table = page_table.data(); + + constexpr auto compare_64_bytes = [](const u8* a, const u8* b) -> bool { +#ifdef _M_X86_64 + // MSVC (x64) doesn't want to optimize the memcmp call. This has a large performance impact + // in GameCube games that use page tables, so let's use our own vectorized version instead. + const __m128i a1 = _mm_load_si128(reinterpret_cast(a)); + const __m128i b1 = _mm_load_si128(reinterpret_cast(b)); + const __m128i cmp1 = _mm_cmpeq_epi8(a1, b1); + const __m128i a2 = _mm_load_si128(reinterpret_cast(a + 0x10)); + const __m128i b2 = _mm_load_si128(reinterpret_cast(b + 0x10)); + const __m128i cmp2 = _mm_cmpeq_epi8(a2, b2); + const __m128i cmp12 = _mm_and_si128(cmp1, cmp2); + const __m128i a3 = _mm_load_si128(reinterpret_cast(a + 0x20)); + const __m128i b3 = _mm_load_si128(reinterpret_cast(b + 0x20)); + const __m128i cmp3 = _mm_cmpeq_epi8(a3, b3); + const __m128i a4 = _mm_load_si128(reinterpret_cast(a + 0x30)); + const __m128i b4 = _mm_load_si128(reinterpret_cast(b + 0x30)); + const __m128i cmp4 = _mm_cmpeq_epi8(a4, b4); + const __m128i cmp34 = _mm_and_si128(cmp3, cmp4); + const __m128i cmp1234 = _mm_and_si128(cmp12, cmp34); + return _mm_movemask_epi8(cmp1234) == 0xFFFF; +#else + return std::memcmp(std::assume_aligned<64>(a), std::assume_aligned<64>(b), 64) == 0; +#endif + }; + + constexpr auto get_page_index = [](UPTE_Lo pte1, u32 hash) { + u32 page_index_from_hash = hash ^ pte1.VSID; + if (pte1.H) + page_index_from_hash = ~page_index_from_hash; + + // Due to hash masking, the upper bits of page_index_from_hash might not match the actual + // page index. But these bits fully overlap with the API (abbreviated page index), so we can + // overwrite these bits with the API from pte1 and thereby get the correct page index. + // + // In other words: logical_address.API must be written to after logical_address.page_index! + + EffectiveAddress logical_address; + logical_address.offset = 0; + logical_address.page_index = page_index_from_hash; + logical_address.API = pte1.API; + return logical_address; + }; + + const auto fixup_shadowed_mappings = [this, &get_page_index, old_page_table, new_page_table]( + UPTE_Lo pte1, u32 page_table_offset, bool* run_pass_2) { + DEBUG_ASSERT(pte1.V == 1); + + bool switched_to_secondary = false; + + while (true) { - for (u32 j = 0; j < 8; ++j) + const u32 big_endian_pte1 = Common::swap32(pte1.Hex); + const u32 pteg_end = (page_table_offset + 64) % 64; + for (u32 i = page_table_offset; i < pteg_end; i += 8) { - const u32 pte_addr = (page_table_base | ((i & page_table_mask) << 6)) + j * 8; + if (std::memcmp(new_page_table + i, &big_endian_pte1, sizeof(big_endian_pte1)) == 0) + { + // We've found a PTE that has V set and has the same logical address as the passed-in PTE. + // The found PTE was previously skipped over because the passed-in PTE had priority, but + // the passed-in PTE is being changed, so now we need to re-check the found PTE. This will + // happen naturally later in the loop that's calling this function, but only if the 8-byte + // memcmp reports that the PTE has changed. Therefore, if the PTE currently compares + // equal, change an unused bit in the PTE. + if (std::memcmp(old_page_table + i, new_page_table + i, 8) == 0) + { + UPTE_Hi pte2(Common::swap32(old_page_table + i + 4)); + pte2.reserved_1 = pte2.reserved_1 ^ 1; + const u32 big_endian_pte2 = Common::swap32(pte2.Hex); + std::memcpy(old_page_table + i + 4, &big_endian_pte2, sizeof(big_endian_pte2)); - UPTE_Lo pte1(Common::swap32(page_table_view + pte_addr - page_table_base)); - UPTE_Hi pte2(Common::swap32(page_table_view + pte_addr - page_table_base + 4)); + if (switched_to_secondary) + *run_pass_2 = true; + } + // This PTE has priority over any later PTEs we might find, so no need to keep scanning. + return; + } + } - if (!pte1.V) - continue; + if (pte1.H == 1) + { + // We've scanned the secondary PTEG. Nothing left to do. + return; + } + else + { + // We've scanned the primary PTEG. Now let's scan the secondary PTEG. + const EffectiveAddress ea = get_page_index(pte1, page_table_offset / 64); + const u32 hash = ~(pte1.VSID ^ ea.page_index); + pte1.H = 1; + page_table_offset = + (((hash & m_ppc_state.pagetable_hashmask) << 6) | m_ppc_state.pagetable_base) - + m_ppc_state.pagetable_base; + switched_to_secondary = true; + } + } + }; - if (pte1.H != H) - continue; + const auto try_add_mapping = [this, &get_page_index, page_table](UPTE_Lo pte1, UPTE_Hi pte2, + u32 page_table_offset) { + EffectiveAddress logical_address = get_page_index(pte1, page_table_offset / 64); + for (u32 i = 0; i < std::size(m_ppc_state.sr); ++i) + { + const auto sr = UReg_SR{m_ppc_state.sr[i]}; + if (sr.VSID != pte1.VSID || sr.T != 0) + continue; + + logical_address.SR = i; + + bool host_mapping = true; + + const bool wi = (pte2.WIMG & 0b1100) != 0; + if (wi) + { // There are quirks related to uncached memory that can't be correctly emulated by fast // accesses, so we don't map uncached memory. (However, no software at all is known to // trigger these quirks through page address translation, only through block address // translation.) - const bool wi = (pte2.WIMG & 0b1100) != 0; - if (wi) - continue; + host_mapping = false; + } + else if (m_dbat_table[logical_address.Hex >> PowerPC::BAT_INDEX_SHIFT] & + PowerPC::BAT_MAPPED_BIT) + { + // Block address translation takes priority over page address translation. + host_mapping = false; + } + else if (m_power_pc.GetMemChecks().OverlapsMemcheck(logical_address.Hex, + PowerPC::HW_PAGE_SIZE)) + { + // Fast accesses don't support memchecks, so force slow accesses by removing fastmem + // mappings for all overlapping virtual pages. + host_mapping = false; + } - // Due to hash masking, the upper bits of page_index_from_hash might not match the actual - // page index. But these bits fully overlap with the API (abbreviated page index), so we can - // overwrite these bits with the API from pte1 and thereby get the correct page index. - // - // In other words: logical_address.API must be written to after logical_address.page_index! - u32 page_index_from_hash = i ^ pte1.VSID; - if (pte1.H) - page_index_from_hash = ~page_index_from_hash; - EffectiveAddress logical_address; - logical_address.offset = 0; - logical_address.page_index = page_index_from_hash; - logical_address.API = pte1.API; + const u32 priority = (page_table_offset % 64 / 8) | (pte1.H << 3); + const PageMapping page_mapping(pte2.RPN, host_mapping, priority); - for (u32 k = 0; k < std::size(m_ppc_state.sr); ++k) + const auto it = m_page_mappings.find(logical_address.Hex); + if (it == m_page_mappings.end()) [[likely]] + { + // There's no existing mapping for this logical address. Add a new mapping. + m_page_mappings.emplace(logical_address.Hex, page_mapping); + } + else + { + if (it->second.priority < priority) { - const auto sr = UReg_SR{m_ppc_state.sr[k]}; - if (sr.VSID != pte1.VSID || sr.T != 0) - continue; + // An existing mapping has priority. + continue; + } + else + { + // The new mapping has priority over an existing mapping. Replace the existing mapping. + if (it->second.host_mapping) + m_removed_mappings.emplace(it->first); + it->second.Hex = page_mapping.Hex; + } + } - logical_address.SR = k; + if (host_mapping) + { + const u32 physical_address = pte2.RPN << 12; + m_added_mappings.emplace(logical_address.Hex, physical_address); - // Block address translation takes priority over page address translation. - if (m_dbat_table[logical_address.Hex >> PowerPC::BAT_INDEX_SHIFT] & - PowerPC::BAT_MAPPED_BIT) - { - continue; - } + // HACK: We set R and C, which indicate whether a page have been read from and written to + // respectively, when a page is mapped rather than when it's actually accessed. The latter + // is probably possible using some fault handling logic, but for now it seems like more + // work than it's worth. + if (!pte2.R || !pte2.C) + { + pte2.R = 1; + pte2.C = 1; - // Fast accesses don't support memchecks, so force slow accesses by removing fastmem - // mappings for all overlapping virtual pages. - constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE; - if (m_power_pc.GetMemChecks().OverlapsMemcheck(logical_address.Hex, logical_size)) - continue; - - const u32 physical_address = pte2.RPN << 12; - - // Important: This doesn't overwrite anything already present in m_page_mappings. - m_page_mappings.emplace(logical_address.Hex, physical_address); - - // HACK: We set R and C, which indicate whether a page have been read from and written to - // respectively, when a page is mapped rather than when it's actually accessed. The latter - // is probably possible using some fault handling logic, but for now it seems like more - // work than it's worth. - if (!pte2.R || !pte2.C) - { - pte2.R = 1; - pte2.C = 1; - - const u32 pte2_swapped = Common::swap32(pte2.Hex); - std::memcpy(page_table_view + pte_addr - page_table_base + 4, &pte2_swapped, - sizeof(pte2_swapped)); - } + const u32 pte2_swapped = Common::swap32(pte2.Hex); + std::memcpy(page_table.data() + page_table_offset + 4, &pte2_swapped, + sizeof(pte2_swapped)); } } } }; - // We need to read all H=0 PTEs first, because H=0 takes priority over H=1. - read_page_table(0); - read_page_table(1); + bool run_pass_2 = false; - m_memory.UpdatePageTableMappings(m_page_mappings); -#endif + // Pass 1: Remove old mappings and add new primary (H=0) mappings. + for (u32 i = 0; i < page_table.size(); i += 64) + { + if (compare_64_bytes(old_page_table + i, new_page_table + i)) [[likely]] + continue; + + for (u32 j = 0; j < 64; j += 8) + { + if (std::memcmp(old_page_table + i + j, new_page_table + i + j, 8) == 0) [[likely]] + continue; + + // Remove old mappings. + UPTE_Lo old_pte1(Common::swap32(old_page_table + i + j)); + if (old_pte1.V) + { + const u32 priority = (j / 8) | (old_pte1.H << 3); + EffectiveAddress logical_address = get_page_index(old_pte1, i / 64); + + for (u32 k = 0; k < std::size(m_ppc_state.sr); ++k) + { + const auto sr = UReg_SR{m_ppc_state.sr[k]}; + if (sr.VSID != old_pte1.VSID || sr.T != 0) + continue; + + logical_address.SR = k; + + const auto it = m_page_mappings.find(logical_address.Hex); + if (it != m_page_mappings.end() && priority == it->second.priority) + { + if (it->second.host_mapping) + m_removed_mappings.emplace(logical_address.Hex); + m_page_mappings.erase(it); + + // It's unlikely but theoretically possible that this was shadowing another PTE that's + // using the same logical address but has a lower priority. If this happens, we must + // make sure that we don't skip over that other PTE because of the 8-byte memcmp. + fixup_shadowed_mappings(old_pte1, i + j, &run_pass_2); + } + } + } + + // Add new primary (H=0) mappings. + UPTE_Lo new_pte1(Common::swap32(new_page_table + i + j)); + UPTE_Hi new_pte2(Common::swap32(new_page_table + i + j + 4)); + if (new_pte1.V) + { + if (new_pte1.H) + { + run_pass_2 = true; + continue; + } + + try_add_mapping(new_pte1, new_pte2, i + j); + } + + // Update our copy of the page table. + std::memcpy(old_page_table + i + j, new_page_table + i + j, 8); + } + } + + // Pass 2: Add new secondary (H=1) mappings. This is a separate pass because before we can process + // whether a mapping should be added, we first need to check all PTEs that have equal or higher + // priority to see if their mappings should be removed. For adding primary mappings, this ordering + // comes naturally from doing a linear scan of the page table from start to finish. But for adding + // secondary mappings, the primary PTEG that has priority over a given secondary PTEG is in the + // other half of the page table, so we need more than one pass through the page table. But most of + // the time, there are no secondary mappings, letting us skip the second pass. + if (run_pass_2) [[unlikely]] + { + for (u32 i = 0; i < page_table.size(); i += 64) + { + if (compare_64_bytes(old_page_table + i, new_page_table + i)) [[likely]] + continue; + + for (u32 j = 0; j < 64; j += 8) + { + if (std::memcmp(old_page_table + i + j, new_page_table + i + j, 8) == 0) [[likely]] + continue; + + UPTE_Lo new_pte1(Common::swap32(new_page_table + i + j)); + UPTE_Hi new_pte2(Common::swap32(new_page_table + i + j + 4)); + + // We don't need to check new_pte1.V and new_pte1.H. If the memcmp above returned nonzero, + // pass 1 must have skipped running memcpy, which only happens if V and H are both set. + DEBUG_ASSERT(new_pte1.V == 1); + DEBUG_ASSERT(new_pte1.H == 1); + try_add_mapping(new_pte1, new_pte2, i + j); + + std::memcpy(old_page_table + i + j, new_page_table + i + j, 8); + } + } + } + + if (!m_removed_mappings.empty()) + m_memory.RemovePageTableMappings(m_removed_mappings); + + if (!m_added_mappings.empty()) + m_memory.AddPageTableMappings(m_added_mappings); } +#endif void MMU::PageTableUpdatedFromJit(MMU* mmu) { @@ -1688,7 +1955,11 @@ void MMU::DBATUpdated() #ifndef _ARCH_32 m_memory.UpdateDBATMappings(m_dbat_table); - m_memory.UpdatePageTableMappings(m_page_mappings); + + // Calling UpdateDBATMappings removes all fastmem page table mappings, + // so we have to recreate them. + if (!m_page_table.empty()) + ReloadPageTable(); #endif // IsOptimizable*Address and dcbz depends on the BAT mapping, so we need a flush here. diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 49d63e6c243..da79a7a267e 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -7,7 +7,10 @@ #include #include #include +#include +#include #include +#include #include "Common/BitField.h" #include "Common/CommonTypes.h" @@ -121,7 +124,7 @@ public: ~MMU(); void Reset(); - void DoState(PointerWrap& p); + void DoState(PointerWrap& p, bool sr_changed); // Routines for debugger UI, cheats, etc. to access emulated memory from the // perspective of the CPU. Not for use by core emulation routines. @@ -299,6 +302,26 @@ private: explicit EffectiveAddress(u32 address) : Hex{address} {} }; + union PageMapping + { + // A small priority number wins over a larger priority number. + BitField<0, 11, u32> priority; + // Whether we're allowed to create a host mapping for this mapping. + BitField<11, 1, u32> host_mapping; + // The physical address of the page. + BitField<12, 20, u32> RPN; + + u32 Hex = 0; + + PageMapping() = default; + PageMapping(u32 RPN_, bool host_mapping_, u32 priority_) + { + RPN = RPN_; + host_mapping = host_mapping_; + priority = priority_; + } + }; + template TranslateAddressResult TranslateAddress(u32 address); @@ -310,6 +333,11 @@ private: void Memcheck(u32 address, u64 var, bool write, size_t size); +#ifndef _ARCH_32 + void ReloadPageTable(); + void PageTableUpdated(std::span page_table); +#endif + void UpdateBATs(BatTable& bat_table, u32 base_spr); void UpdateFakeMMUBat(BatTable& bat_table, u32 start_addr); @@ -327,9 +355,18 @@ private: PowerPC::PowerPCState& m_ppc_state; // STATE_TO_SAVE - std::map m_page_mappings; + std::vector m_page_table; // END STATE_TO_SAVE + // This keeps track of all valid page table mappings in m_page_table. + // The key is the logical address. + std::map m_page_mappings; + + // These are kept around just for their memory allocations. They are always cleared before use. + std::vector m_temp_page_table; + std::set m_removed_mappings; + std::map m_added_mappings; + BatTable m_ibat_table; BatTable m_dbat_table; }; diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index e361f45e4e1..d5199a396e4 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -4,6 +4,7 @@ #include "Core/PowerPC/PowerPC.h" #include +#include #include #include #include @@ -80,6 +81,8 @@ void PowerPCManager::DoState(PointerWrap& p) // *((u64 *)&TL(m_ppc_state)) = SystemTimers::GetFakeTimeBase(); //works since we are little // endian and TL comes first :) + const std::array old_sr = m_ppc_state.sr; + p.DoArray(m_ppc_state.gpr); p.Do(m_ppc_state.pc); p.Do(m_ppc_state.npc); @@ -107,10 +110,10 @@ void PowerPCManager::DoState(PointerWrap& p) m_ppc_state.dCache.DoState(memory, p); auto& mmu = m_system.GetMMU(); - mmu.DoState(p); - if (p.IsReadMode()) { + mmu.DoState(p, old_sr != m_ppc_state.sr); + if (!m_ppc_state.m_enable_dcache) { INFO_LOG_FMT(POWERPC, "Flushing data cache"); @@ -123,6 +126,10 @@ void PowerPCManager::DoState(PointerWrap& p) mmu.IBATUpdated(); mmu.DBATUpdated(); } + else + { + mmu.DoState(p, false); + } // SystemTimers::DecrementerSet(); // SystemTimers::TimeBaseSet(); diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 375604c9af3..d9fd96050dd 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -175,7 +175,7 @@ struct PowerPCState alignas(16) PairedSingle ps[32]; #endif - u32 sr[16]{}; // Segment registers. + std::array sr{}; // Segment registers. // special purpose registers - controls quantizers, DMA, and lots of other misc extensions. // also for power management, but we don't care about that. From 5bfd69039f61fd5c15acfcda03f9da4ebe71d64d Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 2 Aug 2025 20:52:48 +0200 Subject: [PATCH 6/6] Core: Don't create page table mappings before R/C bits are set This gets rid of the hack of setting the R and C bits pessimistically, reversing the performance regression in Rogue Squadron 3. --- Source/Core/Common/MemArena.h | 14 ++++- Source/Core/Common/MemArenaAndroid.cpp | 20 +++++- Source/Core/Common/MemArenaDarwin.cpp | 22 ++++++- Source/Core/Common/MemArenaUnix.cpp | 20 +++++- Source/Core/Common/MemArenaWin.cpp | 38 ++++++++++-- Source/Core/Core/HW/Memmap.cpp | 84 ++++++++++++++++---------- Source/Core/Core/HW/Memmap.h | 9 ++- Source/Core/Core/PowerPC/MMU.cpp | 70 ++++++++++++--------- Source/Core/Core/PowerPC/MMU.h | 5 +- 9 files changed, 202 insertions(+), 80 deletions(-) diff --git a/Source/Core/Common/MemArena.h b/Source/Core/Common/MemArena.h index 021ebe98c05..11db86a634d 100644 --- a/Source/Core/Common/MemArena.h +++ b/Source/Core/Common/MemArena.h @@ -102,10 +102,22 @@ public: /// from. /// @param size Size of the region to map. /// @param base Address within the memory region from ReserveMemoryRegion() where to map it. + /// @param writeable Whether the region should be both readable and writeable, or just readable. /// /// @return The address we actually ended up mapping, which should be the given 'base'. /// - void* MapInMemoryRegion(s64 offset, size_t size, void* base); + void* MapInMemoryRegion(s64 offset, size_t size, void* base, bool writeable); + + /// + /// Changes whether a section mapped by MapInMemoryRegion is writeable. + /// + /// @param view The address returned by MapInMemoryRegion. + /// @param size The size passed to MapInMemoryRegion. + /// @param writeable Whether the region should be both readable and writeable, or just readable. + /// + /// @return Whether the operation succeeded. + /// + bool ChangeMappingProtection(void* view, size_t size, bool writeable); /// /// Unmap a memory region previously mapped with MapInMemoryRegion(). diff --git a/Source/Core/Common/MemArenaAndroid.cpp b/Source/Core/Common/MemArenaAndroid.cpp index 1db5afb91d3..81c27b7677c 100644 --- a/Source/Core/Common/MemArenaAndroid.cpp +++ b/Source/Core/Common/MemArenaAndroid.cpp @@ -123,9 +123,13 @@ void MemArena::ReleaseMemoryRegion() } } -void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) +void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base, bool writeable) { - void* retval = mmap(base, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, m_shm_fd, offset); + int prot = PROT_READ; + if (writeable) + prot |= PROT_WRITE; + + void* retval = mmap(base, size, prot, MAP_SHARED | MAP_FIXED, m_shm_fd, offset); if (retval == MAP_FAILED) { NOTICE_LOG_FMT(MEMMAP, "mmap failed"); @@ -137,6 +141,18 @@ void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) } } +bool MemArena::ChangeMappingProtection(void* view, size_t size, bool writeable) +{ + int prot = PROT_READ; + if (writeable) + prot |= PROT_WRITE; + + int retval = mprotect(view, size, prot); + if (retval != 0) + NOTICE_LOG_FMT(MEMMAP, "mprotect failed"); + return retval == 0; +} + void MemArena::UnmapFromMemoryRegion(void* view, size_t size) { void* retval = mmap(view, size, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); diff --git a/Source/Core/Common/MemArenaDarwin.cpp b/Source/Core/Common/MemArenaDarwin.cpp index 2316f5dcb56..9414535be58 100644 --- a/Source/Core/Common/MemArenaDarwin.cpp +++ b/Source/Core/Common/MemArenaDarwin.cpp @@ -123,7 +123,7 @@ void MemArena::ReleaseMemoryRegion() m_region_size = 0; } -void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) +void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base, bool writeable) { if (m_shm_address == 0) { @@ -132,11 +132,13 @@ void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) } vm_address_t address = reinterpret_cast(base); - constexpr vm_prot_t prot = VM_PROT_READ | VM_PROT_WRITE; + vm_prot_t prot = VM_PROT_READ; + if (writeable) + prot |= VM_PROT_WRITE; kern_return_t retval = vm_map(mach_task_self(), &address, size, 0, VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, m_shm_entry, - offset, false, prot, prot, VM_INHERIT_DEFAULT); + offset, false, prot, VM_PROT_READ | VM_PROT_WRITE, VM_INHERIT_DEFAULT); if (retval != KERN_SUCCESS) { ERROR_LOG_FMT(MEMMAP, "MapInMemoryRegion failed: vm_map returned {0:#x}", retval); @@ -146,6 +148,20 @@ void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) return reinterpret_cast(address); } +bool MemArena::ChangeMappingProtection(void* view, size_t size, bool writeable) +{ + vm_address_t address = reinterpret_cast(view); + vm_prot_t prot = VM_PROT_READ; + if (writeable) + prot |= VM_PROT_WRITE; + + kern_return_t retval = vm_protect(mach_task_self(), address, size, false, prot); + if (retval != KERN_SUCCESS) + ERROR_LOG_FMT(MEMMAP, "ChangeMappingProtection failed: vm_protect returned {0:#x}", retval); + + return retval == KERN_SUCCESS; +} + void MemArena::UnmapFromMemoryRegion(void* view, size_t size) { vm_address_t address = reinterpret_cast(view); diff --git a/Source/Core/Common/MemArenaUnix.cpp b/Source/Core/Common/MemArenaUnix.cpp index 13398de7613..cfd6daf324b 100644 --- a/Source/Core/Common/MemArenaUnix.cpp +++ b/Source/Core/Common/MemArenaUnix.cpp @@ -89,9 +89,13 @@ void MemArena::ReleaseMemoryRegion() } } -void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) +void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base, bool writeable) { - void* retval = mmap(base, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, m_shm_fd, offset); + int prot = PROT_READ; + if (writeable) + prot |= PROT_WRITE; + + void* retval = mmap(base, size, prot, MAP_SHARED | MAP_FIXED, m_shm_fd, offset); if (retval == MAP_FAILED) { NOTICE_LOG_FMT(MEMMAP, "mmap failed"); @@ -103,6 +107,18 @@ void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) } } +bool MemArena::ChangeMappingProtection(void* view, size_t size, bool writeable) +{ + int prot = PROT_READ; + if (writeable) + prot |= PROT_WRITE; + + int retval = mprotect(view, size, prot); + if (retval != 0) + NOTICE_LOG_FMT(MEMMAP, "mprotect failed"); + return retval == 0; +} + void MemArena::UnmapFromMemoryRegion(void* view, size_t size) { void* retval = mmap(view, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); diff --git a/Source/Core/Common/MemArenaWin.cpp b/Source/Core/Common/MemArenaWin.cpp index 51df5445d2b..adbfab3d82c 100644 --- a/Source/Core/Common/MemArenaWin.cpp +++ b/Source/Core/Common/MemArenaWin.cpp @@ -318,8 +318,10 @@ WindowsMemoryRegion* MemArena::EnsureSplitRegionForMapping(void* start_address, } } -void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) +void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base, bool writeable) { + void* result; + if (m_memory_functions.m_api_ms_win_core_memory_l1_1_6_handle.IsOpen()) { WindowsMemoryRegion* const region = EnsureSplitRegionForMapping(base, size); @@ -329,10 +331,10 @@ void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) return nullptr; } - void* rv = static_cast(m_memory_functions.m_address_MapViewOfFile3)( + result = static_cast(m_memory_functions.m_address_MapViewOfFile3)( m_memory_handle, nullptr, base, offset, size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0); - if (rv) + if (result) { region->m_is_mapped = true; } @@ -342,11 +344,37 @@ void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) // revert the split, if any JoinRegionsAfterUnmap(base, size); + + return nullptr; } - return rv; + } + else + { + result = + MapViewOfFileEx(m_memory_handle, FILE_MAP_ALL_ACCESS, 0, (DWORD)((u64)offset), size, base); + + if (!result) + return nullptr; } - return MapViewOfFileEx(m_memory_handle, FILE_MAP_ALL_ACCESS, 0, (DWORD)((u64)offset), size, base); + if (!writeable) + { + // If we want to use PAGE_READONLY for now while still being able to switch to PAGE_READWRITE + // later, we have to call MapViewOfFile with PAGE_READWRITE and then switch to PAGE_READONLY. + ChangeMappingProtection(base, size, writeable); + } + + return result; +} + +bool MemArena::ChangeMappingProtection(void* view, size_t size, bool writeable) +{ + DWORD old_protect; + const int retval = + VirtualProtect(view, size, writeable ? PAGE_READWRITE : PAGE_READONLY, &old_protect); + if (retval == 0) + PanicAlertFmt("VirtualProtect failed: {}", GetLastErrorString()); + return retval != 0; } bool MemArena::JoinRegionsAfterUnmap(void* start_address, size_t size) diff --git a/Source/Core/Core/HW/Memmap.cpp b/Source/Core/Core/HW/Memmap.cpp index f972460d327..8297d437507 100644 --- a/Source/Core/Core/HW/Memmap.cpp +++ b/Source/Core/Core/HW/Memmap.cpp @@ -220,7 +220,7 @@ bool MemoryManager::InitFastmemArena() continue; u8* base = m_physical_base + region.physical_address; - u8* view = (u8*)m_arena.MapInMemoryRegion(region.shm_position, region.size, base); + u8* view = (u8*)m_arena.MapInMemoryRegion(region.shm_position, region.size, base, true); if (base != view) { @@ -238,7 +238,7 @@ bool MemoryManager::InitFastmemArena() void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) { - for (auto& entry : m_dbat_mapped_entries) + for (const auto& [logical_address, entry] : m_dbat_mapped_entries) { m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } @@ -290,7 +290,7 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) u8* base = m_logical_base + logical_address + intersection_start - translated_address; u32 mapped_size = intersection_end - intersection_start; - void* mapped_pointer = m_arena.MapInMemoryRegion(position, mapped_size, base); + void* mapped_pointer = m_arena.MapInMemoryRegion(position, mapped_size, base, true); if (!mapped_pointer) { PanicAlertFmt("Memory::UpdateDBATMappings(): Failed to map memory region at 0x{:08X} " @@ -298,7 +298,8 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) intersection_start, mapped_size, logical_address); continue; } - m_dbat_mapped_entries.push_back({mapped_pointer, mapped_size, logical_address}); + m_dbat_mapped_entries.emplace(logical_address, + LogicalMemoryView{mapped_pointer, mapped_size}); } m_logical_page_mappings[i] = @@ -309,42 +310,58 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) } } -void MemoryManager::AddPageTableMappings(const std::map& mappings) +void MemoryManager::AddPageTableMapping(u32 logical_address, u32 translated_address, bool writeable) { if (m_page_size > PowerPC::HW_PAGE_SIZE) return; - for (const auto [logical_address, translated_address] : mappings) + constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE; + for (const auto& physical_region : m_physical_regions) { - constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE; - for (const auto& physical_region : m_physical_regions) + if (!physical_region.active) + continue; + + u32 mapping_address = physical_region.physical_address; + u32 mapping_end = mapping_address + physical_region.size; + u32 intersection_start = std::max(mapping_address, translated_address); + u32 intersection_end = std::min(mapping_end, translated_address + logical_size); + if (intersection_start < intersection_end) { - if (!physical_region.active) - continue; - - u32 mapping_address = physical_region.physical_address; - u32 mapping_end = mapping_address + physical_region.size; - u32 intersection_start = std::max(mapping_address, translated_address); - u32 intersection_end = std::min(mapping_end, translated_address + logical_size); - if (intersection_start < intersection_end) + // Found an overlapping region; map it. + if (m_is_fastmem_arena_initialized) { - // Found an overlapping region; map it. - if (m_is_fastmem_arena_initialized) - { - u32 position = physical_region.shm_position + intersection_start - mapping_address; - u8* base = m_logical_base + logical_address + intersection_start - translated_address; - u32 mapped_size = intersection_end - intersection_start; + u32 position = physical_region.shm_position + intersection_start - mapping_address; + u8* base = m_logical_base + logical_address + intersection_start - translated_address; + u32 mapped_size = intersection_end - intersection_start; - void* mapped_pointer = m_arena.MapInMemoryRegion(position, mapped_size, base); + const auto it = m_page_table_mapped_entries.find(logical_address); + if (it != m_page_table_mapped_entries.end()) + { + // Update the protection of an existing mapping. + if (it->second.mapped_pointer == base && it->second.mapped_size == mapped_size) + { + if (!m_arena.ChangeMappingProtection(base, mapped_size, writeable)) + { + PanicAlertFmt( + "Memory::AddPageTableMapping(): Failed to change protection for memory " + "region at 0x{:08X} (size 0x{:08X}, logical fastmem region at 0x{:08X}).", + intersection_start, mapped_size, logical_address); + } + } + } + else + { + // Create a new mapping. + void* mapped_pointer = m_arena.MapInMemoryRegion(position, mapped_size, base, writeable); if (!mapped_pointer) { - PanicAlertFmt( - "Memory::UpdatePageTableMappings(): Failed to map memory region at 0x{:08X} " - "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", - intersection_start, mapped_size, logical_address); + PanicAlertFmt("Memory::AddPageTableMapping(): Failed to map memory region at 0x{:08X} " + "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", + intersection_start, mapped_size, logical_address); continue; } - m_page_table_mapped_entries.push_back({mapped_pointer, mapped_size, logical_address}); + m_page_table_mapped_entries.emplace(logical_address, + LogicalMemoryView{mapped_pointer, mapped_size}); } } } @@ -359,8 +376,9 @@ void MemoryManager::RemovePageTableMappings(const std::set& mappings) if (mappings.empty()) return; - std::erase_if(m_page_table_mapped_entries, [this, &mappings](const LogicalMemoryView& entry) { - const bool remove = mappings.contains(entry.logical_address); + std::erase_if(m_page_table_mapped_entries, [this, &mappings](const auto& pair) { + const auto& [logical_address, entry] = pair; + const bool remove = mappings.contains(logical_address); if (remove) m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); return remove; @@ -369,7 +387,7 @@ void MemoryManager::RemovePageTableMappings(const std::set& mappings) void MemoryManager::RemoveAllPageTableMappings() { - for (auto& entry : m_page_table_mapped_entries) + for (const auto& [logical_address, entry] : m_page_table_mapped_entries) { m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } @@ -457,13 +475,13 @@ void MemoryManager::ShutdownFastmemArena() m_arena.UnmapFromMemoryRegion(base, region.size); } - for (auto& entry : m_dbat_mapped_entries) + for (const auto& [logical_address, entry] : m_dbat_mapped_entries) { m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } m_dbat_mapped_entries.clear(); - for (auto& entry : m_page_table_mapped_entries) + for (const auto& [logical_address, entry] : m_page_table_mapped_entries) { m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index 894caf7e0b0..605262819ac 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -9,7 +9,6 @@ #include #include #include -#include #include "Common/CommonTypes.h" #include "Common/MathUtil.h" @@ -56,7 +55,6 @@ struct LogicalMemoryView { void* mapped_pointer; u32 mapped_size; - u32 logical_address; }; class MemoryManager @@ -103,7 +101,7 @@ public: void DoState(PointerWrap& p); void UpdateDBATMappings(const PowerPC::BatTable& dbat_table); - void AddPageTableMappings(const std::map& mappings); + void AddPageTableMapping(u32 logical_address, u32 translated_address, bool writeable); void RemovePageTableMappings(const std::set& mappings); void RemoveAllPageTableMappings(); @@ -255,8 +253,9 @@ private: // TODO: Do we want to handle the mirrors of the GC RAM? std::array m_physical_regions{}; - std::vector m_dbat_mapped_entries; - std::vector m_page_table_mapped_entries; + // The key is the logical address + std::map m_dbat_mapped_entries; + std::map m_page_table_mapped_entries; std::array m_physical_page_mappings{}; std::array m_logical_page_mappings{}; diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 92cb7c5cd33..bb0466f103b 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -1380,9 +1380,11 @@ void MMU::PageTableUpdated() #else if (m_ppc_state.m_enable_dcache) { - // Because fastmem isn't in use when accurate dcache emulation is enabled, setting up mappings - // would be a waste of time. Skipping setting up mappings also comes with the bonus of skipping - // the inaccurate behavior of setting the R and C bits of PTE2 as soon as a page is mapped. + // Because fastmem isn't in use when accurate dcache emulation is enabled, + // keeping track of page table updates would be a waste of time. + m_memory.RemoveAllPageTableMappings(); + m_page_table.clear(); + m_page_mappings.clear(); return; } @@ -1420,7 +1422,7 @@ void MMU::ReloadPageTable() PageTableUpdated(m_temp_page_table); } -void MMU::PageTableUpdated(std::span page_table) +void MMU::PageTableUpdated(std::span page_table) { // PowerPC's priority order for PTEs that have the same logical adress is as follows: // @@ -1429,7 +1431,8 @@ void MMU::PageTableUpdated(std::span page_table) // incorporates the logical address and H. The PTE located first in the PTEG takes priority. m_removed_mappings.clear(); - m_added_mappings.clear(); + m_added_readonly_mappings.clear(); + m_added_readwrite_mappings.clear(); if (m_page_table.size() != page_table.size()) { @@ -1438,7 +1441,7 @@ void MMU::PageTableUpdated(std::span page_table) } u8* old_page_table = m_page_table.data(); - u8* new_page_table = page_table.data(); + const u8* new_page_table = page_table.data(); constexpr auto compare_64_bytes = [](const u8* a, const u8* b) -> bool { #ifdef _M_X86_64 @@ -1537,8 +1540,8 @@ void MMU::PageTableUpdated(std::span page_table) } }; - const auto try_add_mapping = [this, &get_page_index, page_table](UPTE_Lo pte1, UPTE_Hi pte2, - u32 page_table_offset) { + const auto try_add_mapping = [this, &get_page_index](UPTE_Lo pte1, UPTE_Hi pte2, + u32 page_table_offset) { EffectiveAddress logical_address = get_page_index(pte1, page_table_offset / 64); for (u32 i = 0; i < std::size(m_ppc_state.sr); ++i) @@ -1599,24 +1602,13 @@ void MMU::PageTableUpdated(std::span page_table) } } - if (host_mapping) + // If the R bit isn't set yet, the actual host mapping will be created once + // TranslatePageAddress sets the R bit. + if (host_mapping && pte2.R) { const u32 physical_address = pte2.RPN << 12; - m_added_mappings.emplace(logical_address.Hex, physical_address); - - // HACK: We set R and C, which indicate whether a page have been read from and written to - // respectively, when a page is mapped rather than when it's actually accessed. The latter - // is probably possible using some fault handling logic, but for now it seems like more - // work than it's worth. - if (!pte2.R || !pte2.C) - { - pte2.R = 1; - pte2.C = 1; - - const u32 pte2_swapped = Common::swap32(pte2.Hex); - std::memcpy(page_table.data() + page_table_offset + 4, &pte2_swapped, - sizeof(pte2_swapped)); - } + (pte2.C ? m_added_readwrite_mappings : m_added_readonly_mappings) + .emplace(logical_address.Hex, physical_address); } } }; @@ -1719,8 +1711,11 @@ void MMU::PageTableUpdated(std::span page_table) if (!m_removed_mappings.empty()) m_memory.RemovePageTableMappings(m_removed_mappings); - if (!m_added_mappings.empty()) - m_memory.AddPageTableMappings(m_added_mappings); + for (const auto& [logical_address, physical_address] : m_added_readonly_mappings) + m_memory.AddPageTableMapping(logical_address, physical_address, false); + + for (const auto& [logical_address, physical_address] : m_added_readwrite_mappings) + m_memory.AddPageTableMapping(logical_address, physical_address, true); } #endif @@ -1791,6 +1786,7 @@ MMU::TranslateAddressResult MMU::TranslatePageAddress(const EffectiveAddress add if (pte1.Hex == pteg) { UPTE_Hi pte2(ReadFromHardware(pteg_addr + 4)); + const UPTE_Hi old_pte2 = pte2; // set the access bits switch (flag) @@ -1810,9 +1806,29 @@ MMU::TranslateAddressResult MMU::TranslatePageAddress(const EffectiveAddress add break; } - if (!IsNoExceptionFlag(flag)) + if (!IsNoExceptionFlag(flag) && pte2.Hex != old_pte2.Hex) { m_memory.Write_U32(pte2.Hex, pteg_addr + 4); + + const u32 page_logical_address = address.Hex & ~HW_PAGE_MASK; + const auto it = m_page_mappings.find(page_logical_address); + if (it != m_page_mappings.end()) + { + const u32 priority = (pteg_addr % 64 / 8) | (pte1.H << 3); + if (it->second.Hex == PageMapping(pte2.RPN, true, priority).Hex) + { + const u32 swapped_pte1 = Common::swap32(reinterpret_cast(&pte1)); + std::memcpy(m_page_table.data() + pteg_addr - m_ppc_state.pagetable_base, + &swapped_pte1, sizeof(swapped_pte1)); + + const u32 swapped_pte2 = Common::swap32(reinterpret_cast(&pte2)); + std::memcpy(m_page_table.data() + pteg_addr + 4 - m_ppc_state.pagetable_base, + &swapped_pte2, sizeof(swapped_pte2)); + + const u32 page_translated_address = pte2.RPN << 12; + m_memory.AddPageTableMapping(page_logical_address, page_translated_address, pte2.C); + } + } } // We already updated the TLB entry if this was caused by a C bit. diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index da79a7a267e..5a47f77a95a 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -335,7 +335,7 @@ private: #ifndef _ARCH_32 void ReloadPageTable(); - void PageTableUpdated(std::span page_table); + void PageTableUpdated(std::span page_table); #endif void UpdateBATs(BatTable& bat_table, u32 base_spr); @@ -365,7 +365,8 @@ private: // These are kept around just for their memory allocations. They are always cleared before use. std::vector m_temp_page_table; std::set m_removed_mappings; - std::map m_added_mappings; + std::map m_added_readonly_mappings; + std::map m_added_readwrite_mappings; BatTable m_ibat_table; BatTable m_dbat_table;