From 271e8b8adce75db9ebd5c23bbec57c186d5b7956 Mon Sep 17 00:00:00 2001 From: Fabio Arnold Date: Sat, 21 Mar 2026 11:32:50 +0100 Subject: [PATCH 1/8] CMake: add option for sanitizers --- CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index a00879fe..d0e99edf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,6 +76,14 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE ON) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELWITHDEBINFO ON) +option(ENABLE_SANITIZERS "Enable AddressSanitizer and UndefinedBehaviorSanitizer" OFF) +if (ENABLE_SANITIZERS) + add_compile_options(-fsanitize=address,undefined -fno-sanitize=vptr -fno-omit-frame-pointer) + add_link_options(-fsanitize=address,undefined -fno-sanitize=vptr) + # metal-cpp headers are incompatible with ASan/UBSan (Protocol ambiguity in objc/runtime.h) + add_compile_options($<$:-fno-sanitize=address,undefined>) +endif() + if (MSVC) set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT CemuBin) # floating point model: precise, fiber safe optimizations From ff8d78d2ebd8687ebec5ec1977364b13cee8fb6f Mon Sep 17 00:00:00 2001 From: Fabio Arnold Date: Sat, 21 Mar 2026 11:41:56 +0100 Subject: [PATCH 2/8] Core: remove obsolete `#pragma pack(1)` prevent misaligned pointer use by aligning tightly packed structs to 4 --- src/Cafe/OS/libs/coreinit/coreinit_Thread.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Cafe/OS/libs/coreinit/coreinit_Thread.h b/src/Cafe/OS/libs/coreinit/coreinit_Thread.h index a0517d9a..f369f800 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit_Thread.h +++ b/src/Cafe/OS/libs/coreinit/coreinit_Thread.h @@ -88,8 +88,6 @@ typedef struct static_assert(sizeof(crt_t) == 0x1D8, ""); -#pragma pack(1) - namespace coreinit { @@ -275,6 +273,8 @@ namespace coreinit /* +0x04 */ MEMPTR prev; }; + static_assert(sizeof(OSFastMutexLink) == 0x8); + struct OSFastMutex { /* +0x00 */ uint32be magic; @@ -620,8 +620,6 @@ namespace coreinit void __OSDeleteAllActivePPCThreads(); } -#pragma pack() - // deprecated / clean up required extern MPTR activeThread[256]; extern sint32 activeThreadCount; From 8e2cf6c7b2a913b4d8dc8605df791c31467f241b Mon Sep 17 00:00:00 2001 From: Fabio Arnold Date: Wed, 18 Mar 2026 22:29:09 +0100 Subject: [PATCH 3/8] PPC: use unsigned int where wrapping is defined wrapping of signed integers is undefined behavior --- src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp index 2fe07509..5b3919d0 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp @@ -123,7 +123,7 @@ static void PPCInterpreter_ADDI(PPCInterpreter_t* hCPU, uint32 opcode) sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - hCPU->gpr[rD] = (rA ? (int)hCPU->gpr[rA] : 0) + (int)imm; + hCPU->gpr[rD] = (rA ? hCPU->gpr[rA] : 0u) + imm; PPCInterpreter_nextInstruction(hCPU); } From 245f7ed2703f133cd6c1865eab5a6ee00afcdc18 Mon Sep 17 00:00:00 2001 From: Fabio Arnold Date: Wed, 18 Mar 2026 22:46:03 +0100 Subject: [PATCH 4/8] PPC: fix undefined sint underflow --- src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index e76a53fa..186caf49 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -1190,7 +1190,7 @@ bool PPCRecompilerImlGen_SRAWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // calculate CA first ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regTmp, regS, 31); // signMask = input >> 31 (arithmetic shift) ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regTmp, regTmp, regS); // testValue = input & signMask & ((1<emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmp, regTmp, ((1 << SH) - 1)); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmp, regTmp, (sint32)((1u << SH) - 1u)); ppcImlGenContext->emitInst().make_compare_s32(regTmp, 0, regCarry, IMLCondition::NEQ); // ca = (testValue != 0) // do the actual shift ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, (sint32)SH); From fbff89d03b117cfa745f2875d1e9ed6bf1fb8a44 Mon Sep 17 00:00:00 2001 From: Fabio Arnold Date: Wed, 18 Mar 2026 23:05:17 +0100 Subject: [PATCH 5/8] PPC: fix UB sint overflow --- src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp index 5b3919d0..4b53ecfd 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp @@ -33,7 +33,7 @@ static bool checkAdditionOverflow(uint32 x, uint32 y, uint32 r) static void PPCInterpreter_ADD(PPCInterpreter_t* hCPU, uint32 opcode) { PPC_OPC_TEMPL3_XO(); - hCPU->gpr[rD] = (int)hCPU->gpr[rA] + (int)hCPU->gpr[rB]; + hCPU->gpr[rD] = hCPU->gpr[rA] + hCPU->gpr[rB]; if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); From bc21013d217d7c11903a5651667603efa941e7b1 Mon Sep 17 00:00:00 2001 From: Fabio Arnold Date: Wed, 18 Mar 2026 23:06:39 +0100 Subject: [PATCH 6/8] PPC: zero sign extend sint32 only --- src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp index 4b53ecfd..8b3c358c 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp @@ -435,7 +435,7 @@ static void PPCInterpreter_MULHWU_(PPCInterpreter_t* hCPU, uint32 opcode) static void PPCInterpreter_MULLW(PPCInterpreter_t* hCPU, uint32 opcode) { PPC_OPC_TEMPL3_XO(); - sint64 result = (sint64)hCPU->gpr[rA] * (sint64)hCPU->gpr[rB]; + sint64 result = (sint64)(sint32)hCPU->gpr[rA] * (sint64)(sint32)hCPU->gpr[rB]; hCPU->gpr[rD] = (uint32)result; if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); From d4857e874966999e15c23ff87af7c1391a39bd38 Mon Sep 17 00:00:00 2001 From: Fabio Arnold Date: Wed, 18 Mar 2026 22:56:44 +0100 Subject: [PATCH 7/8] Latte: use 32-bit version of __builtin_clz __builtin_clzl is the 64 bit version For example for _Mask 32 it calculated 31 - 58 = -27 Cast to uint32 gives 4294967269 --- src/Cafe/HW/Latte/LatteAddrLib/LatteAddrLib_Coord.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Cafe/HW/Latte/LatteAddrLib/LatteAddrLib_Coord.cpp b/src/Cafe/HW/Latte/LatteAddrLib/LatteAddrLib_Coord.cpp index 26da5ec1..38d5abdd 100644 --- a/src/Cafe/HW/Latte/LatteAddrLib/LatteAddrLib_Coord.cpp +++ b/src/Cafe/HW/Latte/LatteAddrLib/LatteAddrLib_Coord.cpp @@ -12,7 +12,7 @@ namespace LatteAddrLib { if (!_Mask) return 0; - *_Index = 31 - __builtin_clzl(_Mask); + *_Index = 31 - __builtin_clz(_Mask); return 1; } #endif From f26e716686d19ec098e5427c74d7d66b06f32ba3 Mon Sep 17 00:00:00 2001 From: Fabio Arnold Date: Sun, 29 Mar 2026 15:20:57 +0200 Subject: [PATCH 8/8] LatteAddrLib: fix misaligned pointer UB in optimizedDecodeLoop_tm04 - bpp=32: blockOutput is texelBaseType* (uint32_t, 4-byte aligned). Casting to uint64* and dereferencing is UB when the address is not 8-byte aligned (e.g. 0x...c is 4-byte aligned but not 8-byte). - bpp=8: blockOutput is texelBaseType* (uint8_t, 1-byte aligned). Casting to uint64* is UB for any address not divisible by 8. --- .../HW/Latte/LatteAddrLib/AddrLibFastDecode.h | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/Cafe/HW/Latte/LatteAddrLib/AddrLibFastDecode.h b/src/Cafe/HW/Latte/LatteAddrLib/AddrLibFastDecode.h index b54d6038..fa735226 100644 --- a/src/Cafe/HW/Latte/LatteAddrLib/AddrLibFastDecode.h +++ b/src/Cafe/HW/Latte/LatteAddrLib/AddrLibFastDecode.h @@ -149,21 +149,27 @@ void optimizedDecodeLoop_tm04_numSamples1_8x8_optimizedRowCopy(LatteTextureLoade // bpp = 32 if (texelBaseTypeCount == 1) { - uint64* blockOutput64 = (uint64*)blockOutput; - uint64* blockData64 = (uint64*)blockData; if (isEncodeDirection) { - blockData64[0] = blockOutput64[0]; - blockData64[1] = blockOutput64[1]; - blockData64[4] = blockOutput64[2]; - blockData64[5] = blockOutput64[3]; + blockData[0] = blockOutput[0]; + blockData[1] = blockOutput[1]; + blockData[2] = blockOutput[2]; + blockData[3] = blockOutput[3]; + blockData[8] = blockOutput[4]; + blockData[9] = blockOutput[5]; + blockData[10] = blockOutput[6]; + blockData[11] = blockOutput[7]; } else { - blockOutput64[0] = blockData64[0]; - blockOutput64[1] = blockData64[1]; - blockOutput64[2] = blockData64[4]; - blockOutput64[3] = blockData64[5]; + blockOutput[0] = blockData[0]; + blockOutput[1] = blockData[1]; + blockOutput[2] = blockData[2]; + blockOutput[3] = blockData[3]; + blockOutput[4] = blockData[8]; + blockOutput[5] = blockData[9]; + blockOutput[6] = blockData[10]; + blockOutput[7] = blockData[11]; } blockOutput += 8; } @@ -175,12 +181,10 @@ void optimizedDecodeLoop_tm04_numSamples1_8x8_optimizedRowCopy(LatteTextureLoade // bpp = 8 if (texelBaseTypeCount == 1) { - uint64* blockOutput64 = (uint64*)blockOutput; - uint64* blockData64 = (uint64*)blockData; if (isEncodeDirection) - blockData64[0] = blockOutput64[0]; + memcpy(blockData, blockOutput, 8); else - blockOutput64[0] = blockData64[0]; + memcpy(blockOutput, blockData, 8); blockOutput += 8; } else