diff --git a/CMakeLists.txt b/CMakeLists.txt index a00879fe..d0e99edf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,6 +76,14 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE ON) set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELWITHDEBINFO ON) +option(ENABLE_SANITIZERS "Enable AddressSanitizer and UndefinedBehaviorSanitizer" OFF) +if (ENABLE_SANITIZERS) + add_compile_options(-fsanitize=address,undefined -fno-sanitize=vptr -fno-omit-frame-pointer) + add_link_options(-fsanitize=address,undefined -fno-sanitize=vptr) + # metal-cpp headers are incompatible with ASan/UBSan (Protocol ambiguity in objc/runtime.h) + add_compile_options($<$:-fno-sanitize=address,undefined>) +endif() + if (MSVC) set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT CemuBin) # floating point model: precise, fiber safe optimizations diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp index 2fe07509..8b3c358c 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp @@ -33,7 +33,7 @@ static bool checkAdditionOverflow(uint32 x, uint32 y, uint32 r) static void PPCInterpreter_ADD(PPCInterpreter_t* hCPU, uint32 opcode) { PPC_OPC_TEMPL3_XO(); - hCPU->gpr[rD] = (int)hCPU->gpr[rA] + (int)hCPU->gpr[rB]; + hCPU->gpr[rD] = hCPU->gpr[rA] + hCPU->gpr[rB]; if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); @@ -123,7 +123,7 @@ static void PPCInterpreter_ADDI(PPCInterpreter_t* hCPU, uint32 opcode) sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - hCPU->gpr[rD] = (rA ? (int)hCPU->gpr[rA] : 0) + (int)imm; + hCPU->gpr[rD] = (rA ? hCPU->gpr[rA] : 0u) + imm; PPCInterpreter_nextInstruction(hCPU); } @@ -435,7 +435,7 @@ static void PPCInterpreter_MULHWU_(PPCInterpreter_t* hCPU, uint32 opcode) static void PPCInterpreter_MULLW(PPCInterpreter_t* hCPU, uint32 opcode) { PPC_OPC_TEMPL3_XO(); - sint64 result = (sint64)hCPU->gpr[rA] * (sint64)hCPU->gpr[rB]; + sint64 result = (sint64)(sint32)hCPU->gpr[rA] * (sint64)(sint32)hCPU->gpr[rB]; hCPU->gpr[rD] = (uint32)result; if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index e76a53fa..186caf49 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -1190,7 +1190,7 @@ bool PPCRecompilerImlGen_SRAWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco // calculate CA first ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regTmp, regS, 31); // signMask = input >> 31 (arithmetic shift) ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regTmp, regTmp, regS); // testValue = input & signMask & ((1<emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmp, regTmp, ((1 << SH) - 1)); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmp, regTmp, (sint32)((1u << SH) - 1u)); ppcImlGenContext->emitInst().make_compare_s32(regTmp, 0, regCarry, IMLCondition::NEQ); // ca = (testValue != 0) // do the actual shift ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, (sint32)SH); diff --git a/src/Cafe/HW/Latte/LatteAddrLib/AddrLibFastDecode.h b/src/Cafe/HW/Latte/LatteAddrLib/AddrLibFastDecode.h index b54d6038..fa735226 100644 --- a/src/Cafe/HW/Latte/LatteAddrLib/AddrLibFastDecode.h +++ b/src/Cafe/HW/Latte/LatteAddrLib/AddrLibFastDecode.h @@ -149,21 +149,27 @@ void optimizedDecodeLoop_tm04_numSamples1_8x8_optimizedRowCopy(LatteTextureLoade // bpp = 32 if (texelBaseTypeCount == 1) { - uint64* blockOutput64 = (uint64*)blockOutput; - uint64* blockData64 = (uint64*)blockData; if (isEncodeDirection) { - blockData64[0] = blockOutput64[0]; - blockData64[1] = blockOutput64[1]; - blockData64[4] = blockOutput64[2]; - blockData64[5] = blockOutput64[3]; + blockData[0] = blockOutput[0]; + blockData[1] = blockOutput[1]; + blockData[2] = blockOutput[2]; + blockData[3] = blockOutput[3]; + blockData[8] = blockOutput[4]; + blockData[9] = blockOutput[5]; + blockData[10] = blockOutput[6]; + blockData[11] = blockOutput[7]; } else { - blockOutput64[0] = blockData64[0]; - blockOutput64[1] = blockData64[1]; - blockOutput64[2] = blockData64[4]; - blockOutput64[3] = blockData64[5]; + blockOutput[0] = blockData[0]; + blockOutput[1] = blockData[1]; + blockOutput[2] = blockData[2]; + blockOutput[3] = blockData[3]; + blockOutput[4] = blockData[8]; + blockOutput[5] = blockData[9]; + blockOutput[6] = blockData[10]; + blockOutput[7] = blockData[11]; } blockOutput += 8; } @@ -175,12 +181,10 @@ void optimizedDecodeLoop_tm04_numSamples1_8x8_optimizedRowCopy(LatteTextureLoade // bpp = 8 if (texelBaseTypeCount == 1) { - uint64* blockOutput64 = (uint64*)blockOutput; - uint64* blockData64 = (uint64*)blockData; if (isEncodeDirection) - blockData64[0] = blockOutput64[0]; + memcpy(blockData, blockOutput, 8); else - blockOutput64[0] = blockData64[0]; + memcpy(blockOutput, blockData, 8); blockOutput += 8; } else diff --git a/src/Cafe/HW/Latte/LatteAddrLib/LatteAddrLib_Coord.cpp b/src/Cafe/HW/Latte/LatteAddrLib/LatteAddrLib_Coord.cpp index 26da5ec1..38d5abdd 100644 --- a/src/Cafe/HW/Latte/LatteAddrLib/LatteAddrLib_Coord.cpp +++ b/src/Cafe/HW/Latte/LatteAddrLib/LatteAddrLib_Coord.cpp @@ -12,7 +12,7 @@ namespace LatteAddrLib { if (!_Mask) return 0; - *_Index = 31 - __builtin_clzl(_Mask); + *_Index = 31 - __builtin_clz(_Mask); return 1; } #endif diff --git a/src/Cafe/OS/libs/coreinit/coreinit_Thread.h b/src/Cafe/OS/libs/coreinit/coreinit_Thread.h index a0517d9a..f369f800 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit_Thread.h +++ b/src/Cafe/OS/libs/coreinit/coreinit_Thread.h @@ -88,8 +88,6 @@ typedef struct static_assert(sizeof(crt_t) == 0x1D8, ""); -#pragma pack(1) - namespace coreinit { @@ -275,6 +273,8 @@ namespace coreinit /* +0x04 */ MEMPTR prev; }; + static_assert(sizeof(OSFastMutexLink) == 0x8); + struct OSFastMutex { /* +0x00 */ uint32be magic; @@ -620,8 +620,6 @@ namespace coreinit void __OSDeleteAllActivePPCThreads(); } -#pragma pack() - // deprecated / clean up required extern MPTR activeThread[256]; extern sint32 activeThreadCount;