This commit is contained in:
Fabio Arnold 2026-04-16 20:34:39 +02:00 committed by GitHub
commit 2a438af76b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 33 additions and 23 deletions

View File

@ -76,6 +76,14 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE ON)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELWITHDEBINFO ON)
option(ENABLE_SANITIZERS "Enable AddressSanitizer and UndefinedBehaviorSanitizer" OFF)
if (ENABLE_SANITIZERS)
add_compile_options(-fsanitize=address,undefined -fno-sanitize=vptr -fno-omit-frame-pointer)
add_link_options(-fsanitize=address,undefined -fno-sanitize=vptr)
# metal-cpp headers are incompatible with ASan/UBSan (Protocol ambiguity in objc/runtime.h)
add_compile_options($<$<COMPILE_LANGUAGE:OBJCXX>:-fno-sanitize=address,undefined>)
endif()
if (MSVC)
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT CemuBin)
# floating point model: precise, fiber safe optimizations

View File

@ -33,7 +33,7 @@ static bool checkAdditionOverflow(uint32 x, uint32 y, uint32 r)
static void PPCInterpreter_ADD(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
hCPU->gpr[rD] = (int)hCPU->gpr[rA] + (int)hCPU->gpr[rB];
hCPU->gpr[rD] = hCPU->gpr[rA] + hCPU->gpr[rB];
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);
PPCInterpreter_nextInstruction(hCPU);
@ -123,7 +123,7 @@ static void PPCInterpreter_ADDI(PPCInterpreter_t* hCPU, uint32 opcode)
sint32 rD, rA;
uint32 imm;
PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm);
hCPU->gpr[rD] = (rA ? (int)hCPU->gpr[rA] : 0) + (int)imm;
hCPU->gpr[rD] = (rA ? hCPU->gpr[rA] : 0u) + imm;
PPCInterpreter_nextInstruction(hCPU);
}
@ -435,7 +435,7 @@ static void PPCInterpreter_MULHWU_(PPCInterpreter_t* hCPU, uint32 opcode)
static void PPCInterpreter_MULLW(PPCInterpreter_t* hCPU, uint32 opcode)
{
PPC_OPC_TEMPL3_XO();
sint64 result = (sint64)hCPU->gpr[rA] * (sint64)hCPU->gpr[rB];
sint64 result = (sint64)(sint32)hCPU->gpr[rA] * (sint64)(sint32)hCPU->gpr[rB];
hCPU->gpr[rD] = (uint32)result;
if (opHasRC())
ppc_update_cr0(hCPU, hCPU->gpr[rD]);

View File

@ -1190,7 +1190,7 @@ bool PPCRecompilerImlGen_SRAWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco
// calculate CA first
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regTmp, regS, 31); // signMask = input >> 31 (arithmetic shift)
ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regTmp, regTmp, regS); // testValue = input & signMask & ((1<<SH)-1)
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmp, regTmp, ((1 << SH) - 1));
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmp, regTmp, (sint32)((1u << SH) - 1u));
ppcImlGenContext->emitInst().make_compare_s32(regTmp, 0, regCarry, IMLCondition::NEQ); // ca = (testValue != 0)
// do the actual shift
ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, (sint32)SH);

View File

@ -149,21 +149,27 @@ void optimizedDecodeLoop_tm04_numSamples1_8x8_optimizedRowCopy(LatteTextureLoade
// bpp = 32
if (texelBaseTypeCount == 1)
{
uint64* blockOutput64 = (uint64*)blockOutput;
uint64* blockData64 = (uint64*)blockData;
if (isEncodeDirection)
{
blockData64[0] = blockOutput64[0];
blockData64[1] = blockOutput64[1];
blockData64[4] = blockOutput64[2];
blockData64[5] = blockOutput64[3];
blockData[0] = blockOutput[0];
blockData[1] = blockOutput[1];
blockData[2] = blockOutput[2];
blockData[3] = blockOutput[3];
blockData[8] = blockOutput[4];
blockData[9] = blockOutput[5];
blockData[10] = blockOutput[6];
blockData[11] = blockOutput[7];
}
else
{
blockOutput64[0] = blockData64[0];
blockOutput64[1] = blockData64[1];
blockOutput64[2] = blockData64[4];
blockOutput64[3] = blockData64[5];
blockOutput[0] = blockData[0];
blockOutput[1] = blockData[1];
blockOutput[2] = blockData[2];
blockOutput[3] = blockData[3];
blockOutput[4] = blockData[8];
blockOutput[5] = blockData[9];
blockOutput[6] = blockData[10];
blockOutput[7] = blockData[11];
}
blockOutput += 8;
}
@ -175,12 +181,10 @@ void optimizedDecodeLoop_tm04_numSamples1_8x8_optimizedRowCopy(LatteTextureLoade
// bpp = 8
if (texelBaseTypeCount == 1)
{
uint64* blockOutput64 = (uint64*)blockOutput;
uint64* blockData64 = (uint64*)blockData;
if (isEncodeDirection)
blockData64[0] = blockOutput64[0];
memcpy(blockData, blockOutput, 8);
else
blockOutput64[0] = blockData64[0];
memcpy(blockOutput, blockData, 8);
blockOutput += 8;
}
else

View File

@ -12,7 +12,7 @@ namespace LatteAddrLib
{
if (!_Mask)
return 0;
*_Index = 31 - __builtin_clzl(_Mask);
*_Index = 31 - __builtin_clz(_Mask);
return 1;
}
#endif

View File

@ -88,8 +88,6 @@ typedef struct
static_assert(sizeof(crt_t) == 0x1D8, "");
#pragma pack(1)
namespace coreinit
{
@ -275,6 +273,8 @@ namespace coreinit
/* +0x04 */ MEMPTR<struct OSMutex> prev;
};
static_assert(sizeof(OSFastMutexLink) == 0x8);
struct OSFastMutex
{
/* +0x00 */ uint32be magic;
@ -620,8 +620,6 @@ namespace coreinit
void __OSDeleteAllActivePPCThreads();
}
#pragma pack()
// deprecated / clean up required
extern MPTR activeThread[256];
extern sint32 activeThreadCount;