From f26e716686d19ec098e5427c74d7d66b06f32ba3 Mon Sep 17 00:00:00 2001 From: Fabio Arnold Date: Sun, 29 Mar 2026 15:20:57 +0200 Subject: [PATCH] LatteAddrLib: fix misaligned pointer UB in optimizedDecodeLoop_tm04 - bpp=32: blockOutput is texelBaseType* (uint32_t, 4-byte aligned). Casting to uint64* and dereferencing is UB when the address is not 8-byte aligned (e.g. 0x...c is 4-byte aligned but not 8-byte). - bpp=8: blockOutput is texelBaseType* (uint8_t, 1-byte aligned). Casting to uint64* is UB for any address not divisible by 8. --- .../HW/Latte/LatteAddrLib/AddrLibFastDecode.h | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/src/Cafe/HW/Latte/LatteAddrLib/AddrLibFastDecode.h b/src/Cafe/HW/Latte/LatteAddrLib/AddrLibFastDecode.h index b54d6038..fa735226 100644 --- a/src/Cafe/HW/Latte/LatteAddrLib/AddrLibFastDecode.h +++ b/src/Cafe/HW/Latte/LatteAddrLib/AddrLibFastDecode.h @@ -149,21 +149,27 @@ void optimizedDecodeLoop_tm04_numSamples1_8x8_optimizedRowCopy(LatteTextureLoade // bpp = 32 if (texelBaseTypeCount == 1) { - uint64* blockOutput64 = (uint64*)blockOutput; - uint64* blockData64 = (uint64*)blockData; if (isEncodeDirection) { - blockData64[0] = blockOutput64[0]; - blockData64[1] = blockOutput64[1]; - blockData64[4] = blockOutput64[2]; - blockData64[5] = blockOutput64[3]; + blockData[0] = blockOutput[0]; + blockData[1] = blockOutput[1]; + blockData[2] = blockOutput[2]; + blockData[3] = blockOutput[3]; + blockData[8] = blockOutput[4]; + blockData[9] = blockOutput[5]; + blockData[10] = blockOutput[6]; + blockData[11] = blockOutput[7]; } else { - blockOutput64[0] = blockData64[0]; - blockOutput64[1] = blockData64[1]; - blockOutput64[2] = blockData64[4]; - blockOutput64[3] = blockData64[5]; + blockOutput[0] = blockData[0]; + blockOutput[1] = blockData[1]; + blockOutput[2] = blockData[2]; + blockOutput[3] = blockData[3]; + blockOutput[4] = blockData[8]; + blockOutput[5] = blockData[9]; + blockOutput[6] = blockData[10]; + blockOutput[7] = blockData[11]; } blockOutput += 8; } @@ -175,12 +181,10 @@ void optimizedDecodeLoop_tm04_numSamples1_8x8_optimizedRowCopy(LatteTextureLoade // bpp = 8 if (texelBaseTypeCount == 1) { - uint64* blockOutput64 = (uint64*)blockOutput; - uint64* blockData64 = (uint64*)blockData; if (isEncodeDirection) - blockData64[0] = blockOutput64[0]; + memcpy(blockData, blockOutput, 8); else - blockOutput64[0] = blockData64[0]; + memcpy(blockOutput, blockData, 8); blockOutput += 8; } else