From 09554c43baaad391b8679fe6d55a23babbd2dec8 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 10 Apr 2026 02:50:43 +0300 Subject: [PATCH] rsx: Allow DXT texture decoding to gracefully fall back to unaligned memory addresses --- rpcs3/Emu/RSX/Common/TextureUtils.cpp | 64 ++++++++++++++++++++------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 8cf3466bb9..43068bf723 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -7,6 +7,12 @@ #include "util/asm.hpp" +// Unaligned u128 alias +union x128 +{ + u8 _u8[16]; +}; + namespace utils { template @@ -520,14 +526,14 @@ struct copy_decoded_bc1_block struct copy_decoded_bc2_block { - static void copy_mipmap_level(std::span dst, std::span src, u16 width_in_block, u32 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block) + static void copy_mipmap_level(std::span dst, std::span src, u16 width_in_block, u32 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block) { u32 src_offset = 0, dst_offset = 0, destinationPitch = dst_pitch_in_block * 4; for (u32 row = 0; row < row_count * depth; row++) { for (u32 col = 0; col < width_in_block; col++) { - const u8* compressedBlock = reinterpret_cast(&src[src_offset + col]); + const u8* compressedBlock = src[src_offset + col]._u8; u8* decompressedBlock = reinterpret_cast(&dst[dst_offset + col * 4]); bcdec_bc2(compressedBlock, decompressedBlock, destinationPitch); } @@ -540,14 +546,14 @@ struct copy_decoded_bc2_block struct copy_decoded_bc3_block { - static void copy_mipmap_level(std::span dst, std::span src, u16 width_in_block, u32 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block) + static void copy_mipmap_level(std::span dst, std::span src, u16 width_in_block, u32 row_count, u16 depth, u32 dst_pitch_in_block, u32 src_pitch_in_block) { u32 src_offset = 0, dst_offset = 0, destinationPitch = dst_pitch_in_block * 4; for (u32 row = 0; row < row_count * depth; row++) { for (u32 col = 0; col < width_in_block; col++) { - const u8* compressedBlock = reinterpret_cast(&src[src_offset + col]); + const u8* compressedBlock = src[src_offset + col]._u8; u8* decompressedBlock = reinterpret_cast(&dst[dst_offset + col * 4]); bcdec_bc3(compressedBlock, decompressedBlock, destinationPitch); } @@ -1039,22 +1045,25 @@ namespace rsx // This is only supported using Nvidia OpenGL. // Remove the VTC tiling to support ATI and Vulkan. copy_unmodified_block_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } - else if (is_3d && !is_po2 && caps.supports_vtc_decoding) + + if (is_3d && !is_po2 && caps.supports_vtc_decoding) { // In this case, hardware expects us to feed it a VTC input, but on PS3 we only have a linear one. // We need to compress the 2D-planar DXT input into a VTC output copy_linear_block_to_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } - else if (caps.supports_zero_copy) + + if (caps.supports_zero_copy) { result.require_upload = true; result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), 8, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } - else - { - copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); - } + + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); break; } @@ -1062,7 +1071,7 @@ namespace rsx { if (!caps.supports_dxt) { - copy_decoded_bc2_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + copy_decoded_bc2_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); break; } [[fallthrough]]; @@ -1071,7 +1080,7 @@ namespace rsx { if (!caps.supports_dxt) { - copy_decoded_bc3_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + copy_decoded_bc3_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); break; } @@ -1083,23 +1092,44 @@ namespace rsx // PS3 uses the Nvidia VTC memory layout for compressed 3D textures. // This is only supported using Nvidia OpenGL. // Remove the VTC tiling to support ATI and Vulkan. - copy_unmodified_block_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + if (src_layout.data.is_naturally_aligned()) + { + copy_unmodified_block_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; + } + + copy_unmodified_block_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } - else if (is_3d && !is_po2 && caps.supports_vtc_decoding) + + if (is_3d && !is_po2 && caps.supports_vtc_decoding) { // In this case, hardware expects us to feed it a VTC input, but on PS3 we only have a linear one. // We need to compress the 2D-planar DXT input into a VTC output - copy_linear_block_to_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + if (src_layout.data.is_naturally_aligned()) + { + copy_linear_block_to_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; + } + + copy_linear_block_to_vtc::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), w, h, depth, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } - else if (caps.supports_zero_copy) + + if (caps.supports_zero_copy) { result.require_upload = true; result.deferred_cmds = build_transfer_cmds(src_layout.data.data(), 16, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } - else + + if (src_layout.data.is_naturally_aligned()) { copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); + break; } + + copy_unmodified_block::copy_mipmap_level(dst_buffer.as_span(), src_layout.data.as_span(), 1, w, h, depth, 0, get_row_pitch_in_block(w, caps.alignment), src_layout.pitch_in_block); break; }