diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.cpp b/rpcs3/Emu/RSX/Common/TextureUtils.cpp index 43068bf723..3cfc3aad3b 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.cpp +++ b/rpcs3/Emu/RSX/Common/TextureUtils.cpp @@ -234,49 +234,42 @@ struct copy_unmodified_block_swizzled if (std::is_same_v && dst_pitch_in_block == width_in_block && words_per_block == 1 && !border) { rsx::convert_linear_swizzle_3d(src.data(), dst.data(), width_in_block, row_count, depth); + return; + } + + u32 padded_width, padded_height; + if (border) + { + padded_width = rsx::next_pow2(width_in_block + border + border); + padded_height = rsx::next_pow2(row_count + border + border); } else { - u32 padded_width, padded_height; - if (border) - { - padded_width = rsx::next_pow2(width_in_block + border + border); - padded_height = rsx::next_pow2(row_count + border + border); - } - else - { - padded_width = width_in_block; - padded_height = row_count; - } - - const u32 size_in_block = padded_width * padded_height * depth * 2; - rsx::simple_array tmp(size_in_block * words_per_block); - - if (words_per_block == 1) [[likely]] - { - rsx::convert_linear_swizzle_3d(src.data(), tmp.data(), padded_width, padded_height, depth); - } - else - { - switch (words_per_block * sizeof(T)) - { - case 4: - rsx::convert_linear_swizzle_3d(src.data(), tmp.data(), padded_width, padded_height, depth); - break; - case 8: - rsx::convert_linear_swizzle_3d(src.data(), tmp.data(), padded_width, padded_height, depth); - break; - case 16: - rsx::convert_linear_swizzle_3d(src.data(), tmp.data(), padded_width, padded_height, depth); - break; - default: - fmt::throw_exception("Failed to decode swizzled format, words_per_block=%d, src_type_size=%d", words_per_block, sizeof(T)); - } - } - - std::span src_span = tmp; - copy_unmodified_block::copy_mipmap_level(dst, src_span, words_per_block, width_in_block, row_count, depth, border, dst_pitch_in_block, padded_width); + padded_width = width_in_block; + padded_height = row_count; } + + const u32 size_in_block = padded_width * padded_height * depth * 2; + rsx::simple_array tmp(size_in_block * words_per_block); + + switch (const u16 block_size = words_per_block * sizeof(T)) + { + case 1: + rsx::convert_linear_swizzle_3d(src.data(), tmp.data(), padded_width, padded_height, depth); + break; + case 2: + rsx::convert_linear_swizzle_3d(src.data(), tmp.data(), padded_width, padded_height, depth); + break; + case 4: + case 8: + case 16: + // Maximum block size on RSX is 4 bytes. Wider blocks are stored as multiple texels. + rsx::convert_linear_swizzle_3d(src.data(), tmp.data(), padded_width * (block_size / 4), padded_height, depth); + break; + } + + std::span src_span = tmp; + copy_unmodified_block::copy_mipmap_level(dst, src_span, words_per_block, width_in_block, row_count, depth, border, dst_pitch_in_block, padded_width); } }; diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 6f566b9a82..729dbf1a5e 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -56,19 +56,12 @@ namespace gl data_length, width, height, depth, 1); break; case 4: + case 8: + case 16: + // RSX quirk - texels wider than 32 bits are treated as multiple texels! gl::get_compute_task>()->run( cmd, dst, dst_offset, src, src_offset, - data_length, width, height, depth, 1); - break; - case 8: - gl::get_compute_task>()->run( - cmd, dst, dst_offset, src, src_offset, - data_length, width, height, depth, 1); - break; - case 16: - gl::get_compute_task>()->run( - cmd, dst, dst_offset, src, src_offset, - data_length, width, height, depth, 1); + data_length, width * (block_size / 4), height, depth, 1); break; default: fmt::throw_exception("Unreachable"); diff --git a/rpcs3/Emu/RSX/VK/VKTexture.cpp b/rpcs3/Emu/RSX/VK/VKTexture.cpp index eaf5c0b710..4893e7ea8f 100644 --- a/rpcs3/Emu/RSX/VK/VKTexture.cpp +++ b/rpcs3/Emu/RSX/VK/VKTexture.cpp @@ -754,21 +754,31 @@ namespace vk if (src != dst) dst->pop_layout(cmd); } - template - cs_deswizzle_base* get_deswizzle_transformation(u32 block_size) + template + cs_deswizzle_base* get_deswizzle_transformation_swapped(bool swap_bytes) + { + if (swap_bytes) [[ likely ]] + { + return vk::get_compute_task>(); + } + + return vk::get_compute_task>(); + } + + template + cs_deswizzle_base* get_deswizzle_transformation(u32 block_size, bool swap_bytes) { switch (block_size) { case 1: - return vk::get_compute_task>(); + return get_deswizzle_transformation_swapped(swap_bytes); case 2: - return vk::get_compute_task>(); + return get_deswizzle_transformation_swapped(swap_bytes); case 4: - return vk::get_compute_task>(); case 8: - return vk::get_compute_task>(); case 16: - return vk::get_compute_task>(); + // Maximum block size on RSX is 4 bytes. Wider blocks are stored as multiple texels. + return get_deswizzle_transformation_swapped(swap_bytes); default: fmt::throw_exception("Unreachable"); } @@ -778,37 +788,22 @@ namespace vk { // NOTE: This has to be done individually for every LOD vk::cs_deswizzle_base* job = nullptr; - const auto block_size = (word_size * word_count); + const u32 block_size = (word_size * word_count); + const u32 scale_x = std::max(block_size / 4u, 1u); // Virtual width multiplier. RSX only does texel sizes upto 32 bits. - if (!swap_bytes) + switch (word_size) { - switch (word_size) - { - case 1: - job = get_deswizzle_transformation(block_size); - break; - case 2: - job = get_deswizzle_transformation(block_size); - break; - case 4: - job = get_deswizzle_transformation(block_size); - break; - default: - fmt::throw_exception("Unimplemented deswizzle for format."); - } - } - else - { - ensure(word_size == 2 || word_size == 4); - - if (word_size == 4) - { - job = get_deswizzle_transformation(block_size); - } - else - { - job = get_deswizzle_transformation(block_size); - } + case 1: + job = get_deswizzle_transformation(block_size, swap_bytes); + break; + case 2: + job = get_deswizzle_transformation(block_size, swap_bytes); + break; + case 4: + job = get_deswizzle_transformation(block_size, swap_bytes); + break; + default: + fmt::throw_exception("Unimplemented deswizzle for format."); } ensure(job); @@ -868,7 +863,7 @@ namespace vk const u32 src_off32 = static_cast(src_offset); job->run(cmd, scratch_buf, buf_off32, scratch_buf, src_off32, data_length, - section.imageExtent.width, section.imageExtent.height, section.imageExtent.depth, packet.second); + section.imageExtent.width * scale_x, section.imageExtent.height, section.imageExtent.depth, packet.second); } ensure(dst_offset <= scratch_buf->size());