rsx: Fix deswizzle of wide texel formats

This commit is contained in:
kd-11 2026-04-24 02:18:08 +03:00 committed by kd-11
parent b3cc01387f
commit 3574677b66
3 changed files with 68 additions and 87 deletions

View File

@ -234,49 +234,42 @@ struct copy_unmodified_block_swizzled
if (std::is_same_v<T, U> && dst_pitch_in_block == width_in_block && words_per_block == 1 && !border)
{
rsx::convert_linear_swizzle_3d<T>(src.data(), dst.data(), width_in_block, row_count, depth);
return;
}
u32 padded_width, padded_height;
if (border)
{
padded_width = rsx::next_pow2(width_in_block + border + border);
padded_height = rsx::next_pow2(row_count + border + border);
}
else
{
u32 padded_width, padded_height;
if (border)
{
padded_width = rsx::next_pow2(width_in_block + border + border);
padded_height = rsx::next_pow2(row_count + border + border);
}
else
{
padded_width = width_in_block;
padded_height = row_count;
}
const u32 size_in_block = padded_width * padded_height * depth * 2;
rsx::simple_array<U, sizeof(u128)> tmp(size_in_block * words_per_block);
if (words_per_block == 1) [[likely]]
{
rsx::convert_linear_swizzle_3d<T>(src.data(), tmp.data(), padded_width, padded_height, depth);
}
else
{
switch (words_per_block * sizeof(T))
{
case 4:
rsx::convert_linear_swizzle_3d<u32>(src.data(), tmp.data(), padded_width, padded_height, depth);
break;
case 8:
rsx::convert_linear_swizzle_3d<u64>(src.data(), tmp.data(), padded_width, padded_height, depth);
break;
case 16:
rsx::convert_linear_swizzle_3d<u128>(src.data(), tmp.data(), padded_width, padded_height, depth);
break;
default:
fmt::throw_exception("Failed to decode swizzled format, words_per_block=%d, src_type_size=%d", words_per_block, sizeof(T));
}
}
std::span<const U> src_span = tmp;
copy_unmodified_block::copy_mipmap_level(dst, src_span, words_per_block, width_in_block, row_count, depth, border, dst_pitch_in_block, padded_width);
padded_width = width_in_block;
padded_height = row_count;
}
const u32 size_in_block = padded_width * padded_height * depth * 2;
rsx::simple_array<U, sizeof(u128)> tmp(size_in_block * words_per_block);
switch (const u16 block_size = words_per_block * sizeof(T))
{
case 1:
rsx::convert_linear_swizzle_3d<u8>(src.data(), tmp.data(), padded_width, padded_height, depth);
break;
case 2:
rsx::convert_linear_swizzle_3d<u16>(src.data(), tmp.data(), padded_width, padded_height, depth);
break;
case 4:
case 8:
case 16:
// Maximum block size on RSX is 4 bytes. Wider blocks are stored as multiple texels.
rsx::convert_linear_swizzle_3d<u32>(src.data(), tmp.data(), padded_width * (block_size / 4), padded_height, depth);
break;
}
std::span<const U> src_span = tmp;
copy_unmodified_block::copy_mipmap_level(dst, src_span, words_per_block, width_in_block, row_count, depth, border, dst_pitch_in_block, padded_width);
}
};

View File

@ -56,19 +56,12 @@ namespace gl
data_length, width, height, depth, 1);
break;
case 4:
case 8:
case 16:
// RSX quirk - texels wider than 32 bits are treated as multiple texels!
gl::get_compute_task<gl::cs_deswizzle_3d<u32, WordType, SwapBytes>>()->run(
cmd, dst, dst_offset, src, src_offset,
data_length, width, height, depth, 1);
break;
case 8:
gl::get_compute_task<gl::cs_deswizzle_3d<u64, WordType, SwapBytes>>()->run(
cmd, dst, dst_offset, src, src_offset,
data_length, width, height, depth, 1);
break;
case 16:
gl::get_compute_task<gl::cs_deswizzle_3d<u128, WordType, SwapBytes>>()->run(
cmd, dst, dst_offset, src, src_offset,
data_length, width, height, depth, 1);
data_length, width * (block_size / 4), height, depth, 1);
break;
default:
fmt::throw_exception("Unreachable");

View File

@ -754,21 +754,31 @@ namespace vk
if (src != dst) dst->pop_layout(cmd);
}
template <typename WordType, bool SwapBytes>
cs_deswizzle_base* get_deswizzle_transformation(u32 block_size)
template <typename BaseType, typename BlockType>
cs_deswizzle_base* get_deswizzle_transformation_swapped(bool swap_bytes)
{
if (swap_bytes) [[ likely ]]
{
return vk::get_compute_task<cs_deswizzle_3d<BaseType, BlockType, true>>();
}
return vk::get_compute_task<cs_deswizzle_3d<BaseType, BlockType, false>>();
}
template <typename WordType>
cs_deswizzle_base* get_deswizzle_transformation(u32 block_size, bool swap_bytes)
{
switch (block_size)
{
case 1:
return vk::get_compute_task<cs_deswizzle_3d<u8, u8, false>>();
return get_deswizzle_transformation_swapped<u8, u8>(swap_bytes);
case 2:
return vk::get_compute_task<cs_deswizzle_3d<u16, WordType, SwapBytes>>();
return get_deswizzle_transformation_swapped<u16, WordType>(swap_bytes);
case 4:
return vk::get_compute_task<cs_deswizzle_3d<u32, WordType, SwapBytes>>();
case 8:
return vk::get_compute_task<cs_deswizzle_3d<u64, WordType, SwapBytes>>();
case 16:
return vk::get_compute_task<cs_deswizzle_3d<u128, WordType, SwapBytes>>();
// Maximum block size on RSX is 4 bytes. Wider blocks are stored as multiple texels.
return get_deswizzle_transformation_swapped<u32, WordType>(swap_bytes);
default:
fmt::throw_exception("Unreachable");
}
@ -778,37 +788,22 @@ namespace vk
{
// NOTE: This has to be done individually for every LOD
vk::cs_deswizzle_base* job = nullptr;
const auto block_size = (word_size * word_count);
const u32 block_size = (word_size * word_count);
const u32 scale_x = std::max(block_size / 4u, 1u); // Virtual width multiplier. RSX only does texel sizes upto 32 bits.
if (!swap_bytes)
switch (word_size)
{
switch (word_size)
{
case 1:
job = get_deswizzle_transformation<u8, false>(block_size);
break;
case 2:
job = get_deswizzle_transformation<u16, false>(block_size);
break;
case 4:
job = get_deswizzle_transformation<u32, false>(block_size);
break;
default:
fmt::throw_exception("Unimplemented deswizzle for format.");
}
}
else
{
ensure(word_size == 2 || word_size == 4);
if (word_size == 4)
{
job = get_deswizzle_transformation<u32, true>(block_size);
}
else
{
job = get_deswizzle_transformation<u16, true>(block_size);
}
case 1:
job = get_deswizzle_transformation<u8>(block_size, swap_bytes);
break;
case 2:
job = get_deswizzle_transformation<u16>(block_size, swap_bytes);
break;
case 4:
job = get_deswizzle_transformation<u32>(block_size, swap_bytes);
break;
default:
fmt::throw_exception("Unimplemented deswizzle for format.");
}
ensure(job);
@ -868,7 +863,7 @@ namespace vk
const u32 src_off32 = static_cast<u32>(src_offset);
job->run(cmd, scratch_buf, buf_off32, scratch_buf, src_off32, data_length,
section.imageExtent.width, section.imageExtent.height, section.imageExtent.depth, packet.second);
section.imageExtent.width * scale_x, section.imageExtent.height, section.imageExtent.depth, packet.second);
}
ensure(dst_offset <= scratch_buf->size());