gl: Properly initialize memory layout definition structs

- Also refactors the upload_texture function to be a bit more readable
This commit is contained in:
kd-11 2026-03-22 22:06:44 +03:00
parent a2eb4fec53
commit e437cae575
2 changed files with 151 additions and 152 deletions

View File

@ -717,169 +717,168 @@ namespace gl
}
}
}
return;
}
else
std::pair<void*, u32> upload_scratch_mem = {}, compute_scratch_mem = {};
image_memory_requirements mem_info;
pixel_buffer_layout mem_layout;
std::span<std::byte> dst_buffer = staging_buffer;
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
u64 image_linear_size = staging_buffer.size();
const auto min_required_buffer_size = std::max<u64>(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000);
if (driver_caps.ARB_compute_shader_supported)
{
std::pair<void*, u32> upload_scratch_mem = {}, compute_scratch_mem = {};
image_memory_requirements mem_info;
pixel_buffer_layout mem_layout;
std::span<std::byte> dst_buffer = staging_buffer;
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
u64 image_linear_size = staging_buffer.size();
const auto min_required_buffer_size = std::max<u64>(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000);
if (driver_caps.ARB_compute_shader_supported)
if (g_upload_transfer_buffer.size() < static_cast<GLsizeiptr>(min_required_buffer_size))
{
if (g_upload_transfer_buffer.size() < static_cast<GLsizeiptr>(min_required_buffer_size))
{
g_upload_transfer_buffer.remove();
g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, min_required_buffer_size);
}
if (g_compute_decode_buffer.size() < min_required_buffer_size)
{
g_compute_decode_buffer.remove();
g_compute_decode_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size);
}
g_upload_transfer_buffer.remove();
g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, min_required_buffer_size);
}
for (const rsx::subresource_layout& layout : input_layouts)
if (g_compute_decode_buffer.size() < min_required_buffer_size)
{
if (driver_caps.ARB_compute_shader_supported)
g_compute_decode_buffer.remove();
g_compute_decode_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size);
}
}
for (const rsx::subresource_layout& layout : input_layouts)
{
if (driver_caps.ARB_compute_shader_supported)
{
u64 row_pitch = rsx::align2<u64, u64>(layout.width_in_block * block_size_in_bytes, caps.alignment);
// We're in the "else" branch, so "is_compressed_host_format()" is always false.
// Handle emulated compressed formats with host unpack (R8G8 compressed)
row_pitch = std::max<u64>(row_pitch, dst->pitch());
// FIXME: Double-check this logic; it seems like we should always use texels both here and for row_pitch.
image_linear_size = row_pitch * layout.height_in_texel * layout.depth;
compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast<u32>(image_linear_size), 256) };
compute_scratch_mem.first = reinterpret_cast<void*>(static_cast<uintptr_t>(compute_scratch_mem.second));
g_upload_transfer_buffer.reserve_storage_on_heap(static_cast<u32>(image_linear_size));
upload_scratch_mem = g_upload_transfer_buffer.alloc_from_heap(static_cast<u32>(image_linear_size), 256);
dst_buffer = { reinterpret_cast<std::byte*>(upload_scratch_mem.first), image_linear_size };
}
rsx::io_buffer io_buf = dst_buffer;
caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024);
auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps);
// Define upload region
coord3u region;
region.x = 0;
region.y = 0;
region.z = layout.layer;
region.width = layout.width_in_texel;
region.height = layout.height_in_texel;
region.depth = layout.depth;
if (!driver_caps.ARB_compute_shader_supported)
{
unpack_settings.swap_bytes(op.require_swap);
dst->copy_from(staging_buffer, static_cast<texture::format>(gl_format), static_cast<texture::type>(gl_type), layout.level, region, unpack_settings);
continue;
}
// 0. Preconf
mem_layout.alignment = static_cast<u8>(caps.alignment);
mem_layout.swap_bytes = op.require_swap;
mem_layout.format = gl_format;
mem_layout.type = gl_type;
mem_layout.block_size = block_size_in_bytes;
// 2. Upload memory to GPU
if (!op.require_deswizzle)
{
g_upload_transfer_buffer.unmap();
g_upload_transfer_buffer.copy_to(&g_compute_decode_buffer.get(), upload_scratch_mem.second, compute_scratch_mem.second, image_linear_size);
}
else
{
// 2.1 Copy data to deswizzle buf
if (g_deswizzle_scratch_buffer.size() < min_required_buffer_size)
{
u64 row_pitch = rsx::align2<u64, u64>(layout.width_in_block * block_size_in_bytes, caps.alignment);
// We're in the "else" branch, so "is_compressed_host_format()" is always false.
// Handle emulated compressed formats with host unpack (R8G8 compressed)
row_pitch = std::max<u64>(row_pitch, dst->pitch());
// FIXME: Double-check this logic; it seems like we should always use texels both here and for row_pitch.
image_linear_size = row_pitch * layout.height_in_texel * layout.depth;
compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast<u32>(image_linear_size), 256) };
compute_scratch_mem.first = reinterpret_cast<void*>(static_cast<uintptr_t>(compute_scratch_mem.second));
g_upload_transfer_buffer.reserve_storage_on_heap(static_cast<u32>(image_linear_size));
upload_scratch_mem = g_upload_transfer_buffer.alloc_from_heap(static_cast<u32>(image_linear_size), 256);
dst_buffer = { reinterpret_cast<std::byte*>(upload_scratch_mem.first), image_linear_size };
g_deswizzle_scratch_buffer.remove();
g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size);
}
rsx::io_buffer io_buf = dst_buffer;
caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024);
auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps);
u32 deswizzle_data_offset = g_deswizzle_scratch_buffer.alloc(static_cast<u32>(image_linear_size), 256);
g_upload_transfer_buffer.unmap();
g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast<u32>(image_linear_size));
// Define upload region
coord3u region;
region.x = 0;
region.y = 0;
region.z = layout.layer;
region.width = layout.width_in_texel;
region.height = layout.height_in_texel;
region.depth = layout.depth;
// 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem
const auto block_size = op.element_size * op.block_length;
if (driver_caps.ARB_compute_shader_supported)
if (op.require_swap)
{
// 0. Preconf
mem_layout.alignment = static_cast<u8>(caps.alignment);
mem_layout.swap_bytes = op.require_swap;
mem_layout.format = gl_format;
mem_layout.type = gl_type;
mem_layout.block_size = block_size_in_bytes;
mem_layout.swap_bytes = false;
// 2. Upload memory to GPU
if (!op.require_deswizzle)
switch (op.element_size)
{
g_upload_transfer_buffer.unmap();
g_upload_transfer_buffer.copy_to(&g_compute_decode_buffer.get(), upload_scratch_mem.second, compute_scratch_mem.second, image_linear_size);
case 1:
do_deswizzle_transformation<u8, true>(cmd, block_size,
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
break;
case 2:
do_deswizzle_transformation<u16, true>(cmd, block_size,
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
break;
case 4:
do_deswizzle_transformation<u32, true>(cmd, block_size,
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
break;
default:
fmt::throw_exception("Unimplemented element size deswizzle");
}
else
{
// 2.1 Copy data to deswizzle buf
if (g_deswizzle_scratch_buffer.size() < min_required_buffer_size)
{
g_deswizzle_scratch_buffer.remove();
g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size);
}
u32 deswizzle_data_offset = g_deswizzle_scratch_buffer.alloc(static_cast<u32>(image_linear_size), 256);
g_upload_transfer_buffer.unmap();
g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast<u32>(image_linear_size));
// 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem
const auto block_size = op.element_size * op.block_length;
if (op.require_swap)
{
mem_layout.swap_bytes = false;
switch (op.element_size)
{
case 1:
do_deswizzle_transformation<u8, true>(cmd, block_size,
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
break;
case 2:
do_deswizzle_transformation<u16, true>(cmd, block_size,
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
break;
case 4:
do_deswizzle_transformation<u32, true>(cmd, block_size,
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
break;
default:
fmt::throw_exception("Unimplemented element size deswizzle");
}
}
else
{
switch (op.element_size)
{
case 1:
do_deswizzle_transformation<u8, false>(cmd, block_size,
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
break;
case 2:
do_deswizzle_transformation<u16, false>(cmd, block_size,
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
break;
case 4:
do_deswizzle_transformation<u32, false>(cmd, block_size,
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
break;
default:
fmt::throw_exception("Unimplemented element size deswizzle");
}
}
// Barrier
g_deswizzle_scratch_buffer.push_barrier(deswizzle_data_offset, static_cast<u32>(image_linear_size));
}
// 3. Update configuration
mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes;
mem_info.image_size_in_bytes = image_linear_size;
mem_info.memory_required = 0;
// 4. Dispatch compute routines
copy_buffer_to_image(cmd, mem_layout, &g_compute_decode_buffer.get(), dst, compute_scratch_mem.first, layout.level, region, &mem_info);
// Barrier
g_compute_decode_buffer.push_barrier(compute_scratch_mem.second, static_cast<u32>(image_linear_size));
}
else
{
unpack_settings.swap_bytes(op.require_swap);
dst->copy_from(staging_buffer, static_cast<texture::format>(gl_format), static_cast<texture::type>(gl_type), layout.level, region, unpack_settings);
switch (op.element_size)
{
case 1:
do_deswizzle_transformation<u8, false>(cmd, block_size,
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
break;
case 2:
do_deswizzle_transformation<u16, false>(cmd, block_size,
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
break;
case 4:
do_deswizzle_transformation<u32, false>(cmd, block_size,
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
break;
default:
fmt::throw_exception("Unimplemented element size deswizzle");
}
}
// Barrier
g_deswizzle_scratch_buffer.push_barrier(deswizzle_data_offset, static_cast<u32>(image_linear_size));
}
// 3. Update configuration
mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes;
mem_info.image_size_in_bytes = image_linear_size;
mem_info.memory_required = 0;
// 4. Dispatch compute routines
copy_buffer_to_image(cmd, mem_layout, &g_compute_decode_buffer.get(), dst, compute_scratch_mem.first, layout.level, region, &mem_info);
// Barrier
g_compute_decode_buffer.push_barrier(compute_scratch_mem.second, static_cast<u32>(image_linear_size));
}
}

View File

@ -14,20 +14,20 @@ namespace gl
{
struct pixel_buffer_layout
{
GLenum format;
GLenum type;
u32 row_length;
u8 block_size;
bool swap_bytes;
u8 alignment;
GLenum format = GL_RGBA;
GLenum type = GL_UNSIGNED_BYTE;
u32 row_length = 0;
u8 block_size = 0;
bool swap_bytes = false;
u8 alignment = 0;
u8 reserved;
};
struct image_memory_requirements
{
u64 image_size_in_texels;
u64 image_size_in_bytes;
u64 memory_required;
u64 image_size_in_texels = 0;
u64 image_size_in_bytes = 0;
u64 memory_required = 0;
};
struct clear_cmd_info