mirror of
https://github.com/RPCS3/rpcs3.git
synced 2026-03-26 20:38:33 -06:00
gl: Properly initialize memory layout definition structs
- Also refactors the upload_texture function to be a bit more readable
This commit is contained in:
parent
a2eb4fec53
commit
e437cae575
@ -717,169 +717,168 @@ namespace gl
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
else
|
||||
|
||||
std::pair<void*, u32> upload_scratch_mem = {}, compute_scratch_mem = {};
|
||||
image_memory_requirements mem_info;
|
||||
pixel_buffer_layout mem_layout;
|
||||
|
||||
std::span<std::byte> dst_buffer = staging_buffer;
|
||||
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
|
||||
u64 image_linear_size = staging_buffer.size();
|
||||
|
||||
const auto min_required_buffer_size = std::max<u64>(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000);
|
||||
|
||||
if (driver_caps.ARB_compute_shader_supported)
|
||||
{
|
||||
std::pair<void*, u32> upload_scratch_mem = {}, compute_scratch_mem = {};
|
||||
image_memory_requirements mem_info;
|
||||
pixel_buffer_layout mem_layout;
|
||||
|
||||
std::span<std::byte> dst_buffer = staging_buffer;
|
||||
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
|
||||
u64 image_linear_size = staging_buffer.size();
|
||||
|
||||
const auto min_required_buffer_size = std::max<u64>(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000);
|
||||
|
||||
if (driver_caps.ARB_compute_shader_supported)
|
||||
if (g_upload_transfer_buffer.size() < static_cast<GLsizeiptr>(min_required_buffer_size))
|
||||
{
|
||||
if (g_upload_transfer_buffer.size() < static_cast<GLsizeiptr>(min_required_buffer_size))
|
||||
{
|
||||
g_upload_transfer_buffer.remove();
|
||||
g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, min_required_buffer_size);
|
||||
}
|
||||
|
||||
if (g_compute_decode_buffer.size() < min_required_buffer_size)
|
||||
{
|
||||
g_compute_decode_buffer.remove();
|
||||
g_compute_decode_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size);
|
||||
}
|
||||
g_upload_transfer_buffer.remove();
|
||||
g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, min_required_buffer_size);
|
||||
}
|
||||
|
||||
for (const rsx::subresource_layout& layout : input_layouts)
|
||||
if (g_compute_decode_buffer.size() < min_required_buffer_size)
|
||||
{
|
||||
if (driver_caps.ARB_compute_shader_supported)
|
||||
g_compute_decode_buffer.remove();
|
||||
g_compute_decode_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size);
|
||||
}
|
||||
}
|
||||
|
||||
for (const rsx::subresource_layout& layout : input_layouts)
|
||||
{
|
||||
if (driver_caps.ARB_compute_shader_supported)
|
||||
{
|
||||
u64 row_pitch = rsx::align2<u64, u64>(layout.width_in_block * block_size_in_bytes, caps.alignment);
|
||||
|
||||
// We're in the "else" branch, so "is_compressed_host_format()" is always false.
|
||||
// Handle emulated compressed formats with host unpack (R8G8 compressed)
|
||||
row_pitch = std::max<u64>(row_pitch, dst->pitch());
|
||||
|
||||
// FIXME: Double-check this logic; it seems like we should always use texels both here and for row_pitch.
|
||||
image_linear_size = row_pitch * layout.height_in_texel * layout.depth;
|
||||
|
||||
compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast<u32>(image_linear_size), 256) };
|
||||
compute_scratch_mem.first = reinterpret_cast<void*>(static_cast<uintptr_t>(compute_scratch_mem.second));
|
||||
|
||||
g_upload_transfer_buffer.reserve_storage_on_heap(static_cast<u32>(image_linear_size));
|
||||
upload_scratch_mem = g_upload_transfer_buffer.alloc_from_heap(static_cast<u32>(image_linear_size), 256);
|
||||
dst_buffer = { reinterpret_cast<std::byte*>(upload_scratch_mem.first), image_linear_size };
|
||||
}
|
||||
|
||||
rsx::io_buffer io_buf = dst_buffer;
|
||||
caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024);
|
||||
auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps);
|
||||
|
||||
// Define upload region
|
||||
coord3u region;
|
||||
region.x = 0;
|
||||
region.y = 0;
|
||||
region.z = layout.layer;
|
||||
region.width = layout.width_in_texel;
|
||||
region.height = layout.height_in_texel;
|
||||
region.depth = layout.depth;
|
||||
|
||||
if (!driver_caps.ARB_compute_shader_supported)
|
||||
{
|
||||
unpack_settings.swap_bytes(op.require_swap);
|
||||
dst->copy_from(staging_buffer, static_cast<texture::format>(gl_format), static_cast<texture::type>(gl_type), layout.level, region, unpack_settings);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 0. Preconf
|
||||
mem_layout.alignment = static_cast<u8>(caps.alignment);
|
||||
mem_layout.swap_bytes = op.require_swap;
|
||||
mem_layout.format = gl_format;
|
||||
mem_layout.type = gl_type;
|
||||
mem_layout.block_size = block_size_in_bytes;
|
||||
|
||||
// 2. Upload memory to GPU
|
||||
if (!op.require_deswizzle)
|
||||
{
|
||||
g_upload_transfer_buffer.unmap();
|
||||
g_upload_transfer_buffer.copy_to(&g_compute_decode_buffer.get(), upload_scratch_mem.second, compute_scratch_mem.second, image_linear_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
// 2.1 Copy data to deswizzle buf
|
||||
if (g_deswizzle_scratch_buffer.size() < min_required_buffer_size)
|
||||
{
|
||||
u64 row_pitch = rsx::align2<u64, u64>(layout.width_in_block * block_size_in_bytes, caps.alignment);
|
||||
|
||||
// We're in the "else" branch, so "is_compressed_host_format()" is always false.
|
||||
// Handle emulated compressed formats with host unpack (R8G8 compressed)
|
||||
row_pitch = std::max<u64>(row_pitch, dst->pitch());
|
||||
|
||||
// FIXME: Double-check this logic; it seems like we should always use texels both here and for row_pitch.
|
||||
image_linear_size = row_pitch * layout.height_in_texel * layout.depth;
|
||||
|
||||
compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast<u32>(image_linear_size), 256) };
|
||||
compute_scratch_mem.first = reinterpret_cast<void*>(static_cast<uintptr_t>(compute_scratch_mem.second));
|
||||
|
||||
g_upload_transfer_buffer.reserve_storage_on_heap(static_cast<u32>(image_linear_size));
|
||||
upload_scratch_mem = g_upload_transfer_buffer.alloc_from_heap(static_cast<u32>(image_linear_size), 256);
|
||||
dst_buffer = { reinterpret_cast<std::byte*>(upload_scratch_mem.first), image_linear_size };
|
||||
g_deswizzle_scratch_buffer.remove();
|
||||
g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size);
|
||||
}
|
||||
|
||||
rsx::io_buffer io_buf = dst_buffer;
|
||||
caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024);
|
||||
auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps);
|
||||
u32 deswizzle_data_offset = g_deswizzle_scratch_buffer.alloc(static_cast<u32>(image_linear_size), 256);
|
||||
g_upload_transfer_buffer.unmap();
|
||||
g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast<u32>(image_linear_size));
|
||||
|
||||
// Define upload region
|
||||
coord3u region;
|
||||
region.x = 0;
|
||||
region.y = 0;
|
||||
region.z = layout.layer;
|
||||
region.width = layout.width_in_texel;
|
||||
region.height = layout.height_in_texel;
|
||||
region.depth = layout.depth;
|
||||
// 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem
|
||||
const auto block_size = op.element_size * op.block_length;
|
||||
|
||||
if (driver_caps.ARB_compute_shader_supported)
|
||||
if (op.require_swap)
|
||||
{
|
||||
// 0. Preconf
|
||||
mem_layout.alignment = static_cast<u8>(caps.alignment);
|
||||
mem_layout.swap_bytes = op.require_swap;
|
||||
mem_layout.format = gl_format;
|
||||
mem_layout.type = gl_type;
|
||||
mem_layout.block_size = block_size_in_bytes;
|
||||
mem_layout.swap_bytes = false;
|
||||
|
||||
// 2. Upload memory to GPU
|
||||
if (!op.require_deswizzle)
|
||||
switch (op.element_size)
|
||||
{
|
||||
g_upload_transfer_buffer.unmap();
|
||||
g_upload_transfer_buffer.copy_to(&g_compute_decode_buffer.get(), upload_scratch_mem.second, compute_scratch_mem.second, image_linear_size);
|
||||
case 1:
|
||||
do_deswizzle_transformation<u8, true>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
case 2:
|
||||
do_deswizzle_transformation<u16, true>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
case 4:
|
||||
do_deswizzle_transformation<u32, true>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unimplemented element size deswizzle");
|
||||
}
|
||||
else
|
||||
{
|
||||
// 2.1 Copy data to deswizzle buf
|
||||
if (g_deswizzle_scratch_buffer.size() < min_required_buffer_size)
|
||||
{
|
||||
g_deswizzle_scratch_buffer.remove();
|
||||
g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size);
|
||||
}
|
||||
|
||||
u32 deswizzle_data_offset = g_deswizzle_scratch_buffer.alloc(static_cast<u32>(image_linear_size), 256);
|
||||
g_upload_transfer_buffer.unmap();
|
||||
g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast<u32>(image_linear_size));
|
||||
|
||||
// 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem
|
||||
const auto block_size = op.element_size * op.block_length;
|
||||
|
||||
if (op.require_swap)
|
||||
{
|
||||
mem_layout.swap_bytes = false;
|
||||
|
||||
switch (op.element_size)
|
||||
{
|
||||
case 1:
|
||||
do_deswizzle_transformation<u8, true>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
case 2:
|
||||
do_deswizzle_transformation<u16, true>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
case 4:
|
||||
do_deswizzle_transformation<u32, true>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unimplemented element size deswizzle");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (op.element_size)
|
||||
{
|
||||
case 1:
|
||||
do_deswizzle_transformation<u8, false>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
case 2:
|
||||
do_deswizzle_transformation<u16, false>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
case 4:
|
||||
do_deswizzle_transformation<u32, false>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unimplemented element size deswizzle");
|
||||
}
|
||||
}
|
||||
|
||||
// Barrier
|
||||
g_deswizzle_scratch_buffer.push_barrier(deswizzle_data_offset, static_cast<u32>(image_linear_size));
|
||||
}
|
||||
|
||||
// 3. Update configuration
|
||||
mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes;
|
||||
mem_info.image_size_in_bytes = image_linear_size;
|
||||
mem_info.memory_required = 0;
|
||||
|
||||
// 4. Dispatch compute routines
|
||||
copy_buffer_to_image(cmd, mem_layout, &g_compute_decode_buffer.get(), dst, compute_scratch_mem.first, layout.level, region, &mem_info);
|
||||
|
||||
// Barrier
|
||||
g_compute_decode_buffer.push_barrier(compute_scratch_mem.second, static_cast<u32>(image_linear_size));
|
||||
}
|
||||
else
|
||||
{
|
||||
unpack_settings.swap_bytes(op.require_swap);
|
||||
dst->copy_from(staging_buffer, static_cast<texture::format>(gl_format), static_cast<texture::type>(gl_type), layout.level, region, unpack_settings);
|
||||
switch (op.element_size)
|
||||
{
|
||||
case 1:
|
||||
do_deswizzle_transformation<u8, false>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
case 2:
|
||||
do_deswizzle_transformation<u16, false>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
case 4:
|
||||
do_deswizzle_transformation<u32, false>(cmd, block_size,
|
||||
&g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset,
|
||||
static_cast<u32>(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth);
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unimplemented element size deswizzle");
|
||||
}
|
||||
}
|
||||
|
||||
// Barrier
|
||||
g_deswizzle_scratch_buffer.push_barrier(deswizzle_data_offset, static_cast<u32>(image_linear_size));
|
||||
}
|
||||
|
||||
// 3. Update configuration
|
||||
mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes;
|
||||
mem_info.image_size_in_bytes = image_linear_size;
|
||||
mem_info.memory_required = 0;
|
||||
|
||||
// 4. Dispatch compute routines
|
||||
copy_buffer_to_image(cmd, mem_layout, &g_compute_decode_buffer.get(), dst, compute_scratch_mem.first, layout.level, region, &mem_info);
|
||||
|
||||
// Barrier
|
||||
g_compute_decode_buffer.push_barrier(compute_scratch_mem.second, static_cast<u32>(image_linear_size));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -14,20 +14,20 @@ namespace gl
|
||||
{
|
||||
struct pixel_buffer_layout
|
||||
{
|
||||
GLenum format;
|
||||
GLenum type;
|
||||
u32 row_length;
|
||||
u8 block_size;
|
||||
bool swap_bytes;
|
||||
u8 alignment;
|
||||
GLenum format = GL_RGBA;
|
||||
GLenum type = GL_UNSIGNED_BYTE;
|
||||
u32 row_length = 0;
|
||||
u8 block_size = 0;
|
||||
bool swap_bytes = false;
|
||||
u8 alignment = 0;
|
||||
u8 reserved;
|
||||
};
|
||||
|
||||
struct image_memory_requirements
|
||||
{
|
||||
u64 image_size_in_texels;
|
||||
u64 image_size_in_bytes;
|
||||
u64 memory_required;
|
||||
u64 image_size_in_texels = 0;
|
||||
u64 image_size_in_bytes = 0;
|
||||
u64 memory_required = 0;
|
||||
};
|
||||
|
||||
struct clear_cmd_info
|
||||
|
||||
Loading…
Reference in New Issue
Block a user