From e437cae5755ddbaa76e39d891759593bf68e7aa7 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 22 Mar 2026 22:06:44 +0300 Subject: [PATCH] gl: Properly initialize memory layout definition structs - Also refactors the upload_texture function to be a bit more readable --- rpcs3/Emu/RSX/GL/GLTexture.cpp | 285 ++++++++++++++++----------------- rpcs3/Emu/RSX/GL/GLTexture.h | 18 +-- 2 files changed, 151 insertions(+), 152 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 64e451e579..6f566b9a82 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -717,169 +717,168 @@ namespace gl } } } + + return; } - else + + std::pair upload_scratch_mem = {}, compute_scratch_mem = {}; + image_memory_requirements mem_info; + pixel_buffer_layout mem_layout; + + std::span dst_buffer = staging_buffer; + u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); + u64 image_linear_size = staging_buffer.size(); + + const auto min_required_buffer_size = std::max(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000); + + if (driver_caps.ARB_compute_shader_supported) { - std::pair upload_scratch_mem = {}, compute_scratch_mem = {}; - image_memory_requirements mem_info; - pixel_buffer_layout mem_layout; - - std::span dst_buffer = staging_buffer; - u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); - u64 image_linear_size = staging_buffer.size(); - - const auto min_required_buffer_size = std::max(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000); - - if (driver_caps.ARB_compute_shader_supported) + if (g_upload_transfer_buffer.size() < static_cast(min_required_buffer_size)) { - if (g_upload_transfer_buffer.size() < static_cast(min_required_buffer_size)) - { - g_upload_transfer_buffer.remove(); - g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, min_required_buffer_size); - } - - if (g_compute_decode_buffer.size() < min_required_buffer_size) - { - g_compute_decode_buffer.remove(); - g_compute_decode_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); - } + g_upload_transfer_buffer.remove(); + g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, min_required_buffer_size); } - for (const rsx::subresource_layout& layout : input_layouts) + if (g_compute_decode_buffer.size() < min_required_buffer_size) { - if (driver_caps.ARB_compute_shader_supported) + g_compute_decode_buffer.remove(); + g_compute_decode_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); + } + } + + for (const rsx::subresource_layout& layout : input_layouts) + { + if (driver_caps.ARB_compute_shader_supported) + { + u64 row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, caps.alignment); + + // We're in the "else" branch, so "is_compressed_host_format()" is always false. + // Handle emulated compressed formats with host unpack (R8G8 compressed) + row_pitch = std::max(row_pitch, dst->pitch()); + + // FIXME: Double-check this logic; it seems like we should always use texels both here and for row_pitch. + image_linear_size = row_pitch * layout.height_in_texel * layout.depth; + + compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast(image_linear_size), 256) }; + compute_scratch_mem.first = reinterpret_cast(static_cast(compute_scratch_mem.second)); + + g_upload_transfer_buffer.reserve_storage_on_heap(static_cast(image_linear_size)); + upload_scratch_mem = g_upload_transfer_buffer.alloc_from_heap(static_cast(image_linear_size), 256); + dst_buffer = { reinterpret_cast(upload_scratch_mem.first), image_linear_size }; + } + + rsx::io_buffer io_buf = dst_buffer; + caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024); + auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps); + + // Define upload region + coord3u region; + region.x = 0; + region.y = 0; + region.z = layout.layer; + region.width = layout.width_in_texel; + region.height = layout.height_in_texel; + region.depth = layout.depth; + + if (!driver_caps.ARB_compute_shader_supported) + { + unpack_settings.swap_bytes(op.require_swap); + dst->copy_from(staging_buffer, static_cast(gl_format), static_cast(gl_type), layout.level, region, unpack_settings); + continue; + } + + // 0. Preconf + mem_layout.alignment = static_cast(caps.alignment); + mem_layout.swap_bytes = op.require_swap; + mem_layout.format = gl_format; + mem_layout.type = gl_type; + mem_layout.block_size = block_size_in_bytes; + + // 2. Upload memory to GPU + if (!op.require_deswizzle) + { + g_upload_transfer_buffer.unmap(); + g_upload_transfer_buffer.copy_to(&g_compute_decode_buffer.get(), upload_scratch_mem.second, compute_scratch_mem.second, image_linear_size); + } + else + { + // 2.1 Copy data to deswizzle buf + if (g_deswizzle_scratch_buffer.size() < min_required_buffer_size) { - u64 row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, caps.alignment); - - // We're in the "else" branch, so "is_compressed_host_format()" is always false. - // Handle emulated compressed formats with host unpack (R8G8 compressed) - row_pitch = std::max(row_pitch, dst->pitch()); - - // FIXME: Double-check this logic; it seems like we should always use texels both here and for row_pitch. - image_linear_size = row_pitch * layout.height_in_texel * layout.depth; - - compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast(image_linear_size), 256) }; - compute_scratch_mem.first = reinterpret_cast(static_cast(compute_scratch_mem.second)); - - g_upload_transfer_buffer.reserve_storage_on_heap(static_cast(image_linear_size)); - upload_scratch_mem = g_upload_transfer_buffer.alloc_from_heap(static_cast(image_linear_size), 256); - dst_buffer = { reinterpret_cast(upload_scratch_mem.first), image_linear_size }; + g_deswizzle_scratch_buffer.remove(); + g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); } - rsx::io_buffer io_buf = dst_buffer; - caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 1024); - auto op = upload_texture_subresource(io_buf, layout, format, is_swizzled, caps); + u32 deswizzle_data_offset = g_deswizzle_scratch_buffer.alloc(static_cast(image_linear_size), 256); + g_upload_transfer_buffer.unmap(); + g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast(image_linear_size)); - // Define upload region - coord3u region; - region.x = 0; - region.y = 0; - region.z = layout.layer; - region.width = layout.width_in_texel; - region.height = layout.height_in_texel; - region.depth = layout.depth; + // 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem + const auto block_size = op.element_size * op.block_length; - if (driver_caps.ARB_compute_shader_supported) + if (op.require_swap) { - // 0. Preconf - mem_layout.alignment = static_cast(caps.alignment); - mem_layout.swap_bytes = op.require_swap; - mem_layout.format = gl_format; - mem_layout.type = gl_type; - mem_layout.block_size = block_size_in_bytes; + mem_layout.swap_bytes = false; - // 2. Upload memory to GPU - if (!op.require_deswizzle) + switch (op.element_size) { - g_upload_transfer_buffer.unmap(); - g_upload_transfer_buffer.copy_to(&g_compute_decode_buffer.get(), upload_scratch_mem.second, compute_scratch_mem.second, image_linear_size); + case 1: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 2: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 4: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + default: + fmt::throw_exception("Unimplemented element size deswizzle"); } - else - { - // 2.1 Copy data to deswizzle buf - if (g_deswizzle_scratch_buffer.size() < min_required_buffer_size) - { - g_deswizzle_scratch_buffer.remove(); - g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, min_required_buffer_size); - } - - u32 deswizzle_data_offset = g_deswizzle_scratch_buffer.alloc(static_cast(image_linear_size), 256); - g_upload_transfer_buffer.unmap(); - g_upload_transfer_buffer.copy_to(&g_deswizzle_scratch_buffer.get(), upload_scratch_mem.second, deswizzle_data_offset, static_cast(image_linear_size)); - - // 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem - const auto block_size = op.element_size * op.block_length; - - if (op.require_swap) - { - mem_layout.swap_bytes = false; - - switch (op.element_size) - { - case 1: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 2: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 4: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - default: - fmt::throw_exception("Unimplemented element size deswizzle"); - } - } - else - { - switch (op.element_size) - { - case 1: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 2: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - case 4: - do_deswizzle_transformation(cmd, block_size, - &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, - static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); - break; - default: - fmt::throw_exception("Unimplemented element size deswizzle"); - } - } - - // Barrier - g_deswizzle_scratch_buffer.push_barrier(deswizzle_data_offset, static_cast(image_linear_size)); - } - - // 3. Update configuration - mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes; - mem_info.image_size_in_bytes = image_linear_size; - mem_info.memory_required = 0; - - // 4. Dispatch compute routines - copy_buffer_to_image(cmd, mem_layout, &g_compute_decode_buffer.get(), dst, compute_scratch_mem.first, layout.level, region, &mem_info); - - // Barrier - g_compute_decode_buffer.push_barrier(compute_scratch_mem.second, static_cast(image_linear_size)); } else { - unpack_settings.swap_bytes(op.require_swap); - dst->copy_from(staging_buffer, static_cast(gl_format), static_cast(gl_type), layout.level, region, unpack_settings); + switch (op.element_size) + { + case 1: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 2: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + case 4: + do_deswizzle_transformation(cmd, block_size, + &g_compute_decode_buffer.get(), compute_scratch_mem.second, &g_deswizzle_scratch_buffer.get(), deswizzle_data_offset, + static_cast(image_linear_size), layout.width_in_texel, layout.height_in_texel, layout.depth); + break; + default: + fmt::throw_exception("Unimplemented element size deswizzle"); + } } + + // Barrier + g_deswizzle_scratch_buffer.push_barrier(deswizzle_data_offset, static_cast(image_linear_size)); } + + // 3. Update configuration + mem_info.image_size_in_texels = image_linear_size / block_size_in_bytes; + mem_info.image_size_in_bytes = image_linear_size; + mem_info.memory_required = 0; + + // 4. Dispatch compute routines + copy_buffer_to_image(cmd, mem_layout, &g_compute_decode_buffer.get(), dst, compute_scratch_mem.first, layout.level, region, &mem_info); + + // Barrier + g_compute_decode_buffer.push_barrier(compute_scratch_mem.second, static_cast(image_linear_size)); } } diff --git a/rpcs3/Emu/RSX/GL/GLTexture.h b/rpcs3/Emu/RSX/GL/GLTexture.h index 4a17dae081..6d0bbb18a3 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.h +++ b/rpcs3/Emu/RSX/GL/GLTexture.h @@ -14,20 +14,20 @@ namespace gl { struct pixel_buffer_layout { - GLenum format; - GLenum type; - u32 row_length; - u8 block_size; - bool swap_bytes; - u8 alignment; + GLenum format = GL_RGBA; + GLenum type = GL_UNSIGNED_BYTE; + u32 row_length = 0; + u8 block_size = 0; + bool swap_bytes = false; + u8 alignment = 0; u8 reserved; }; struct image_memory_requirements { - u64 image_size_in_texels; - u64 image_size_in_bytes; - u64 memory_required; + u64 image_size_in_texels = 0; + u64 image_size_in_bytes = 0; + u64 memory_required = 0; }; struct clear_cmd_info