vk: Get basic vertex shaders working without traditional UBOs

This commit is contained in:
kd-11 2025-07-20 00:39:42 +03:00 committed by kd-11
parent a4eccb259a
commit a819b9fc2a
3 changed files with 56 additions and 37 deletions

View File

@ -511,7 +511,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
// VRAM allocation // VRAM allocation
m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000, VK_TRUE); m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000, VK_TRUE);
m_fragment_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment env buffer"); m_fragment_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment env buffer");
m_vertex_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer"); m_vertex_env_ring_info.create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer");
m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment texture params buffer"); m_fragment_texture_params_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment texture params buffer");
m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer", 0x10000, VK_TRUE); m_vertex_layout_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex layout buffer", 0x10000, VK_TRUE);
m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer"); m_fragment_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment constants buffer");
@ -551,7 +551,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar)
} }
// Initialize optional allocation information with placeholders // Initialize optional allocation information with placeholders
m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, 0, 16 }; m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, 0, VK_WHOLE_SIZE };
m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, 0, 16 }; m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, 0, 16 };
m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, 0, 16 }; m_fragment_env_buffer_info = { m_fragment_env_ring_info.heap->value, 0, 16 };
m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, 0, 16 }; m_fragment_texture_params_buffer_info = { m_fragment_texture_params_ring_info.heap->value, 0, 16 };
@ -1940,9 +1940,8 @@ void VKGSRender::load_program_env()
if (update_vertex_env) if (update_vertex_env)
{ {
// Vertex state // Vertex state. Note, we're now on std430 alignment here, not hardware alignment.
const auto mem = m_vertex_env_ring_info.static_alloc<128>(); const auto [mem, buf] = m_vertex_env_ring_info.alloc_and_map<16, char>(96);
auto buf = static_cast<u8*>(m_vertex_env_ring_info.map(mem, 84));
m_draw_processor.fill_scale_offset_data(buf, false); m_draw_processor.fill_scale_offset_data(buf, false);
m_draw_processor.fill_user_clip_data(buf + 64); m_draw_processor.fill_user_clip_data(buf + 64);
@ -1952,7 +1951,7 @@ void VKGSRender::load_program_env()
*(reinterpret_cast<f32*>(buf + 80)) = ctx->clip_max(); *(reinterpret_cast<f32*>(buf + 80)) = ctx->clip_max();
m_vertex_env_ring_info.unmap(); m_vertex_env_ring_info.unmap();
m_vertex_env_buffer_info = { m_vertex_env_ring_info.heap->value, mem, 128 }; m_vertex_env_dynamic_offset = mem;
} }
if (update_instancing_data) if (update_instancing_data)
@ -2117,8 +2116,9 @@ void VKGSRender::load_program_env()
if (vk::emulate_conditional_rendering()) if (vk::emulate_conditional_rendering())
{ {
auto predicate = m_cond_render_buffer ? m_cond_render_buffer->value : vk::get_scratch_buffer(*m_current_command_buffer, 4)->value; const VkBuffer predicate = m_cond_render_buffer ? m_cond_render_buffer->value : vk::get_scratch_buffer(*m_current_command_buffer, 4)->value;
m_program->bind_uniform({ predicate, 0, 4 }, vk::glsl::binding_set_index_vertex, m_vs_binding_table->cr_pred_buffer_location); const u32 offset = cond_render_ctrl.hw_cond_active ? 0 : 4;
m_program->bind_uniform({ predicate, offset, 4 }, vk::glsl::binding_set_index_vertex, m_vs_binding_table->cr_pred_buffer_location);
} }
if (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) if (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)
@ -2188,6 +2188,16 @@ void VKGSRender::upload_transform_constants(const rsx::io_buffer& buffer)
void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_info) void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_info)
{ {
struct rsx_prog_push_constants_block_t
{
u32 vertex_base_index;
u32 vertex_index_offset;
u32 draw_id;
u32 layout_ptr_offset;
u32 xform_constants_offset;
u32 vs_context_offset;
};
// Actual allocation must have been done previously // Actual allocation must have been done previously
u32 base_offset; u32 base_offset;
const u32 offset32 = static_cast<u32>(m_vertex_layout_stream_info.offset); const u32 offset32 = static_cast<u32>(m_vertex_layout_stream_info.offset);
@ -2206,29 +2216,24 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_
const u32 vertex_layout_offset = (id * 16) + (base_offset / 8); const u32 vertex_layout_offset = (id * 16) + (base_offset / 8);
const u32 constant_id_offset = static_cast<u32>(m_xform_constants_dynamic_offset) / 16u; const u32 constant_id_offset = static_cast<u32>(m_xform_constants_dynamic_offset) / 16u;
const u32 vertex_context_offset = static_cast<u32>(m_vertex_env_dynamic_offset) / 128u;
u32 push_constants[6]; // Pack
u32 data_length = 20; rsx_prog_push_constants_block_t push_constants;
push_constants.vertex_base_index = vertex_info.vertex_index_base;
push_constants[0] = vertex_info.vertex_index_base; push_constants.vertex_index_offset = vertex_info.vertex_index_offset;
push_constants[1] = vertex_info.vertex_index_offset; push_constants.draw_id = id;
push_constants[2] = id; push_constants.layout_ptr_offset = vertex_layout_offset;
push_constants[3] = vertex_layout_offset; push_constants.xform_constants_offset = constant_id_offset;
push_constants[4] = constant_id_offset; push_constants.vs_context_offset = vertex_context_offset;
if (vk::emulate_conditional_rendering())
{
push_constants[5] = cond_render_ctrl.hw_cond_active ? 1 : 0;
data_length += 4;
}
vkCmdPushConstants( vkCmdPushConstants(
*m_current_command_buffer, *m_current_command_buffer,
m_program->layout(), m_program->layout(),
VK_SHADER_STAGE_VERTEX_BIT, VK_SHADER_STAGE_VERTEX_BIT,
0, 0,
data_length, sizeof(push_constants),
push_constants); &push_constants);
const usz data_offset = (id * 128) + m_vertex_layout_stream_info.offset; const usz data_offset = (id * 128) + m_vertex_layout_stream_info.offset;
auto dst = m_vertex_layout_ring_info.map(data_offset, 128); auto dst = m_vertex_layout_ring_info.map(data_offset, 128);
@ -2837,9 +2842,17 @@ void VKGSRender::begin_conditional_rendering(const std::vector<rsx::reports::occ
} }
m_cond_render_buffer = std::make_unique<vk::buffer>( m_cond_render_buffer = std::make_unique<vk::buffer>(
*m_device, 4, *m_device, 8,
memory_props.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, memory_props.device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
usage_flags, 0, VMM_ALLOCATION_POOL_UNDEFINED); usage_flags, 0, VMM_ALLOCATION_POOL_UNDEFINED);
// Fill CR+4 with all-ones. Instead of dynamic flags, just bind the all-ones word to disable CR
vkCmdFillBuffer(*m_current_command_buffer, m_cond_render_buffer->value, 4, 4, 0xFFFFFFFFu);
// Ensure the all-ones mask is written before next VS invocation
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_cond_render_buffer->value, 4, 4,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
} }
VkPipelineStageFlags dst_stage; VkPipelineStageFlags dst_stage;

View File

@ -152,6 +152,7 @@ private:
rsx::simple_array<u8> m_multidraw_parameters_buffer; rsx::simple_array<u8> m_multidraw_parameters_buffer;
u64 m_xform_constants_dynamic_offset = 0; // We manage transform_constants dynamic offset manually to alleviate performance penalty of doing a hot-patch of constants. u64 m_xform_constants_dynamic_offset = 0; // We manage transform_constants dynamic offset manually to alleviate performance penalty of doing a hot-patch of constants.
u64 m_vertex_env_dynamic_offset = 0;
std::array<vk::frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage; std::array<vk::frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage;
//Temp frame context to use if the real frame queue is overburdened. Only used for storage //Temp frame context to use if the real frame queue is overburdened. Only used for storage

View File

@ -80,18 +80,18 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
glsl::insert_subheader_block(OS); glsl::insert_subheader_block(OS);
OS << OS <<
"layout(std140, set=0, binding=" << vk_prog->binding_table.context_buffer_location << ") uniform VertexContextBuffer\n" "layout(std430, set=0, binding=" << vk_prog->binding_table.context_buffer_location << ") readonly buffer VertexContextBuffer\n"
"{\n" "{\n"
" vertex_context_t vertex_contexts[];\n" " vertex_context_t vertex_contexts[];\n"
"};\n\n" "};\n\n"
"" ""
"#define get_vertex_context() vertex_contexts[vtx_context_id]\n" "#define get_vertex_context() vertex_contexts[vs_context_offset]\n"
"#define get_user_clip_config() get_vertex_context().user_clip_configuration_bits\n\n"; "#define get_user_clip_config() get_vertex_context().user_clip_configuration_bits\n\n";
const vk::glsl::program_input context_input const vk::glsl::program_input context_input
{ {
.domain = glsl::glsl_vertex_program, .domain = glsl::glsl_vertex_program,
.type = vk::glsl::input_type_uniform_buffer, .type = vk::glsl::input_type_storage_buffer,
.set = vk::glsl::binding_set_index_vertex, .set = vk::glsl::binding_set_index_vertex,
.location = vk_prog->binding_table.context_buffer_location, .location = vk_prog->binding_table.context_buffer_location,
.name = "VertexContextBuffer" .name = "VertexContextBuffer"
@ -103,10 +103,8 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
OS << OS <<
"layout(std430, set=0, binding=" << vk_prog->binding_table.cr_pred_buffer_location << ") readonly buffer EXT_Conditional_Rendering\n" "layout(std430, set=0, binding=" << vk_prog->binding_table.cr_pred_buffer_location << ") readonly buffer EXT_Conditional_Rendering\n"
"{\n" "{\n"
" uint cr_predicates[];\n" " uint cr_predicate_value;\n"
"};\n\n" "};\n\n";
""
"#define get_cr_predicate() cr_predicates[cr_predicate_id]\n\n";
const vk::glsl::program_input predicate_input const vk::glsl::program_input predicate_input
{ {
@ -126,16 +124,15 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS)
" uint vertex_index_offset;\n" " uint vertex_index_offset;\n"
" uint draw_id;\n" " uint draw_id;\n"
" uint layout_ptr_offset;\n" " uint layout_ptr_offset;\n"
" uint vtx_constants_offset;\n" " uint xform_constants_offset;\n"
" uint vtx_context_id;\n" " uint vs_context_offset;\n"
" uint cr_predicate_id;\n"
"};\n\n"; "};\n\n";
const vk::glsl::program_input push_constants const vk::glsl::program_input push_constants
{ {
.domain = glsl::glsl_vertex_program, .domain = glsl::glsl_vertex_program,
.type = vk::glsl::input_type_push_constant, .type = vk::glsl::input_type_push_constant,
.bound_data = vk::glsl::push_constant_ref{ .offset = 0, .size = 28 }, .bound_data = vk::glsl::push_constant_ref{ .offset = 0, .size = 24 },
.set = vk::glsl::binding_set_index_vertex, .set = vk::glsl::binding_set_index_vertex,
.location = umax, .location = umax,
.name = "push_constants_block" .name = "push_constants_block"
@ -344,6 +341,14 @@ void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS)
OS << registers << ";\n"; OS << registers << ";\n";
} }
// Expand indexed uniform structs here. We don't need to commit registers - these are very rarely consumed anyway.
OS <<
"#define scale_offset_mat get_vertex_context().scale_offset_mat\n"
"#define transform_branch_bits get_vertex_context().transform_branch_bits\n"
"#define point_size get_vertex_context().point_size\n"
"#define z_near get_vertex_context().z_near\n"
"#define z_far get_vertex_context().z_far\n\n";
OS << "void vs_main()\n"; OS << "void vs_main()\n";
OS << "{\n"; OS << "{\n";
@ -381,7 +386,7 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS)
if (m_device_props.emulate_conditional_rendering) if (m_device_props.emulate_conditional_rendering)
{ {
OS << " if (conditional_rendering_enabled != 0 && conditional_rendering_predicate == 0)\n"; OS << " if (cr_predicate_value == 0)\n";
OS << " {\n"; OS << " {\n";
OS << " gl_Position = vec4(0., 0., 0., -1.);\n"; OS << " gl_Position = vec4(0., 0., 0., -1.);\n";
OS << " return;\n"; OS << " return;\n";