Add support for LsHsEsGs (tess + geometry) pipelines where tess in onchip and geometry is offchip (#4328)

2026-04-29 23:41:19 -06:00 · 2026-04-28 12:41:09 -07:00 · 2026-04-28 12:41:09 -07:00 · 09c20d4636
commit 09c20d4636
parent cf238acea9
7 changed files with 113 additions and 62 deletions
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@ -373,13 +373,12 @@ void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
        break;
    case LogicalStage::TessellationEval: {
        execution_model = spv::ExecutionModel::TessellationEvaluation;
-        const auto& vs_info = ctx.runtime_info.vs_info;
-        ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type));
-        ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning));
-        ctx.AddExecutionMode(main,
-                             vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw
-                                 ? spv::ExecutionMode::VertexOrderCcw
-                                 : spv::ExecutionMode::VertexOrderCw);
+        ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.es_vs_info.tess_type));
+        ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.es_vs_info.tess_partitioning));
+        ctx.AddExecutionMode(main, ctx.runtime_info.es_vs_info.tess_topology ==
+                                           AmdGpu::TessellationTopology::TriangleCcw
+                                       ? spv::ExecutionMode::VertexOrderCcw
+                                       : spv::ExecutionMode::VertexOrderCw);
        break;
    }
    case LogicalStage::Fragment:
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@ -520,7 +520,8 @@ void EmitContext::DefineInputs() {
        const u32 num_attrs = Common::AlignUp(runtime_info.hs_info.ls_stride, 16) >> 4;
        if (num_attrs > 0) {
            const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
-            // The input vertex count isn't statically known, so make length 32 (what glslang does)
+            // The input vertex count isn't statically known, so make length 32 (what
+            // glslang does)
            const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
            input_attr_array = DefineInput(patch_array_type, 0);
            Name(input_attr_array, "in_attrs");
@ -531,10 +532,12 @@ void EmitContext::DefineInputs() {
        tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
        primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);

-        const u32 num_attrs = Common::AlignUp(runtime_info.vs_info.hs_output_cp_stride, 16) >> 4;
+        const u32 num_attrs =
+            Common::AlignUp(runtime_info.hs_es_vs_info.hs_output_cp_stride, 16) >> 4;
        if (num_attrs > 0) {
            const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
-            // The input vertex count isn't statically known, so make length 32 (what glslang does)
+            // The input vertex count isn't statically known, so make length 32 (what
+            // glslang does)
            const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
            input_attr_array = DefineInput(patch_array_type, 0);
            Name(input_attr_array, "in_attrs");
@ -643,10 +646,12 @@ void EmitContext::DefineOutputs() {
            Decorate(output_tess_level_inner, spv::Decoration::Patch);
        }

-        const u32 num_attrs = Common::AlignUp(runtime_info.hs_info.hs_output_cp_stride, 16) >> 4;
+        const u32 num_attrs =
+            Common::AlignUp(runtime_info.hs_es_vs_info.hs_output_cp_stride, 16) >> 4;
        if (num_attrs > 0) {
            const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
-            // The input vertex count isn't statically known, so make length 32 (what glslang does)
+            // The input vertex count isn't statically known, so make length 32 (what
+            // glslang does)
            const Id patch_array_type{TypeArray(
                per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))};
            output_attr_array = DefineOutput(patch_array_type, 0);
@ -709,7 +714,8 @@ void EmitContext::DefineOutputs() {
            ++num_render_targets;
        }
        // Dual source blending allows at most 2 render targets, one for each source.
-        // Fewer targets are allowed but the missing blending source values will be undefined.
+        // Fewer targets are allowed but the missing blending source values will be
+        // undefined.
        ASSERT_MSG(!runtime_info.fs_info.dual_source_blending || num_render_targets <= 2,
                   "Dual source blending enabled, there must be at most two MRT exports");
        break;
--- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp
+++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp
@ -149,12 +149,7 @@ static void InitTessConstants(IR::ScalarReg sharp_ptr_base, s32 sharp_dword_offs
    info.tess_consts_ptr_base = sharp_ptr_base;
    info.tess_consts_dword_offset = sharp_dword_offset;
    info.ReadTessConstantBuffer(tess_constants);
-    if (info.l_stage == LogicalStage::TessellationControl) {
-        runtime_info.hs_info.InitFromTessConstants(tess_constants);
-    } else {
-        runtime_info.vs_info.InitFromTessConstants(tess_constants);
-    }
-
+    runtime_info.InitFromTessConstants(tess_constants);
    return;
 }

@ -470,7 +465,7 @@ void HullShaderTransform(IR::Program& program, const RuntimeInfo& runtime_info)
                        if (off_dw > 0) {
                            addr = ir.IAdd(addr, ir.Imm32(off_dw));
                        }
-                        const u32 stride = runtime_info.hs_info.hs_output_cp_stride;
+                        const u32 stride = runtime_info.hs_es_vs_info.hs_output_cp_stride;
                        // Invocation ID array index is implicit, handled by SPIRV backend
                        const IR::U32 opt_addr = TryOptimizeAddressModulo(addr, stride, ir);
                        const IR::U32 offset = ir.IMod(opt_addr, ir.Imm32(stride));
@ -508,8 +503,9 @@ void HullShaderTransform(IR::Program& program, const RuntimeInfo& runtime_info)
                               region == AttributeRegion::OutputCP,
                           "Unhandled read of patchconst attribute in hull shader");
                const bool is_tcs_output_read = region == AttributeRegion::OutputCP;
-                const u32 stride = is_tcs_output_read ? runtime_info.hs_info.hs_output_cp_stride
-                                                      : runtime_info.hs_info.ls_stride;
+                const u32 stride = is_tcs_output_read
+                                       ? runtime_info.hs_es_vs_info.hs_output_cp_stride
+                                       : runtime_info.hs_info.ls_stride;
                IR::Value attr_read;
                if (num_dwords == 1) {
                    attr_read = ir.BitCast<IR::U32>(
@ -585,7 +581,8 @@ void DomainShaderTransform(const IR::Program& program, const RuntimeInfo& runtim
                const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 {
                    if (region == AttributeRegion::OutputCP) {
                        return ReadTessControlPointAttribute(
-                            addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw, false);
+                            addr, runtime_info.hs_es_vs_info.hs_output_cp_stride, ir, off_dw,
+                            false);
                    } else {
                        ASSERT(region == AttributeRegion::PatchConst);
                        return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw));
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@ -40,16 +40,30 @@ constexpr Stage StageFromIndex(size_t index) noexcept {
    return static_cast<Stage>(index);
 }

+struct CommonHsEsVsRuntimeInfo {
+    u32 hs_output_cp_stride;
+
+    bool operator<=>(const CommonHsEsVsRuntimeInfo&) const noexcept = default;
+};
+
+struct CommonEsVsRuntimeInfo : protected CommonHsEsVsRuntimeInfo {
+    AmdGpu::TessellationType tess_type;
+    AmdGpu::TessellationTopology tess_topology;
+    AmdGpu::TessellationPartitioning tess_partitioning;
+
+    bool operator<=>(const CommonEsVsRuntimeInfo&) const noexcept = default;
+};
+
 struct LocalRuntimeInfo {
    u32 ls_stride;

    auto operator<=>(const LocalRuntimeInfo&) const noexcept = default;
 };

-struct ExportRuntimeInfo {
+struct ExportRuntimeInfo : protected CommonEsVsRuntimeInfo {
    u32 vertex_data_size;

-    auto operator<=>(const ExportRuntimeInfo&) const noexcept = default;
+    bool operator<=>(const ExportRuntimeInfo&) const noexcept = default;
 };

 enum class Output : u8 {
@ -79,7 +93,7 @@ enum class Output : u8 {
 };
 using OutputMap = std::array<Output, 4>;

-struct VertexRuntimeInfo {
+struct VertexRuntimeInfo : protected CommonEsVsRuntimeInfo {
    u32 num_outputs;
    u32 num_exports;
    std::array<OutputMap, 3> outputs;
@ -88,42 +102,18 @@ struct VertexRuntimeInfo {
    bool clip_disable{};
    u32 step_rate_0;
    u32 step_rate_1;
-    AmdGpu::TessellationType tess_type;
-    AmdGpu::TessellationTopology tess_topology;
-    AmdGpu::TessellationPartitioning tess_partitioning;
-    u32 hs_output_cp_stride{};

-    bool operator==(const VertexRuntimeInfo& other) const noexcept {
-        return num_outputs == other.num_outputs && outputs == other.outputs &&
-               tess_emulated_primitive == other.tess_emulated_primitive &&
-               emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one &&
-               clip_disable == other.clip_disable && tess_type == other.tess_type &&
-               tess_topology == other.tess_topology &&
-               tess_partitioning == other.tess_partitioning &&
-               hs_output_cp_stride == other.hs_output_cp_stride &&
-               step_rate_0 == other.step_rate_0 && step_rate_1 == other.step_rate_1;
-    }
-
-    void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
-        hs_output_cp_stride = tess_constants.hs_cp_stride;
-    }
+    bool operator<=>(const VertexRuntimeInfo& other) const noexcept = default;
 };

-struct HullRuntimeInfo {
+struct HullRuntimeInfo : protected CommonHsEsVsRuntimeInfo {
    u32 num_input_control_points;
    u32 num_threads;
    AmdGpu::TessellationType tess_type;
    bool offchip_lds_enable;
    u32 ls_stride;
-    u32 hs_output_cp_stride;
    u32 hs_output_base;

-    void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
-        ls_stride = tess_constants.ls_stride;
-        hs_output_cp_stride = tess_constants.hs_cp_stride;
-        hs_output_base = tess_constants.hs_output_base;
-    }
-
    bool operator==(const HullRuntimeInfo&) const = default;

    // It might be possible for a non-passthrough TCS to have these conditions, in some dumb
@ -248,6 +238,10 @@ struct RuntimeInfo {
        GeometryRuntimeInfo gs_info;
        FragmentRuntimeInfo fs_info;
        ComputeRuntimeInfo cs_info;
+        // Hs/Es/VsRuntimeInfo inherit from these so we can
+        // access common info with correct offsets
+        CommonHsEsVsRuntimeInfo hs_es_vs_info;
+        CommonEsVsRuntimeInfo es_vs_info;
    };

    void Initialize(Stage stage_) {
@ -275,6 +269,14 @@ struct RuntimeInfo {
            return true;
        }
    }
+
+    void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
+        hs_es_vs_info.hs_output_cp_stride = tess_constants.hs_cp_stride;
+        if (stage == Stage::Hull) {
+            hs_info.ls_stride = tess_constants.ls_stride;
+            hs_info.hs_output_base = tess_constants.hs_output_base;
+        }
+    }
 };

 } // namespace Shader
--- a/src/shader_recompiler/specialization.h
+++ b/src/shader_recompiler/specialization.h
@ -164,11 +164,7 @@ struct StageSpecialization {
            info->l_stage == LogicalStage::TessellationEval) {
            TessellationDataConstantBuffer tess_constants{};
            info->ReadTessConstantBuffer(tess_constants);
-            if (info->l_stage == LogicalStage::TessellationControl) {
-                runtime_info.hs_info.InitFromTessConstants(tess_constants);
-            } else {
-                runtime_info.vs_info.InitFromTessConstants(tess_constants);
-            }
+            runtime_info.InitFromTessConstants(tess_constants);
        }
    }

--- a/src/video_core/amdgpu/regs_vertex.h
+++ b/src/video_core/amdgpu/regs_vertex.h
@ -87,6 +87,7 @@ union ShaderStageEnable {
        Vs = 0u, // always enabled
        EsGs = 0xB0u,
        LsHs = 0x45u,
+        LsHsEsGs = 0xAD,
    };

    VgtStages raw;
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@ -120,6 +120,11 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
    case Stage::Export: {
        BuildCommon(regs.es_program);
        info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
+        if (l_stage == LogicalStage::TessellationEval) {
+            info.es_vs_info.tess_type = regs.tess_config.type;
+            info.es_vs_info.tess_topology = regs.tess_config.topology;
+            info.es_vs_info.tess_partitioning = regs.tess_config.partitioning;
+        }
        break;
    }
    case Stage::Vertex: {
@ -136,9 +141,9 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
            regs.primitive_type == AmdGpu::PrimitiveType::QuadList;
        info.vs_info.clip_disable = regs.IsClipDisabled();
        if (l_stage == LogicalStage::TessellationEval) {
-            info.vs_info.tess_type = regs.tess_config.type;
-            info.vs_info.tess_topology = regs.tess_config.topology;
-            info.vs_info.tess_partitioning = regs.tess_config.partitioning;
+            info.es_vs_info.tess_type = regs.tess_config.type;
+            info.es_vs_info.tess_topology = regs.tess_config.topology;
+            info.es_vs_info.tess_partitioning = regs.tess_config.partitioning;
        }
        break;
    }
@ -149,7 +154,23 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
        gs_info.output_vertices = regs.vgt_gs_max_vert_out;
        gs_info.num_invocations =
            regs.vgt_gs_instance_cnt.IsEnabled() ? regs.vgt_gs_instance_cnt.count : 1;
-        gs_info.in_primitive = regs.primitive_type;
+        if (regs.stage_enable.raw == AmdGpu::ShaderStageEnable::LsHsEsGs) {
+            gs_info.in_primitive = [&]() {
+                switch (regs.tess_config.topology) {
+                case AmdGpu::TessellationTopology::Point:
+                    return AmdGpu::PrimitiveType::PointList;
+                case AmdGpu::TessellationTopology::Line:
+                    return AmdGpu::PrimitiveType::LineList;
+                case AmdGpu::TessellationTopology::TriangleCw:
+                case AmdGpu::TessellationTopology::TriangleCcw:
+                    return AmdGpu::PrimitiveType::TriangleList;
+                default:
+                    UNREACHABLE();
+                }
+            }();
+        } else {
+            gs_info.in_primitive = regs.primitive_type;
+        }
        for (u32 stream_id = 0; stream_id < Shader::GsMaxOutputStreams; ++stream_id) {
            gs_info.out_primitive[stream_id] =
                regs.vgt_gs_out_prim_type.GetPrimitiveType(stream_id);
@ -511,9 +532,38 @@ bool PipelineCache::RefreshGraphicsStages() {
            return false;
        }
        break;
-    default:
+    case AmdGpu::ShaderStageEnable::VgtStages::LsHsEsGs:
+        if (!instance.IsTessellationSupported() ||
+            (regs.tess_config.type == AmdGpu::TessellationType::Isoline &&
+             !instance.IsTessellationIsolinesSupported())) {
+            return false;
+        }
+        if (!instance.IsGeometryStageSupported()) {
+            LOG_WARNING(Render_Vulkan, "Geometry shader stage unsupported, skipping");
+            return false;
+        }
+        if (regs.vgt_gs_mode.onchip || regs.vgt_strmout_config.raw) {
+            LOG_WARNING(Render_Vulkan, "Geometry shader features unsupported, skipping");
+            return false;
+        }
+        if (!bind_stage(Stage::Hull, LogicalStage::TessellationControl)) {
+            return false;
+        }
+        if (!bind_stage(Stage::Export, LogicalStage::TessellationEval)) {
+            return false;
+        }
+        if (!bind_stage(Stage::Local, LogicalStage::Vertex)) {
+            return false;
+        }
+        if (!bind_stage(Stage::Geometry, LogicalStage::Geometry)) {
+            return false;
+        }
+        break;
+    case AmdGpu::ShaderStageEnable::VgtStages::Vs:
        bind_stage(Stage::Vertex, LogicalStage::Vertex);
        break;
+    default:
+        UNREACHABLE_MSG("unhandled stage_en: {}", (u32)regs.stage_enable.raw);
    }

    const auto* vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];