Add support for LsHsEsGs (tess + geometry) pipelines where tess in onchip and geometry is offchip (#4328)

This commit is contained in:
baggins183 2026-04-28 12:41:09 -07:00 committed by GitHub
parent cf238acea9
commit 09c20d4636
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 113 additions and 62 deletions

View File

@ -373,13 +373,12 @@ void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
break;
case LogicalStage::TessellationEval: {
execution_model = spv::ExecutionModel::TessellationEvaluation;
const auto& vs_info = ctx.runtime_info.vs_info;
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type));
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning));
ctx.AddExecutionMode(main,
vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw
? spv::ExecutionMode::VertexOrderCcw
: spv::ExecutionMode::VertexOrderCw);
ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.es_vs_info.tess_type));
ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.es_vs_info.tess_partitioning));
ctx.AddExecutionMode(main, ctx.runtime_info.es_vs_info.tess_topology ==
AmdGpu::TessellationTopology::TriangleCcw
? spv::ExecutionMode::VertexOrderCcw
: spv::ExecutionMode::VertexOrderCw);
break;
}
case LogicalStage::Fragment:

View File

@ -520,7 +520,8 @@ void EmitContext::DefineInputs() {
const u32 num_attrs = Common::AlignUp(runtime_info.hs_info.ls_stride, 16) >> 4;
if (num_attrs > 0) {
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
// The input vertex count isn't statically known, so make length 32 (what glslang does)
// The input vertex count isn't statically known, so make length 32 (what
// glslang does)
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
input_attr_array = DefineInput(patch_array_type, 0);
Name(input_attr_array, "in_attrs");
@ -531,10 +532,12 @@ void EmitContext::DefineInputs() {
tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
const u32 num_attrs = Common::AlignUp(runtime_info.vs_info.hs_output_cp_stride, 16) >> 4;
const u32 num_attrs =
Common::AlignUp(runtime_info.hs_es_vs_info.hs_output_cp_stride, 16) >> 4;
if (num_attrs > 0) {
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
// The input vertex count isn't statically known, so make length 32 (what glslang does)
// The input vertex count isn't statically known, so make length 32 (what
// glslang does)
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
input_attr_array = DefineInput(patch_array_type, 0);
Name(input_attr_array, "in_attrs");
@ -643,10 +646,12 @@ void EmitContext::DefineOutputs() {
Decorate(output_tess_level_inner, spv::Decoration::Patch);
}
const u32 num_attrs = Common::AlignUp(runtime_info.hs_info.hs_output_cp_stride, 16) >> 4;
const u32 num_attrs =
Common::AlignUp(runtime_info.hs_es_vs_info.hs_output_cp_stride, 16) >> 4;
if (num_attrs > 0) {
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
// The input vertex count isn't statically known, so make length 32 (what glslang does)
// The input vertex count isn't statically known, so make length 32 (what
// glslang does)
const Id patch_array_type{TypeArray(
per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))};
output_attr_array = DefineOutput(patch_array_type, 0);
@ -709,7 +714,8 @@ void EmitContext::DefineOutputs() {
++num_render_targets;
}
// Dual source blending allows at most 2 render targets, one for each source.
// Fewer targets are allowed but the missing blending source values will be undefined.
// Fewer targets are allowed but the missing blending source values will be
// undefined.
ASSERT_MSG(!runtime_info.fs_info.dual_source_blending || num_render_targets <= 2,
"Dual source blending enabled, there must be at most two MRT exports");
break;

View File

@ -149,12 +149,7 @@ static void InitTessConstants(IR::ScalarReg sharp_ptr_base, s32 sharp_dword_offs
info.tess_consts_ptr_base = sharp_ptr_base;
info.tess_consts_dword_offset = sharp_dword_offset;
info.ReadTessConstantBuffer(tess_constants);
if (info.l_stage == LogicalStage::TessellationControl) {
runtime_info.hs_info.InitFromTessConstants(tess_constants);
} else {
runtime_info.vs_info.InitFromTessConstants(tess_constants);
}
runtime_info.InitFromTessConstants(tess_constants);
return;
}
@ -470,7 +465,7 @@ void HullShaderTransform(IR::Program& program, const RuntimeInfo& runtime_info)
if (off_dw > 0) {
addr = ir.IAdd(addr, ir.Imm32(off_dw));
}
const u32 stride = runtime_info.hs_info.hs_output_cp_stride;
const u32 stride = runtime_info.hs_es_vs_info.hs_output_cp_stride;
// Invocation ID array index is implicit, handled by SPIRV backend
const IR::U32 opt_addr = TryOptimizeAddressModulo(addr, stride, ir);
const IR::U32 offset = ir.IMod(opt_addr, ir.Imm32(stride));
@ -508,8 +503,9 @@ void HullShaderTransform(IR::Program& program, const RuntimeInfo& runtime_info)
region == AttributeRegion::OutputCP,
"Unhandled read of patchconst attribute in hull shader");
const bool is_tcs_output_read = region == AttributeRegion::OutputCP;
const u32 stride = is_tcs_output_read ? runtime_info.hs_info.hs_output_cp_stride
: runtime_info.hs_info.ls_stride;
const u32 stride = is_tcs_output_read
? runtime_info.hs_es_vs_info.hs_output_cp_stride
: runtime_info.hs_info.ls_stride;
IR::Value attr_read;
if (num_dwords == 1) {
attr_read = ir.BitCast<IR::U32>(
@ -585,7 +581,8 @@ void DomainShaderTransform(const IR::Program& program, const RuntimeInfo& runtim
const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 {
if (region == AttributeRegion::OutputCP) {
return ReadTessControlPointAttribute(
addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw, false);
addr, runtime_info.hs_es_vs_info.hs_output_cp_stride, ir, off_dw,
false);
} else {
ASSERT(region == AttributeRegion::PatchConst);
return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw));

View File

@ -40,16 +40,30 @@ constexpr Stage StageFromIndex(size_t index) noexcept {
return static_cast<Stage>(index);
}
struct CommonHsEsVsRuntimeInfo {
u32 hs_output_cp_stride;
bool operator<=>(const CommonHsEsVsRuntimeInfo&) const noexcept = default;
};
struct CommonEsVsRuntimeInfo : protected CommonHsEsVsRuntimeInfo {
AmdGpu::TessellationType tess_type;
AmdGpu::TessellationTopology tess_topology;
AmdGpu::TessellationPartitioning tess_partitioning;
bool operator<=>(const CommonEsVsRuntimeInfo&) const noexcept = default;
};
struct LocalRuntimeInfo {
u32 ls_stride;
auto operator<=>(const LocalRuntimeInfo&) const noexcept = default;
};
struct ExportRuntimeInfo {
struct ExportRuntimeInfo : protected CommonEsVsRuntimeInfo {
u32 vertex_data_size;
auto operator<=>(const ExportRuntimeInfo&) const noexcept = default;
bool operator<=>(const ExportRuntimeInfo&) const noexcept = default;
};
enum class Output : u8 {
@ -79,7 +93,7 @@ enum class Output : u8 {
};
using OutputMap = std::array<Output, 4>;
struct VertexRuntimeInfo {
struct VertexRuntimeInfo : protected CommonEsVsRuntimeInfo {
u32 num_outputs;
u32 num_exports;
std::array<OutputMap, 3> outputs;
@ -88,42 +102,18 @@ struct VertexRuntimeInfo {
bool clip_disable{};
u32 step_rate_0;
u32 step_rate_1;
AmdGpu::TessellationType tess_type;
AmdGpu::TessellationTopology tess_topology;
AmdGpu::TessellationPartitioning tess_partitioning;
u32 hs_output_cp_stride{};
bool operator==(const VertexRuntimeInfo& other) const noexcept {
return num_outputs == other.num_outputs && outputs == other.outputs &&
tess_emulated_primitive == other.tess_emulated_primitive &&
emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one &&
clip_disable == other.clip_disable && tess_type == other.tess_type &&
tess_topology == other.tess_topology &&
tess_partitioning == other.tess_partitioning &&
hs_output_cp_stride == other.hs_output_cp_stride &&
step_rate_0 == other.step_rate_0 && step_rate_1 == other.step_rate_1;
}
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
hs_output_cp_stride = tess_constants.hs_cp_stride;
}
bool operator<=>(const VertexRuntimeInfo& other) const noexcept = default;
};
struct HullRuntimeInfo {
struct HullRuntimeInfo : protected CommonHsEsVsRuntimeInfo {
u32 num_input_control_points;
u32 num_threads;
AmdGpu::TessellationType tess_type;
bool offchip_lds_enable;
u32 ls_stride;
u32 hs_output_cp_stride;
u32 hs_output_base;
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
ls_stride = tess_constants.ls_stride;
hs_output_cp_stride = tess_constants.hs_cp_stride;
hs_output_base = tess_constants.hs_output_base;
}
bool operator==(const HullRuntimeInfo&) const = default;
// It might be possible for a non-passthrough TCS to have these conditions, in some dumb
@ -248,6 +238,10 @@ struct RuntimeInfo {
GeometryRuntimeInfo gs_info;
FragmentRuntimeInfo fs_info;
ComputeRuntimeInfo cs_info;
// Hs/Es/VsRuntimeInfo inherit from these so we can
// access common info with correct offsets
CommonHsEsVsRuntimeInfo hs_es_vs_info;
CommonEsVsRuntimeInfo es_vs_info;
};
void Initialize(Stage stage_) {
@ -275,6 +269,14 @@ struct RuntimeInfo {
return true;
}
}
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
hs_es_vs_info.hs_output_cp_stride = tess_constants.hs_cp_stride;
if (stage == Stage::Hull) {
hs_info.ls_stride = tess_constants.ls_stride;
hs_info.hs_output_base = tess_constants.hs_output_base;
}
}
};
} // namespace Shader

View File

@ -164,11 +164,7 @@ struct StageSpecialization {
info->l_stage == LogicalStage::TessellationEval) {
TessellationDataConstantBuffer tess_constants{};
info->ReadTessConstantBuffer(tess_constants);
if (info->l_stage == LogicalStage::TessellationControl) {
runtime_info.hs_info.InitFromTessConstants(tess_constants);
} else {
runtime_info.vs_info.InitFromTessConstants(tess_constants);
}
runtime_info.InitFromTessConstants(tess_constants);
}
}

View File

@ -87,6 +87,7 @@ union ShaderStageEnable {
Vs = 0u, // always enabled
EsGs = 0xB0u,
LsHs = 0x45u,
LsHsEsGs = 0xAD,
};
VgtStages raw;

View File

@ -120,6 +120,11 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
case Stage::Export: {
BuildCommon(regs.es_program);
info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
if (l_stage == LogicalStage::TessellationEval) {
info.es_vs_info.tess_type = regs.tess_config.type;
info.es_vs_info.tess_topology = regs.tess_config.topology;
info.es_vs_info.tess_partitioning = regs.tess_config.partitioning;
}
break;
}
case Stage::Vertex: {
@ -136,9 +141,9 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
regs.primitive_type == AmdGpu::PrimitiveType::QuadList;
info.vs_info.clip_disable = regs.IsClipDisabled();
if (l_stage == LogicalStage::TessellationEval) {
info.vs_info.tess_type = regs.tess_config.type;
info.vs_info.tess_topology = regs.tess_config.topology;
info.vs_info.tess_partitioning = regs.tess_config.partitioning;
info.es_vs_info.tess_type = regs.tess_config.type;
info.es_vs_info.tess_topology = regs.tess_config.topology;
info.es_vs_info.tess_partitioning = regs.tess_config.partitioning;
}
break;
}
@ -149,7 +154,23 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
gs_info.output_vertices = regs.vgt_gs_max_vert_out;
gs_info.num_invocations =
regs.vgt_gs_instance_cnt.IsEnabled() ? regs.vgt_gs_instance_cnt.count : 1;
gs_info.in_primitive = regs.primitive_type;
if (regs.stage_enable.raw == AmdGpu::ShaderStageEnable::LsHsEsGs) {
gs_info.in_primitive = [&]() {
switch (regs.tess_config.topology) {
case AmdGpu::TessellationTopology::Point:
return AmdGpu::PrimitiveType::PointList;
case AmdGpu::TessellationTopology::Line:
return AmdGpu::PrimitiveType::LineList;
case AmdGpu::TessellationTopology::TriangleCw:
case AmdGpu::TessellationTopology::TriangleCcw:
return AmdGpu::PrimitiveType::TriangleList;
default:
UNREACHABLE();
}
}();
} else {
gs_info.in_primitive = regs.primitive_type;
}
for (u32 stream_id = 0; stream_id < Shader::GsMaxOutputStreams; ++stream_id) {
gs_info.out_primitive[stream_id] =
regs.vgt_gs_out_prim_type.GetPrimitiveType(stream_id);
@ -511,9 +532,38 @@ bool PipelineCache::RefreshGraphicsStages() {
return false;
}
break;
default:
case AmdGpu::ShaderStageEnable::VgtStages::LsHsEsGs:
if (!instance.IsTessellationSupported() ||
(regs.tess_config.type == AmdGpu::TessellationType::Isoline &&
!instance.IsTessellationIsolinesSupported())) {
return false;
}
if (!instance.IsGeometryStageSupported()) {
LOG_WARNING(Render_Vulkan, "Geometry shader stage unsupported, skipping");
return false;
}
if (regs.vgt_gs_mode.onchip || regs.vgt_strmout_config.raw) {
LOG_WARNING(Render_Vulkan, "Geometry shader features unsupported, skipping");
return false;
}
if (!bind_stage(Stage::Hull, LogicalStage::TessellationControl)) {
return false;
}
if (!bind_stage(Stage::Export, LogicalStage::TessellationEval)) {
return false;
}
if (!bind_stage(Stage::Local, LogicalStage::Vertex)) {
return false;
}
if (!bind_stage(Stage::Geometry, LogicalStage::Geometry)) {
return false;
}
break;
case AmdGpu::ShaderStageEnable::VgtStages::Vs:
bind_stage(Stage::Vertex, LogicalStage::Vertex);
break;
default:
UNREACHABLE_MSG("unhandled stage_en: {}", (u32)regs.stage_enable.raw);
}
const auto* vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];