diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 804c1d076..bf7ba9699 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -373,13 +373,12 @@ void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) { break; case LogicalStage::TessellationEval: { execution_model = spv::ExecutionModel::TessellationEvaluation; - const auto& vs_info = ctx.runtime_info.vs_info; - ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type)); - ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning)); - ctx.AddExecutionMode(main, - vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw - ? spv::ExecutionMode::VertexOrderCcw - : spv::ExecutionMode::VertexOrderCw); + ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.es_vs_info.tess_type)); + ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.es_vs_info.tess_partitioning)); + ctx.AddExecutionMode(main, ctx.runtime_info.es_vs_info.tess_topology == + AmdGpu::TessellationTopology::TriangleCcw + ? spv::ExecutionMode::VertexOrderCcw + : spv::ExecutionMode::VertexOrderCw); break; } case LogicalStage::Fragment: diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index c0e469964..9ec342b59 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -520,7 +520,8 @@ void EmitContext::DefineInputs() { const u32 num_attrs = Common::AlignUp(runtime_info.hs_info.ls_stride, 16) >> 4; if (num_attrs > 0) { const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))}; - // The input vertex count isn't statically known, so make length 32 (what glslang does) + // The input vertex count isn't statically known, so make length 32 (what + // glslang does) const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))}; input_attr_array = DefineInput(patch_array_type, 0); Name(input_attr_array, "in_attrs"); @@ -531,10 +532,12 @@ void EmitContext::DefineInputs() { tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord); primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input); - const u32 num_attrs = Common::AlignUp(runtime_info.vs_info.hs_output_cp_stride, 16) >> 4; + const u32 num_attrs = + Common::AlignUp(runtime_info.hs_es_vs_info.hs_output_cp_stride, 16) >> 4; if (num_attrs > 0) { const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))}; - // The input vertex count isn't statically known, so make length 32 (what glslang does) + // The input vertex count isn't statically known, so make length 32 (what + // glslang does) const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))}; input_attr_array = DefineInput(patch_array_type, 0); Name(input_attr_array, "in_attrs"); @@ -643,10 +646,12 @@ void EmitContext::DefineOutputs() { Decorate(output_tess_level_inner, spv::Decoration::Patch); } - const u32 num_attrs = Common::AlignUp(runtime_info.hs_info.hs_output_cp_stride, 16) >> 4; + const u32 num_attrs = + Common::AlignUp(runtime_info.hs_es_vs_info.hs_output_cp_stride, 16) >> 4; if (num_attrs > 0) { const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))}; - // The input vertex count isn't statically known, so make length 32 (what glslang does) + // The input vertex count isn't statically known, so make length 32 (what + // glslang does) const Id patch_array_type{TypeArray( per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))}; output_attr_array = DefineOutput(patch_array_type, 0); @@ -709,7 +714,8 @@ void EmitContext::DefineOutputs() { ++num_render_targets; } // Dual source blending allows at most 2 render targets, one for each source. - // Fewer targets are allowed but the missing blending source values will be undefined. + // Fewer targets are allowed but the missing blending source values will be + // undefined. ASSERT_MSG(!runtime_info.fs_info.dual_source_blending || num_render_targets <= 2, "Dual source blending enabled, there must be at most two MRT exports"); break; diff --git a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp index d975c47ea..c5be58030 100644 --- a/src/shader_recompiler/ir/passes/hull_shader_transform.cpp +++ b/src/shader_recompiler/ir/passes/hull_shader_transform.cpp @@ -149,12 +149,7 @@ static void InitTessConstants(IR::ScalarReg sharp_ptr_base, s32 sharp_dword_offs info.tess_consts_ptr_base = sharp_ptr_base; info.tess_consts_dword_offset = sharp_dword_offset; info.ReadTessConstantBuffer(tess_constants); - if (info.l_stage == LogicalStage::TessellationControl) { - runtime_info.hs_info.InitFromTessConstants(tess_constants); - } else { - runtime_info.vs_info.InitFromTessConstants(tess_constants); - } - + runtime_info.InitFromTessConstants(tess_constants); return; } @@ -470,7 +465,7 @@ void HullShaderTransform(IR::Program& program, const RuntimeInfo& runtime_info) if (off_dw > 0) { addr = ir.IAdd(addr, ir.Imm32(off_dw)); } - const u32 stride = runtime_info.hs_info.hs_output_cp_stride; + const u32 stride = runtime_info.hs_es_vs_info.hs_output_cp_stride; // Invocation ID array index is implicit, handled by SPIRV backend const IR::U32 opt_addr = TryOptimizeAddressModulo(addr, stride, ir); const IR::U32 offset = ir.IMod(opt_addr, ir.Imm32(stride)); @@ -508,8 +503,9 @@ void HullShaderTransform(IR::Program& program, const RuntimeInfo& runtime_info) region == AttributeRegion::OutputCP, "Unhandled read of patchconst attribute in hull shader"); const bool is_tcs_output_read = region == AttributeRegion::OutputCP; - const u32 stride = is_tcs_output_read ? runtime_info.hs_info.hs_output_cp_stride - : runtime_info.hs_info.ls_stride; + const u32 stride = is_tcs_output_read + ? runtime_info.hs_es_vs_info.hs_output_cp_stride + : runtime_info.hs_info.ls_stride; IR::Value attr_read; if (num_dwords == 1) { attr_read = ir.BitCast( @@ -585,7 +581,8 @@ void DomainShaderTransform(const IR::Program& program, const RuntimeInfo& runtim const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 { if (region == AttributeRegion::OutputCP) { return ReadTessControlPointAttribute( - addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw, false); + addr, runtime_info.hs_es_vs_info.hs_output_cp_stride, ir, off_dw, + false); } else { ASSERT(region == AttributeRegion::PatchConst); return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw)); diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index b8851c06c..94a690fb3 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -40,16 +40,30 @@ constexpr Stage StageFromIndex(size_t index) noexcept { return static_cast(index); } +struct CommonHsEsVsRuntimeInfo { + u32 hs_output_cp_stride; + + bool operator<=>(const CommonHsEsVsRuntimeInfo&) const noexcept = default; +}; + +struct CommonEsVsRuntimeInfo : protected CommonHsEsVsRuntimeInfo { + AmdGpu::TessellationType tess_type; + AmdGpu::TessellationTopology tess_topology; + AmdGpu::TessellationPartitioning tess_partitioning; + + bool operator<=>(const CommonEsVsRuntimeInfo&) const noexcept = default; +}; + struct LocalRuntimeInfo { u32 ls_stride; auto operator<=>(const LocalRuntimeInfo&) const noexcept = default; }; -struct ExportRuntimeInfo { +struct ExportRuntimeInfo : protected CommonEsVsRuntimeInfo { u32 vertex_data_size; - auto operator<=>(const ExportRuntimeInfo&) const noexcept = default; + bool operator<=>(const ExportRuntimeInfo&) const noexcept = default; }; enum class Output : u8 { @@ -79,7 +93,7 @@ enum class Output : u8 { }; using OutputMap = std::array; -struct VertexRuntimeInfo { +struct VertexRuntimeInfo : protected CommonEsVsRuntimeInfo { u32 num_outputs; u32 num_exports; std::array outputs; @@ -88,42 +102,18 @@ struct VertexRuntimeInfo { bool clip_disable{}; u32 step_rate_0; u32 step_rate_1; - AmdGpu::TessellationType tess_type; - AmdGpu::TessellationTopology tess_topology; - AmdGpu::TessellationPartitioning tess_partitioning; - u32 hs_output_cp_stride{}; - bool operator==(const VertexRuntimeInfo& other) const noexcept { - return num_outputs == other.num_outputs && outputs == other.outputs && - tess_emulated_primitive == other.tess_emulated_primitive && - emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one && - clip_disable == other.clip_disable && tess_type == other.tess_type && - tess_topology == other.tess_topology && - tess_partitioning == other.tess_partitioning && - hs_output_cp_stride == other.hs_output_cp_stride && - step_rate_0 == other.step_rate_0 && step_rate_1 == other.step_rate_1; - } - - void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) { - hs_output_cp_stride = tess_constants.hs_cp_stride; - } + bool operator<=>(const VertexRuntimeInfo& other) const noexcept = default; }; -struct HullRuntimeInfo { +struct HullRuntimeInfo : protected CommonHsEsVsRuntimeInfo { u32 num_input_control_points; u32 num_threads; AmdGpu::TessellationType tess_type; bool offchip_lds_enable; u32 ls_stride; - u32 hs_output_cp_stride; u32 hs_output_base; - void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) { - ls_stride = tess_constants.ls_stride; - hs_output_cp_stride = tess_constants.hs_cp_stride; - hs_output_base = tess_constants.hs_output_base; - } - bool operator==(const HullRuntimeInfo&) const = default; // It might be possible for a non-passthrough TCS to have these conditions, in some dumb @@ -248,6 +238,10 @@ struct RuntimeInfo { GeometryRuntimeInfo gs_info; FragmentRuntimeInfo fs_info; ComputeRuntimeInfo cs_info; + // Hs/Es/VsRuntimeInfo inherit from these so we can + // access common info with correct offsets + CommonHsEsVsRuntimeInfo hs_es_vs_info; + CommonEsVsRuntimeInfo es_vs_info; }; void Initialize(Stage stage_) { @@ -275,6 +269,14 @@ struct RuntimeInfo { return true; } } + + void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) { + hs_es_vs_info.hs_output_cp_stride = tess_constants.hs_cp_stride; + if (stage == Stage::Hull) { + hs_info.ls_stride = tess_constants.ls_stride; + hs_info.hs_output_base = tess_constants.hs_output_base; + } + } }; } // namespace Shader diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index fa14583af..73228bc1f 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -164,11 +164,7 @@ struct StageSpecialization { info->l_stage == LogicalStage::TessellationEval) { TessellationDataConstantBuffer tess_constants{}; info->ReadTessConstantBuffer(tess_constants); - if (info->l_stage == LogicalStage::TessellationControl) { - runtime_info.hs_info.InitFromTessConstants(tess_constants); - } else { - runtime_info.vs_info.InitFromTessConstants(tess_constants); - } + runtime_info.InitFromTessConstants(tess_constants); } } diff --git a/src/video_core/amdgpu/regs_vertex.h b/src/video_core/amdgpu/regs_vertex.h index a1a1b7a0f..77fdb426e 100644 --- a/src/video_core/amdgpu/regs_vertex.h +++ b/src/video_core/amdgpu/regs_vertex.h @@ -87,6 +87,7 @@ union ShaderStageEnable { Vs = 0u, // always enabled EsGs = 0xB0u, LsHs = 0x45u, + LsHsEsGs = 0xAD, }; VgtStages raw; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 223d282ea..a10ccd577 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -120,6 +120,11 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS case Stage::Export: { BuildCommon(regs.es_program); info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize; + if (l_stage == LogicalStage::TessellationEval) { + info.es_vs_info.tess_type = regs.tess_config.type; + info.es_vs_info.tess_topology = regs.tess_config.topology; + info.es_vs_info.tess_partitioning = regs.tess_config.partitioning; + } break; } case Stage::Vertex: { @@ -136,9 +141,9 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS regs.primitive_type == AmdGpu::PrimitiveType::QuadList; info.vs_info.clip_disable = regs.IsClipDisabled(); if (l_stage == LogicalStage::TessellationEval) { - info.vs_info.tess_type = regs.tess_config.type; - info.vs_info.tess_topology = regs.tess_config.topology; - info.vs_info.tess_partitioning = regs.tess_config.partitioning; + info.es_vs_info.tess_type = regs.tess_config.type; + info.es_vs_info.tess_topology = regs.tess_config.topology; + info.es_vs_info.tess_partitioning = regs.tess_config.partitioning; } break; } @@ -149,7 +154,23 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS gs_info.output_vertices = regs.vgt_gs_max_vert_out; gs_info.num_invocations = regs.vgt_gs_instance_cnt.IsEnabled() ? regs.vgt_gs_instance_cnt.count : 1; - gs_info.in_primitive = regs.primitive_type; + if (regs.stage_enable.raw == AmdGpu::ShaderStageEnable::LsHsEsGs) { + gs_info.in_primitive = [&]() { + switch (regs.tess_config.topology) { + case AmdGpu::TessellationTopology::Point: + return AmdGpu::PrimitiveType::PointList; + case AmdGpu::TessellationTopology::Line: + return AmdGpu::PrimitiveType::LineList; + case AmdGpu::TessellationTopology::TriangleCw: + case AmdGpu::TessellationTopology::TriangleCcw: + return AmdGpu::PrimitiveType::TriangleList; + default: + UNREACHABLE(); + } + }(); + } else { + gs_info.in_primitive = regs.primitive_type; + } for (u32 stream_id = 0; stream_id < Shader::GsMaxOutputStreams; ++stream_id) { gs_info.out_primitive[stream_id] = regs.vgt_gs_out_prim_type.GetPrimitiveType(stream_id); @@ -511,9 +532,38 @@ bool PipelineCache::RefreshGraphicsStages() { return false; } break; - default: + case AmdGpu::ShaderStageEnable::VgtStages::LsHsEsGs: + if (!instance.IsTessellationSupported() || + (regs.tess_config.type == AmdGpu::TessellationType::Isoline && + !instance.IsTessellationIsolinesSupported())) { + return false; + } + if (!instance.IsGeometryStageSupported()) { + LOG_WARNING(Render_Vulkan, "Geometry shader stage unsupported, skipping"); + return false; + } + if (regs.vgt_gs_mode.onchip || regs.vgt_strmout_config.raw) { + LOG_WARNING(Render_Vulkan, "Geometry shader features unsupported, skipping"); + return false; + } + if (!bind_stage(Stage::Hull, LogicalStage::TessellationControl)) { + return false; + } + if (!bind_stage(Stage::Export, LogicalStage::TessellationEval)) { + return false; + } + if (!bind_stage(Stage::Local, LogicalStage::Vertex)) { + return false; + } + if (!bind_stage(Stage::Geometry, LogicalStage::Geometry)) { + return false; + } + break; + case AmdGpu::ShaderStageEnable::VgtStages::Vs: bind_stage(Stage::Vertex, LogicalStage::Vertex); break; + default: + UNREACHABLE_MSG("unhandled stage_en: {}", (u32)regs.stage_enable.raw); } const auto* vs_info = infos[static_cast(Shader::LogicalStage::Vertex)];