mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2026-04-29 23:41:19 -06:00
Add support for LsHsEsGs (tess + geometry) pipelines where tess in onchip and geometry is offchip (#4328)
This commit is contained in:
parent
cf238acea9
commit
09c20d4636
@ -373,13 +373,12 @@ void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
|
||||
break;
|
||||
case LogicalStage::TessellationEval: {
|
||||
execution_model = spv::ExecutionModel::TessellationEvaluation;
|
||||
const auto& vs_info = ctx.runtime_info.vs_info;
|
||||
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_type));
|
||||
ctx.AddExecutionMode(main, ExecutionMode(vs_info.tess_partitioning));
|
||||
ctx.AddExecutionMode(main,
|
||||
vs_info.tess_topology == AmdGpu::TessellationTopology::TriangleCcw
|
||||
? spv::ExecutionMode::VertexOrderCcw
|
||||
: spv::ExecutionMode::VertexOrderCw);
|
||||
ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.es_vs_info.tess_type));
|
||||
ctx.AddExecutionMode(main, ExecutionMode(ctx.runtime_info.es_vs_info.tess_partitioning));
|
||||
ctx.AddExecutionMode(main, ctx.runtime_info.es_vs_info.tess_topology ==
|
||||
AmdGpu::TessellationTopology::TriangleCcw
|
||||
? spv::ExecutionMode::VertexOrderCcw
|
||||
: spv::ExecutionMode::VertexOrderCw);
|
||||
break;
|
||||
}
|
||||
case LogicalStage::Fragment:
|
||||
|
||||
@ -520,7 +520,8 @@ void EmitContext::DefineInputs() {
|
||||
const u32 num_attrs = Common::AlignUp(runtime_info.hs_info.ls_stride, 16) >> 4;
|
||||
if (num_attrs > 0) {
|
||||
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
||||
// The input vertex count isn't statically known, so make length 32 (what
|
||||
// glslang does)
|
||||
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
|
||||
input_attr_array = DefineInput(patch_array_type, 0);
|
||||
Name(input_attr_array, "in_attrs");
|
||||
@ -531,10 +532,12 @@ void EmitContext::DefineInputs() {
|
||||
tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
|
||||
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
|
||||
|
||||
const u32 num_attrs = Common::AlignUp(runtime_info.vs_info.hs_output_cp_stride, 16) >> 4;
|
||||
const u32 num_attrs =
|
||||
Common::AlignUp(runtime_info.hs_es_vs_info.hs_output_cp_stride, 16) >> 4;
|
||||
if (num_attrs > 0) {
|
||||
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
||||
// The input vertex count isn't statically known, so make length 32 (what
|
||||
// glslang does)
|
||||
const Id patch_array_type{TypeArray(per_vertex_type, ConstU32(32u))};
|
||||
input_attr_array = DefineInput(patch_array_type, 0);
|
||||
Name(input_attr_array, "in_attrs");
|
||||
@ -643,10 +646,12 @@ void EmitContext::DefineOutputs() {
|
||||
Decorate(output_tess_level_inner, spv::Decoration::Patch);
|
||||
}
|
||||
|
||||
const u32 num_attrs = Common::AlignUp(runtime_info.hs_info.hs_output_cp_stride, 16) >> 4;
|
||||
const u32 num_attrs =
|
||||
Common::AlignUp(runtime_info.hs_es_vs_info.hs_output_cp_stride, 16) >> 4;
|
||||
if (num_attrs > 0) {
|
||||
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
|
||||
// The input vertex count isn't statically known, so make length 32 (what glslang does)
|
||||
// The input vertex count isn't statically known, so make length 32 (what
|
||||
// glslang does)
|
||||
const Id patch_array_type{TypeArray(
|
||||
per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))};
|
||||
output_attr_array = DefineOutput(patch_array_type, 0);
|
||||
@ -709,7 +714,8 @@ void EmitContext::DefineOutputs() {
|
||||
++num_render_targets;
|
||||
}
|
||||
// Dual source blending allows at most 2 render targets, one for each source.
|
||||
// Fewer targets are allowed but the missing blending source values will be undefined.
|
||||
// Fewer targets are allowed but the missing blending source values will be
|
||||
// undefined.
|
||||
ASSERT_MSG(!runtime_info.fs_info.dual_source_blending || num_render_targets <= 2,
|
||||
"Dual source blending enabled, there must be at most two MRT exports");
|
||||
break;
|
||||
|
||||
@ -149,12 +149,7 @@ static void InitTessConstants(IR::ScalarReg sharp_ptr_base, s32 sharp_dword_offs
|
||||
info.tess_consts_ptr_base = sharp_ptr_base;
|
||||
info.tess_consts_dword_offset = sharp_dword_offset;
|
||||
info.ReadTessConstantBuffer(tess_constants);
|
||||
if (info.l_stage == LogicalStage::TessellationControl) {
|
||||
runtime_info.hs_info.InitFromTessConstants(tess_constants);
|
||||
} else {
|
||||
runtime_info.vs_info.InitFromTessConstants(tess_constants);
|
||||
}
|
||||
|
||||
runtime_info.InitFromTessConstants(tess_constants);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -470,7 +465,7 @@ void HullShaderTransform(IR::Program& program, const RuntimeInfo& runtime_info)
|
||||
if (off_dw > 0) {
|
||||
addr = ir.IAdd(addr, ir.Imm32(off_dw));
|
||||
}
|
||||
const u32 stride = runtime_info.hs_info.hs_output_cp_stride;
|
||||
const u32 stride = runtime_info.hs_es_vs_info.hs_output_cp_stride;
|
||||
// Invocation ID array index is implicit, handled by SPIRV backend
|
||||
const IR::U32 opt_addr = TryOptimizeAddressModulo(addr, stride, ir);
|
||||
const IR::U32 offset = ir.IMod(opt_addr, ir.Imm32(stride));
|
||||
@ -508,8 +503,9 @@ void HullShaderTransform(IR::Program& program, const RuntimeInfo& runtime_info)
|
||||
region == AttributeRegion::OutputCP,
|
||||
"Unhandled read of patchconst attribute in hull shader");
|
||||
const bool is_tcs_output_read = region == AttributeRegion::OutputCP;
|
||||
const u32 stride = is_tcs_output_read ? runtime_info.hs_info.hs_output_cp_stride
|
||||
: runtime_info.hs_info.ls_stride;
|
||||
const u32 stride = is_tcs_output_read
|
||||
? runtime_info.hs_es_vs_info.hs_output_cp_stride
|
||||
: runtime_info.hs_info.ls_stride;
|
||||
IR::Value attr_read;
|
||||
if (num_dwords == 1) {
|
||||
attr_read = ir.BitCast<IR::U32>(
|
||||
@ -585,7 +581,8 @@ void DomainShaderTransform(const IR::Program& program, const RuntimeInfo& runtim
|
||||
const auto GetInput = [&](IR::U32 addr, u32 off_dw) -> IR::F32 {
|
||||
if (region == AttributeRegion::OutputCP) {
|
||||
return ReadTessControlPointAttribute(
|
||||
addr, runtime_info.vs_info.hs_output_cp_stride, ir, off_dw, false);
|
||||
addr, runtime_info.hs_es_vs_info.hs_output_cp_stride, ir, off_dw,
|
||||
false);
|
||||
} else {
|
||||
ASSERT(region == AttributeRegion::PatchConst);
|
||||
return ir.GetPatch(IR::PatchGeneric((addr.U32() >> 2) + off_dw));
|
||||
|
||||
@ -40,16 +40,30 @@ constexpr Stage StageFromIndex(size_t index) noexcept {
|
||||
return static_cast<Stage>(index);
|
||||
}
|
||||
|
||||
struct CommonHsEsVsRuntimeInfo {
|
||||
u32 hs_output_cp_stride;
|
||||
|
||||
bool operator<=>(const CommonHsEsVsRuntimeInfo&) const noexcept = default;
|
||||
};
|
||||
|
||||
struct CommonEsVsRuntimeInfo : protected CommonHsEsVsRuntimeInfo {
|
||||
AmdGpu::TessellationType tess_type;
|
||||
AmdGpu::TessellationTopology tess_topology;
|
||||
AmdGpu::TessellationPartitioning tess_partitioning;
|
||||
|
||||
bool operator<=>(const CommonEsVsRuntimeInfo&) const noexcept = default;
|
||||
};
|
||||
|
||||
struct LocalRuntimeInfo {
|
||||
u32 ls_stride;
|
||||
|
||||
auto operator<=>(const LocalRuntimeInfo&) const noexcept = default;
|
||||
};
|
||||
|
||||
struct ExportRuntimeInfo {
|
||||
struct ExportRuntimeInfo : protected CommonEsVsRuntimeInfo {
|
||||
u32 vertex_data_size;
|
||||
|
||||
auto operator<=>(const ExportRuntimeInfo&) const noexcept = default;
|
||||
bool operator<=>(const ExportRuntimeInfo&) const noexcept = default;
|
||||
};
|
||||
|
||||
enum class Output : u8 {
|
||||
@ -79,7 +93,7 @@ enum class Output : u8 {
|
||||
};
|
||||
using OutputMap = std::array<Output, 4>;
|
||||
|
||||
struct VertexRuntimeInfo {
|
||||
struct VertexRuntimeInfo : protected CommonEsVsRuntimeInfo {
|
||||
u32 num_outputs;
|
||||
u32 num_exports;
|
||||
std::array<OutputMap, 3> outputs;
|
||||
@ -88,42 +102,18 @@ struct VertexRuntimeInfo {
|
||||
bool clip_disable{};
|
||||
u32 step_rate_0;
|
||||
u32 step_rate_1;
|
||||
AmdGpu::TessellationType tess_type;
|
||||
AmdGpu::TessellationTopology tess_topology;
|
||||
AmdGpu::TessellationPartitioning tess_partitioning;
|
||||
u32 hs_output_cp_stride{};
|
||||
|
||||
bool operator==(const VertexRuntimeInfo& other) const noexcept {
|
||||
return num_outputs == other.num_outputs && outputs == other.outputs &&
|
||||
tess_emulated_primitive == other.tess_emulated_primitive &&
|
||||
emulate_depth_negative_one_to_one == other.emulate_depth_negative_one_to_one &&
|
||||
clip_disable == other.clip_disable && tess_type == other.tess_type &&
|
||||
tess_topology == other.tess_topology &&
|
||||
tess_partitioning == other.tess_partitioning &&
|
||||
hs_output_cp_stride == other.hs_output_cp_stride &&
|
||||
step_rate_0 == other.step_rate_0 && step_rate_1 == other.step_rate_1;
|
||||
}
|
||||
|
||||
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
|
||||
hs_output_cp_stride = tess_constants.hs_cp_stride;
|
||||
}
|
||||
bool operator<=>(const VertexRuntimeInfo& other) const noexcept = default;
|
||||
};
|
||||
|
||||
struct HullRuntimeInfo {
|
||||
struct HullRuntimeInfo : protected CommonHsEsVsRuntimeInfo {
|
||||
u32 num_input_control_points;
|
||||
u32 num_threads;
|
||||
AmdGpu::TessellationType tess_type;
|
||||
bool offchip_lds_enable;
|
||||
u32 ls_stride;
|
||||
u32 hs_output_cp_stride;
|
||||
u32 hs_output_base;
|
||||
|
||||
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
|
||||
ls_stride = tess_constants.ls_stride;
|
||||
hs_output_cp_stride = tess_constants.hs_cp_stride;
|
||||
hs_output_base = tess_constants.hs_output_base;
|
||||
}
|
||||
|
||||
bool operator==(const HullRuntimeInfo&) const = default;
|
||||
|
||||
// It might be possible for a non-passthrough TCS to have these conditions, in some dumb
|
||||
@ -248,6 +238,10 @@ struct RuntimeInfo {
|
||||
GeometryRuntimeInfo gs_info;
|
||||
FragmentRuntimeInfo fs_info;
|
||||
ComputeRuntimeInfo cs_info;
|
||||
// Hs/Es/VsRuntimeInfo inherit from these so we can
|
||||
// access common info with correct offsets
|
||||
CommonHsEsVsRuntimeInfo hs_es_vs_info;
|
||||
CommonEsVsRuntimeInfo es_vs_info;
|
||||
};
|
||||
|
||||
void Initialize(Stage stage_) {
|
||||
@ -275,6 +269,14 @@ struct RuntimeInfo {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
|
||||
hs_es_vs_info.hs_output_cp_stride = tess_constants.hs_cp_stride;
|
||||
if (stage == Stage::Hull) {
|
||||
hs_info.ls_stride = tess_constants.ls_stride;
|
||||
hs_info.hs_output_base = tess_constants.hs_output_base;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
@ -164,11 +164,7 @@ struct StageSpecialization {
|
||||
info->l_stage == LogicalStage::TessellationEval) {
|
||||
TessellationDataConstantBuffer tess_constants{};
|
||||
info->ReadTessConstantBuffer(tess_constants);
|
||||
if (info->l_stage == LogicalStage::TessellationControl) {
|
||||
runtime_info.hs_info.InitFromTessConstants(tess_constants);
|
||||
} else {
|
||||
runtime_info.vs_info.InitFromTessConstants(tess_constants);
|
||||
}
|
||||
runtime_info.InitFromTessConstants(tess_constants);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -87,6 +87,7 @@ union ShaderStageEnable {
|
||||
Vs = 0u, // always enabled
|
||||
EsGs = 0xB0u,
|
||||
LsHs = 0x45u,
|
||||
LsHsEsGs = 0xAD,
|
||||
};
|
||||
|
||||
VgtStages raw;
|
||||
|
||||
@ -120,6 +120,11 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
||||
case Stage::Export: {
|
||||
BuildCommon(regs.es_program);
|
||||
info.es_info.vertex_data_size = regs.vgt_esgs_ring_itemsize;
|
||||
if (l_stage == LogicalStage::TessellationEval) {
|
||||
info.es_vs_info.tess_type = regs.tess_config.type;
|
||||
info.es_vs_info.tess_topology = regs.tess_config.topology;
|
||||
info.es_vs_info.tess_partitioning = regs.tess_config.partitioning;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Stage::Vertex: {
|
||||
@ -136,9 +141,9 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
||||
regs.primitive_type == AmdGpu::PrimitiveType::QuadList;
|
||||
info.vs_info.clip_disable = regs.IsClipDisabled();
|
||||
if (l_stage == LogicalStage::TessellationEval) {
|
||||
info.vs_info.tess_type = regs.tess_config.type;
|
||||
info.vs_info.tess_topology = regs.tess_config.topology;
|
||||
info.vs_info.tess_partitioning = regs.tess_config.partitioning;
|
||||
info.es_vs_info.tess_type = regs.tess_config.type;
|
||||
info.es_vs_info.tess_topology = regs.tess_config.topology;
|
||||
info.es_vs_info.tess_partitioning = regs.tess_config.partitioning;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -149,7 +154,23 @@ const Shader::RuntimeInfo& PipelineCache::BuildRuntimeInfo(Stage stage, LogicalS
|
||||
gs_info.output_vertices = regs.vgt_gs_max_vert_out;
|
||||
gs_info.num_invocations =
|
||||
regs.vgt_gs_instance_cnt.IsEnabled() ? regs.vgt_gs_instance_cnt.count : 1;
|
||||
gs_info.in_primitive = regs.primitive_type;
|
||||
if (regs.stage_enable.raw == AmdGpu::ShaderStageEnable::LsHsEsGs) {
|
||||
gs_info.in_primitive = [&]() {
|
||||
switch (regs.tess_config.topology) {
|
||||
case AmdGpu::TessellationTopology::Point:
|
||||
return AmdGpu::PrimitiveType::PointList;
|
||||
case AmdGpu::TessellationTopology::Line:
|
||||
return AmdGpu::PrimitiveType::LineList;
|
||||
case AmdGpu::TessellationTopology::TriangleCw:
|
||||
case AmdGpu::TessellationTopology::TriangleCcw:
|
||||
return AmdGpu::PrimitiveType::TriangleList;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}();
|
||||
} else {
|
||||
gs_info.in_primitive = regs.primitive_type;
|
||||
}
|
||||
for (u32 stream_id = 0; stream_id < Shader::GsMaxOutputStreams; ++stream_id) {
|
||||
gs_info.out_primitive[stream_id] =
|
||||
regs.vgt_gs_out_prim_type.GetPrimitiveType(stream_id);
|
||||
@ -511,9 +532,38 @@ bool PipelineCache::RefreshGraphicsStages() {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
case AmdGpu::ShaderStageEnable::VgtStages::LsHsEsGs:
|
||||
if (!instance.IsTessellationSupported() ||
|
||||
(regs.tess_config.type == AmdGpu::TessellationType::Isoline &&
|
||||
!instance.IsTessellationIsolinesSupported())) {
|
||||
return false;
|
||||
}
|
||||
if (!instance.IsGeometryStageSupported()) {
|
||||
LOG_WARNING(Render_Vulkan, "Geometry shader stage unsupported, skipping");
|
||||
return false;
|
||||
}
|
||||
if (regs.vgt_gs_mode.onchip || regs.vgt_strmout_config.raw) {
|
||||
LOG_WARNING(Render_Vulkan, "Geometry shader features unsupported, skipping");
|
||||
return false;
|
||||
}
|
||||
if (!bind_stage(Stage::Hull, LogicalStage::TessellationControl)) {
|
||||
return false;
|
||||
}
|
||||
if (!bind_stage(Stage::Export, LogicalStage::TessellationEval)) {
|
||||
return false;
|
||||
}
|
||||
if (!bind_stage(Stage::Local, LogicalStage::Vertex)) {
|
||||
return false;
|
||||
}
|
||||
if (!bind_stage(Stage::Geometry, LogicalStage::Geometry)) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case AmdGpu::ShaderStageEnable::VgtStages::Vs:
|
||||
bind_stage(Stage::Vertex, LogicalStage::Vertex);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("unhandled stage_en: {}", (u32)regs.stage_enable.raw);
|
||||
}
|
||||
|
||||
const auto* vs_info = infos[static_cast<u32>(Shader::LogicalStage::Vertex)];
|
||||
|
||||
Loading…
Reference in New Issue
Block a user