// SPDX-FileCopyrightText: Copyright 2024-2026 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include "common/io_file.h" #include "common/path_util.h" #include "core/emulator_settings.h" #include "core/libraries/kernel/process.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/info.h" #include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/reg.h" #include "shader_recompiler/ir/reinterpret.h" #include "shader_recompiler/profile.h" #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" #include #include namespace Shader::Gcn { static IR::VectorReg IterateBarycentrics(const RuntimeInfo& runtime_info, auto&& set_attribute) { if (runtime_info.stage != Stage::Fragment) { return IR::VectorReg::V0; } u32 dst_vreg{}; if (runtime_info.fs_info.addr_flags.persp_sample_ena) { set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothSample, 0); // I set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothSample, 1); // J } if (runtime_info.fs_info.addr_flags.persp_center_ena) { set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmooth, 0); // I set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmooth, 1); // J } if (runtime_info.fs_info.addr_flags.persp_centroid_ena) { set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothCentroid, 0); // I set_attribute(dst_vreg++, IR::Attribute::BaryCoordSmoothCentroid, 1); // J } if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) { set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 0); // I/W set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 1); // J/W set_attribute(dst_vreg++, IR::Attribute::BaryCoordPullModel, 2); // 1/W } if (runtime_info.fs_info.addr_flags.linear_sample_ena) { set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspSample, 0); // I set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspSample, 1); // J } if (runtime_info.fs_info.addr_flags.linear_center_ena) { set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPersp, 0); // I set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPersp, 1); // J } if (runtime_info.fs_info.addr_flags.linear_centroid_ena) { set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspCentroid, 0); // I set_attribute(dst_vreg++, IR::Attribute::BaryCoordNoPerspCentroid, 1); // J } if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) { ++dst_vreg; } return IR::VectorReg(dst_vreg); } Translator::Translator(Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_) : info{info_}, runtime_info{runtime_info_}, profile{profile_}, next_vgpr_num{runtime_info.num_allocated_vgprs} { IterateBarycentrics(runtime_info, [this](u32 vreg, IR::Attribute attrib, u32) { vgpr_to_interp[vreg] = attrib; }); } void Translator::EmitPrologue(IR::Block* first_block) { ir = IR::IREmitter(*first_block, first_block->begin()); ir.Prologue(); ir.SetExec(ir.Imm1(true)); // Initialize user data. IR::ScalarReg dst_sreg = IR::ScalarReg::S0; for (u32 i = 0; i < runtime_info.num_user_data; i++) { ir.SetScalarReg(dst_sreg, ir.GetUserData(dst_sreg)); ++dst_sreg; } IR::VectorReg dst_vreg = IR::VectorReg::V0; switch (info.l_stage) { case LogicalStage::Vertex: // v0: vertex ID, always present ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId)); if (info.stage == Stage::Local) { // v1: rel patch ID if (runtime_info.num_input_vgprs > 0) { ir.SetVectorReg(dst_vreg++, ir.Imm32(0)); } // v2: unknown if (runtime_info.num_input_vgprs > 1) { ++dst_vreg; } // v3: instance ID, plain if (runtime_info.num_input_vgprs > 2) { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); } } else { // v1: instance ID, step rate 0 if (runtime_info.num_input_vgprs > 0) { if (runtime_info.vs_info.step_rate_0 != 0) { ir.SetVectorReg(dst_vreg++, ir.IDiv(ir.GetAttributeU32(IR::Attribute::InstanceId), ir.Imm32(runtime_info.vs_info.step_rate_0))); } else { ir.SetVectorReg(dst_vreg++, ir.Imm32(0)); } } // v2: instance ID, step rate 1 if (runtime_info.num_input_vgprs > 1) { if (runtime_info.vs_info.step_rate_1 != 0) { ir.SetVectorReg(dst_vreg++, ir.IDiv(ir.GetAttributeU32(IR::Attribute::InstanceId), ir.Imm32(runtime_info.vs_info.step_rate_1))); } else { ir.SetVectorReg(dst_vreg++, ir.Imm32(0)); } } // v3: instance ID, plain if (runtime_info.num_input_vgprs > 2) { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId)); } } break; case LogicalStage::Fragment: dst_vreg = IterateBarycentrics(runtime_info, [this](u32 vreg, IR::Attribute attrib, u32 comp) { if (profile.supports_amd_shader_explicit_vertex_parameter || profile.supports_fragment_shader_barycentric) { ir.SetVectorReg(IR::VectorReg(vreg), ir.GetAttribute(attrib, comp)); } }); if (runtime_info.fs_info.addr_flags.pos_x_float_ena) { if (runtime_info.fs_info.en_flags.pos_x_float_ena) { ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0)); } else { ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f)); } } if (runtime_info.fs_info.addr_flags.pos_y_float_ena) { if (runtime_info.fs_info.en_flags.pos_y_float_ena) { ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 1)); } else { ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f)); } } if (runtime_info.fs_info.addr_flags.pos_z_float_ena) { if (runtime_info.fs_info.en_flags.pos_z_float_ena) { ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 2)); } else { ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f)); } } if (runtime_info.fs_info.addr_flags.pos_w_float_ena) { if (runtime_info.fs_info.en_flags.pos_w_float_ena) { ir.SetVectorReg(dst_vreg++, ir.FPRecip(ir.GetAttribute(IR::Attribute::FragCoord, 3))); } else { ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f)); } } if (runtime_info.fs_info.addr_flags.front_face_ena) { if (runtime_info.fs_info.en_flags.front_face_ena) { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::IsFrontFace)); } else { ir.SetVectorReg(dst_vreg++, ir.Imm32(0)); } } if (runtime_info.fs_info.addr_flags.ancillary_ena) { if (runtime_info.fs_info.en_flags.ancillary_ena) { ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::PackedAncillary)); } else { ir.SetVectorReg(dst_vreg++, ir.Imm32(0)); } } break; case LogicalStage::TessellationControl: { ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); // Should be laid out like: // [0:8]: patch id within VGT // [8:12]: output control point id ir.SetVectorReg(IR::VectorReg::V1, ir.GetAttributeU32(IR::Attribute::PackedHullInvocationInfo)); if (runtime_info.hs_info.offchip_lds_enable) { // No off-chip tessellation has been observed yet. If this survives dead code elim, // revisit ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::OffChipLdsBase)); } ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::TessFactorsBufferBase)); break; } case LogicalStage::TessellationEval: ir.SetVectorReg(IR::VectorReg::V0, ir.GetAttribute(IR::Attribute::TessellationEvaluationPointU)); ir.SetVectorReg(IR::VectorReg::V1, ir.GetAttribute(IR::Attribute::TessellationEvaluationPointV)); // V2 is similar to PrimitiveID but not the same. It seems to only be used in // compiler-generated address calculations. Its probably the patch id within the // patches running locally on a given VGT (or CU, whichever is the granularity of LDS // memory) // Set to 0. See explanation in comment describing hull/domain passes ir.SetVectorReg(IR::VectorReg::V2, ir.Imm32(0u)); // V3 is the actual PrimitiveID as intended by the shader author. ir.SetVectorReg(IR::VectorReg::V3, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); break; case LogicalStage::Compute: ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1)); ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2)); if (runtime_info.cs_info.tgid_enable[0]) { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 0)); } if (runtime_info.cs_info.tgid_enable[1]) { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 1)); } if (runtime_info.cs_info.tgid_enable[2]) { ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2)); } break; case LogicalStage::Geometry: switch (runtime_info.gs_info.out_primitive[0]) { case AmdGpu::GsOutputPrimitiveType::TriangleStrip: ir.SetVectorReg(IR::VectorReg::V3, ir.Imm32(2u)); // vertex 2 case AmdGpu::GsOutputPrimitiveType::LineStrip: ir.SetVectorReg(IR::VectorReg::V1, ir.Imm32(1u)); // vertex 1 default: ir.SetVectorReg(IR::VectorReg::V0, ir.Imm32(0u)); // vertex 0 break; } ir.SetVectorReg(IR::VectorReg::V2, ir.GetAttributeU32(IR::Attribute::PrimitiveId)); break; default: UNREACHABLE_MSG("Unknown shader stage"); } // Clear any scratch vgpr mappings for next shader. vgpr_map.clear(); } IR::VectorReg Translator::GetScratchVgpr(u32 offset) { const auto [it, is_new] = vgpr_map.try_emplace(offset); if (is_new) { ASSERT_MSG(next_vgpr_num < 256, "Out of VGPRs"); const auto new_vgpr = static_cast(next_vgpr_num++); it->second = new_vgpr; } return it->second; }; IR::U1 Translator::GetSrc1(const InstOperand& operand) { switch (operand.field) { case OperandField::VccLo: return ir.GetVcc(); case OperandField::ExecLo: return ir.GetExec(); case OperandField::ScalarGPR: return ir.GetThreadBitScalarReg(IR::ScalarReg(operand.code)); case OperandField::ConstZero: return ir.Imm1(false); case OperandField::SignedConstIntNeg: ASSERT_MSG(-s32(operand.code) + SignedConstIntNegMin - 1 == -1, "SignedConstIntNeg must be -1"); return ir.Imm1(true); case OperandField::LiteralConst: ASSERT_MSG(operand.code == 0 || operand.code == std::numeric_limits::max(), "Unsupported literal {:#x}", operand.code); return ir.Imm1(operand.code & 1); default: UNREACHABLE_MSG("Unknown field {}", u32(operand.field)); } } template T Translator::GetSrc(const InstOperand& operand) { constexpr bool is_float = std::is_same_v; const auto get_imm = [&](auto value) -> T { if constexpr (is_float) { return ir.Imm32(std::bit_cast(value)); } else { return ir.Imm32(std::bit_cast(value)); } }; T value{}; switch (operand.field) { case OperandField::ScalarGPR: value = ir.GetScalarReg(IR::ScalarReg(operand.code)); break; case OperandField::VectorGPR: value = ir.GetVectorReg(IR::VectorReg(operand.code)); break; case OperandField::ConstZero: value = get_imm(0U); break; case OperandField::SignedConstIntPos: value = get_imm(operand.code - SignedConstIntPosMin + 1); break; case OperandField::SignedConstIntNeg: value = get_imm(-s32(operand.code) + SignedConstIntNegMin - 1); break; case OperandField::LiteralConst: value = get_imm(operand.code); break; case OperandField::ConstFloatPos_1_0: value = get_imm(1.f); break; case OperandField::ConstFloatPos_0_5: value = get_imm(0.5f); break; case OperandField::ConstFloatPos_2_0: value = get_imm(2.0f); break; case OperandField::ConstFloatPos_4_0: value = get_imm(4.0f); break; case OperandField::ConstFloatNeg_0_5: value = get_imm(-0.5f); break; case OperandField::ConstFloatNeg_1_0: value = get_imm(-1.0f); break; case OperandField::ConstFloatNeg_2_0: value = get_imm(-2.0f); break; case OperandField::ConstFloatNeg_4_0: value = get_imm(-4.0f); break; case OperandField::Inv2Pi: value = get_imm(static_cast(1.0f / (2.0f * std::numbers::pi))); break; case OperandField::Sdwa: UNREACHABLE_MSG("unhandled SDWA"); case OperandField::Dpp: UNREACHABLE_MSG("unhandled DPP"); case OperandField::VccLo: if constexpr (is_float) { value = ir.BitCast(ir.GetVccLo()); } else { value = ir.GetVccLo(); } break; case OperandField::VccHi: if constexpr (is_float) { value = ir.BitCast(ir.GetVccHi()); } else { value = ir.GetVccHi(); } break; case OperandField::M0: if constexpr (is_float) { value = ir.BitCast(ir.GetM0()); } else { value = ir.GetM0(); } break; case OperandField::Scc: if constexpr (is_float) { UNREACHABLE(); } else { value = ir.BitCast(ir.GetScc()); } break; default: UNREACHABLE_MSG("unexpected operand: {}", std::to_underlying(operand.field)); } if constexpr (is_float) { if (operand.input_modifier.abs) { value = ir.FPAbs(value); } if (operand.input_modifier.neg) { value = ir.FPNeg(value); } } else { if (operand.input_modifier.abs) { value = ir.BitwiseAnd(value, ir.Imm32(0x7FFFFFFFu)); } if (operand.input_modifier.neg) { value = ir.BitwiseXor(value, ir.Imm32(0x80000000u)); } } return value; } template IR::U32 Translator::GetSrc(const InstOperand&); template IR::F32 Translator::GetSrc(const InstOperand&); template T Translator::GetSrc16(const InstOperand& operand) { constexpr bool is_float = std::is_same_v; const auto get_imm = [&](auto value) -> T { if constexpr (is_float) { return ir.Imm32(std::bit_cast(value)); } else { return ir.Imm32(std::bit_cast(value)); } }; const auto number_format = []() -> AmdGpu::NumberFormat { if constexpr (is_float) { return AmdGpu::NumberFormat::Float; } else { return AmdGpu::NumberFormat::Uint; } }(); const auto bitcast_to_u = [&](auto value) -> IR::U32 { if constexpr (is_float) { return ir.BitCast(value); } else { return value; } }; const auto cast = [&](auto value) -> T { if constexpr (is_float) { return value; } else { return ir.BitFieldExtract(ir.BitCast(value), ir.Imm32(0), ir.Imm32(16), is_signed); } }; const auto op_sel = operand.op_sel.op_sel; T value{}; switch (operand.field) { case OperandField::ScalarGPR: { const auto f = ir.GetScalarReg(IR::ScalarReg(operand.code)); value = cast(IR::F32{ ir.CompositeExtract(ir.Unpack2x16(number_format, bitcast_to_u(f)), op_sel ? 1 : 0)}); break; } case OperandField::VectorGPR: { const auto v = ir.GetVectorReg(IR::VectorReg(operand.code)); value = cast(IR::F32{ ir.CompositeExtract(ir.Unpack2x16(number_format, bitcast_to_u(v)), op_sel ? 1 : 0)}); break; } case OperandField::ConstZero: value = get_imm(0U); break; case OperandField::SignedConstIntPos: value = get_imm(operand.code - SignedConstIntPosMin + 1); break; case OperandField::SignedConstIntNeg: value = get_imm(-s32(operand.code) + SignedConstIntNegMin - 1); break; case OperandField::LiteralConst: value = get_imm(operand.code); break; case OperandField::ConstFloatPos_1_0: value = get_imm(1.f); break; case OperandField::ConstFloatPos_0_5: value = get_imm(0.5f); break; case OperandField::ConstFloatPos_2_0: value = get_imm(2.0f); break; case OperandField::ConstFloatPos_4_0: value = get_imm(4.0f); break; case OperandField::ConstFloatNeg_0_5: value = get_imm(-0.5f); break; case OperandField::ConstFloatNeg_1_0: value = get_imm(-1.0f); break; case OperandField::ConstFloatNeg_2_0: value = get_imm(-2.0f); break; case OperandField::ConstFloatNeg_4_0: value = get_imm(-4.0f); break; case OperandField::Inv2Pi: value = get_imm(static_cast(1.0f / (2.0f * std::numbers::pi))); break; case OperandField::Sdwa: LOG_ERROR(Render_Recompiler, "unhandled SDWA"); value = get_imm(0U); break; case OperandField::Dpp: LOG_ERROR(Render_Recompiler, "unhandled DPP"); value = get_imm(0U); break; case OperandField::VccLo: if constexpr (is_float) { value = IR::F32{ ir.CompositeExtract(ir.Unpack2x16(number_format, ir.GetVccLo()), op_sel ? 1 : 0)}; } else { value = cast(IR::F32{ir.CompositeExtract( ir.Unpack2x16(number_format, bitcast_to_u(ir.GetVccLo())), op_sel ? 1 : 0)}); } break; case OperandField::VccHi: UNREACHABLE(); break; case OperandField::M0: UNREACHABLE(); break; case OperandField::Scc: UNREACHABLE(); break; default: UNREACHABLE_MSG("unexpected operand: {}", std::to_underlying(operand.field)); } if constexpr (is_float) { if (operand.input_modifier.abs) { value = ir.FPAbs(value); } if (operand.input_modifier.neg) { value = ir.FPNeg(value); } } else { if (operand.input_modifier.abs) { value = ir.BitwiseAnd(value, ir.Imm32(0x7FFFFFFFu)); } if (operand.input_modifier.neg) { value = ir.BitwiseXor(value, ir.Imm32(0x80000000u)); } } return value; } template IR::U32 Translator::GetSrc16(const InstOperand&); template IR::U32 Translator::GetSrc16(const InstOperand&); template IR::F32 Translator::GetSrc16(const InstOperand&); IR::F32 Translator::GetSrcMix(const InstOperand& operand) { const auto get_imm = [&](auto value) -> IR::F32 { return ir.Imm32(std::bit_cast(value)); }; const auto extract = [&](auto value) -> IR::F32 { const auto getter_u = [&]() { if constexpr (std::same_as) { return ir.GetScalarReg(value); } else { return ir.GetVectorReg(value); } }(); if (!operand.op_sel.op_sel_hi) { if constexpr (std::same_as) { return ir.GetScalarReg(value); } else { return ir.GetVectorReg(value); } } else if (operand.op_sel.op_sel) { return IR::F32{ ir.CompositeExtract(ir.Unpack2x16(AmdGpu::NumberFormat::Float, getter_u), 1)}; } else { return IR::F32{ ir.CompositeExtract(ir.Unpack2x16(AmdGpu::NumberFormat::Float, getter_u), 0)}; } }; IR::F32 value{}; switch (operand.field) { case OperandField::ScalarGPR: value = extract(IR::ScalarReg(operand.code)); break; case OperandField::VectorGPR: value = extract(IR::VectorReg(operand.code)); break; case OperandField::ConstZero: value = get_imm(0U); break; case OperandField::SignedConstIntPos: value = get_imm(operand.code - SignedConstIntPosMin + 1); break; case OperandField::SignedConstIntNeg: value = get_imm(-s32(operand.code) + SignedConstIntNegMin - 1); break; case OperandField::LiteralConst: value = get_imm(operand.code); break; case OperandField::ConstFloatPos_1_0: value = get_imm(1.f); break; case OperandField::ConstFloatPos_0_5: value = get_imm(0.5f); break; case OperandField::ConstFloatPos_2_0: value = get_imm(2.0f); break; case OperandField::ConstFloatPos_4_0: value = get_imm(4.0f); break; case OperandField::ConstFloatNeg_0_5: value = get_imm(-0.5f); break; case OperandField::ConstFloatNeg_1_0: value = get_imm(-1.0f); break; case OperandField::ConstFloatNeg_2_0: value = get_imm(-2.0f); break; case OperandField::ConstFloatNeg_4_0: value = get_imm(-4.0f); break; case OperandField::VccLo: { if (!operand.op_sel.op_sel_hi) { value = ir.BitCast(ir.GetVccLo()); } else if (operand.op_sel.op_sel) { value = IR::F32{ ir.CompositeExtract(ir.Unpack2x16(AmdGpu::NumberFormat::Float, ir.GetVccLo()), 1)}; } else { value = IR::F32{ ir.CompositeExtract(ir.Unpack2x16(AmdGpu::NumberFormat::Float, ir.GetVccLo()), 0)}; } break; } case OperandField::VccHi: UNREACHABLE(); break; case OperandField::M0: UNREACHABLE(); break; case OperandField::Scc: UNREACHABLE(); break; case OperandField::Inv2Pi: value = get_imm(static_cast(1.0f / (2.0f * std::numbers::pi))); break; case OperandField::Sdwa: UNREACHABLE_MSG("unhandled SDWA"); break; case OperandField::Dpp: UNREACHABLE_MSG("unhandled DPP"); break; default: UNREACHABLE_MSG("unexpected operand: {}", std::to_underlying(operand.field)); } if (operand.input_modifier.neg_hi) { value = ir.FPAbs(value); } if (operand.input_modifier.neg) { value = ir.FPNeg(value); } return value; } template T Translator::GetSrc64(const InstOperand& operand) { constexpr bool is_float = std::is_same_v; const auto get_imm = [&](auto value) -> T { if constexpr (is_float) { return ir.Imm64(std::bit_cast(value)); } else { return ir.Imm64(std::bit_cast(value)); } }; T value{}; switch (operand.field) { case OperandField::ScalarGPR: { const auto value_lo = ir.GetScalarReg(IR::ScalarReg(operand.code)); const auto value_hi = ir.GetScalarReg(IR::ScalarReg(operand.code + 1)); if constexpr (is_float) { value = ir.PackDouble2x32(ir.CompositeConstruct(value_lo, value_hi)); } else { value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi)); } break; } case OperandField::VectorGPR: { const auto value_lo = ir.GetVectorReg(IR::VectorReg(operand.code)); const auto value_hi = ir.GetVectorReg(IR::VectorReg(operand.code + 1)); if constexpr (is_float) { value = ir.PackDouble2x32(ir.CompositeConstruct(value_lo, value_hi)); } else { value = ir.PackUint2x32(ir.CompositeConstruct(value_lo, value_hi)); } break; } case OperandField::ConstZero: value = get_imm(0ULL); break; case OperandField::SignedConstIntPos: value = get_imm(s64(operand.code) - SignedConstIntPosMin + 1); break; case OperandField::SignedConstIntNeg: value = get_imm(-s64(operand.code) + SignedConstIntNegMin - 1); break; case OperandField::LiteralConst: value = get_imm(u64(operand.code)); break; case OperandField::ConstFloatPos_1_0: value = get_imm(1.0); break; case OperandField::ConstFloatPos_0_5: value = get_imm(0.5); break; case OperandField::ConstFloatPos_2_0: value = get_imm(2.0); break; case OperandField::ConstFloatPos_4_0: value = get_imm(4.0); break; case OperandField::ConstFloatNeg_0_5: value = get_imm(-0.5); break; case OperandField::ConstFloatNeg_1_0: value = get_imm(-1.0); break; case OperandField::ConstFloatNeg_2_0: value = get_imm(-2.0); break; case OperandField::ConstFloatNeg_4_0: value = get_imm(-4.0); break; case OperandField::VccLo: if constexpr (is_float) { value = ir.PackDouble2x32(ir.CompositeConstruct(ir.GetVccLo(), ir.GetVccHi())); } else { value = ir.PackUint2x32(ir.CompositeConstruct(ir.GetVccLo(), ir.GetVccHi())); } break; case OperandField::VccHi: default: UNREACHABLE(); } if constexpr (is_float) { if (operand.input_modifier.abs) { value = ir.FPAbs(value); } if (operand.input_modifier.neg) { value = ir.FPNeg(value); } } else { // GCN VOP3 abs/neg modifier bits operate on the sign bit (bit 63 for // 64-bit values). Unpack, modify the high dword's bit 31, repack. if (operand.input_modifier.abs) { const auto unpacked = ir.UnpackUint2x32(value); const auto lo = IR::U32{ir.CompositeExtract(unpacked, 0)}; const auto hi = IR::U32{ir.CompositeExtract(unpacked, 1)}; const auto hi_abs = ir.BitwiseAnd(hi, ir.Imm32(0x7FFFFFFFu)); value = ir.PackUint2x32(ir.CompositeConstruct(lo, hi_abs)); } if (operand.input_modifier.neg) { const auto unpacked = ir.UnpackUint2x32(value); const auto lo = IR::U32{ir.CompositeExtract(unpacked, 0)}; const auto hi = IR::U32{ir.CompositeExtract(unpacked, 1)}; const auto hi_neg = ir.BitwiseXor(hi, ir.Imm32(0x80000000u)); value = ir.PackUint2x32(ir.CompositeConstruct(lo, hi_neg)); } } return value; } template IR::U64 Translator::GetSrc64(const InstOperand&); template IR::F64 Translator::GetSrc64(const InstOperand&); template pk_type Translator::GetSrcPk(const InstOperand& operand) { constexpr bool is_float = std::is_same_v; const auto get_imm = [&](auto value) -> pk_type { if constexpr (is_float) { auto imm = ir.Imm32(std::bit_cast(value)); return {operand.op_sel.op_sel ? ir.Imm32(0.f) : imm, operand.op_sel.op_sel_hi ? ir.Imm32(0.f) : imm}; } else { auto imm = ir.Imm32(std::bit_cast(value)); return {operand.op_sel.op_sel ? ir.Imm32(0U) : imm, operand.op_sel.op_sel_hi ? ir.Imm32(0U) : imm}; } }; constexpr auto number_format = [&]() { if constexpr (is_float) { return AmdGpu::NumberFormat::Float; } else { return AmdGpu::NumberFormat::Uint; } }(); const auto cast = [&](auto value) -> T { if constexpr (is_float) { return value; } else { return ir.BitFieldExtract(ir.BitCast(value), ir.Imm32(0), ir.Imm32(16), is_signed); } }; const auto extract = [&](auto value) -> pk_type { auto v_unpacked = ir.Unpack2x16(number_format, value); return {cast(IR::F32{ir.CompositeExtract(v_unpacked, operand.op_sel.op_sel)}), cast(IR::F32{ir.CompositeExtract(v_unpacked, operand.op_sel.op_sel_hi)})}; }; pk_type value{}; switch (operand.field) { case OperandField::ScalarGPR: { value = extract(ir.GetScalarReg(IR::ScalarReg(operand.code))); break; } case OperandField::VectorGPR: { value = extract(ir.GetVectorReg(IR::VectorReg(operand.code))); break; } case OperandField::ConstZero: { value = get_imm(0U); break; } case OperandField::SignedConstIntPos: { value = get_imm(operand.code - SignedConstIntPosMin + 1); break; } case OperandField::SignedConstIntNeg: { value = get_imm(-s32(operand.code) + SignedConstIntNegMin - 1); break; } case OperandField::LiteralConst: { value = get_imm(operand.code); break; } case OperandField::ConstFloatPos_1_0: { value = get_imm(1.f); break; } case OperandField::ConstFloatPos_0_5: { value = get_imm(0.5f); break; } case OperandField::ConstFloatPos_2_0: { value = get_imm(2.0f); break; } case OperandField::ConstFloatPos_4_0: { value = get_imm(4.0f); break; } case OperandField::ConstFloatNeg_0_5: { value = get_imm(-0.5f); break; } case OperandField::ConstFloatNeg_1_0: { value = get_imm(-1.0f); break; } case OperandField::ConstFloatNeg_2_0: { value = get_imm(-2.0f); break; } case OperandField::ConstFloatNeg_4_0: { value = get_imm(-4.0f); break; } case OperandField::Inv2Pi: { value = get_imm(1.0f / (2.0f * std::numbers::pi_v)); break; } case OperandField::VccLo: value = extract(ir.GetVccLo()); break; default: UNREACHABLE_MSG("unexpected operand: {}", std::to_underlying(operand.field)); } if constexpr (is_float) { if (operand.input_modifier.neg) { value.first = ir.FPNeg(value.first); } if (operand.input_modifier.neg_hi) { value.second = ir.FPNeg(value.second); } } else { if (operand.input_modifier.neg) { value.first = ir.INeg(value.first); } if (operand.input_modifier.neg_hi) { value.second = ir.INeg(value.second); } } return value; } template pk_type Translator::GetSrcPk(const InstOperand&); template pk_type Translator::GetSrcPk(const InstOperand&); template pk_type Translator::GetSrcPk(const InstOperand&); void Translator::SetDst1(const InstOperand& operand, const IR::U1& value) { switch (operand.field) { case OperandField::VccLo: ir.SetVcc(value); break; case OperandField::ScalarGPR: ir.SetThreadBitScalarReg(IR::ScalarReg(operand.code), value); break; case OperandField::ExecLo: ir.SetExec(value); break; default: UNREACHABLE_MSG("Unknown field {}", u32(operand.field)); } } void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) { IR::U32F32 result = value; if (value.Type() == IR::Type::F32) { if (operand.output_modifier.multiplier != 0.f) { result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier)); } if (operand.output_modifier.clamp) { result = ir.FPSaturate(result); } } switch (operand.field) { case OperandField::ScalarGPR: return ir.SetScalarReg(IR::ScalarReg(operand.code), result); case OperandField::VectorGPR: return ir.SetVectorReg(IR::VectorReg(operand.code), result); case OperandField::VccLo: return ir.SetVccLo(result); case OperandField::VccHi: return ir.SetVccHi(result); case OperandField::M0: return ir.SetM0(result); default: UNREACHABLE(); } } template void Translator::SetDst16(const InstOperand& operand, const IR::U32F32& value) { IR::U32F32 result = value; if (value.Type() == IR::Type::F32) { if (operand.output_modifier.multiplier != 0.f) { result = ir.FPMul(result, ir.Imm32(operand.output_modifier.multiplier)); } if (operand.output_modifier.clamp) { result = ir.FPSaturate(result); } } else { if (operand.output_modifier.clamp) { if constexpr (is_signed) { result = ir.SClamp(result, ir.Imm32(-32768), ir.Imm32(32767)); } else { result = ir.UMin(result, ir.Imm32(0xFFFF)); } } } const auto cast = [&](auto value) -> IR::U32 { if (value.Type() == IR::Type::F32) { return ir.UConvert(32, ir.BitCast(IR::F16{ir.FPConvert(16, value)})); } else if (value.Type() == IR::Type::U32) { return value; } else { UNREACHABLE(); } }; const auto op_sel = operand.op_sel.op_sel; switch (operand.field) { case OperandField::ScalarGPR: { const auto prev_dst = ir.GetScalarReg(IR::ScalarReg(operand.code)); const auto result_16 = cast(result); const auto new_dst = ir.BitFieldInsert(prev_dst, result_16, ir.Imm32(op_sel ? 16 : 0), ir.Imm32(16)); return ir.SetScalarReg(IR::ScalarReg(operand.code), new_dst); } case OperandField::VectorGPR: { const auto prev_dst = ir.GetVectorReg(IR::VectorReg(operand.code)); const auto result_16 = cast(result); const auto new_dst = ir.BitFieldInsert(prev_dst, result_16, ir.Imm32(op_sel ? 16 : 0), ir.Imm32(16)); return ir.SetVectorReg(IR::VectorReg(operand.code), new_dst); } case OperandField::VccLo: UNREACHABLE(); case OperandField::VccHi: UNREACHABLE(); case OperandField::M0: UNREACHABLE(); default: UNREACHABLE(); } } template void Translator::SetDst16(const InstOperand&, const IR::U32F32& value); template void Translator::SetDst16(const InstOperand&, const IR::U32F32& value); void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_raw) { IR::U64F64 value_untyped = value_raw; const bool is_float = value_raw.Type() == IR::Type::F64 || value_raw.Type() == IR::Type::F32; if (is_float) { if (operand.output_modifier.multiplier != 0.f) { value_untyped = ir.FPMul(value_untyped, ir.Imm64(f64(operand.output_modifier.multiplier))); } if (operand.output_modifier.clamp) { value_untyped = ir.FPSaturate(value_untyped); } } const IR::Value unpacked{is_float ? ir.UnpackDouble2x32(IR::F64{value_untyped}) : ir.UnpackUint2x32(IR::U64{value_untyped})}; const IR::U32 lo{ir.CompositeExtract(unpacked, 0U)}; const IR::U32 hi{ir.CompositeExtract(unpacked, 1U)}; switch (operand.field) { case OperandField::ScalarGPR: ir.SetScalarReg(IR::ScalarReg(operand.code + 1), hi); return ir.SetScalarReg(IR::ScalarReg(operand.code), lo); case OperandField::VectorGPR: ir.SetVectorReg(IR::VectorReg(operand.code + 1), hi); return ir.SetVectorReg(IR::VectorReg(operand.code), lo); case OperandField::VccLo: ir.SetVccLo(lo); return ir.SetVccHi(hi); case OperandField::VccHi: UNREACHABLE(); case OperandField::M0: break; default: UNREACHABLE(); } } template void Translator::SetDstPk(const InstOperand& operand, const pk_type& value) { pk_type v = value; if constexpr (std::is_same_v) { if (operand.output_modifier.clamp) { v = {ir.FPSaturate(v.first), ir.FPSaturate(v.second)}; } } else { if (operand.output_modifier.clamp) { if constexpr (is_signed) { auto lower = ir.Imm32(-32768); auto upper = ir.Imm32(32767); v = {ir.SClamp(v.first, lower, upper), ir.SClamp(v.second, lower, upper)}; } else { auto imm = ir.Imm32(0xFFFF); v = {ir.UMin(v.first, imm), ir.UMin(v.second, imm)}; } } } IR::U32 value_raw{}; if constexpr (std::is_same_v) { value_raw = ir.Pack2x16(AmdGpu::NumberFormat::Float, ir.CompositeConstruct(v.first, v.second)); } else { value_raw = ir.Pack2x16(AmdGpu::NumberFormat::Uint, ir.CompositeConstruct(ir.BitCast(v.first), ir.BitCast(v.second))); } SetDst(operand, value_raw); } template void Translator::SetDstPk(const InstOperand& operand, const pk_type& value); template void Translator::SetDstPk(const InstOperand& operand, const pk_type& value); template void Translator::SetDstPk(const InstOperand& operand, const pk_type& value); void Translator::EmitFetch(const GcnInst& inst) { const auto code_sgpr_base = inst.src[0].code; #if 0 // Translate fetch shader inline using regular buffer bindings; useful for debugging. const auto* code = GetFetchShaderCode(info, code_sgpr_base); GcnCodeSlice slice(code, code + std::numeric_limits::max()); GcnDecodeContext decoder; // Decode and save instructions while (!slice.atEnd()) { const auto sub_inst = decoder.decodeInstruction(slice); if (sub_inst.opcode == Opcode::S_SETPC_B64) { // Assume we're swapping back to the main shader. break; } TranslateInstruction(sub_inst); } return; #endif info.has_fetch_shader = true; info.fetch_shader_sgpr_base = code_sgpr_base; const auto fetch_data = ParseFetchShader(info); ASSERT(fetch_data.has_value()); if (EmulatorSettings.IsDumpShaders()) { using namespace Common::FS; const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; if (!std::filesystem::exists(dump_dir)) { std::filesystem::create_directories(dump_dir); } const auto filename = fmt::format("vs_{:#018x}.fetch.bin", info.pgm_hash); const auto file = IOFile{dump_dir / filename, FileAccessMode::Create}; const auto* code = GetFetchShaderCode(info, code_sgpr_base); file.WriteRaw(code, fetch_data->size); } for (const auto& attrib : fetch_data->attributes) { const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic}; IR::VectorReg dst_reg{attrib.dest_vgpr}; // Read the V# of the attribute to figure out component number and type. const auto buffer = attrib.GetSharp(info); const auto values = ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1), ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3)); const auto converted = IR::ApplyReadNumberConversionVec4(ir, values, buffer.GetNumberConversion()); const auto swizzled = ApplySwizzle(ir, converted, buffer.DstSelect()); for (u32 i = 0; i < 4; i++) { ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)}); } } } void Translator::LogMissingOpcode(const GcnInst& inst) { LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({}, category = {})", magic_enum::enum_name(inst.opcode), u32(inst.opcode), magic_enum::enum_name(inst.category)); info.translation_failed = true; } void Translator::Translate(IR::Block* block, u32 start_pc, std::span inst_list) { if (inst_list.empty()) { return; } ir = IR::IREmitter{*block, block->begin()}; pc = start_pc; for (const auto& inst : inst_list) { pc += inst.length; // Special case for emitting fetch shader. if (inst.opcode == Opcode::S_SWAPPC_B64) { ASSERT(info.stage == Stage::Vertex || info.stage == Stage::Export || info.stage == Stage::Local); EmitFetch(inst); continue; } TranslateInstruction(inst); } } void Translator::TranslateInstruction(const GcnInst& inst) { // Emit instructions for each category. switch (inst.category) { case InstCategory::DataShare: EmitDataShare(inst); break; case InstCategory::VectorInterpolation: EmitVectorInterpolation(inst); break; case InstCategory::ScalarMemory: EmitScalarMemory(inst); break; case InstCategory::VectorMemory: EmitVectorMemory(inst); break; case InstCategory::Export: EmitExport(inst); break; case InstCategory::FlowControl: EmitFlowControl(inst); break; case InstCategory::ScalarALU: EmitScalarAlu(inst); break; case InstCategory::VectorALU: EmitVectorAlu(inst); break; case InstCategory::DebugProfile: break; default: UNREACHABLE(); } } } // namespace Shader::Gcn