From c1e496efcde74f4e04c1515b916bbdd54cb2fd03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Tue, 14 Apr 2026 21:41:55 +0200 Subject: [PATCH] SDWA (#4203) --- src/shader_recompiler/frontend/decode.cpp | 71 +++++++++++++++++++ src/shader_recompiler/frontend/decode.h | 1 + src/shader_recompiler/frontend/instruction.h | 66 +++++++++++++++++ src/shader_recompiler/frontend/opcodes.h | 3 + .../frontend/translate/translate.cpp | 11 ++- 5 files changed, 151 insertions(+), 1 deletion(-) diff --git a/src/shader_recompiler/frontend/decode.cpp b/src/shader_recompiler/frontend/decode.cpp index ae58f080b..d945837bf 100644 --- a/src/shader_recompiler/frontend/decode.cpp +++ b/src/shader_recompiler/frontend/decode.cpp @@ -132,6 +132,7 @@ GcnInst GcnDecodeContext::decodeInstruction(GcnCodeSlice& code) { // Note: Literal constant decode must be performed after meta info updated. if (encodingLen == sizeof(u32)) { decodeLiteralConstant(encoding, code); + decodeSubDwordAddressing(encoding, code); } repairOperandType(); @@ -431,6 +432,76 @@ void GcnDecodeContext::decodeLiteralConstant(InstEncoding encoding, GcnCodeSlice } } +void GcnDecodeContext::decodeSubDwordAddressing(InstEncoding encoding, GcnCodeSlice& code) { + // Find if the instruction contains SDWA (it's legal only as src0) + if (m_instruction.src[0].field == OperandField::Sdwa) { + m_instruction.src[0].code = code.readu32(); + m_instruction.length += sizeof(u32); + + if (encoding == InstEncoding::VOPC) { + SdwaVopc sdwa = *reinterpret_cast(&m_instruction.src[0].code); + m_instruction.src[0].field = + sdwa.s0 == 0 ? OperandField::VectorGPR : getOperandField(sdwa.src0); + m_instruction.src[0].code = sdwa.src0; + m_instruction.src[0].sdwa_sel = SdwaSelector(sdwa.src0_sel); + + m_instruction.src[0].input_modifier.neg = sdwa.src0_neg; + m_instruction.src[0].input_modifier.abs = sdwa.src0_abs; + m_instruction.src[0].input_modifier.sext = sdwa.src0_sext; + + m_instruction.src[1].field = + sdwa.s1 == 0 ? OperandField::VectorGPR : getOperandField(m_instruction.src[1].code); + m_instruction.src[1].sdwa_sel = SdwaSelector(sdwa.src1_sel); + + m_instruction.src[1].input_modifier.neg = sdwa.src1_neg; + m_instruction.src[1].input_modifier.abs = sdwa.src1_abs; + m_instruction.src[1].input_modifier.sext = sdwa.src1_sext; + + m_instruction.dst[0].sdwa_sel = SdwaSelector(sdwa.dst_sel); + m_instruction.dst[0].sdwa_dst = SdwaDstUnused(sdwa.dst_u); + m_instruction.dst[0].output_modifier.clamp = sdwa.clamp; + + switch (sdwa.omod) { + case 0: + m_instruction.dst[0].output_modifier.multiplier = 0.f; + break; + case 1: + m_instruction.dst[0].output_modifier.multiplier = 2.0f; + break; + case 2: + m_instruction.dst[0].output_modifier.multiplier = 4.0f; + break; + case 3: + m_instruction.dst[0].output_modifier.multiplier = 0.5f; + break; + } + } else if (encoding == InstEncoding::VOP1 || encoding == InstEncoding::VOP2) { + SdwaVop12 sdwa = *reinterpret_cast(&m_instruction.src[0].code); + m_instruction.src[0].field = + sdwa.s0 == 0 ? OperandField::VectorGPR : getOperandField(sdwa.src0); + m_instruction.src[0].code = sdwa.src0; + m_instruction.src[0].sdwa_sel = SdwaSelector(sdwa.src0_sel); + + m_instruction.src[0].input_modifier.neg = sdwa.src0_neg; + m_instruction.src[0].input_modifier.abs = sdwa.src0_abs; + m_instruction.src[0].input_modifier.sext = sdwa.src0_sext; + + m_instruction.src[1].field = + sdwa.s1 == 0 ? OperandField::VectorGPR : getOperandField(m_instruction.src[1].code); + m_instruction.src[1].sdwa_sel = SdwaSelector(sdwa.src1_sel); + + m_instruction.src[1].input_modifier.neg = sdwa.src1_neg; + m_instruction.src[1].input_modifier.abs = sdwa.src1_abs; + m_instruction.src[1].input_modifier.sext = sdwa.src1_sext; + + m_instruction.dst[0].field = sdwa.sd ? OperandField::ScalarGPR : OperandField::VccLo; + m_instruction.dst[0].code = sdwa.sdst; + } else { + LOG_WARNING(Render_Recompiler, "illegal instruction: SDWA used outside VOP1/VOP2/VOPC"); + } + } +} + void GcnDecodeContext::decodeInstructionSOP1(u32 hexInstruction) { u32 ssrc0 = bit::extract(hexInstruction, 7, 0); u32 op = bit::extract(hexInstruction, 15, 8); diff --git a/src/shader_recompiler/frontend/decode.h b/src/shader_recompiler/frontend/decode.h index 605a4e229..dfcf851bf 100644 --- a/src/shader_recompiler/frontend/decode.h +++ b/src/shader_recompiler/frontend/decode.h @@ -70,6 +70,7 @@ private: void decodeInstruction32(InstEncoding encoding, GcnCodeSlice& code); void decodeInstruction64(InstEncoding encoding, GcnCodeSlice& code); void decodeLiteralConstant(InstEncoding encoding, GcnCodeSlice& code); + void decodeSubDwordAddressing(InstEncoding encoding, GcnCodeSlice& code); // 32 bits encodings void decodeInstructionSOP1(uint32_t hexInstruction); diff --git a/src/shader_recompiler/frontend/instruction.h b/src/shader_recompiler/frontend/instruction.h index 05d5ce7b7..37b607b70 100644 --- a/src/shader_recompiler/frontend/instruction.h +++ b/src/shader_recompiler/frontend/instruction.h @@ -28,6 +28,7 @@ struct InputModifiers { bool neg = false; bool neg_hi = false; bool abs = false; + bool sext = false; }; /// These are applied before storing an operand register. @@ -41,6 +42,24 @@ struct OperandSelection { bool op_sel_hi = false; }; +enum class SdwaSelector : u32 { + Byte0 = 0, + Byte1 = 1, + Byte2 = 2, + Byte3 = 3, + Word0 = 4, + Word1 = 5, + Dword = 6, + Invalid = 7, +}; + +enum class SdwaDstUnused : u32 { + Pad = 0, + Sext = 1, + Preserve = 2, + Invalid = 3, +}; + struct InstOperand { OperandField field = OperandField::Undefined; ScalarType type = ScalarType::Undefined; @@ -48,6 +67,8 @@ struct InstOperand { OutputModifiers output_modifier = {}; // only valid for packed 16bit operations OperandSelection op_sel = {}; + SdwaDstUnused sdwa_dst = SdwaDstUnused::Invalid; + SdwaSelector sdwa_sel = SdwaSelector::Invalid; u32 code = 0xFFFFFFFF; }; @@ -219,6 +240,51 @@ union InstControl { InstControlEXP exp; }; +struct SdwaVopc { + u32 src0 : 8; + u32 dst_sel : 3; + u32 dst_u : 2; + u32 clamp : 1; + u32 omod : 2; + u32 src0_sel : 3; + u32 src0_sext : 1; + u32 src0_neg : 1; + u32 src0_abs : 1; + u32 : 1; + u32 s0 : 1; + + u32 src1_sel : 3; + u32 src1_sext : 1; + u32 src1_neg : 1; + u32 src1_abs : 1; + u32 : 1; + u32 s1 : 1; +}; + +struct SdwaVop12 { + u32 src0 : 8; + u32 sdst : 7; + u32 sd : 1; + u32 src0_sel : 3; + u32 src0_sext : 1; + u32 src0_neg : 1; + u32 src0_abs : 1; + u32 : 1; + u32 s0 : 1; + + u32 src1_sel : 3; + u32 src1_sext : 1; + u32 src1_neg : 1; + u32 src1_abs : 1; + u32 : 1; + u32 s1 : 1; +}; + +union Sdwa { + SdwaVopc vopc; + SdwaVop12 vop12; +}; + struct GcnInst { Opcode opcode; InstEncoding encoding; diff --git a/src/shader_recompiler/frontend/opcodes.h b/src/shader_recompiler/frontend/opcodes.h index 6d6df88a6..4d6a606f9 100644 --- a/src/shader_recompiler/frontend/opcodes.h +++ b/src/shader_recompiler/frontend/opcodes.h @@ -2448,6 +2448,9 @@ enum class OperandField : u32 { ConstFloatNeg_2_0, ConstFloatPos_4_0, ConstFloatNeg_4_0, + Inv2Pi, + Sdwa, + Dpp, VccZ = 251, ExecZ = 252, Scc = 253, diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 3a97bcef7..bd73d04be 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -4,6 +4,7 @@ #include "common/io_file.h" #include "common/path_util.h" #include "core/emulator_settings.h" +#include "core/libraries/kernel/process.h" #include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/translate/translate.h" @@ -15,6 +16,7 @@ #include "shader_recompiler/runtime_info.h" #include "video_core/amdgpu/resource.h" +#include #define MAGIC_ENUM_RANGE_MIN 0 #define MAGIC_ENUM_RANGE_MAX 1515 #include @@ -334,6 +336,13 @@ T Translator::GetSrc(const InstOperand& operand) { case OperandField::ConstFloatNeg_4_0: value = get_imm(-4.0f); break; + case OperandField::Inv2Pi: + value = get_imm(static_cast(1.0f / (2.0f * std::numbers::pi))); + break; + case OperandField::Sdwa: + UNREACHABLE_MSG("unhandled SDWA"); + case OperandField::Dpp: + UNREACHABLE_MSG("unhandled DPP"); case OperandField::VccLo: if constexpr (is_float) { value = ir.BitCast(ir.GetVccLo()); @@ -363,7 +372,7 @@ T Translator::GetSrc(const InstOperand& operand) { } break; default: - UNREACHABLE(); + UNREACHABLE_MSG("unexpected operand: {}", std::to_underlying(operand.field)); } if constexpr (is_float) {