From 07a0475d0f2ed391fce38fa5254dcbdf85eabe4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Miko=C5=82ajczyk?= Date: Wed, 22 Apr 2026 08:35:22 +0200 Subject: [PATCH] VOP decoding for Neo (#4301) --- src/shader_recompiler/frontend/decode.cpp | 61 +++++---- src/shader_recompiler/frontend/instruction.h | 9 +- src/shader_recompiler/frontend/opcodes.h | 128 ++++++++++++++++++- 3 files changed, 166 insertions(+), 32 deletions(-) diff --git a/src/shader_recompiler/frontend/decode.cpp b/src/shader_recompiler/frontend/decode.cpp index d945837bf..806ccce6e 100644 --- a/src/shader_recompiler/frontend/decode.cpp +++ b/src/shader_recompiler/frontend/decode.cpp @@ -239,11 +239,11 @@ uint32_t GcnDecodeContext::mapEncodingOp(InstEncoding encoding, Opcode opcode) { uint32_t op = static_cast(opcode) - static_cast(OpcodeMap::OP_MAP_VOPC); encodingOp = op + static_cast(OpMapVOP3VOPX::VOP3_TO_VOPC); - } else if (opcode >= Opcode::V_CNDMASK_B32 && opcode <= Opcode::V_CVT_PK_I16_I32) { + } else if (opcode >= Opcode::V_CNDMASK_B32 && opcode <= Opcode::V_LDEXP_F16) { uint32_t op = static_cast(opcode) - static_cast(OpcodeMap::OP_MAP_VOP2); encodingOp = op + static_cast(OpMapVOP3VOPX::VOP3_TO_VOP2); - } else if (opcode >= Opcode::V_NOP && opcode <= Opcode::V_MOVRELSD_B32) { + } else if (opcode >= Opcode::V_NOP && opcode <= Opcode::V_SQRT_F16) { uint32_t op = static_cast(opcode) - static_cast(OpcodeMap::OP_MAP_VOP1); encodingOp = op + static_cast(OpMapVOP3VOPX::VOP3_TO_VOP1); @@ -440,6 +440,30 @@ void GcnDecodeContext::decodeSubDwordAddressing(InstEncoding encoding, GcnCodeSl if (encoding == InstEncoding::VOPC) { SdwaVopc sdwa = *reinterpret_cast(&m_instruction.src[0].code); + + m_instruction.src[0].field = + sdwa.s0 == 0 ? OperandField::VectorGPR : getOperandField(sdwa.src0); + m_instruction.src[0].code = sdwa.src0; + m_instruction.src[0].sdwa_sel = SdwaSelector(sdwa.src0_sel); + + m_instruction.src[0].input_modifier.neg = sdwa.src0_neg; + m_instruction.src[0].input_modifier.abs = sdwa.src0_abs; + m_instruction.src[0].input_modifier.sext = sdwa.src0_sext; + + m_instruction.src[1].field = + sdwa.s1 == 0 ? OperandField::VectorGPR : getOperandField(m_instruction.src[1].code); + m_instruction.src[1].sdwa_sel = SdwaSelector(sdwa.src1_sel); + + m_instruction.src[1].input_modifier.neg = sdwa.src1_neg; + m_instruction.src[1].input_modifier.abs = sdwa.src1_abs; + m_instruction.src[1].input_modifier.sext = sdwa.src1_sext; + + m_instruction.dst[0].field = sdwa.sd ? OperandField::ScalarGPR : OperandField::VccLo; + m_instruction.dst[0].code = sdwa.sdst; + + } else if (encoding == InstEncoding::VOP1 || encoding == InstEncoding::VOP2) { + SdwaVop12 sdwa = *reinterpret_cast(&m_instruction.src[0].code); + m_instruction.src[0].field = sdwa.s0 == 0 ? OperandField::VectorGPR : getOperandField(sdwa.src0); m_instruction.src[0].code = sdwa.src0; @@ -475,27 +499,6 @@ void GcnDecodeContext::decodeSubDwordAddressing(InstEncoding encoding, GcnCodeSl m_instruction.dst[0].output_modifier.multiplier = 0.5f; break; } - } else if (encoding == InstEncoding::VOP1 || encoding == InstEncoding::VOP2) { - SdwaVop12 sdwa = *reinterpret_cast(&m_instruction.src[0].code); - m_instruction.src[0].field = - sdwa.s0 == 0 ? OperandField::VectorGPR : getOperandField(sdwa.src0); - m_instruction.src[0].code = sdwa.src0; - m_instruction.src[0].sdwa_sel = SdwaSelector(sdwa.src0_sel); - - m_instruction.src[0].input_modifier.neg = sdwa.src0_neg; - m_instruction.src[0].input_modifier.abs = sdwa.src0_abs; - m_instruction.src[0].input_modifier.sext = sdwa.src0_sext; - - m_instruction.src[1].field = - sdwa.s1 == 0 ? OperandField::VectorGPR : getOperandField(m_instruction.src[1].code); - m_instruction.src[1].sdwa_sel = SdwaSelector(sdwa.src1_sel); - - m_instruction.src[1].input_modifier.neg = sdwa.src1_neg; - m_instruction.src[1].input_modifier.abs = sdwa.src1_abs; - m_instruction.src[1].input_modifier.sext = sdwa.src1_sext; - - m_instruction.dst[0].field = sdwa.sd ? OperandField::ScalarGPR : OperandField::VccLo; - m_instruction.dst[0].code = sdwa.sdst; } else { LOG_WARNING(Render_Recompiler, "illegal instruction: SDWA used outside VOP1/VOP2/VOPC"); } @@ -692,6 +695,8 @@ void GcnDecodeContext::decodeInstructionVOP3(uint64_t hexInstruction) { u32 vdst = bit::extract(hexInstruction, 7, 0); u32 sdst = bit::extract(hexInstruction, 14, 8); // For VOP3B u32 op = bit::extract(hexInstruction, 25, 17); + u32 op_msb = bit::extract(hexInstruction, 16, 16); + op = op + op_msb * (1 << 9); u32 src0 = bit::extract(hexInstruction, 40, 32); u32 src1 = bit::extract(hexInstruction, 49, 41); u32 src2 = bit::extract(hexInstruction, 58, 50); @@ -703,13 +708,13 @@ void GcnDecodeContext::decodeInstructionVOP3(uint64_t hexInstruction) { m_instruction.opcode = static_cast(vopcOp + static_cast(OpcodeMap::OP_MAP_VOPC)); } else if (op >= static_cast(OpcodeVOP3::V_CNDMASK_B32) && - op <= static_cast(OpcodeVOP3::V_CVT_PK_I16_I32)) { + op <= static_cast(OpcodeVOP3::V_LDEXP_F16)) { // Map from VOP3 to VOP2 u32 vop2Op = op - static_cast(OpMapVOP3VOPX::VOP3_TO_VOP2); m_instruction.opcode = static_cast(vop2Op + static_cast(OpcodeMap::OP_MAP_VOP2)); } else if (op >= static_cast(OpcodeVOP3::V_NOP) && - op <= static_cast(OpcodeVOP3::V_MOVRELSD_B32)) { + op <= static_cast(OpcodeVOP3::V_SQRT_F16)) { // Map from VOP3 to VOP1 u32 vop1Op = op - static_cast(OpMapVOP3VOPX::VOP3_TO_VOP1); m_instruction.opcode = @@ -769,8 +774,14 @@ void GcnDecodeContext::decodeInstructionVOP3(uint64_t hexInstruction) { if (control.neg & (1u << i)) { m_instruction.src[i].input_modifier.neg = true; } + + if (control.op_sel & (1u << i)) { + m_instruction.src[i].op_sel.op_sel = true; + } } + m_instruction.dst[0].op_sel.op_sel = control.op_sel & (1u << 3); + // update output modifier auto& outputMod = m_instruction.dst[0].output_modifier; diff --git a/src/shader_recompiler/frontend/instruction.h b/src/shader_recompiler/frontend/instruction.h index 37b607b70..cb02445dc 100644 --- a/src/shader_recompiler/frontend/instruction.h +++ b/src/shader_recompiler/frontend/instruction.h @@ -115,7 +115,8 @@ struct InstControlVOP3 { u64 : 8; u64 abs : 3; u64 clmp : 1; - u64 : 47; + u64 op_sel : 4; + u64 : 43; u64 omod : 2; u64 neg : 3; }; @@ -130,7 +131,7 @@ struct InstControlVOP3P { u64 op_sel_hi_01 : 2; u64 neg : 3; - bool get_op_sel_hi(int idx) { + bool get_op_sel_hi(int idx) const { switch (idx) { case 0: return (op_sel_hi_01 & 1) == 1; @@ -240,7 +241,7 @@ union InstControl { InstControlEXP exp; }; -struct SdwaVopc { +struct SdwaVop12 { u32 src0 : 8; u32 dst_sel : 3; u32 dst_u : 2; @@ -261,7 +262,7 @@ struct SdwaVopc { u32 s1 : 1; }; -struct SdwaVop12 { +struct SdwaVopc { u32 src0 : 8; u32 sdst : 7; u32 sd : 1; diff --git a/src/shader_recompiler/frontend/opcodes.h b/src/shader_recompiler/frontend/opcodes.h index 6aa58d61b..4e538f247 100644 --- a/src/shader_recompiler/frontend/opcodes.h +++ b/src/shader_recompiler/frontend/opcodes.h @@ -259,8 +259,15 @@ enum class OpcodeVOP2 : u32 { V_CVT_PKRTZ_F16_F32 = 47, V_CVT_PK_U16_U32 = 48, V_CVT_PK_I16_I32 = 49, + V_ADD_F16 = 50, + V_SUB_F16 = 51, + V_SUBREV_F16 = 52, + V_MUL_F16 = 53, + V_MAX_F16 = 57, + V_MIN_F16 = 58, + V_LDEXP_F16 = 59, - OP_RANGE_VOP2 = V_CVT_PK_I16_I32 + 1, + OP_RANGE_VOP2 = V_LDEXP_F16 + 1, }; enum class OpcodeVOP3 : u32 { @@ -411,6 +418,12 @@ enum class OpcodeVOP3 : u32 { V_CMP_TRU_I32 = 135, V_CMP_T_I32 = 135, V_CMP_CLASS_F32 = 136, + V_CMP_LT_I16 = 137, + V_CMP_EQ_I16 = 138, + V_CMP_LE_I16 = 139, + V_CMP_GT_I16 = 140, + V_CMP_NE_I16 = 141, + V_CMP_GE_I16 = 142, V_CMPX_F_I32 = 144, V_CMPX_LT_I32 = 145, V_CMPX_EQ_I32 = 146, @@ -433,6 +446,12 @@ enum class OpcodeVOP3 : u32 { V_CMP_TRU_I64 = 167, V_CMP_T_I64 = 167, V_CMP_CLASS_F64 = 168, + V_CMP_LT_U16 = 169, + V_CMP_EQ_U16 = 170, + V_CMP_LE_U16 = 171, + V_CMP_GT_U16 = 172, + V_CMP_NE_U16 = 173, + V_CMP_GE_U16 = 174, V_CMPX_F_I64 = 176, V_CMPX_LT_I64 = 177, V_CMPX_EQ_I64 = 178, @@ -454,6 +473,14 @@ enum class OpcodeVOP3 : u32 { V_CMP_GE_U32 = 198, V_CMP_TRU_U32 = 199, V_CMP_T_U32 = 199, + V_CMP_F_F16 = 200, + V_CMP_LT_F16 = 201, + V_CMP_EQ_F16 = 202, + V_CMP_LE_F16 = 203, + V_CMP_GT_F16 = 204, + V_CMP_LG_F16 = 205, + V_CMP_GE_F16 = 206, + V_CMP_O_F16 = 207, V_CMPX_F_U32 = 208, V_CMPX_LT_U32 = 209, V_CMPX_EQ_U32 = 210, @@ -534,6 +561,13 @@ enum class OpcodeVOP3 : u32 { V_CVT_PKRTZ_F16_F32 = 303, V_CVT_PK_U16_U32 = 304, V_CVT_PK_I16_I32 = 305, + V_ADD_F16 = 306, + V_SUB_F16 = 307, + V_SUBREV_F16 = 308, + V_MUL_F16 = 309, + V_MAX_F16 = 313, + V_MIN_F16 = 314, + V_LDEXP_F16 = 315, V_MAD_LEGACY_F32 = 320, V_MAD_F32 = 321, V_MAD_I32_I24 = 322, @@ -658,8 +692,28 @@ enum class OpcodeVOP3 : u32 { V_MOVRELSD_B32 = 452, V_LOG_LEGACY_F32 = 453, V_EXP_LEGACY_F32 = 454, + V_RCP_F16 = 468, + V_SQRT_F16 = 469, - OP_RANGE_VOP3 = V_EXP_LEGACY_F32 + 1, + V_ADD_NC_U16 = 771, + V_LSHRREV_B16 = 775, + V_ASHRREV_I16 = 776, + V_ADD_NC_I16 = 781, + V_SUB_NC_I16 = 782, + V_SUB_CO_U32 = 784, + V_LSHLREV_B16 = 788, + V_MAD_F16 = 833, + V_LSHL_ADD_U32 = 838, + V_ADD_LSHL_U32 = 839, + V_MIN3_F16 = 849, + V_MAX3_F16 = 852, + V_MAD_I16 = 862, + V_ADD3_U32 = 877, + V_LSHL_OR_B32 = 879, + V_AND_OR_B32 = 881, + V_OR3_B32 = 882, + + OP_RANGE_VOP3 = 1024, // in neo mode, VOP3 gets an additional bit for OP so round up }; enum class OpcodeVOP3P : u32 { @@ -756,8 +810,10 @@ enum class OpcodeVOP1 : u32 { V_MOVRELSD_B32 = 68, V_LOG_LEGACY_F32 = 69, V_EXP_LEGACY_F32 = 70, + V_RCP_F16 = 84, + V_SQRT_F16 = 85, - OP_RANGE_VOP1 = V_EXP_LEGACY_F32 + 1, + OP_RANGE_VOP1 = V_SQRT_F16 + 1, }; enum class OpcodeVOPC : u32 { @@ -908,6 +964,12 @@ enum class OpcodeVOPC : u32 { V_CMP_TRU_I32 = 135, V_CMP_T_I32 = 135, V_CMP_CLASS_F32 = 136, + V_CMP_LT_I16 = 137, + V_CMP_EQ_I16 = 138, + V_CMP_LE_I16 = 139, + V_CMP_GT_I16 = 140, + V_CMP_NE_I16 = 141, + V_CMP_GE_I16 = 142, V_CMPX_F_I32 = 144, V_CMPX_LT_I32 = 145, V_CMPX_EQ_I32 = 146, @@ -930,6 +992,12 @@ enum class OpcodeVOPC : u32 { V_CMP_TRU_I64 = 167, V_CMP_T_I64 = 167, V_CMP_CLASS_F64 = 168, + V_CMP_LT_U16 = 169, + V_CMP_EQ_U16 = 170, + V_CMP_LE_U16 = 171, + V_CMP_GT_U16 = 172, + V_CMP_NE_U16 = 173, + V_CMP_GE_U16 = 174, V_CMPX_F_I64 = 176, V_CMPX_LT_I64 = 177, V_CMPX_EQ_I64 = 178, @@ -951,6 +1019,14 @@ enum class OpcodeVOPC : u32 { V_CMP_GE_U32 = 198, V_CMP_TRU_U32 = 199, V_CMP_T_U32 = 199, + V_CMP_F_F16 = 200, + V_CMP_LT_F16 = 201, + V_CMP_EQ_F16 = 202, + V_CMP_LE_F16 = 203, + V_CMP_GT_F16 = 204, + V_CMP_LG_F16 = 205, + V_CMP_GE_F16 = 206, + V_CMP_O_F16 = 207, V_CMPX_F_U32 = 208, V_CMPX_LT_U32 = 209, V_CMPX_EQ_U32 = 210, @@ -1653,6 +1729,12 @@ enum class Opcode : u32 { V_CMP_TRU_I32 = 135 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMP_T_I32 = 135 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMP_CLASS_F32 = 136 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LT_I16 = 137 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_EQ_I16 = 138 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LE_I16 = 139 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GT_I16 = 140 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NE_I16 = 141 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GE_I16 = 142 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMPX_F_I32 = 144 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMPX_LT_I32 = 145 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMPX_EQ_I32 = 146 + (u32)OpcodeMap::OP_MAP_VOPC, @@ -1675,6 +1757,12 @@ enum class Opcode : u32 { V_CMP_TRU_I64 = 167 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMP_T_I64 = 167 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMP_CLASS_F64 = 168 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LT_U16 = 169 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_EQ_U16 = 170 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LE_U16 = 171 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GT_U16 = 172 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_NE_U16 = 173 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GE_U16 = 174 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMPX_F_I64 = 176 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMPX_LT_I64 = 177 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMPX_EQ_I64 = 178 + (u32)OpcodeMap::OP_MAP_VOPC, @@ -1695,6 +1783,14 @@ enum class Opcode : u32 { V_CMP_GE_U32 = 198 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMP_TRU_U32 = 199 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMP_T_U32 = 199 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_F_F16 = 200 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LT_F16 = 201 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_EQ_F16 = 202 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LE_F16 = 203 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GT_F16 = 204 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_LG_F16 = 205 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_GE_F16 = 206 + (u32)OpcodeMap::OP_MAP_VOPC, + V_CMP_O_F16 = 207 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMPX_F_U32 = 208 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMPX_LT_U32 = 209 + (u32)OpcodeMap::OP_MAP_VOPC, V_CMPX_EQ_U32 = 210 + (u32)OpcodeMap::OP_MAP_VOPC, @@ -1775,6 +1871,13 @@ enum class Opcode : u32 { V_CVT_PKRTZ_F16_F32 = 47 + (u32)OpcodeMap::OP_MAP_VOP2, V_CVT_PK_U16_U32 = 48 + (u32)OpcodeMap::OP_MAP_VOP2, V_CVT_PK_I16_I32 = 49 + (u32)OpcodeMap::OP_MAP_VOP2, + V_ADD_F16 = 50 + (u32)OpcodeMap::OP_MAP_VOP2, + V_SUB_F16 = 51 + (u32)OpcodeMap::OP_MAP_VOP2, + V_SUBREV_F16 = 52 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MUL_F16 = 53 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MAX_F16 = 57 + (u32)OpcodeMap::OP_MAP_VOP2, + V_MIN_F16 = 58 + (u32)OpcodeMap::OP_MAP_VOP2, + V_LDEXP_F16 = 59 + (u32)OpcodeMap::OP_MAP_VOP2, // VOP1 V_NOP = 0 + (u32)OpcodeMap::OP_MAP_VOP1, V_MOV_B32 = 1 + (u32)OpcodeMap::OP_MAP_VOP1, @@ -1842,6 +1945,8 @@ enum class Opcode : u32 { V_MOVRELSD_B32 = 68 + (u32)OpcodeMap::OP_MAP_VOP1, V_LOG_LEGACY_F32 = 69 + (u32)OpcodeMap::OP_MAP_VOP1, V_EXP_LEGACY_F32 = 70 + (u32)OpcodeMap::OP_MAP_VOP1, + V_RCP_F16 = 84 + (u32)OpcodeMap::OP_MAP_VOP1, + V_SQRT_F16 = 85 + (u32)OpcodeMap::OP_MAP_VOP1, // VOP3 V_MAD_LEGACY_F32 = 320 + (u32)OpcodeMap::OP_MAP_VOP3, V_MAD_F32 = 321 + (u32)OpcodeMap::OP_MAP_VOP3, @@ -1901,6 +2006,23 @@ enum class Opcode : u32 { V_MQSAD_U32_U8 = 373 + (u32)OpcodeMap::OP_MAP_VOP3, V_MAD_U64_U32 = 374 + (u32)OpcodeMap::OP_MAP_VOP3, V_MAD_I64_I32 = 375 + (u32)OpcodeMap::OP_MAP_VOP3, + V_ADD_NC_U16 = 771 + (u32)OpcodeMap::OP_MAP_VOP3, + V_LSHRREV_B16 = 775 + (u32)OpcodeMap::OP_MAP_VOP3, + V_ASHRREV_I16 = 776 + (u32)OpcodeMap::OP_MAP_VOP3, + V_ADD_NC_I16 = 781 + (u32)OpcodeMap::OP_MAP_VOP3, + V_SUB_NC_I16 = 782 + (u32)OpcodeMap::OP_MAP_VOP3, + V_SUB_CO_U32 = 784 + (u32)OpcodeMap::OP_MAP_VOP3, + V_LSHLREV_B16 = 788 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MAD_F16 = 833 + (u32)OpcodeMap::OP_MAP_VOP3, + V_LSHL_ADD_U32 = 838 + (u32)OpcodeMap::OP_MAP_VOP3, + V_ADD_LSHL_U32 = 839 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MIN3_F16 = 849 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MAX3_F16 = 852 + (u32)OpcodeMap::OP_MAP_VOP3, + V_MAD_I16 = 862 + (u32)OpcodeMap::OP_MAP_VOP3, + V_ADD3_U32 = 877 + (u32)OpcodeMap::OP_MAP_VOP3, + V_LSHL_OR_B32 = 879 + (u32)OpcodeMap::OP_MAP_VOP3, + V_AND_OR_B32 = 881 + (u32)OpcodeMap::OP_MAP_VOP3, + V_OR3_B32 = 882 + (u32)OpcodeMap::OP_MAP_VOP3, // VINTRP V_INTERP_P1_F32 = 0 + (u32)OpcodeMap::OP_MAP_VINTRP, V_INTERP_P2_F32 = 1 + (u32)OpcodeMap::OP_MAP_VINTRP,