VOP decoding for Neo (#4301)

This commit is contained in:
Marcin Mikołajczyk 2026-04-22 08:35:22 +02:00 committed by GitHub
parent c68a8baa94
commit 07a0475d0f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 166 additions and 32 deletions

View File

@ -239,11 +239,11 @@ uint32_t GcnDecodeContext::mapEncodingOp(InstEncoding encoding, Opcode opcode) {
uint32_t op =
static_cast<uint32_t>(opcode) - static_cast<uint32_t>(OpcodeMap::OP_MAP_VOPC);
encodingOp = op + static_cast<uint32_t>(OpMapVOP3VOPX::VOP3_TO_VOPC);
} else if (opcode >= Opcode::V_CNDMASK_B32 && opcode <= Opcode::V_CVT_PK_I16_I32) {
} else if (opcode >= Opcode::V_CNDMASK_B32 && opcode <= Opcode::V_LDEXP_F16) {
uint32_t op =
static_cast<uint32_t>(opcode) - static_cast<uint32_t>(OpcodeMap::OP_MAP_VOP2);
encodingOp = op + static_cast<uint32_t>(OpMapVOP3VOPX::VOP3_TO_VOP2);
} else if (opcode >= Opcode::V_NOP && opcode <= Opcode::V_MOVRELSD_B32) {
} else if (opcode >= Opcode::V_NOP && opcode <= Opcode::V_SQRT_F16) {
uint32_t op =
static_cast<uint32_t>(opcode) - static_cast<uint32_t>(OpcodeMap::OP_MAP_VOP1);
encodingOp = op + static_cast<uint32_t>(OpMapVOP3VOPX::VOP3_TO_VOP1);
@ -440,6 +440,30 @@ void GcnDecodeContext::decodeSubDwordAddressing(InstEncoding encoding, GcnCodeSl
if (encoding == InstEncoding::VOPC) {
SdwaVopc sdwa = *reinterpret_cast<SdwaVopc*>(&m_instruction.src[0].code);
m_instruction.src[0].field =
sdwa.s0 == 0 ? OperandField::VectorGPR : getOperandField(sdwa.src0);
m_instruction.src[0].code = sdwa.src0;
m_instruction.src[0].sdwa_sel = SdwaSelector(sdwa.src0_sel);
m_instruction.src[0].input_modifier.neg = sdwa.src0_neg;
m_instruction.src[0].input_modifier.abs = sdwa.src0_abs;
m_instruction.src[0].input_modifier.sext = sdwa.src0_sext;
m_instruction.src[1].field =
sdwa.s1 == 0 ? OperandField::VectorGPR : getOperandField(m_instruction.src[1].code);
m_instruction.src[1].sdwa_sel = SdwaSelector(sdwa.src1_sel);
m_instruction.src[1].input_modifier.neg = sdwa.src1_neg;
m_instruction.src[1].input_modifier.abs = sdwa.src1_abs;
m_instruction.src[1].input_modifier.sext = sdwa.src1_sext;
m_instruction.dst[0].field = sdwa.sd ? OperandField::ScalarGPR : OperandField::VccLo;
m_instruction.dst[0].code = sdwa.sdst;
} else if (encoding == InstEncoding::VOP1 || encoding == InstEncoding::VOP2) {
SdwaVop12 sdwa = *reinterpret_cast<SdwaVop12*>(&m_instruction.src[0].code);
m_instruction.src[0].field =
sdwa.s0 == 0 ? OperandField::VectorGPR : getOperandField(sdwa.src0);
m_instruction.src[0].code = sdwa.src0;
@ -475,27 +499,6 @@ void GcnDecodeContext::decodeSubDwordAddressing(InstEncoding encoding, GcnCodeSl
m_instruction.dst[0].output_modifier.multiplier = 0.5f;
break;
}
} else if (encoding == InstEncoding::VOP1 || encoding == InstEncoding::VOP2) {
SdwaVop12 sdwa = *reinterpret_cast<SdwaVop12*>(&m_instruction.src[0].code);
m_instruction.src[0].field =
sdwa.s0 == 0 ? OperandField::VectorGPR : getOperandField(sdwa.src0);
m_instruction.src[0].code = sdwa.src0;
m_instruction.src[0].sdwa_sel = SdwaSelector(sdwa.src0_sel);
m_instruction.src[0].input_modifier.neg = sdwa.src0_neg;
m_instruction.src[0].input_modifier.abs = sdwa.src0_abs;
m_instruction.src[0].input_modifier.sext = sdwa.src0_sext;
m_instruction.src[1].field =
sdwa.s1 == 0 ? OperandField::VectorGPR : getOperandField(m_instruction.src[1].code);
m_instruction.src[1].sdwa_sel = SdwaSelector(sdwa.src1_sel);
m_instruction.src[1].input_modifier.neg = sdwa.src1_neg;
m_instruction.src[1].input_modifier.abs = sdwa.src1_abs;
m_instruction.src[1].input_modifier.sext = sdwa.src1_sext;
m_instruction.dst[0].field = sdwa.sd ? OperandField::ScalarGPR : OperandField::VccLo;
m_instruction.dst[0].code = sdwa.sdst;
} else {
LOG_WARNING(Render_Recompiler, "illegal instruction: SDWA used outside VOP1/VOP2/VOPC");
}
@ -692,6 +695,8 @@ void GcnDecodeContext::decodeInstructionVOP3(uint64_t hexInstruction) {
u32 vdst = bit::extract(hexInstruction, 7, 0);
u32 sdst = bit::extract(hexInstruction, 14, 8); // For VOP3B
u32 op = bit::extract(hexInstruction, 25, 17);
u32 op_msb = bit::extract(hexInstruction, 16, 16);
op = op + op_msb * (1 << 9);
u32 src0 = bit::extract(hexInstruction, 40, 32);
u32 src1 = bit::extract(hexInstruction, 49, 41);
u32 src2 = bit::extract(hexInstruction, 58, 50);
@ -703,13 +708,13 @@ void GcnDecodeContext::decodeInstructionVOP3(uint64_t hexInstruction) {
m_instruction.opcode =
static_cast<Opcode>(vopcOp + static_cast<u32>(OpcodeMap::OP_MAP_VOPC));
} else if (op >= static_cast<u32>(OpcodeVOP3::V_CNDMASK_B32) &&
op <= static_cast<u32>(OpcodeVOP3::V_CVT_PK_I16_I32)) {
op <= static_cast<u32>(OpcodeVOP3::V_LDEXP_F16)) {
// Map from VOP3 to VOP2
u32 vop2Op = op - static_cast<u32>(OpMapVOP3VOPX::VOP3_TO_VOP2);
m_instruction.opcode =
static_cast<Opcode>(vop2Op + static_cast<u32>(OpcodeMap::OP_MAP_VOP2));
} else if (op >= static_cast<u32>(OpcodeVOP3::V_NOP) &&
op <= static_cast<u32>(OpcodeVOP3::V_MOVRELSD_B32)) {
op <= static_cast<u32>(OpcodeVOP3::V_SQRT_F16)) {
// Map from VOP3 to VOP1
u32 vop1Op = op - static_cast<u32>(OpMapVOP3VOPX::VOP3_TO_VOP1);
m_instruction.opcode =
@ -769,8 +774,14 @@ void GcnDecodeContext::decodeInstructionVOP3(uint64_t hexInstruction) {
if (control.neg & (1u << i)) {
m_instruction.src[i].input_modifier.neg = true;
}
if (control.op_sel & (1u << i)) {
m_instruction.src[i].op_sel.op_sel = true;
}
}
m_instruction.dst[0].op_sel.op_sel = control.op_sel & (1u << 3);
// update output modifier
auto& outputMod = m_instruction.dst[0].output_modifier;

View File

@ -115,7 +115,8 @@ struct InstControlVOP3 {
u64 : 8;
u64 abs : 3;
u64 clmp : 1;
u64 : 47;
u64 op_sel : 4;
u64 : 43;
u64 omod : 2;
u64 neg : 3;
};
@ -130,7 +131,7 @@ struct InstControlVOP3P {
u64 op_sel_hi_01 : 2;
u64 neg : 3;
bool get_op_sel_hi(int idx) {
bool get_op_sel_hi(int idx) const {
switch (idx) {
case 0:
return (op_sel_hi_01 & 1) == 1;
@ -240,7 +241,7 @@ union InstControl {
InstControlEXP exp;
};
struct SdwaVopc {
struct SdwaVop12 {
u32 src0 : 8;
u32 dst_sel : 3;
u32 dst_u : 2;
@ -261,7 +262,7 @@ struct SdwaVopc {
u32 s1 : 1;
};
struct SdwaVop12 {
struct SdwaVopc {
u32 src0 : 8;
u32 sdst : 7;
u32 sd : 1;

View File

@ -259,8 +259,15 @@ enum class OpcodeVOP2 : u32 {
V_CVT_PKRTZ_F16_F32 = 47,
V_CVT_PK_U16_U32 = 48,
V_CVT_PK_I16_I32 = 49,
V_ADD_F16 = 50,
V_SUB_F16 = 51,
V_SUBREV_F16 = 52,
V_MUL_F16 = 53,
V_MAX_F16 = 57,
V_MIN_F16 = 58,
V_LDEXP_F16 = 59,
OP_RANGE_VOP2 = V_CVT_PK_I16_I32 + 1,
OP_RANGE_VOP2 = V_LDEXP_F16 + 1,
};
enum class OpcodeVOP3 : u32 {
@ -411,6 +418,12 @@ enum class OpcodeVOP3 : u32 {
V_CMP_TRU_I32 = 135,
V_CMP_T_I32 = 135,
V_CMP_CLASS_F32 = 136,
V_CMP_LT_I16 = 137,
V_CMP_EQ_I16 = 138,
V_CMP_LE_I16 = 139,
V_CMP_GT_I16 = 140,
V_CMP_NE_I16 = 141,
V_CMP_GE_I16 = 142,
V_CMPX_F_I32 = 144,
V_CMPX_LT_I32 = 145,
V_CMPX_EQ_I32 = 146,
@ -433,6 +446,12 @@ enum class OpcodeVOP3 : u32 {
V_CMP_TRU_I64 = 167,
V_CMP_T_I64 = 167,
V_CMP_CLASS_F64 = 168,
V_CMP_LT_U16 = 169,
V_CMP_EQ_U16 = 170,
V_CMP_LE_U16 = 171,
V_CMP_GT_U16 = 172,
V_CMP_NE_U16 = 173,
V_CMP_GE_U16 = 174,
V_CMPX_F_I64 = 176,
V_CMPX_LT_I64 = 177,
V_CMPX_EQ_I64 = 178,
@ -454,6 +473,14 @@ enum class OpcodeVOP3 : u32 {
V_CMP_GE_U32 = 198,
V_CMP_TRU_U32 = 199,
V_CMP_T_U32 = 199,
V_CMP_F_F16 = 200,
V_CMP_LT_F16 = 201,
V_CMP_EQ_F16 = 202,
V_CMP_LE_F16 = 203,
V_CMP_GT_F16 = 204,
V_CMP_LG_F16 = 205,
V_CMP_GE_F16 = 206,
V_CMP_O_F16 = 207,
V_CMPX_F_U32 = 208,
V_CMPX_LT_U32 = 209,
V_CMPX_EQ_U32 = 210,
@ -534,6 +561,13 @@ enum class OpcodeVOP3 : u32 {
V_CVT_PKRTZ_F16_F32 = 303,
V_CVT_PK_U16_U32 = 304,
V_CVT_PK_I16_I32 = 305,
V_ADD_F16 = 306,
V_SUB_F16 = 307,
V_SUBREV_F16 = 308,
V_MUL_F16 = 309,
V_MAX_F16 = 313,
V_MIN_F16 = 314,
V_LDEXP_F16 = 315,
V_MAD_LEGACY_F32 = 320,
V_MAD_F32 = 321,
V_MAD_I32_I24 = 322,
@ -658,8 +692,28 @@ enum class OpcodeVOP3 : u32 {
V_MOVRELSD_B32 = 452,
V_LOG_LEGACY_F32 = 453,
V_EXP_LEGACY_F32 = 454,
V_RCP_F16 = 468,
V_SQRT_F16 = 469,
OP_RANGE_VOP3 = V_EXP_LEGACY_F32 + 1,
V_ADD_NC_U16 = 771,
V_LSHRREV_B16 = 775,
V_ASHRREV_I16 = 776,
V_ADD_NC_I16 = 781,
V_SUB_NC_I16 = 782,
V_SUB_CO_U32 = 784,
V_LSHLREV_B16 = 788,
V_MAD_F16 = 833,
V_LSHL_ADD_U32 = 838,
V_ADD_LSHL_U32 = 839,
V_MIN3_F16 = 849,
V_MAX3_F16 = 852,
V_MAD_I16 = 862,
V_ADD3_U32 = 877,
V_LSHL_OR_B32 = 879,
V_AND_OR_B32 = 881,
V_OR3_B32 = 882,
OP_RANGE_VOP3 = 1024, // in neo mode, VOP3 gets an additional bit for OP so round up
};
enum class OpcodeVOP3P : u32 {
@ -756,8 +810,10 @@ enum class OpcodeVOP1 : u32 {
V_MOVRELSD_B32 = 68,
V_LOG_LEGACY_F32 = 69,
V_EXP_LEGACY_F32 = 70,
V_RCP_F16 = 84,
V_SQRT_F16 = 85,
OP_RANGE_VOP1 = V_EXP_LEGACY_F32 + 1,
OP_RANGE_VOP1 = V_SQRT_F16 + 1,
};
enum class OpcodeVOPC : u32 {
@ -908,6 +964,12 @@ enum class OpcodeVOPC : u32 {
V_CMP_TRU_I32 = 135,
V_CMP_T_I32 = 135,
V_CMP_CLASS_F32 = 136,
V_CMP_LT_I16 = 137,
V_CMP_EQ_I16 = 138,
V_CMP_LE_I16 = 139,
V_CMP_GT_I16 = 140,
V_CMP_NE_I16 = 141,
V_CMP_GE_I16 = 142,
V_CMPX_F_I32 = 144,
V_CMPX_LT_I32 = 145,
V_CMPX_EQ_I32 = 146,
@ -930,6 +992,12 @@ enum class OpcodeVOPC : u32 {
V_CMP_TRU_I64 = 167,
V_CMP_T_I64 = 167,
V_CMP_CLASS_F64 = 168,
V_CMP_LT_U16 = 169,
V_CMP_EQ_U16 = 170,
V_CMP_LE_U16 = 171,
V_CMP_GT_U16 = 172,
V_CMP_NE_U16 = 173,
V_CMP_GE_U16 = 174,
V_CMPX_F_I64 = 176,
V_CMPX_LT_I64 = 177,
V_CMPX_EQ_I64 = 178,
@ -951,6 +1019,14 @@ enum class OpcodeVOPC : u32 {
V_CMP_GE_U32 = 198,
V_CMP_TRU_U32 = 199,
V_CMP_T_U32 = 199,
V_CMP_F_F16 = 200,
V_CMP_LT_F16 = 201,
V_CMP_EQ_F16 = 202,
V_CMP_LE_F16 = 203,
V_CMP_GT_F16 = 204,
V_CMP_LG_F16 = 205,
V_CMP_GE_F16 = 206,
V_CMP_O_F16 = 207,
V_CMPX_F_U32 = 208,
V_CMPX_LT_U32 = 209,
V_CMPX_EQ_U32 = 210,
@ -1653,6 +1729,12 @@ enum class Opcode : u32 {
V_CMP_TRU_I32 = 135 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_T_I32 = 135 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_CLASS_F32 = 136 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_LT_I16 = 137 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_EQ_I16 = 138 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_LE_I16 = 139 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_GT_I16 = 140 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_NE_I16 = 141 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_GE_I16 = 142 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMPX_F_I32 = 144 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMPX_LT_I32 = 145 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMPX_EQ_I32 = 146 + (u32)OpcodeMap::OP_MAP_VOPC,
@ -1675,6 +1757,12 @@ enum class Opcode : u32 {
V_CMP_TRU_I64 = 167 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_T_I64 = 167 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_CLASS_F64 = 168 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_LT_U16 = 169 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_EQ_U16 = 170 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_LE_U16 = 171 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_GT_U16 = 172 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_NE_U16 = 173 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_GE_U16 = 174 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMPX_F_I64 = 176 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMPX_LT_I64 = 177 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMPX_EQ_I64 = 178 + (u32)OpcodeMap::OP_MAP_VOPC,
@ -1695,6 +1783,14 @@ enum class Opcode : u32 {
V_CMP_GE_U32 = 198 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_TRU_U32 = 199 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_T_U32 = 199 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_F_F16 = 200 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_LT_F16 = 201 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_EQ_F16 = 202 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_LE_F16 = 203 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_GT_F16 = 204 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_LG_F16 = 205 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_GE_F16 = 206 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMP_O_F16 = 207 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMPX_F_U32 = 208 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMPX_LT_U32 = 209 + (u32)OpcodeMap::OP_MAP_VOPC,
V_CMPX_EQ_U32 = 210 + (u32)OpcodeMap::OP_MAP_VOPC,
@ -1775,6 +1871,13 @@ enum class Opcode : u32 {
V_CVT_PKRTZ_F16_F32 = 47 + (u32)OpcodeMap::OP_MAP_VOP2,
V_CVT_PK_U16_U32 = 48 + (u32)OpcodeMap::OP_MAP_VOP2,
V_CVT_PK_I16_I32 = 49 + (u32)OpcodeMap::OP_MAP_VOP2,
V_ADD_F16 = 50 + (u32)OpcodeMap::OP_MAP_VOP2,
V_SUB_F16 = 51 + (u32)OpcodeMap::OP_MAP_VOP2,
V_SUBREV_F16 = 52 + (u32)OpcodeMap::OP_MAP_VOP2,
V_MUL_F16 = 53 + (u32)OpcodeMap::OP_MAP_VOP2,
V_MAX_F16 = 57 + (u32)OpcodeMap::OP_MAP_VOP2,
V_MIN_F16 = 58 + (u32)OpcodeMap::OP_MAP_VOP2,
V_LDEXP_F16 = 59 + (u32)OpcodeMap::OP_MAP_VOP2,
// VOP1
V_NOP = 0 + (u32)OpcodeMap::OP_MAP_VOP1,
V_MOV_B32 = 1 + (u32)OpcodeMap::OP_MAP_VOP1,
@ -1842,6 +1945,8 @@ enum class Opcode : u32 {
V_MOVRELSD_B32 = 68 + (u32)OpcodeMap::OP_MAP_VOP1,
V_LOG_LEGACY_F32 = 69 + (u32)OpcodeMap::OP_MAP_VOP1,
V_EXP_LEGACY_F32 = 70 + (u32)OpcodeMap::OP_MAP_VOP1,
V_RCP_F16 = 84 + (u32)OpcodeMap::OP_MAP_VOP1,
V_SQRT_F16 = 85 + (u32)OpcodeMap::OP_MAP_VOP1,
// VOP3
V_MAD_LEGACY_F32 = 320 + (u32)OpcodeMap::OP_MAP_VOP3,
V_MAD_F32 = 321 + (u32)OpcodeMap::OP_MAP_VOP3,
@ -1901,6 +2006,23 @@ enum class Opcode : u32 {
V_MQSAD_U32_U8 = 373 + (u32)OpcodeMap::OP_MAP_VOP3,
V_MAD_U64_U32 = 374 + (u32)OpcodeMap::OP_MAP_VOP3,
V_MAD_I64_I32 = 375 + (u32)OpcodeMap::OP_MAP_VOP3,
V_ADD_NC_U16 = 771 + (u32)OpcodeMap::OP_MAP_VOP3,
V_LSHRREV_B16 = 775 + (u32)OpcodeMap::OP_MAP_VOP3,
V_ASHRREV_I16 = 776 + (u32)OpcodeMap::OP_MAP_VOP3,
V_ADD_NC_I16 = 781 + (u32)OpcodeMap::OP_MAP_VOP3,
V_SUB_NC_I16 = 782 + (u32)OpcodeMap::OP_MAP_VOP3,
V_SUB_CO_U32 = 784 + (u32)OpcodeMap::OP_MAP_VOP3,
V_LSHLREV_B16 = 788 + (u32)OpcodeMap::OP_MAP_VOP3,
V_MAD_F16 = 833 + (u32)OpcodeMap::OP_MAP_VOP3,
V_LSHL_ADD_U32 = 838 + (u32)OpcodeMap::OP_MAP_VOP3,
V_ADD_LSHL_U32 = 839 + (u32)OpcodeMap::OP_MAP_VOP3,
V_MIN3_F16 = 849 + (u32)OpcodeMap::OP_MAP_VOP3,
V_MAX3_F16 = 852 + (u32)OpcodeMap::OP_MAP_VOP3,
V_MAD_I16 = 862 + (u32)OpcodeMap::OP_MAP_VOP3,
V_ADD3_U32 = 877 + (u32)OpcodeMap::OP_MAP_VOP3,
V_LSHL_OR_B32 = 879 + (u32)OpcodeMap::OP_MAP_VOP3,
V_AND_OR_B32 = 881 + (u32)OpcodeMap::OP_MAP_VOP3,
V_OR3_B32 = 882 + (u32)OpcodeMap::OP_MAP_VOP3,
// VINTRP
V_INTERP_P1_F32 = 0 + (u32)OpcodeMap::OP_MAP_VINTRP,
V_INTERP_P2_F32 = 1 + (u32)OpcodeMap::OP_MAP_VINTRP,