VOP3P instructions definitions

2026-04-01 18:40:57 -06:00 · 2026-03-31 23:20:37 +02:00 · 2026-03-31 23:20:37 +02:00 · de2f69a4d2
commit de2f69a4d2
parent 969955b8a0
5 changed files with 237 additions and 2 deletions
--- a/src/shader_recompiler/frontend/decode.cpp
+++ b/src/shader_recompiler/frontend/decode.cpp
@ -3,6 +3,7 @@

 #include <algorithm>
 #include "common/assert.h"
+#include "core/libraries/kernel/process.h"
 #include "shader_recompiler/frontend/decode.h"

 #include <magic_enum/magic_enum.hpp>
@ -39,6 +40,7 @@ InstEncoding GetInstructionEncoding(u32 token) {
    encoding = static_cast<InstEncoding>(token & (u32)EncodingMask::MASK_6bit);
    switch (encoding) {
    case InstEncoding::VOP3:
+    case InstEncoding::VOP3P:
    case InstEncoding::EXP:
    case InstEncoding::VINTRP:
    case InstEncoding::DS:
@ -82,7 +84,6 @@ InstEncoding GetInstructionEncoding(u32 token) {
        break;
    }

-    UNREACHABLE();
    return InstEncoding::ILLEGAL;
 }

@ -111,7 +112,7 @@ GcnInst GcnDecodeContext::decodeInstruction(GcnCodeSlice& code) {
    const uint32_t token = code.at(0);

    InstEncoding encoding = GetInstructionEncoding(token);
-    ASSERT_MSG(encoding != InstEncoding::ILLEGAL, "illegal encoding");
+    ASSERT_MSG(encoding != InstEncoding::ILLEGAL, "illegal encoding: {:#x}", token);
    uint32_t encodingLen = getEncodingLength(encoding);

    // Clear the instruction
@ -155,6 +156,7 @@ uint32_t GcnDecodeContext::getEncodingLength(InstEncoding encoding) {
        break;

    case InstEncoding::VOP3:
+    case InstEncoding::VOP3P:
    case InstEncoding::MUBUF:
    case InstEncoding::MTBUF:
    case InstEncoding::MIMG:
@ -189,6 +191,9 @@ uint32_t GcnDecodeContext::getOpMapOffset(InstEncoding encoding) {
    case InstEncoding::VOP3:
        offset = (uint32_t)OpcodeMap::OP_MAP_VOP3;
        break;
+    case InstEncoding::VOP3P:
+        offset = (uint32_t)OpcodeMap::OP_MAP_VOP3P;
+        break;
    case InstEncoding::EXP:
        offset = (uint32_t)OpcodeMap::OP_MAP_EXP;
        break;
@ -381,6 +386,9 @@ void GcnDecodeContext::decodeInstruction64(InstEncoding encoding, GcnCodeSlice&
    case InstEncoding::VOP3:
        decodeInstructionVOP3(hexInstruction);
        break;
+    case InstEncoding::VOP3P:
+        decodeInstructionVOP3P(hexInstruction);
+        break;
    case InstEncoding::MUBUF:
        decodeInstructionMUBUF(hexInstruction);
        break;
@ -712,6 +720,55 @@ void GcnDecodeContext::decodeInstructionVOP3(uint64_t hexInstruction) {
    }
 }

+void GcnDecodeContext::decodeInstructionVOP3P(uint64_t hexInstruction) {
+    u32 vdst = bit::extract(hexInstruction, 7, 0);
+    u32 op = bit::extract(hexInstruction, 22, 16);
+    u32 src0 = bit::extract(hexInstruction, 40, 32);
+    u32 src1 = bit::extract(hexInstruction, 49, 41);
+    u32 src2 = bit::extract(hexInstruction, 58, 50);
+
+    m_instruction.opcode = static_cast<Opcode>(op + static_cast<u32>(OpcodeMap::OP_MAP_VOP3P));
+
+    m_instruction.src[0].field = getOperandField(src0);
+    m_instruction.src[0].code =
+        m_instruction.src[0].field == OperandField::VectorGPR ? src0 - VectorGPRMin : src0;
+    m_instruction.src[1].field = getOperandField(src1);
+    m_instruction.src[1].code =
+        m_instruction.src[1].field == OperandField::VectorGPR ? src1 - VectorGPRMin : src1;
+    m_instruction.src[2].field = getOperandField(src2);
+    m_instruction.src[2].code =
+        m_instruction.src[2].field == OperandField::VectorGPR ? src2 - VectorGPRMin : src2;
+    m_instruction.dst[0].field = OperandField::VectorGPR;
+    m_instruction.dst[0].code = vdst;
+
+    m_instruction.control.vop3p = *reinterpret_cast<InstControlVOP3P*>(&hexInstruction);
+
+    // update input modifier
+    auto& control = m_instruction.control.vop3p;
+    for (u32 i = 0; i != 3; ++i) {
+        if (control.neg & (1u << i)) {
+            m_instruction.src[i].input_modifier.neg = true;
+        }
+
+        if (control.neg_hi & (1u << i)) {
+            m_instruction.src[i].input_modifier.neg_hi = true;
+        }
+
+        if (control.op_sel & (1u << i)) {
+            m_instruction.src[i].op_sel.op_sel = true;
+        }
+
+        if (control.get_op_sel_hi(i)) {
+            m_instruction.src[i].op_sel.op_sel_hi = true;
+        }
+    }
+
+    // update output modifier
+    auto& outputMod = m_instruction.dst[0].output_modifier;
+
+    outputMod.clamp = static_cast<bool>(control.clamp);
+}
+
 void GcnDecodeContext::decodeInstructionMUBUF(uint64_t hexInstruction) {
    u32 op = bit::extract(hexInstruction, 24, 18);
    u32 vaddr = bit::extract(hexInstruction, 39, 32);
--- a/src/shader_recompiler/frontend/decode.h
+++ b/src/shader_recompiler/frontend/decode.h
@ -84,6 +84,7 @@ private:
    void decodeInstructionVINTRP(uint32_t hexInstruction);
    // 64 bits encodings
    void decodeInstructionVOP3(uint64_t hexInstruction);
+    void decodeInstructionVOP3P(uint64_t hexInstruction);
    void decodeInstructionMUBUF(uint64_t hexInstruction);
    void decodeInstructionMTBUF(uint64_t hexInstruction);
    void decodeInstructionMIMG(uint64_t hexInstruction);
--- a/src/shader_recompiler/frontend/format.cpp
+++ b/src/shader_recompiler/frontend/format.cpp
@ -1784,6 +1784,88 @@ constexpr std::array<InstFormat, 455> InstructionFormatVOP3 = {{
    {},
 }};

+constexpr std::array<InstFormat, 35> InstructionFormatVOP3P = {{
+    // 0 = V_PK_MAD_I16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 3, 1, ScalarType::Sint16,
+     ScalarType::Sint16},
+    // 1 = V_PK_MUL_LO_U16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 2 = V_PK_ADD_I16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16,
+     ScalarType::Sint16},
+    // 3 = V_PK_SUB_I16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16,
+     ScalarType::Sint16},
+    // 4 = V_PK_LSHLREV_B16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 5 = V_PK_LSHRREV_B16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 6 = V_PK_ASHRREV_I16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16,
+     ScalarType::Uint16},
+    // 7 = V_PK_MAX_I16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16,
+     ScalarType::Sint16},
+    // 8 = V_PK_MIN_I16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16,
+     ScalarType::Sint16},
+    // 9 = V_PK_MAD_U16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 3, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 10 = V_PK_ADD_U16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 11 = V_PK_SUB_U16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 12 = V_PK_MAX_U16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 13 = V_PK_MIN_U16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 14 = V_PK_FMA_F16
+    {InstClass::VectorFpArith16, InstCategory::VectorALU, 3, 1, ScalarType::Float16,
+     ScalarType::Float16},
+    // 15 = V_PK_ADD_F16
+    {InstClass::VectorFpArith16, InstCategory::VectorALU, 2, 1, ScalarType::Float16,
+     ScalarType::Float16},
+    // 16 = V_PK_MUL_F16
+    {InstClass::VectorFpArith16, InstCategory::VectorALU, 2, 1, ScalarType::Float16,
+     ScalarType::Float16},
+    // 17 = V_PK_MIN_F16
+    {InstClass::VectorFpArith16, InstCategory::VectorALU, 2, 1, ScalarType::Float16,
+     ScalarType::Float16},
+    // 18 = V_PK_MAX_F16
+    {InstClass::VectorFpArith16, InstCategory::VectorALU, 2, 1, ScalarType::Float16,
+     ScalarType::Float16},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    // 32 = V_MAD_MIX_F32
+    {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
+     ScalarType::Float32},
+    // 33 = V_MAD_MIXLO_F16
+    {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
+     ScalarType::Float32},
+    // 34 = V_MAD_MIXHI_F16
+    {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
+     ScalarType::Float32},
+}};
+
 constexpr std::array<InstFormat, 71> InstructionFormatVOP1 = {{
    // 0 = V_NOP
    {InstClass::VectorMisc, InstCategory::VectorALU, 0, 1, ScalarType::Any, ScalarType::Any},
@ -3705,6 +3787,8 @@ InstFormat InstructionFormat(InstEncoding encoding, uint32_t opcode) {
        return InstructionFormatVOPC[opcode];
    case InstEncoding::VOP3:
        return InstructionFormatVOP3[opcode];
+    case InstEncoding::VOP3P:
+        return InstructionFormatVOP3P[opcode];
    case InstEncoding::EXP:
        return InstructionFormatEXP[opcode];
    case InstEncoding::VINTRP:
--- a/src/shader_recompiler/frontend/instruction.h
+++ b/src/shader_recompiler/frontend/instruction.h
@ -3,6 +3,7 @@

 #pragma once

+#include "common/assert.h"
 #include "shader_recompiler/frontend/opcodes.h"

 namespace Shader::Gcn {
@ -25,6 +26,7 @@ enum OperandFieldRange {
 /// These are applied after loading an operand register.
 struct InputModifiers {
    bool neg = false;
+    bool neg_hi = false;
    bool abs = false;
 };

@ -34,11 +36,18 @@ struct OutputModifiers {
    float multiplier = 0.f;
 };

+struct OperandSelection {
+    bool op_sel = false;
+    bool op_sel_hi = false;
+};
+
 struct InstOperand {
    OperandField field = OperandField::Undefined;
    ScalarType type = ScalarType::Undefined;
    InputModifiers input_modifier = {};
    OutputModifiers output_modifier = {};
+    // only valid for packed 16bit operations
+    OperandSelection op_sel = {};
    u32 code = 0xFFFFFFFF;
 };

@ -90,6 +99,32 @@ struct InstControlVOP3 {
    u64 neg : 3;
 };

+struct InstControlVOP3P {
+    u64 : 8;
+    u64 neg_hi : 3;
+    u64 op_sel : 3;
+    u64 op_sel_hi_2 : 1;
+    u64 clamp : 1;
+    u64 : 43;
+    u64 op_sel_hi_01 : 2;
+    u64 neg : 3;
+
+    bool get_op_sel_hi(int idx) {
+        switch (idx) {
+        case 0:
+            return (op_sel_hi_01 & 1) == 1;
+        case 1:
+            return ((op_sel_hi_01 >> 1) & 1) == 1;
+        case 2:
+            return (op_sel_hi_2 & 1) == 1;
+        default:
+            UNREACHABLE_MSG("get_op_sel_hi: {}", idx);
+        }
+    }
+};
+
+static_assert(sizeof(InstControlVOP3P) == 8);
+
 struct InstControlSMRD {
    u32 offset : 8;
    u32 imm : 1;
@ -174,6 +209,7 @@ union InstControl {
    InstControlSOPK sopk;
    InstControlSOPP sopp;
    InstControlVOP3 vop3;
+    InstControlVOP3P vop3p;
    InstControlSMRD smrd;
    InstControlMUBUF mubuf;
    InstControlMTBUF mtbuf;
--- a/src/shader_recompiler/frontend/opcodes.h
+++ b/src/shader_recompiler/frontend/opcodes.h
@ -662,6 +662,33 @@ enum class OpcodeVOP3 : u32 {
    OP_RANGE_VOP3 = V_EXP_LEGACY_F32 + 1,
 };

+enum class OpcodeVOP3P : u32 {
+    V_PK_MAD_I16 = 0,
+    V_PK_MUL_LO_U16 = 1,
+    V_PK_ADD_I16 = 2,
+    V_PK_SUB_I16 = 3,
+    V_PK_LSHLREV_B16 = 4,
+    V_PK_LSHRREV_B16 = 5,
+    V_PK_ASHRREV_I16 = 6,
+    V_PK_MAX_I16 = 7,
+    V_PK_MIN_I16 = 8,
+    V_PK_MAD_U16 = 9,
+    V_PK_ADD_U16 = 10,
+    V_PK_SUB_U16 = 11,
+    V_PK_MAX_U16 = 12,
+    V_PK_MIN_U16 = 13,
+    V_PK_FMA_F16 = 14,
+    V_PK_ADD_F16 = 15,
+    V_PK_MUL_F16 = 16,
+    V_PK_MIN_F16 = 17,
+    V_PK_MAX_F16 = 18,
+    V_MAD_MIX_F32 = 32,
+    V_MAD_MIXLO_F16 = 33,
+    V_MAD_MIXHI_F16 = 34,
+
+    OP_RANGE_VOP3P = V_MAD_MIXHI_F16 + 1,
+};
+
 enum class OpcodeVOP1 : u32 {
    V_NOP = 0,
    V_MOV_B32 = 1,
@ -1313,6 +1340,7 @@ enum class OpcodeMap : u32 {
    OP_MAP_MTBUF = OP_MAP_MUBUF + (u32)OpcodeMUBUF::OP_RANGE_MUBUF,
    OP_MAP_MIMG = OP_MAP_MTBUF + (u32)OpcodeMTBUF::OP_RANGE_MTBUF,
    OP_MAP_EXP = OP_MAP_MIMG + (u32)OpcodeMIMG::OP_RANGE_MIMG,
+    OP_MAP_VOP3P = OP_MAP_EXP + (u32)OpcodeEXP::OP_RANGE_EXP,
 };

 enum class Opcode : u32 {
@ -2192,6 +2220,29 @@ enum class Opcode : u32 {
    IMAGE_SAMPLE_C_CD_CL_O = 111 + (u32)OpcodeMap::OP_MAP_MIMG,
    // EXP
    EXP = 0 + (u32)OpcodeMap::OP_MAP_EXP,
+    // VOP3P
+    V_PK_MAD_I16 = 0 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MUL_LO_U16 = 1 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_ADD_I16 = 2 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_SUB_I16 = 3 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_LSHLREV_B16 = 4 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_LSHRREV_B16 = 5 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_ASHRREV_I16 = 6 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MAX_I16 = 7 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MIN_I16 = 8 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MAD_U16 = 9 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_ADD_U16 = 10 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_SUB_U16 = 11 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MAX_U16 = 12 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MIN_U16 = 13 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_FMA_F16 = 14 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_ADD_F16 = 15 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MUL_F16 = 16 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MIN_F16 = 17 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MAX_F16 = 18 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_MAD_MIX_F32 = 32 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_MAD_MIXLO_F16 = 33 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_MAD_MIXHI_F16 = 34 + (u32)OpcodeMap::OP_MAP_VOP3P,
 };

 enum class EncodingMask : u32 {
@ -2222,6 +2273,8 @@ enum class InstEncoding : u32 {
    VOP3 = 0x00000034u << 26,
    /// bits [31:26] - (1 1 1 1 1 0)
    EXP = 0x0000003Eu << 26,
+    /// bits [31:26] - (1 1 0 0 1 1)
+    VOP3P = 0x00000033u << 26,
    /// bits [31:26] - (1 1 0 0 1 0)
    VINTRP = 0x00000032u << 26,
    /// bits [31:26] - (1 1 0 1 1 0)
@ -2271,6 +2324,7 @@ enum class InstClass : u32 {
    VectorBitField32,
    VectorThreadMask,
    VectorBitField64,
+    VectorFpArith16,
    VectorFpArith32,
    VectorFpRound32,
    VectorFpField32,
@ -2281,6 +2335,7 @@ enum class InstClass : u32 {
    VectorFpField64,
    VectorFpTran64,
    VectorFpCmp64,
+    VectorIntArith16,
    VectorIntArith32,
    VectorIntArith64,
    VectorIntCmp32,
@ -2360,8 +2415,10 @@ enum class InstCategory : u32 {
 enum class ScalarType : u32 {
    Undefined,
    Any,
+    Uint16,
    Uint32,
    Uint64,
+    Sint16,
    Sint32,
    Sint64,
    Float16,