diff --git a/src/shader_recompiler/frontend/decode.cpp b/src/shader_recompiler/frontend/decode.cpp
index 37e8a0973..ae58f080b 100644
--- a/src/shader_recompiler/frontend/decode.cpp
+++ b/src/shader_recompiler/frontend/decode.cpp
@@ -3,6 +3,7 @@
 
 #include <algorithm>
 #include "common/assert.h"
+#include "core/libraries/kernel/process.h"
 #include "shader_recompiler/frontend/decode.h"
 
 #include <magic_enum/magic_enum.hpp>
@@ -39,6 +40,7 @@ InstEncoding GetInstructionEncoding(u32 token) {
     encoding = static_cast<InstEncoding>(token & (u32)EncodingMask::MASK_6bit);
     switch (encoding) {
     case InstEncoding::VOP3:
+    case InstEncoding::VOP3P:
     case InstEncoding::EXP:
     case InstEncoding::VINTRP:
     case InstEncoding::DS:
@@ -82,7 +84,6 @@ InstEncoding GetInstructionEncoding(u32 token) {
         break;
     }
 
-    UNREACHABLE();
     return InstEncoding::ILLEGAL;
 }
 
@@ -111,7 +112,7 @@ GcnInst GcnDecodeContext::decodeInstruction(GcnCodeSlice& code) {
     const uint32_t token = code.at(0);
 
     InstEncoding encoding = GetInstructionEncoding(token);
-    ASSERT_MSG(encoding != InstEncoding::ILLEGAL, "illegal encoding");
+    ASSERT_MSG(encoding != InstEncoding::ILLEGAL, "illegal encoding: {:#x}", token);
     uint32_t encodingLen = getEncodingLength(encoding);
 
     // Clear the instruction
@@ -155,6 +156,7 @@ uint32_t GcnDecodeContext::getEncodingLength(InstEncoding encoding) {
         break;
 
     case InstEncoding::VOP3:
+    case InstEncoding::VOP3P:
     case InstEncoding::MUBUF:
     case InstEncoding::MTBUF:
     case InstEncoding::MIMG:
@@ -189,6 +191,9 @@ uint32_t GcnDecodeContext::getOpMapOffset(InstEncoding encoding) {
     case InstEncoding::VOP3:
         offset = (uint32_t)OpcodeMap::OP_MAP_VOP3;
         break;
+    case InstEncoding::VOP3P:
+        offset = (uint32_t)OpcodeMap::OP_MAP_VOP3P;
+        break;
     case InstEncoding::EXP:
         offset = (uint32_t)OpcodeMap::OP_MAP_EXP;
         break;
@@ -381,6 +386,9 @@ void GcnDecodeContext::decodeInstruction64(InstEncoding encoding, GcnCodeSlice&
     case InstEncoding::VOP3:
         decodeInstructionVOP3(hexInstruction);
         break;
+    case InstEncoding::VOP3P:
+        decodeInstructionVOP3P(hexInstruction);
+        break;
     case InstEncoding::MUBUF:
         decodeInstructionMUBUF(hexInstruction);
         break;
@@ -712,6 +720,55 @@ void GcnDecodeContext::decodeInstructionVOP3(uint64_t hexInstruction) {
     }
 }
 
+void GcnDecodeContext::decodeInstructionVOP3P(uint64_t hexInstruction) {
+    u32 vdst = bit::extract(hexInstruction, 7, 0);
+    u32 op = bit::extract(hexInstruction, 22, 16);
+    u32 src0 = bit::extract(hexInstruction, 40, 32);
+    u32 src1 = bit::extract(hexInstruction, 49, 41);
+    u32 src2 = bit::extract(hexInstruction, 58, 50);
+
+    m_instruction.opcode = static_cast<Opcode>(op + static_cast<u32>(OpcodeMap::OP_MAP_VOP3P));
+
+    m_instruction.src[0].field = getOperandField(src0);
+    m_instruction.src[0].code =
+        m_instruction.src[0].field == OperandField::VectorGPR ? src0 - VectorGPRMin : src0;
+    m_instruction.src[1].field = getOperandField(src1);
+    m_instruction.src[1].code =
+        m_instruction.src[1].field == OperandField::VectorGPR ? src1 - VectorGPRMin : src1;
+    m_instruction.src[2].field = getOperandField(src2);
+    m_instruction.src[2].code =
+        m_instruction.src[2].field == OperandField::VectorGPR ? src2 - VectorGPRMin : src2;
+    m_instruction.dst[0].field = OperandField::VectorGPR;
+    m_instruction.dst[0].code = vdst;
+
+    m_instruction.control.vop3p = *reinterpret_cast<InstControlVOP3P*>(&hexInstruction);
+
+    // update input modifier
+    auto& control = m_instruction.control.vop3p;
+    for (u32 i = 0; i != 3; ++i) {
+        if (control.neg & (1u << i)) {
+            m_instruction.src[i].input_modifier.neg = true;
+        }
+
+        if (control.neg_hi & (1u << i)) {
+            m_instruction.src[i].input_modifier.neg_hi = true;
+        }
+
+        if (control.op_sel & (1u << i)) {
+            m_instruction.src[i].op_sel.op_sel = true;
+        }
+
+        if (control.get_op_sel_hi(i)) {
+            m_instruction.src[i].op_sel.op_sel_hi = true;
+        }
+    }
+
+    // update output modifier
+    auto& outputMod = m_instruction.dst[0].output_modifier;
+
+    outputMod.clamp = static_cast<bool>(control.clamp);
+}
+
 void GcnDecodeContext::decodeInstructionMUBUF(uint64_t hexInstruction) {
     u32 op = bit::extract(hexInstruction, 24, 18);
     u32 vaddr = bit::extract(hexInstruction, 39, 32);
diff --git a/src/shader_recompiler/frontend/decode.h b/src/shader_recompiler/frontend/decode.h
index 8125ce7fb..605a4e229 100644
--- a/src/shader_recompiler/frontend/decode.h
+++ b/src/shader_recompiler/frontend/decode.h
@@ -84,6 +84,7 @@ private:
     void decodeInstructionVINTRP(uint32_t hexInstruction);
     // 64 bits encodings
     void decodeInstructionVOP3(uint64_t hexInstruction);
+    void decodeInstructionVOP3P(uint64_t hexInstruction);
     void decodeInstructionMUBUF(uint64_t hexInstruction);
     void decodeInstructionMTBUF(uint64_t hexInstruction);
     void decodeInstructionMIMG(uint64_t hexInstruction);
diff --git a/src/shader_recompiler/frontend/format.cpp b/src/shader_recompiler/frontend/format.cpp
index 4a90fe358..bc0ab3707 100644
--- a/src/shader_recompiler/frontend/format.cpp
+++ b/src/shader_recompiler/frontend/format.cpp
@@ -1784,6 +1784,88 @@ constexpr std::array<InstFormat, 455> InstructionFormatVOP3 = {{
     {},
 }};
 
+constexpr std::array<InstFormat, 35> InstructionFormatVOP3P = {{
+    // 0 = V_PK_MAD_I16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 3, 1, ScalarType::Sint16,
+     ScalarType::Sint16},
+    // 1 = V_PK_MUL_LO_U16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 2 = V_PK_ADD_I16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16,
+     ScalarType::Sint16},
+    // 3 = V_PK_SUB_I16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16,
+     ScalarType::Sint16},
+    // 4 = V_PK_LSHLREV_B16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 5 = V_PK_LSHRREV_B16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 6 = V_PK_ASHRREV_I16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16,
+     ScalarType::Uint16},
+    // 7 = V_PK_MAX_I16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16,
+     ScalarType::Sint16},
+    // 8 = V_PK_MIN_I16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16,
+     ScalarType::Sint16},
+    // 9 = V_PK_MAD_U16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 3, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 10 = V_PK_ADD_U16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 11 = V_PK_SUB_U16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 12 = V_PK_MAX_U16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 13 = V_PK_MIN_U16
+    {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16,
+     ScalarType::Uint16},
+    // 14 = V_PK_FMA_F16
+    {InstClass::VectorFpArith16, InstCategory::VectorALU, 3, 1, ScalarType::Float16,
+     ScalarType::Float16},
+    // 15 = V_PK_ADD_F16
+    {InstClass::VectorFpArith16, InstCategory::VectorALU, 2, 1, ScalarType::Float16,
+     ScalarType::Float16},
+    // 16 = V_PK_MUL_F16
+    {InstClass::VectorFpArith16, InstCategory::VectorALU, 2, 1, ScalarType::Float16,
+     ScalarType::Float16},
+    // 17 = V_PK_MIN_F16
+    {InstClass::VectorFpArith16, InstCategory::VectorALU, 2, 1, ScalarType::Float16,
+     ScalarType::Float16},
+    // 18 = V_PK_MAX_F16
+    {InstClass::VectorFpArith16, InstCategory::VectorALU, 2, 1, ScalarType::Float16,
+     ScalarType::Float16},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    {},
+    // 32 = V_MAD_MIX_F32
+    {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
+     ScalarType::Float32},
+    // 33 = V_MAD_MIXLO_F16
+    {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
+     ScalarType::Float32},
+    // 34 = V_MAD_MIXHI_F16
+    {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32,
+     ScalarType::Float32},
+}};
+
 constexpr std::array<InstFormat, 71> InstructionFormatVOP1 = {{
     // 0 = V_NOP
     {InstClass::VectorMisc, InstCategory::VectorALU, 0, 1, ScalarType::Any, ScalarType::Any},
@@ -3705,6 +3787,8 @@ InstFormat InstructionFormat(InstEncoding encoding, uint32_t opcode) {
         return InstructionFormatVOPC[opcode];
     case InstEncoding::VOP3:
         return InstructionFormatVOP3[opcode];
+    case InstEncoding::VOP3P:
+        return InstructionFormatVOP3P[opcode];
     case InstEncoding::EXP:
         return InstructionFormatEXP[opcode];
     case InstEncoding::VINTRP:
diff --git a/src/shader_recompiler/frontend/instruction.h b/src/shader_recompiler/frontend/instruction.h
index f4e7bc9f2..05d5ce7b7 100644
--- a/src/shader_recompiler/frontend/instruction.h
+++ b/src/shader_recompiler/frontend/instruction.h
@@ -3,6 +3,7 @@
 
 #pragma once
 
+#include "common/assert.h"
 #include "shader_recompiler/frontend/opcodes.h"
 
 namespace Shader::Gcn {
@@ -25,6 +26,7 @@ enum OperandFieldRange {
 /// These are applied after loading an operand register.
 struct InputModifiers {
     bool neg = false;
+    bool neg_hi = false;
     bool abs = false;
 };
 
@@ -34,11 +36,18 @@ struct OutputModifiers {
     float multiplier = 0.f;
 };
 
+struct OperandSelection {
+    bool op_sel = false;
+    bool op_sel_hi = false;
+};
+
 struct InstOperand {
     OperandField field = OperandField::Undefined;
     ScalarType type = ScalarType::Undefined;
     InputModifiers input_modifier = {};
     OutputModifiers output_modifier = {};
+    // only valid for packed 16bit operations
+    OperandSelection op_sel = {};
     u32 code = 0xFFFFFFFF;
 };
 
@@ -90,6 +99,32 @@ struct InstControlVOP3 {
     u64 neg : 3;
 };
 
+struct InstControlVOP3P {
+    u64 : 8;
+    u64 neg_hi : 3;
+    u64 op_sel : 3;
+    u64 op_sel_hi_2 : 1;
+    u64 clamp : 1;
+    u64 : 43;
+    u64 op_sel_hi_01 : 2;
+    u64 neg : 3;
+
+    bool get_op_sel_hi(int idx) {
+        switch (idx) {
+        case 0:
+            return (op_sel_hi_01 & 1) == 1;
+        case 1:
+            return ((op_sel_hi_01 >> 1) & 1) == 1;
+        case 2:
+            return (op_sel_hi_2 & 1) == 1;
+        default:
+            UNREACHABLE_MSG("get_op_sel_hi: {}", idx);
+        }
+    }
+};
+
+static_assert(sizeof(InstControlVOP3P) == 8);
+
 struct InstControlSMRD {
     u32 offset : 8;
     u32 imm : 1;
@@ -174,6 +209,7 @@ union InstControl {
     InstControlSOPK sopk;
     InstControlSOPP sopp;
     InstControlVOP3 vop3;
+    InstControlVOP3P vop3p;
     InstControlSMRD smrd;
     InstControlMUBUF mubuf;
     InstControlMTBUF mtbuf;
diff --git a/src/shader_recompiler/frontend/opcodes.h b/src/shader_recompiler/frontend/opcodes.h
index 7390a3940..6d6df88a6 100644
--- a/src/shader_recompiler/frontend/opcodes.h
+++ b/src/shader_recompiler/frontend/opcodes.h
@@ -662,6 +662,33 @@ enum class OpcodeVOP3 : u32 {
     OP_RANGE_VOP3 = V_EXP_LEGACY_F32 + 1,
 };
 
+enum class OpcodeVOP3P : u32 {
+    V_PK_MAD_I16 = 0,
+    V_PK_MUL_LO_U16 = 1,
+    V_PK_ADD_I16 = 2,
+    V_PK_SUB_I16 = 3,
+    V_PK_LSHLREV_B16 = 4,
+    V_PK_LSHRREV_B16 = 5,
+    V_PK_ASHRREV_I16 = 6,
+    V_PK_MAX_I16 = 7,
+    V_PK_MIN_I16 = 8,
+    V_PK_MAD_U16 = 9,
+    V_PK_ADD_U16 = 10,
+    V_PK_SUB_U16 = 11,
+    V_PK_MAX_U16 = 12,
+    V_PK_MIN_U16 = 13,
+    V_PK_FMA_F16 = 14,
+    V_PK_ADD_F16 = 15,
+    V_PK_MUL_F16 = 16,
+    V_PK_MIN_F16 = 17,
+    V_PK_MAX_F16 = 18,
+    V_MAD_MIX_F32 = 32,
+    V_MAD_MIXLO_F16 = 33,
+    V_MAD_MIXHI_F16 = 34,
+
+    OP_RANGE_VOP3P = V_MAD_MIXHI_F16 + 1,
+};
+
 enum class OpcodeVOP1 : u32 {
     V_NOP = 0,
     V_MOV_B32 = 1,
@@ -1313,6 +1340,7 @@ enum class OpcodeMap : u32 {
     OP_MAP_MTBUF = OP_MAP_MUBUF + (u32)OpcodeMUBUF::OP_RANGE_MUBUF,
     OP_MAP_MIMG = OP_MAP_MTBUF + (u32)OpcodeMTBUF::OP_RANGE_MTBUF,
     OP_MAP_EXP = OP_MAP_MIMG + (u32)OpcodeMIMG::OP_RANGE_MIMG,
+    OP_MAP_VOP3P = OP_MAP_EXP + (u32)OpcodeEXP::OP_RANGE_EXP,
 };
 
 enum class Opcode : u32 {
@@ -2192,6 +2220,29 @@ enum class Opcode : u32 {
     IMAGE_SAMPLE_C_CD_CL_O = 111 + (u32)OpcodeMap::OP_MAP_MIMG,
     // EXP
     EXP = 0 + (u32)OpcodeMap::OP_MAP_EXP,
+    // VOP3P
+    V_PK_MAD_I16 = 0 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MUL_LO_U16 = 1 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_ADD_I16 = 2 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_SUB_I16 = 3 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_LSHLREV_B16 = 4 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_LSHRREV_B16 = 5 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_ASHRREV_I16 = 6 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MAX_I16 = 7 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MIN_I16 = 8 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MAD_U16 = 9 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_ADD_U16 = 10 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_SUB_U16 = 11 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MAX_U16 = 12 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MIN_U16 = 13 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_FMA_F16 = 14 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_ADD_F16 = 15 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MUL_F16 = 16 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MIN_F16 = 17 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_PK_MAX_F16 = 18 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_MAD_MIX_F32 = 32 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_MAD_MIXLO_F16 = 33 + (u32)OpcodeMap::OP_MAP_VOP3P,
+    V_MAD_MIXHI_F16 = 34 + (u32)OpcodeMap::OP_MAP_VOP3P,
 };
 
 enum class EncodingMask : u32 {
@@ -2222,6 +2273,8 @@ enum class InstEncoding : u32 {
     VOP3 = 0x00000034u << 26,
     /// bits [31:26] - (1 1 1 1 1 0)
     EXP = 0x0000003Eu << 26,
+    /// bits [31:26] - (1 1 0 0 1 1)
+    VOP3P = 0x00000033u << 26,
     /// bits [31:26] - (1 1 0 0 1 0)
     VINTRP = 0x00000032u << 26,
     /// bits [31:26] - (1 1 0 1 1 0)
@@ -2271,6 +2324,7 @@ enum class InstClass : u32 {
     VectorBitField32,
     VectorThreadMask,
     VectorBitField64,
+    VectorFpArith16,
     VectorFpArith32,
     VectorFpRound32,
     VectorFpField32,
@@ -2281,6 +2335,7 @@ enum class InstClass : u32 {
     VectorFpField64,
     VectorFpTran64,
     VectorFpCmp64,
+    VectorIntArith16,
     VectorIntArith32,
     VectorIntArith64,
     VectorIntCmp32,
@@ -2360,8 +2415,10 @@ enum class InstCategory : u32 {
 enum class ScalarType : u32 {
     Undefined,
     Any,
+    Uint16,
     Uint32,
     Uint64,
+    Sint16,
     Sint32,
     Sint64,
     Float16,