diff --git a/src/shader_recompiler/frontend/decode.cpp b/src/shader_recompiler/frontend/decode.cpp index 37e8a0973..ae58f080b 100644 --- a/src/shader_recompiler/frontend/decode.cpp +++ b/src/shader_recompiler/frontend/decode.cpp @@ -3,6 +3,7 @@ #include #include "common/assert.h" +#include "core/libraries/kernel/process.h" #include "shader_recompiler/frontend/decode.h" #include @@ -39,6 +40,7 @@ InstEncoding GetInstructionEncoding(u32 token) { encoding = static_cast(token & (u32)EncodingMask::MASK_6bit); switch (encoding) { case InstEncoding::VOP3: + case InstEncoding::VOP3P: case InstEncoding::EXP: case InstEncoding::VINTRP: case InstEncoding::DS: @@ -82,7 +84,6 @@ InstEncoding GetInstructionEncoding(u32 token) { break; } - UNREACHABLE(); return InstEncoding::ILLEGAL; } @@ -111,7 +112,7 @@ GcnInst GcnDecodeContext::decodeInstruction(GcnCodeSlice& code) { const uint32_t token = code.at(0); InstEncoding encoding = GetInstructionEncoding(token); - ASSERT_MSG(encoding != InstEncoding::ILLEGAL, "illegal encoding"); + ASSERT_MSG(encoding != InstEncoding::ILLEGAL, "illegal encoding: {:#x}", token); uint32_t encodingLen = getEncodingLength(encoding); // Clear the instruction @@ -155,6 +156,7 @@ uint32_t GcnDecodeContext::getEncodingLength(InstEncoding encoding) { break; case InstEncoding::VOP3: + case InstEncoding::VOP3P: case InstEncoding::MUBUF: case InstEncoding::MTBUF: case InstEncoding::MIMG: @@ -189,6 +191,9 @@ uint32_t GcnDecodeContext::getOpMapOffset(InstEncoding encoding) { case InstEncoding::VOP3: offset = (uint32_t)OpcodeMap::OP_MAP_VOP3; break; + case InstEncoding::VOP3P: + offset = (uint32_t)OpcodeMap::OP_MAP_VOP3P; + break; case InstEncoding::EXP: offset = (uint32_t)OpcodeMap::OP_MAP_EXP; break; @@ -381,6 +386,9 @@ void GcnDecodeContext::decodeInstruction64(InstEncoding encoding, GcnCodeSlice& case InstEncoding::VOP3: decodeInstructionVOP3(hexInstruction); break; + case InstEncoding::VOP3P: + decodeInstructionVOP3P(hexInstruction); + break; case InstEncoding::MUBUF: decodeInstructionMUBUF(hexInstruction); break; @@ -712,6 +720,55 @@ void GcnDecodeContext::decodeInstructionVOP3(uint64_t hexInstruction) { } } +void GcnDecodeContext::decodeInstructionVOP3P(uint64_t hexInstruction) { + u32 vdst = bit::extract(hexInstruction, 7, 0); + u32 op = bit::extract(hexInstruction, 22, 16); + u32 src0 = bit::extract(hexInstruction, 40, 32); + u32 src1 = bit::extract(hexInstruction, 49, 41); + u32 src2 = bit::extract(hexInstruction, 58, 50); + + m_instruction.opcode = static_cast(op + static_cast(OpcodeMap::OP_MAP_VOP3P)); + + m_instruction.src[0].field = getOperandField(src0); + m_instruction.src[0].code = + m_instruction.src[0].field == OperandField::VectorGPR ? src0 - VectorGPRMin : src0; + m_instruction.src[1].field = getOperandField(src1); + m_instruction.src[1].code = + m_instruction.src[1].field == OperandField::VectorGPR ? src1 - VectorGPRMin : src1; + m_instruction.src[2].field = getOperandField(src2); + m_instruction.src[2].code = + m_instruction.src[2].field == OperandField::VectorGPR ? src2 - VectorGPRMin : src2; + m_instruction.dst[0].field = OperandField::VectorGPR; + m_instruction.dst[0].code = vdst; + + m_instruction.control.vop3p = *reinterpret_cast(&hexInstruction); + + // update input modifier + auto& control = m_instruction.control.vop3p; + for (u32 i = 0; i != 3; ++i) { + if (control.neg & (1u << i)) { + m_instruction.src[i].input_modifier.neg = true; + } + + if (control.neg_hi & (1u << i)) { + m_instruction.src[i].input_modifier.neg_hi = true; + } + + if (control.op_sel & (1u << i)) { + m_instruction.src[i].op_sel.op_sel = true; + } + + if (control.get_op_sel_hi(i)) { + m_instruction.src[i].op_sel.op_sel_hi = true; + } + } + + // update output modifier + auto& outputMod = m_instruction.dst[0].output_modifier; + + outputMod.clamp = static_cast(control.clamp); +} + void GcnDecodeContext::decodeInstructionMUBUF(uint64_t hexInstruction) { u32 op = bit::extract(hexInstruction, 24, 18); u32 vaddr = bit::extract(hexInstruction, 39, 32); diff --git a/src/shader_recompiler/frontend/decode.h b/src/shader_recompiler/frontend/decode.h index 8125ce7fb..605a4e229 100644 --- a/src/shader_recompiler/frontend/decode.h +++ b/src/shader_recompiler/frontend/decode.h @@ -84,6 +84,7 @@ private: void decodeInstructionVINTRP(uint32_t hexInstruction); // 64 bits encodings void decodeInstructionVOP3(uint64_t hexInstruction); + void decodeInstructionVOP3P(uint64_t hexInstruction); void decodeInstructionMUBUF(uint64_t hexInstruction); void decodeInstructionMTBUF(uint64_t hexInstruction); void decodeInstructionMIMG(uint64_t hexInstruction); diff --git a/src/shader_recompiler/frontend/format.cpp b/src/shader_recompiler/frontend/format.cpp index 4a90fe358..bc0ab3707 100644 --- a/src/shader_recompiler/frontend/format.cpp +++ b/src/shader_recompiler/frontend/format.cpp @@ -1784,6 +1784,88 @@ constexpr std::array InstructionFormatVOP3 = {{ {}, }}; +constexpr std::array InstructionFormatVOP3P = {{ + // 0 = V_PK_MAD_I16 + {InstClass::VectorIntArith16, InstCategory::VectorALU, 3, 1, ScalarType::Sint16, + ScalarType::Sint16}, + // 1 = V_PK_MUL_LO_U16 + {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16, + ScalarType::Uint16}, + // 2 = V_PK_ADD_I16 + {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16, + ScalarType::Sint16}, + // 3 = V_PK_SUB_I16 + {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16, + ScalarType::Sint16}, + // 4 = V_PK_LSHLREV_B16 + {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16, + ScalarType::Uint16}, + // 5 = V_PK_LSHRREV_B16 + {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16, + ScalarType::Uint16}, + // 6 = V_PK_ASHRREV_I16 + {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16, + ScalarType::Uint16}, + // 7 = V_PK_MAX_I16 + {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16, + ScalarType::Sint16}, + // 8 = V_PK_MIN_I16 + {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Sint16, + ScalarType::Sint16}, + // 9 = V_PK_MAD_U16 + {InstClass::VectorIntArith16, InstCategory::VectorALU, 3, 1, ScalarType::Uint16, + ScalarType::Uint16}, + // 10 = V_PK_ADD_U16 + {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16, + ScalarType::Uint16}, + // 11 = V_PK_SUB_U16 + {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16, + ScalarType::Uint16}, + // 12 = V_PK_MAX_U16 + {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16, + ScalarType::Uint16}, + // 13 = V_PK_MIN_U16 + {InstClass::VectorIntArith16, InstCategory::VectorALU, 2, 1, ScalarType::Uint16, + ScalarType::Uint16}, + // 14 = V_PK_FMA_F16 + {InstClass::VectorFpArith16, InstCategory::VectorALU, 3, 1, ScalarType::Float16, + ScalarType::Float16}, + // 15 = V_PK_ADD_F16 + {InstClass::VectorFpArith16, InstCategory::VectorALU, 2, 1, ScalarType::Float16, + ScalarType::Float16}, + // 16 = V_PK_MUL_F16 + {InstClass::VectorFpArith16, InstCategory::VectorALU, 2, 1, ScalarType::Float16, + ScalarType::Float16}, + // 17 = V_PK_MIN_F16 + {InstClass::VectorFpArith16, InstCategory::VectorALU, 2, 1, ScalarType::Float16, + ScalarType::Float16}, + // 18 = V_PK_MAX_F16 + {InstClass::VectorFpArith16, InstCategory::VectorALU, 2, 1, ScalarType::Float16, + ScalarType::Float16}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + {}, + // 32 = V_MAD_MIX_F32 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 33 = V_MAD_MIXLO_F16 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, + // 34 = V_MAD_MIXHI_F16 + {InstClass::VectorFpArith32, InstCategory::VectorALU, 3, 1, ScalarType::Float32, + ScalarType::Float32}, +}}; + constexpr std::array InstructionFormatVOP1 = {{ // 0 = V_NOP {InstClass::VectorMisc, InstCategory::VectorALU, 0, 1, ScalarType::Any, ScalarType::Any}, @@ -3705,6 +3787,8 @@ InstFormat InstructionFormat(InstEncoding encoding, uint32_t opcode) { return InstructionFormatVOPC[opcode]; case InstEncoding::VOP3: return InstructionFormatVOP3[opcode]; + case InstEncoding::VOP3P: + return InstructionFormatVOP3P[opcode]; case InstEncoding::EXP: return InstructionFormatEXP[opcode]; case InstEncoding::VINTRP: diff --git a/src/shader_recompiler/frontend/instruction.h b/src/shader_recompiler/frontend/instruction.h index f4e7bc9f2..05d5ce7b7 100644 --- a/src/shader_recompiler/frontend/instruction.h +++ b/src/shader_recompiler/frontend/instruction.h @@ -3,6 +3,7 @@ #pragma once +#include "common/assert.h" #include "shader_recompiler/frontend/opcodes.h" namespace Shader::Gcn { @@ -25,6 +26,7 @@ enum OperandFieldRange { /// These are applied after loading an operand register. struct InputModifiers { bool neg = false; + bool neg_hi = false; bool abs = false; }; @@ -34,11 +36,18 @@ struct OutputModifiers { float multiplier = 0.f; }; +struct OperandSelection { + bool op_sel = false; + bool op_sel_hi = false; +}; + struct InstOperand { OperandField field = OperandField::Undefined; ScalarType type = ScalarType::Undefined; InputModifiers input_modifier = {}; OutputModifiers output_modifier = {}; + // only valid for packed 16bit operations + OperandSelection op_sel = {}; u32 code = 0xFFFFFFFF; }; @@ -90,6 +99,32 @@ struct InstControlVOP3 { u64 neg : 3; }; +struct InstControlVOP3P { + u64 : 8; + u64 neg_hi : 3; + u64 op_sel : 3; + u64 op_sel_hi_2 : 1; + u64 clamp : 1; + u64 : 43; + u64 op_sel_hi_01 : 2; + u64 neg : 3; + + bool get_op_sel_hi(int idx) { + switch (idx) { + case 0: + return (op_sel_hi_01 & 1) == 1; + case 1: + return ((op_sel_hi_01 >> 1) & 1) == 1; + case 2: + return (op_sel_hi_2 & 1) == 1; + default: + UNREACHABLE_MSG("get_op_sel_hi: {}", idx); + } + } +}; + +static_assert(sizeof(InstControlVOP3P) == 8); + struct InstControlSMRD { u32 offset : 8; u32 imm : 1; @@ -174,6 +209,7 @@ union InstControl { InstControlSOPK sopk; InstControlSOPP sopp; InstControlVOP3 vop3; + InstControlVOP3P vop3p; InstControlSMRD smrd; InstControlMUBUF mubuf; InstControlMTBUF mtbuf; diff --git a/src/shader_recompiler/frontend/opcodes.h b/src/shader_recompiler/frontend/opcodes.h index 7390a3940..6d6df88a6 100644 --- a/src/shader_recompiler/frontend/opcodes.h +++ b/src/shader_recompiler/frontend/opcodes.h @@ -662,6 +662,33 @@ enum class OpcodeVOP3 : u32 { OP_RANGE_VOP3 = V_EXP_LEGACY_F32 + 1, }; +enum class OpcodeVOP3P : u32 { + V_PK_MAD_I16 = 0, + V_PK_MUL_LO_U16 = 1, + V_PK_ADD_I16 = 2, + V_PK_SUB_I16 = 3, + V_PK_LSHLREV_B16 = 4, + V_PK_LSHRREV_B16 = 5, + V_PK_ASHRREV_I16 = 6, + V_PK_MAX_I16 = 7, + V_PK_MIN_I16 = 8, + V_PK_MAD_U16 = 9, + V_PK_ADD_U16 = 10, + V_PK_SUB_U16 = 11, + V_PK_MAX_U16 = 12, + V_PK_MIN_U16 = 13, + V_PK_FMA_F16 = 14, + V_PK_ADD_F16 = 15, + V_PK_MUL_F16 = 16, + V_PK_MIN_F16 = 17, + V_PK_MAX_F16 = 18, + V_MAD_MIX_F32 = 32, + V_MAD_MIXLO_F16 = 33, + V_MAD_MIXHI_F16 = 34, + + OP_RANGE_VOP3P = V_MAD_MIXHI_F16 + 1, +}; + enum class OpcodeVOP1 : u32 { V_NOP = 0, V_MOV_B32 = 1, @@ -1313,6 +1340,7 @@ enum class OpcodeMap : u32 { OP_MAP_MTBUF = OP_MAP_MUBUF + (u32)OpcodeMUBUF::OP_RANGE_MUBUF, OP_MAP_MIMG = OP_MAP_MTBUF + (u32)OpcodeMTBUF::OP_RANGE_MTBUF, OP_MAP_EXP = OP_MAP_MIMG + (u32)OpcodeMIMG::OP_RANGE_MIMG, + OP_MAP_VOP3P = OP_MAP_EXP + (u32)OpcodeEXP::OP_RANGE_EXP, }; enum class Opcode : u32 { @@ -2192,6 +2220,29 @@ enum class Opcode : u32 { IMAGE_SAMPLE_C_CD_CL_O = 111 + (u32)OpcodeMap::OP_MAP_MIMG, // EXP EXP = 0 + (u32)OpcodeMap::OP_MAP_EXP, + // VOP3P + V_PK_MAD_I16 = 0 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_MUL_LO_U16 = 1 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_ADD_I16 = 2 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_SUB_I16 = 3 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_LSHLREV_B16 = 4 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_LSHRREV_B16 = 5 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_ASHRREV_I16 = 6 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_MAX_I16 = 7 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_MIN_I16 = 8 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_MAD_U16 = 9 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_ADD_U16 = 10 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_SUB_U16 = 11 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_MAX_U16 = 12 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_MIN_U16 = 13 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_FMA_F16 = 14 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_ADD_F16 = 15 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_MUL_F16 = 16 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_MIN_F16 = 17 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_PK_MAX_F16 = 18 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_MAD_MIX_F32 = 32 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_MAD_MIXLO_F16 = 33 + (u32)OpcodeMap::OP_MAP_VOP3P, + V_MAD_MIXHI_F16 = 34 + (u32)OpcodeMap::OP_MAP_VOP3P, }; enum class EncodingMask : u32 { @@ -2222,6 +2273,8 @@ enum class InstEncoding : u32 { VOP3 = 0x00000034u << 26, /// bits [31:26] - (1 1 1 1 1 0) EXP = 0x0000003Eu << 26, + /// bits [31:26] - (1 1 0 0 1 1) + VOP3P = 0x00000033u << 26, /// bits [31:26] - (1 1 0 0 1 0) VINTRP = 0x00000032u << 26, /// bits [31:26] - (1 1 0 1 1 0) @@ -2271,6 +2324,7 @@ enum class InstClass : u32 { VectorBitField32, VectorThreadMask, VectorBitField64, + VectorFpArith16, VectorFpArith32, VectorFpRound32, VectorFpField32, @@ -2281,6 +2335,7 @@ enum class InstClass : u32 { VectorFpField64, VectorFpTran64, VectorFpCmp64, + VectorIntArith16, VectorIntArith32, VectorIntArith64, VectorIntCmp32, @@ -2360,8 +2415,10 @@ enum class InstCategory : u32 { enum class ScalarType : u32 { Undefined, Any, + Uint16, Uint32, Uint64, + Sint16, Sint32, Sint64, Float16,