diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index da84e253c..3b1a364f2 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -193,6 +193,7 @@ Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 u32 comp2, u32 comp3); Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitBitCastU16F16(EmitContext& ctx, Id value); Id EmitBitCastU32F32(EmitContext& ctx, Id value); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp index 1bfe4ea3c..752074aa4 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp @@ -14,6 +14,10 @@ Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value); } +Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) { + return ctx.OpSelect(ctx.U64, cond, true_value, false_value); +} + Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value); } diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 23236b702..69ae172db 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -1149,52 +1149,113 @@ void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const } void Translator::V_CMP_U64(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) { - const bool is_zero = inst.src[1].field == OperandField::ConstZero; - const bool is_neg_one = inst.src[1].field == OperandField::SignedConstIntNeg; - ASSERT(is_zero || is_neg_one); - if (is_neg_one) { - ASSERT_MSG(-s32(inst.src[1].code) + SignedConstIntNegMin - 1 == -1, - "SignedConstIntNeg must be -1"); + ASSERT(!is_signed); + + // Lambda to read a 64-bit operand as two 32-bit parts + auto read_u64 = [&](const InstOperand& op, bool is_src0) -> std::pair { + using enum OperandField; + + switch (op.field) { + case ScalarGPR: + return {ir.GetScalarReg(IR::ScalarReg(op.code)), + ir.GetScalarReg(IR::ScalarReg(op.code + 1))}; + + case VectorGPR: + return {ir.GetVectorReg(IR::VectorReg(op.code)), + ir.GetVectorReg(IR::VectorReg(op.code + 1))}; + + case ConstZero: + return {ir.Imm32(0), ir.Imm32(0)}; + + case SignedConstIntPos: + return {ir.Imm32(op.code), ir.Imm32(0)}; + + case SignedConstIntNeg: { + s32 v = -s32(op.code) + SignedConstIntNegMin - 1; + return {ir.Imm32(v), ir.Imm32(v < 0 ? -1 : 0)}; + } + case VccLo: + return {ir.GetVccLo(), ir.GetVccHi()}; + default: + UNREACHABLE_MSG("Unsupported operand field {}", u32(op.field)); + } + }; + + const auto [a_lo, a_hi] = read_u64(inst.src[0], true); + const auto [b_lo, b_hi] = read_u64(inst.src[1], false); + + IR::U1 cmp_result{}; + switch (op) { + case ConditionOp::EQ: { + IR::U1 hi_eq = ir.IEqual(a_hi, b_hi); + IR::U1 lo_eq = ir.IEqual(a_lo, b_lo); + cmp_result = ir.LogicalAnd(hi_eq, lo_eq); + break; } - const IR::U1 src0 = [&] { - switch (inst.src[0].field) { - case OperandField::ScalarGPR: - return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code)); - case OperandField::VccLo: - return ir.GetVcc(); - default: - UNREACHABLE_MSG("src0 = {}", u32(inst.src[0].field)); - } - }(); - const IR::U1 result = [&] { - switch (op) { - case ConditionOp::EQ: - return is_zero ? ir.LogicalNot(src0) : src0; - case ConditionOp::LG: // NE - return is_zero ? src0 : ir.LogicalNot(src0); - case ConditionOp::GT: - ASSERT(is_zero); - return ir.GroupAny(ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code))); - default: - UNREACHABLE_MSG("Unsupported V_CMP_U64 condition operation: {}", u32(op)); - } - }(); + case ConditionOp::LG: { // NE + IR::U1 hi_ne = ir.INotEqual(a_hi, b_hi); + IR::U1 lo_ne = ir.INotEqual(a_lo, b_lo); + cmp_result = ir.LogicalOr(hi_ne, lo_ne); + break; + } + case ConditionOp::GT: { + IR::U1 hi_gt = ir.IGreaterThan(a_hi, b_hi, false); + IR::U1 hi_eq = ir.IEqual(a_hi, b_hi); + IR::U1 lo_gt = ir.IGreaterThan(a_lo, b_lo, false); + cmp_result = ir.LogicalOr(hi_gt, ir.LogicalAnd(hi_eq, lo_gt)); + break; + } + + case ConditionOp::LT: { + IR::U1 hi_lt = ir.ILessThan(a_hi, b_hi, false); + IR::U1 hi_eq = ir.IEqual(a_hi, b_hi); + IR::U1 lo_lt = ir.ILessThan(a_lo, b_lo, false); + cmp_result = ir.LogicalOr(hi_lt, ir.LogicalAnd(hi_eq, lo_lt)); + break; + } + + case ConditionOp::GE: { + IR::U1 hi_gt = ir.IGreaterThan(a_hi, b_hi, false); + IR::U1 hi_eq = ir.IEqual(a_hi, b_hi); + IR::U1 lo_ge = ir.IGreaterThanEqual(a_lo, b_lo, false); + cmp_result = ir.LogicalOr(hi_gt, ir.LogicalAnd(hi_eq, lo_ge)); + break; + } + + case ConditionOp::LE: { + IR::U1 hi_lt = ir.ILessThan(a_hi, b_hi, false); + IR::U1 hi_eq = ir.IEqual(a_hi, b_hi); + IR::U1 lo_le = ir.ILessThanEqual(a_lo, b_lo, false); + cmp_result = ir.LogicalOr(hi_lt, ir.LogicalAnd(hi_eq, lo_le)); + break; + } + + default: + UNREACHABLE_MSG("Unsupported V_CMP_U64 condition {}", u32(op)); + } + + // Handle flags if (is_signed) { UNREACHABLE_MSG("V_CMP_U64 with signed integers is not supported"); } + if (set_exec) { UNREACHABLE_MSG("Exec setting for V_CMP_U64 is not supported"); } switch (inst.dst[1].field) { case OperandField::VccLo: - return ir.SetVcc(result); + ir.SetVcc(cmp_result); + break; + case OperandField::ScalarGPR: - return ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), result); + ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), cmp_result); + break; + default: - UNREACHABLE(); + UNREACHABLE_MSG("Invalid V_CMP_U64 destination"); } } diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 922f01b1f..29005af54 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -891,6 +891,8 @@ Value IREmitter::Select(const U1& condition, const Value& true_value, const Valu return Inst(Opcode::SelectU1, condition, true_value, false_value); case Type::U32: return Inst(Opcode::SelectU32, condition, true_value, false_value); + case Type::U64: + return Inst(Opcode::SelectU64, condition, true_value, false_value); case Type::F32: return Inst(Opcode::SelectF32, condition, true_value, false_value); default: diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index ebbc702b7..735a64383 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -183,6 +183,7 @@ OPCODE(CompositeShuffleF32x4, F32x4, F32x // Select operations OPCODE(SelectU1, U1, U1, U1, U1, ) OPCODE(SelectU32, U32, U1, U32, U32, ) +OPCODE(SelectU64, U64, U1, U32, U32, ) OPCODE(SelectF32, F32, U1, F32, F32, ) // Bitwise conversions