MIP fixes (#4141)

* int32-modifiers GCN VOP3 abs/neg modifier bits always operate on the sign bit (bit 31) regardless of instruction type. For integer operands this means: abs = clear bit 31 (x & 0x7FFFFFFF) neg = toggle bit 31 (x ^ 0x80000000) * int64-modifiers Previously GetSrc64<IR::U64> completely ignored input modifiers for integer operands. Now unpacks to two U32s, modifies the high dword's bit 31 (= bit 63 of the 64-bit value), and repacks. * V_MUL_LEGACY_F32 GCN V_MUL_LEGACY_F32: if either source is zero, result is +0.0 regardless of the other operand (even NaN or Inf). Standard IEEE multiply produces NaN for 0*Inf. The fix adds a zero-check select before the multiply.
2026-04-29 23:41:19 -06:00 · 2026-03-18 09:05:20 +01:00 · 2026-03-18 09:05:20 +01:00 · 2ca342970a
commit 2ca342970a
parent 9a3e7b097c
3 changed files with 34 additions and 3 deletions
--- a/src/shader_recompiler/frontend/translate/translate.cpp
+++ b/src/shader_recompiler/frontend/translate/translate.cpp
@ -352,10 +352,10 @@ T Translator::GetSrc(const InstOperand& operand) {
        }
    } else {
        if (operand.input_modifier.abs) {
-            value = ir.IAbs(value);
+            value = ir.BitwiseAnd(value, ir.Imm32(0x7FFFFFFFu));
        }
        if (operand.input_modifier.neg) {
-            value = ir.INeg(value);
+            value = ir.BitwiseXor(value, ir.Imm32(0x80000000u));
        }
    }
    return value;
@ -453,6 +453,23 @@ T Translator::GetSrc64(const InstOperand& operand) {
        if (operand.input_modifier.neg) {
            value = ir.FPNeg(value);
        }
+    } else {
+        // GCN VOP3 abs/neg modifier bits operate on the sign bit (bit 63 for
+        // 64-bit values). Unpack, modify the high dword's bit 31, repack.
+        if (operand.input_modifier.abs) {
+            const auto unpacked = ir.UnpackUint2x32(value);
+            const auto lo = IR::U32{ir.CompositeExtract(unpacked, 0)};
+            const auto hi = IR::U32{ir.CompositeExtract(unpacked, 1)};
+            const auto hi_abs = ir.BitwiseAnd(hi, ir.Imm32(0x7FFFFFFFu));
+            value = ir.PackUint2x32(ir.CompositeConstruct(lo, hi_abs));
+        }
+        if (operand.input_modifier.neg) {
+            const auto unpacked = ir.UnpackUint2x32(value);
+            const auto lo = IR::U32{ir.CompositeExtract(unpacked, 0)};
+            const auto hi = IR::U32{ir.CompositeExtract(unpacked, 1)};
+            const auto hi_neg = ir.BitwiseXor(hi, ir.Imm32(0x80000000u));
+            value = ir.PackUint2x32(ir.CompositeConstruct(lo, hi_neg));
+        }
    }
    return value;
 }
--- a/src/shader_recompiler/frontend/translate/translate.h
+++ b/src/shader_recompiler/frontend/translate/translate.h
@ -153,6 +153,7 @@ public:
    void V_SUB_F32(const GcnInst& inst);
    void V_SUBREV_F32(const GcnInst& inst);
    void V_MUL_F32(const GcnInst& inst);
+    void V_MUL_LEGACY_F32(const GcnInst& inst);
    void V_MUL_I32_I24(const GcnInst& inst, bool is_signed);
    void V_MIN_F32(const GcnInst& inst, bool is_legacy = false);
    void V_MAX_F32(const GcnInst& inst, bool is_legacy = false);
--- a/src/shader_recompiler/frontend/translate/vector_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp
@ -25,7 +25,7 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
    case Opcode::V_MAC_LEGACY_F32:
        return V_MAC_F32(inst);
    case Opcode::V_MUL_LEGACY_F32:
-        return V_MUL_F32(inst);
+        return V_MUL_LEGACY_F32(inst);
    case Opcode::V_MUL_F32:
        return V_MUL_F32(inst);
    case Opcode::V_MUL_I32_I24:
@ -493,6 +493,19 @@ void Translator::V_MUL_F32(const GcnInst& inst) {
    SetDst(inst.dst[0], ir.FPMul(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1])));
 }

+void Translator::V_MUL_LEGACY_F32(const GcnInst& inst) {
+    // GCN V_MUL_LEGACY_F32: if either source is zero, the result is +0.0
+    // regardless of the other operand (even if NaN or Inf).
+    // Standard IEEE multiply would produce NaN for 0 * Inf.
+    const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
+    const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
+    const IR::F32 zero{ir.Imm32(0.0f)};
+    const IR::U1 src0_zero{ir.FPEqual(src0, zero)};
+    const IR::U1 src1_zero{ir.FPEqual(src1, zero)};
+    const IR::U1 either_zero{ir.LogicalOr(src0_zero, src1_zero)};
+    SetDst(inst.dst[0], IR::F32{ir.Select(either_zero, zero, ir.FPMul(src0, src1))});
+}
+
 void Translator::V_MUL_I32_I24(const GcnInst& inst, bool is_signed) {
    const IR::U32 src0{
        ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), is_signed)};