diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 3aa70e2ec..611070a86 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -352,10 +352,10 @@ T Translator::GetSrc(const InstOperand& operand) { } } else { if (operand.input_modifier.abs) { - value = ir.IAbs(value); + value = ir.BitwiseAnd(value, ir.Imm32(0x7FFFFFFFu)); } if (operand.input_modifier.neg) { - value = ir.INeg(value); + value = ir.BitwiseXor(value, ir.Imm32(0x80000000u)); } } return value; @@ -453,6 +453,23 @@ T Translator::GetSrc64(const InstOperand& operand) { if (operand.input_modifier.neg) { value = ir.FPNeg(value); } + } else { + // GCN VOP3 abs/neg modifier bits operate on the sign bit (bit 63 for + // 64-bit values). Unpack, modify the high dword's bit 31, repack. + if (operand.input_modifier.abs) { + const auto unpacked = ir.UnpackUint2x32(value); + const auto lo = IR::U32{ir.CompositeExtract(unpacked, 0)}; + const auto hi = IR::U32{ir.CompositeExtract(unpacked, 1)}; + const auto hi_abs = ir.BitwiseAnd(hi, ir.Imm32(0x7FFFFFFFu)); + value = ir.PackUint2x32(ir.CompositeConstruct(lo, hi_abs)); + } + if (operand.input_modifier.neg) { + const auto unpacked = ir.UnpackUint2x32(value); + const auto lo = IR::U32{ir.CompositeExtract(unpacked, 0)}; + const auto hi = IR::U32{ir.CompositeExtract(unpacked, 1)}; + const auto hi_neg = ir.BitwiseXor(hi, ir.Imm32(0x80000000u)); + value = ir.PackUint2x32(ir.CompositeConstruct(lo, hi_neg)); + } } return value; } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 08b0192f5..5ee75e336 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -153,6 +153,7 @@ public: void V_SUB_F32(const GcnInst& inst); void V_SUBREV_F32(const GcnInst& inst); void V_MUL_F32(const GcnInst& inst); + void V_MUL_LEGACY_F32(const GcnInst& inst); void V_MUL_I32_I24(const GcnInst& inst, bool is_signed); void V_MIN_F32(const GcnInst& inst, bool is_legacy = false); void V_MAX_F32(const GcnInst& inst, bool is_legacy = false); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 08a0f6527..23236b702 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -25,7 +25,7 @@ void Translator::EmitVectorAlu(const GcnInst& inst) { case Opcode::V_MAC_LEGACY_F32: return V_MAC_F32(inst); case Opcode::V_MUL_LEGACY_F32: - return V_MUL_F32(inst); + return V_MUL_LEGACY_F32(inst); case Opcode::V_MUL_F32: return V_MUL_F32(inst); case Opcode::V_MUL_I32_I24: @@ -493,6 +493,19 @@ void Translator::V_MUL_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPMul(GetSrc(inst.src[0]), GetSrc(inst.src[1]))); } +void Translator::V_MUL_LEGACY_F32(const GcnInst& inst) { + // GCN V_MUL_LEGACY_F32: if either source is zero, the result is +0.0 + // regardless of the other operand (even if NaN or Inf). + // Standard IEEE multiply would produce NaN for 0 * Inf. + const IR::F32 src0{GetSrc(inst.src[0])}; + const IR::F32 src1{GetSrc(inst.src[1])}; + const IR::F32 zero{ir.Imm32(0.0f)}; + const IR::U1 src0_zero{ir.FPEqual(src0, zero)}; + const IR::U1 src1_zero{ir.FPEqual(src1, zero)}; + const IR::U1 either_zero{ir.LogicalOr(src0_zero, src1_zero)}; + SetDst(inst.dst[0], IR::F32{ir.Select(either_zero, zero, ir.FPMul(src0, src1))}); +} + void Translator::V_MUL_I32_I24(const GcnInst& inst, bool is_signed) { const IR::U32 src0{ ir.BitFieldExtract(GetSrc(inst.src[0]), ir.Imm32(0), ir.Imm32(24), is_signed)};