diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 6a1eb4d88..2a628e1ed 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -663,6 +663,18 @@ void EmitGetExec(EmitContext& ctx) { UNREACHABLE_MSG("Unreachable instruction"); } +Id EmitGetExecLo(EmitContext& ctx) { + const Id scope_subgroup = ctx.ConstU32(static_cast(spv::Scope::Subgroup)); + const Id ballot = ctx.OpGroupNonUniformBallot(ctx.U32[4], scope_subgroup, ctx.true_value); + return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0u); +} + +Id EmitGetExecHi(EmitContext& ctx) { + const Id scope_subgroup = ctx.ConstU32(static_cast(spv::Scope::Subgroup)); + const Id ballot = ctx.OpGroupNonUniformBallot(ctx.U32[4], scope_subgroup, ctx.true_value); + return ctx.OpCompositeExtract(ctx.U32[1], ballot, 1u); +} + void EmitGetVcc(EmitContext& ctx) { UNREACHABLE_MSG("Unreachable instruction"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 080f6dd92..78a17719a 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -30,6 +30,8 @@ void EmitPhiMove(EmitContext&); void EmitJoin(EmitContext& ctx); void EmitGetScc(EmitContext& ctx); void EmitGetExec(EmitContext& ctx); +Id EmitGetExecLo(EmitContext& ctx); +Id EmitGetExecHi(EmitContext& ctx); void EmitGetVcc(EmitContext& ctx); void EmitGetSccLo(EmitContext& ctx); void EmitGetVccLo(EmitContext& ctx); diff --git a/src/shader_recompiler/frontend/opcodes.h b/src/shader_recompiler/frontend/opcodes.h index 64b20e1d9..b35452204 100644 --- a/src/shader_recompiler/frontend/opcodes.h +++ b/src/shader_recompiler/frontend/opcodes.h @@ -2565,7 +2565,7 @@ enum class OperandField : u32 { VccHi, M0 = 124, ExecLo = 126, - ExecHi, + ExecHi = 127, ConstZero, SignedConstIntPos = 129, SignedConstIntNeg = 193, diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 043c973c8..e53072f54 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -341,6 +341,20 @@ T Translator::GetSrc(const InstOperand& operand) { UNREACHABLE_MSG("unhandled SDWA"); case OperandField::Dpp: UNREACHABLE_MSG("unhandled DPP"); + case OperandField::ExecLo: + if constexpr (is_float) { + value = ir.BitCast(ir.GetExecLo()); + } else { + value = ir.GetExecLo(); + } + break; + case OperandField::ExecHi: + if constexpr (is_float) { + value = ir.BitCast(ir.GetExecHi()); + } else { + value = ir.GetExecHi(); + } + break; case OperandField::VccLo: if constexpr (is_float) { value = ir.BitCast(ir.GetVccLo()); diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 63732fb96..9bbaa9fba 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -210,6 +210,14 @@ U1 IREmitter::GetExec() { return Inst(Opcode::GetExec); } +U32 IREmitter::GetExecLo() { + return Inst(Opcode::GetExecLo); +} + +U32 IREmitter::GetExecHi() { + return Inst(Opcode::GetExecHi); +} + U1 IREmitter::GetVcc() { return Inst(Opcode::GetVcc); } diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 17a58bcb5..faf24b5b1 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -67,6 +67,8 @@ public: [[nodiscard]] U1 GetScc(); [[nodiscard]] U1 GetExec(); + [[nodiscard]] U32 GetExecLo(); + [[nodiscard]] U32 GetExecHi(); [[nodiscard]] U1 GetVcc(); [[nodiscard]] U32 GetVccLo(); [[nodiscard]] U32 GetVccHi(); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 3893ef6f5..93aa702a5 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -84,6 +84,8 @@ OPCODE(ReadTcsGenericOuputAttribute, F32, U32, // Flags OPCODE(GetScc, U1, Void, ) OPCODE(GetExec, U1, Void, ) +OPCODE(GetExecLo, U32, Void, ) +OPCODE(GetExecHi, U32, Void, ) OPCODE(GetVcc, U1, Void, ) OPCODE(GetVccLo, U32, Void, ) OPCODE(GetVccHi, U32, Void, )