Skip to content

[AMDGPU] Every convergent operation needs post-isel processing #80102

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ class AMDGPUInst <dag outs, dag ins, string asm = "",

let TSFlags{63} = isRegisterLoad;
let TSFlags{62} = isRegisterStore;

assert !if(!eq(isConvergent, 1), !eq(hasPostISelHook, 1), true),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert !if(!eq(isConvergent, 1), !eq(hasPostISelHook, 1), true),
assert !if(isConvergent, hasPostISelHook, true),

!strconcat(NAME, ": Every convergent operation needs post-isel processing.");
}

class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/AMDGPU/DSInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag,
let mayLoad = 0;
let mayStore = 0;
let isConvergent = 1;
let hasPostISelHook = 1;

let has_data1 = 0;
let has_gds = 0;
Expand Down Expand Up @@ -582,7 +583,7 @@ defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<"ds_wrxchg_rtn_b64", VReg_64>;
defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>;
defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>;

let isConvergent = 1, usesCustomInserter = 1 in {
let isConvergent = 1, hasPostISelHook = 1, usesCustomInserter = 1 in {
def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init"> {
let mayLoad = 0;
}
Expand Down Expand Up @@ -627,7 +628,7 @@ def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">;
def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">;
} // End SubtargetPredicate = HasDsSrc2Insts

let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in {
let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1, hasPostISelHook = 1 in {
def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, Swizzle>;
}

Expand Down Expand Up @@ -677,7 +678,7 @@ let SubtargetPredicate = isGFX7Plus in {
defm DS_WRAP_RTN_B32 : DS_1A2D_RET_mc<"ds_wrap_rtn_b32", VGPR_32>;
defm DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET_mc<"ds_condxchg32_rtn_b64", VReg_64>;

let isConvergent = 1, usesCustomInserter = 1 in {
let isConvergent = 1, hasPostISelHook = 1, usesCustomInserter = 1 in {
def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">;
}

Expand Down
17 changes: 14 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def WWM_COPY : SPseudoInstSI <
let hasSideEffects = 0;
let isAsCheapAsAMove = 1;
let isConvergent = 1;
let hasPostISelHook = 1;
}

def ENTER_STRICT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins i64imm:$src0)> {
Expand Down Expand Up @@ -246,7 +247,7 @@ def FPTRUNC_DOWNWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),

// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
// restoring it after we're done.
let Defs = [SCC], isConvergent = 1 in {
let Defs = [SCC], isConvergent = 1, hasPostISelHook = 1 in {
def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VSrc_b32: $src, VSrc_b32:$inactive),
[(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> {
Expand Down Expand Up @@ -369,6 +370,7 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
let mayLoad = 0;
let mayStore = 0;
let isConvergent = 1;
let hasPostISelHook = 1;
let FixedSize = 1;
let Size = 0;
let isMeta = 1;
Expand All @@ -382,6 +384,7 @@ def SCHED_BARRIER : SPseudoInstSI<(outs), (ins i32imm:$mask),
let mayLoad = 0;
let mayStore = 0;
let isConvergent = 1;
let hasPostISelHook = 1;
let FixedSize = 1;
let Size = 0;
let isMeta = 1;
Expand All @@ -397,6 +400,7 @@ def SCHED_GROUP_BARRIER : SPseudoInstSI<
let mayLoad = 0;
let mayStore = 0;
let isConvergent = 1;
let hasPostISelHook = 1;
let FixedSize = 1;
let Size = 0;
let isMeta = 1;
Expand All @@ -410,6 +414,7 @@ def IGLP_OPT : SPseudoInstSI<(outs), (ins i32imm:$mask),
let mayLoad = 0;
let mayStore = 0;
let isConvergent = 1;
let hasPostISelHook = 1;
let FixedSize = 1;
let Size = 0;
let isMeta = 1;
Expand Down Expand Up @@ -509,6 +514,7 @@ multiclass PseudoInstKill <dag ins> {
let Defs = [EXEC,SCC] in
def _PSEUDO : PseudoInstSI <(outs), ins> {
let isConvergent = 1;
let hasPostISelHook = 1;
let usesCustomInserter = 1;
}

Expand Down Expand Up @@ -626,6 +632,7 @@ def SI_CALL_ISEL : SPseudoInstSI <
let usesCustomInserter = 1;
// TODO: Should really base this on the call target
let isConvergent = 1;
let hasPostISelHook = 1;
}

def : GCNPat<
Expand All @@ -644,6 +651,7 @@ def SI_CALL : SPseudoInstSI <
let SchedRW = [WriteBranch];
// TODO: Should really base this on the call target
let isConvergent = 1;
let hasPostISelHook = 1;
}

class SI_TCRETURN_Pseudo<RegisterClass rc, SDNode sd> : SPseudoInstSI <(outs),
Expand All @@ -659,6 +667,7 @@ class SI_TCRETURN_Pseudo<RegisterClass rc, SDNode sd> : SPseudoInstSI <(outs),
let SchedRW = [WriteBranch];
// TODO: Should really base this on the call target
let isConvergent = 1;
let hasPostISelHook = 1;
}

// Tail call handling pseudo
Expand Down Expand Up @@ -693,6 +702,7 @@ class SI_CS_CHAIN_TC<
let UseNamedOperandTable = 1;
let SchedRW = [WriteBranch];
let isConvergent = 1;
let hasPostISelHook = 1;

let WaveSizePredicate = wavesizepred;
}
Expand Down Expand Up @@ -931,7 +941,7 @@ defm SI_SPILL_S384 : SI_SPILL_SGPR <SReg_384>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;

let SGPRSpill = 1, VALU = 1, isConvergent = 1 in {
let SGPRSpill = 1, VALU = 1, isConvergent = 1, hasPostISelHook = 1 in {
def SI_SPILL_S32_TO_VGPR : PseudoInstSI <(outs VGPR_32:$vdst),
(ins SReg_32:$src0, i32imm:$src1, VGPR_32:$vdst_in)> {
let Size = 4;
Expand Down Expand Up @@ -1031,7 +1041,7 @@ defm SI_SPILL_AV384 : SI_SPILL_VGPR <AV_384, 1>;
defm SI_SPILL_AV512 : SI_SPILL_VGPR <AV_512, 1>;
defm SI_SPILL_AV1024 : SI_SPILL_VGPR <AV_1024, 1>;

let isConvergent = 1 in {
let isConvergent = 1, hasPostISelHook = 1 in {
defm SI_SPILL_WWM_V32 : SI_SPILL_VGPR <VGPR_32>;
defm SI_SPILL_WWM_AV32 : SI_SPILL_VGPR <AV_32, 1>;
}
Expand Down Expand Up @@ -3974,6 +3984,7 @@ def G_SI_CALL : AMDGPUGenericInstruction {
let SchedRW = [WriteBranch];
// TODO: Should really base this on the call target
let isConvergent = 1;
let hasPostISelHook = 1;
}

def G_FPTRUNC_ROUND_UPWARD : AMDGPUGenericInstruction {
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -448,77 +448,89 @@ def S_BARRIER_SIGNAL_M0 : SOP1_Pseudo <"s_barrier_signal m0", (outs), (ins),
"", [(int_amdgcn_s_barrier_signal_var M0)]>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
let hasPostISelHook = 1;
}

def S_BARRIER_SIGNAL_ISFIRST_M0 : SOP1_Pseudo <"s_barrier_signal_isfirst m0", (outs), (ins),
"", [(set SCC, (int_amdgcn_s_barrier_signal_isfirst_var M0))]>{
let Defs = [SCC];
let SchedRW = [WriteBarrier];
let isConvergent = 1;
let hasPostISelHook = 1;
}

def S_BARRIER_INIT_M0 : SOP1_Pseudo <"s_barrier_init m0", (outs), (ins),
"", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
let hasPostISelHook = 1;
}

def S_BARRIER_INIT_IMM : SOP1_Pseudo <"s_barrier_init", (outs),
(ins SplitBarrier:$src0), "$src0", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
let hasPostISelHook = 1;
}

def S_BARRIER_JOIN_M0 : SOP1_Pseudo <"s_barrier_join m0", (outs), (ins),
"", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
let hasPostISelHook = 1;
}

def S_WAKEUP_BARRIER_M0 : SOP1_Pseudo <"s_wakeup_barrier m0", (outs), (ins),
"", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
let hasPostISelHook = 1;
}
} // End Uses = [M0]

def S_BARRIER_SIGNAL_IMM : SOP1_Pseudo <"s_barrier_signal", (outs),
(ins SplitBarrier:$src0), "$src0", [(int_amdgcn_s_barrier_signal timm:$src0)]>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
let hasPostISelHook = 1;
}

def S_BARRIER_SIGNAL_ISFIRST_IMM : SOP1_Pseudo <"s_barrier_signal_isfirst", (outs),
(ins SplitBarrier:$src0), "$src0", [(set SCC, (int_amdgcn_s_barrier_signal_isfirst timm:$src0))]>{
let Defs = [SCC];
let SchedRW = [WriteBarrier];
let isConvergent = 1;
let hasPostISelHook = 1;
}

def S_BARRIER_JOIN_IMM : SOP1_Pseudo <"s_barrier_join", (outs),
(ins SplitBarrier:$src0), "$src0", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
let hasPostISelHook = 1;
}

def S_WAKEUP_BARRIER_IMM : SOP1_Pseudo <"s_wakeup_barrier", (outs),
(ins SplitBarrier:$src0), "$src0", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
let hasPostISelHook = 1;
}
} // End has_sdst = 0

def S_GET_BARRIER_STATE_IMM : SOP1_Pseudo <"s_get_barrier_state", (outs SSrc_b32:$sdst),
(ins SplitBarrier:$src0), "$sdst, $src0", []>{
let SchedRW = [WriteBarrier];
let isConvergent = 1;
let hasPostISelHook = 1;
}

def S_GET_BARRIER_STATE_M0 : SOP1_Pseudo <"s_get_barrier_state $sdst, m0", (outs SSrc_b32:$sdst),
(ins), "", []>{
let Uses = [M0];
let SchedRW = [WriteBarrier];
let isConvergent = 1;
let hasPostISelHook = 1;
}
} // End hasSideEffects = 1

Expand Down Expand Up @@ -1561,12 +1573,14 @@ def S_BARRIER : SOPP_Pseudo <"s_barrier", (ins), "",
let simm16 = 0;
let fixed_imm = 1;
let isConvergent = 1;
let hasPostISelHook = 1;
}

def S_BARRIER_WAIT : SOPP_Pseudo <"s_barrier_wait", (ins i16imm:$simm16), "$simm16",
[(int_amdgcn_s_barrier_wait timm:$simm16)]> {
let SchedRW = [WriteBarrier];
let isConvergent = 1;
let hasPostISelHook = 1;
}

def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins), "",
Expand All @@ -1575,6 +1589,7 @@ def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins), "",
let simm16 = 0;
let fixed_imm = 1;
let isConvergent = 1;
let hasPostISelHook = 1;
let Defs = [SCC];
}

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ def V_READFIRSTLANE_B32 :
let VALU = 1;
let Uses = [EXEC];
let isConvergent = 1;
let hasPostISelHook = 1;

bits<8> vdst;
bits<9> src0;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,7 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag,
} // End isCommutable = 1

// These are special and do not read the exec mask.
let isConvergent = 1, Uses = []<Register> in {
let isConvergent = 1, hasPostISelHook = 1, Uses = []<Register> in {
def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
[(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>;
let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ class MAIInst<string OpName, VOPProfile P, SDPatternOperator node>

multiclass MAIInst<string OpName, string P, SDPatternOperator node,
bit NoDstOverlap = !cast<VOPProfileMAI>("VOPProfileMAI_" # P).NoDstOverlap> {
let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in {
let isConvergent = 1, hasPostISelHook = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in {
// FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported.
let Constraints = !if(NoDstOverlap, "@earlyclobber $vdst", "") in {
def _e64 : MAIInst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P),
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/AMDGPU/VOPCInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ multiclass VOPC_Pseudos <string opName,
let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
let SchedRW = P.Schedule;
let isConvergent = DefExec;
let hasPostISelHook = DefExec;
let isCompare = 1;
let isCommutable = 1;
}
Expand All @@ -304,6 +305,7 @@ multiclass VOPC_Pseudos <string opName,
let Defs = !if(DefExec, [EXEC], []);
let SchedRW = P.Schedule;
let isConvergent = DefExec;
let hasPostISelHook = DefExec;
let isCompare = 1;
}

Expand All @@ -313,6 +315,7 @@ multiclass VOPC_Pseudos <string opName,
let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
let SchedRW = P.Schedule;
let isConvergent = DefExec;
let hasPostISelHook = DefExec;
let isCompare = 1;
let VOPC = 1;
let Constraints = "";
Expand Down Expand Up @@ -342,6 +345,7 @@ multiclass VOPCX_Pseudos <string opName,
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
let isConvergent = 1;
let hasPostISelHook = 1;
let isCompare = 1;
let isCommutable = 1;
let SubtargetPredicate = HasNoSdstCMPX;
Expand All @@ -365,6 +369,7 @@ multiclass VOPCX_Pseudos <string opName,
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
let isConvergent = 1;
let hasPostISelHook = 1;
let isCompare = 1;
let SubtargetPredicate = HasNoSdstCMPX;
}
Expand All @@ -375,6 +380,7 @@ multiclass VOPCX_Pseudos <string opName,
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
let isConvergent = 1;
let hasPostISelHook = 1;
let isCompare = 1;
let VOPC = 1;
let Constraints = "";
Expand Down Expand Up @@ -846,6 +852,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
!if(DefVcc, [VCC], []));
let SchedRW = p.Schedule;
let isConvergent = DefExec;
let hasPostISelHook = DefExec;
}

def _e64 : VOP3_Pseudo<opName, p, getVOPCClassPat64<p>.ret>,
Expand All @@ -860,6 +867,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
!if(DefVcc, [VCC], []));
let SchedRW = p.Schedule;
let isConvergent = DefExec;
let hasPostISelHook = DefExec;
}

let SubtargetPredicate = isGFX11Plus in {
Expand All @@ -869,6 +877,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
!if(DefVcc, [VCC], []));
let SchedRW = p.Schedule;
let isConvergent = DefExec;
let hasPostISelHook = DefExec;
let VOPC = 1;
let Constraints = "";
}
Expand All @@ -892,6 +901,7 @@ multiclass VOPCX_Class_Pseudos <string opName,
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
let isConvergent = 1;
let hasPostISelHook = 1;
let SubtargetPredicate = HasNoSdstCMPX;
}

Expand All @@ -907,6 +917,7 @@ multiclass VOPCX_Class_Pseudos <string opName,
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
let isConvergent = 1;
let hasPostISelHook = 1;
let SubtargetPredicate = HasNoSdstCMPX;
}

Expand All @@ -916,6 +927,7 @@ multiclass VOPCX_Class_Pseudos <string opName,
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
let isConvergent = 1;
let hasPostISelHook = 1;
let VOPC = 1;
let Constraints = "";
}
Expand Down
Loading