Skip to content

Commit 95dd0b0

Browse files
AMDGPU/SILowerI1Copies process phi incomings in specific order (#72375)
When merging lane masks, value from block that is always visited first (PrevReg in buildMergeLaneMasks) needs to exist because we do on-the-fly constant folding. For PrevReg to exist, basic block that should contain PrevReg definition must be processed first. Sort the incomings such that incoming values that dominate other incoming values are processed first. Sorting of phi incomings makes no changes for phis created by SDAG because SDAG adds phi incomings as it selects basic blocks in reversed post order traversal. This change is required by upcoming lane mask merging implementation for GlobalISel that leaves phi incomings as they are in IR.
1 parent 77ecb9a commit 95dd0b0

File tree

2 files changed

+161
-0
lines changed

2 files changed

+161
-0
lines changed

llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,7 @@ bool SILowerI1Copies::lowerPhis() {
559559
if (Vreg1Phis.empty())
560560
return false;
561561

562+
DT->getBase().updateDFSNumbers();
562563
MachineBasicBlock *PrevMBB = nullptr;
563564
for (MachineInstr *MI : Vreg1Phis) {
564565
MachineBasicBlock &MBB = *MI->getParent();
@@ -593,6 +594,15 @@ bool SILowerI1Copies::lowerPhis() {
593594
Incomings.emplace_back(IncomingReg, IncomingMBB, Register{});
594595
}
595596

597+
// Sort the incomings such that incoming values that dominate other incoming
598+
// values are sorted earlier. This allows us to do some amount of on-the-fly
599+
// constant folding.
600+
// Incoming with smaller DFSNumIn goes first, DFSNumIn is 0 for entry block.
601+
llvm::sort(Incomings, [this](Incoming LHS, Incoming RHS) {
602+
return DT->getNode(LHS.Block)->getDFSNumIn() <
603+
DT->getNode(RHS.Block)->getDFSNumIn();
604+
});
605+
596606
#ifndef NDEBUG
597607
PhiRegisters.insert(DstReg);
598608
#endif
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -run-pass=si-i1-copies -o - %s | FileCheck -check-prefixes=GCN %s
3+
4+
# Order in which SILowerI1Copies build instructions to merge lane masks should
5+
# not depend on order of incoming operands in phi instruction.
6+
# SDAG adds phi incomings as it processes basic blocks in reversed post order
7+
# traversal. Because of that, incomings in phis created by SDAG are sorted,
8+
# compared to the how phi looked in IR, in convenient way for lowerPhis.
9+
10+
# Here incomings for %20:vreg_1 = PHI %19, %bb.1, %26, %bb.2 are swapped
11+
# to verify that SILowerI1Copies sorts incomings from phi appropriately before
12+
# it starts merging lane masks.
13+
14+
---
15+
name: phi
16+
tracksRegLiveness: true
17+
body: |
18+
; GCN-LABEL: name: phi
19+
; GCN: bb.0:
20+
; GCN-NEXT: successors: %bb.1(0x80000000)
21+
; GCN-NEXT: liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr4
22+
; GCN-NEXT: {{ $}}
23+
; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4
24+
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
25+
; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
26+
; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1
27+
; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
28+
; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
29+
; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
30+
; GCN-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
31+
; GCN-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
32+
; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
33+
; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
34+
; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
35+
; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]]
36+
; GCN-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
37+
; GCN-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
38+
; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $exec_lo
39+
; GCN-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
40+
; GCN-NEXT: {{ $}}
41+
; GCN-NEXT: bb.1:
42+
; GCN-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
43+
; GCN-NEXT: {{ $}}
44+
; GCN-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF5]], %bb.0, %20, %bb.3
45+
; GCN-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[COPY6]], %bb.0, %37, %bb.3
46+
; GCN-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_1]], %bb.0, %16, %bb.3
47+
; GCN-NEXT: [[PHI3:%[0-9]+]]:vreg_64 = PHI [[COPY5]], %bb.0, %18, %bb.3
48+
; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[PHI1]]
49+
; GCN-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[PHI]], $exec_lo, implicit-def $scc
50+
; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[PHI1]], $exec_lo, implicit-def $scc
51+
; GCN-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_ANDN2_B32_]], [[S_AND_B32_]], implicit-def $scc
52+
; GCN-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF [[COPY7]], %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
53+
; GCN-NEXT: S_BRANCH %bb.2
54+
; GCN-NEXT: {{ $}}
55+
; GCN-NEXT: bb.2:
56+
; GCN-NEXT: successors: %bb.3(0x80000000)
57+
; GCN-NEXT: {{ $}}
58+
; GCN-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PHI3]], 0, 0, implicit $exec :: (load (s32), addrspace 1)
59+
; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
60+
; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 killed [[GLOBAL_LOAD_DWORD]], killed [[S_MOV_B32_2]], implicit $exec
61+
; GCN-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_]], $exec_lo, implicit-def $scc
62+
; GCN-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_EQ_U32_e64_]], $exec_lo, implicit-def $scc
63+
; GCN-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_ANDN2_B32_1]], [[S_AND_B32_1]], implicit-def $scc
64+
; GCN-NEXT: {{ $}}
65+
; GCN-NEXT: bb.3:
66+
; GCN-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
67+
; GCN-NEXT: {{ $}}
68+
; GCN-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]], %bb.1, [[S_OR_B32_1]], %bb.2
69+
; GCN-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
70+
; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 4
71+
; GCN-NEXT: [[V_ADD_U:%[0-9]+]]:vreg_64 = V_ADD_U64_PSEUDO [[PHI3]], killed [[S_MOV_B64_]], implicit-def dead $vcc, implicit $exec
72+
; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1
73+
; GCN-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = nsw S_ADD_I32 [[PHI2]], killed [[S_MOV_B32_3]], implicit-def dead $scc
74+
; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 9
75+
; GCN-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[PHI1]], $exec_lo, implicit-def $scc
76+
; GCN-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[PHI4]], $exec_lo, implicit-def $scc
77+
; GCN-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_ANDN2_B32_2]], [[S_AND_B32_2]], implicit-def $scc
78+
; GCN-NEXT: S_CMP_GT_I32 [[S_ADD_I32_]], killed [[S_MOV_B32_4]], implicit-def $scc
79+
; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
80+
; GCN-NEXT: S_BRANCH %bb.4
81+
; GCN-NEXT: {{ $}}
82+
; GCN-NEXT: bb.4:
83+
; GCN-NEXT: [[S_MOV_B32_5:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
84+
; GCN-NEXT: [[S_MOV_B32_6:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
85+
; GCN-NEXT: [[COPY8:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[PHI1]]
86+
; GCN-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_5]]
87+
; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[S_MOV_B32_6]], 0, [[COPY9]], [[COPY8]], implicit $exec
88+
; GCN-NEXT: FLAT_STORE_DWORD [[COPY4]], killed [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
89+
; GCN-NEXT: SI_RETURN
90+
bb.0:
91+
successors: %bb.1(0x80000000)
92+
liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr4
93+
94+
%0:vgpr_32 = COPY $vgpr4
95+
%1:vgpr_32 = COPY $vgpr3
96+
%2:vgpr_32 = COPY $vgpr2
97+
%3:vgpr_32 = COPY $vgpr1
98+
%4:sgpr_32 = IMPLICIT_DEF
99+
%5:sgpr_32 = IMPLICIT_DEF
100+
%6:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1
101+
%7:sgpr_32 = IMPLICIT_DEF
102+
%8:sgpr_32 = IMPLICIT_DEF
103+
%9:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
104+
%10:sreg_32 = S_MOV_B32 -1
105+
%11:sreg_32 = S_MOV_B32 -1
106+
%12:vreg_64 = COPY %6
107+
%13:vreg_64 = COPY %9
108+
%14:vreg_1 = COPY %10, implicit $exec
109+
110+
bb.1:
111+
successors: %bb.2(0x40000000), %bb.3(0x40000000)
112+
113+
%15:sreg_32 = PHI %11, %bb.0, %16, %bb.3
114+
%17:vreg_64 = PHI %13, %bb.0, %18, %bb.3
115+
%19:vreg_1 = PHI %14, %bb.0, %20, %bb.3
116+
%21:sreg_32 = COPY %19
117+
%22:sreg_32 = SI_IF %21, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
118+
S_BRANCH %bb.2
119+
120+
bb.2:
121+
successors: %bb.3(0x80000000)
122+
123+
%23:vgpr_32 = GLOBAL_LOAD_DWORD %17, 0, 0, implicit $exec :: (load (s32), addrspace 1)
124+
%24:sreg_32 = S_MOV_B32 0
125+
%25:sreg_32 = V_CMP_EQ_U32_e64 killed %23, killed %24, implicit $exec
126+
%26:vreg_1 = COPY %25
127+
128+
bb.3:
129+
successors: %bb.4(0x04000000), %bb.1(0x7c000000)
130+
131+
%20:vreg_1 = PHI %26, %bb.2, %19, %bb.1 ;%20:vreg_1 = PHI %19, %bb.1, %26, %bb.2 - this is original phi created by SDAG
132+
SI_END_CF %22, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
133+
%27:sreg_64 = S_MOV_B64 4
134+
%18:vreg_64 = V_ADD_U64_PSEUDO %17, killed %27, implicit-def dead $vcc, implicit $exec
135+
%28:sreg_32 = S_MOV_B32 1
136+
%16:sreg_32 = nsw S_ADD_I32 %15, killed %28, implicit-def dead $scc
137+
%29:sreg_32 = S_MOV_B32 9
138+
S_CMP_GT_I32 %16, killed %29, implicit-def $scc
139+
S_CBRANCH_SCC1 %bb.1, implicit $scc
140+
S_BRANCH %bb.4
141+
142+
bb.4:
143+
%30:vreg_1 = PHI %19, %bb.3
144+
%31:sgpr_32 = S_MOV_B32 1065353216
145+
%32:sgpr_32 = S_MOV_B32 0
146+
%33:sreg_32_xm0_xexec = COPY %30
147+
%34:vgpr_32 = COPY killed %31
148+
%35:vgpr_32 = V_CNDMASK_B32_e64 0, killed %32, 0, %34, %33, implicit $exec
149+
FLAT_STORE_DWORD %12, killed %35, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
150+
SI_RETURN
151+
...

0 commit comments

Comments
 (0)