Skip to content

Commit 09d38dd

Browse files
committed
AMDGPU: Fix assert when trying to overextend liverange
This was trying to add segments beyond the new and use, so skip additional segments. This would hit (S < E && "Cannot create empty or backwards segment").
1 parent 7b7ec60 commit 09d38dd

File tree

3 files changed

+280
-1
lines changed

3 files changed

+280
-1
lines changed

llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
226226
auto DefSegment = SelLI->FindSegmentContaining(SelIdx.getRegSlot());
227227
assert(DefSegment != SelLI->end() &&
228228
"No live interval segment covering definition?");
229-
for (auto I = DefSegment; I != SelLI->end(); ++I) {
229+
for (auto I = DefSegment; I != SelLI->end() && I->start <= AndIdx; ++I) {
230230
SlotIndex Start = I->start < SelIdx.getRegSlot() ?
231231
SelIdx.getRegSlot() : I->start;
232232
SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ?
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck %s
3+
4+
define amdgpu_kernel void @cannot_create_empty_or_backwards_segment(i1 %arg, i1 %arg1, i1 %arg2, i1 %arg3, i1 %arg4, i1 %arg5) {
5+
; CHECK-LABEL: cannot_create_empty_or_backwards_segment:
6+
; CHECK: ; %bb.0: ; %bb
7+
; CHECK-NEXT: s_mov_b64 s[26:27], s[2:3]
8+
; CHECK-NEXT: s_mov_b64 s[24:25], s[0:1]
9+
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
10+
; CHECK-NEXT: s_add_u32 s24, s24, s7
11+
; CHECK-NEXT: s_addc_u32 s25, s25, 0
12+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
13+
; CHECK-NEXT: s_bitcmp1_b32 s0, 0
14+
; CHECK-NEXT: s_cselect_b64 s[14:15], -1, 0
15+
; CHECK-NEXT: s_bitcmp1_b32 s0, 8
16+
; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0
17+
; CHECK-NEXT: s_bitcmp1_b32 s0, 16
18+
; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
19+
; CHECK-NEXT: s_bitcmp1_b32 s0, 24
20+
; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0
21+
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
22+
; CHECK-NEXT: s_xor_b64 s[2:3], s[6:7], -1
23+
; CHECK-NEXT: s_bitcmp1_b32 s1, 0
24+
; CHECK-NEXT: s_cselect_b64 s[10:11], -1, 0
25+
; CHECK-NEXT: s_bitcmp1_b32 s1, 8
26+
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[14:15]
27+
; CHECK-NEXT: s_cselect_b64 s[12:13], -1, 0
28+
; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v1
29+
; CHECK-NEXT: s_and_b64 s[2:3], exec, s[2:3]
30+
; CHECK-NEXT: s_and_b64 s[4:5], exec, s[8:9]
31+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
32+
; CHECK-NEXT: s_branch .LBB0_3
33+
; CHECK-NEXT: .LBB0_1: ; in Loop: Header=BB0_3 Depth=1
34+
; CHECK-NEXT: s_mov_b64 s[18:19], -1
35+
; CHECK-NEXT: s_mov_b64 s[16:17], 0
36+
; CHECK-NEXT: s_mov_b64 s[20:21], -1
37+
; CHECK-NEXT: s_mov_b64 s[22:23], -1
38+
; CHECK-NEXT: .LBB0_2: ; %Flow7
39+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
40+
; CHECK-NEXT: s_and_b64 vcc, exec, s[22:23]
41+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_12
42+
; CHECK-NEXT: .LBB0_3: ; %bb7
43+
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
44+
; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
45+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_1
46+
; CHECK-NEXT: ; %bb.4: ; %bb8
47+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
48+
; CHECK-NEXT: s_mov_b64 vcc, s[2:3]
49+
; CHECK-NEXT: s_cbranch_vccz .LBB0_6
50+
; CHECK-NEXT: ; %bb.5: ; %bb9
51+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
52+
; CHECK-NEXT: s_mov_b64 s[18:19], 0
53+
; CHECK-NEXT: s_mov_b64 s[16:17], -1
54+
; CHECK-NEXT: s_mov_b64 s[22:23], s[8:9]
55+
; CHECK-NEXT: s_cbranch_execz .LBB0_7
56+
; CHECK-NEXT: s_branch .LBB0_8
57+
; CHECK-NEXT: .LBB0_6: ; in Loop: Header=BB0_3 Depth=1
58+
; CHECK-NEXT: s_mov_b64 s[18:19], -1
59+
; CHECK-NEXT: s_mov_b64 s[16:17], 0
60+
; CHECK-NEXT: s_mov_b64 s[22:23], 0
61+
; CHECK-NEXT: .LBB0_7: ; %bb10
62+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
63+
; CHECK-NEXT: s_mov_b64 s[18:19], 0
64+
; CHECK-NEXT: s_mov_b64 s[16:17], -1
65+
; CHECK-NEXT: s_mov_b64 s[22:23], s[12:13]
66+
; CHECK-NEXT: .LBB0_8: ; %Flow9
67+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
68+
; CHECK-NEXT: s_mov_b64 s[20:21], -1
69+
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[22:23]
70+
; CHECK-NEXT: s_mov_b64 s[22:23], -1
71+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_2
72+
; CHECK-NEXT: ; %bb.9: ; %bb13
73+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
74+
; CHECK-NEXT: s_mov_b64 vcc, s[4:5]
75+
; CHECK-NEXT: s_cbranch_vccz .LBB0_11
76+
; CHECK-NEXT: ; %bb.10: ; %bb16
77+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
78+
; CHECK-NEXT: s_mov_b64 s[16:17], 0
79+
; CHECK-NEXT: s_mov_b64 s[20:21], -1
80+
; CHECK-NEXT: s_mov_b64 s[22:23], s[10:11]
81+
; CHECK-NEXT: s_mov_b64 s[18:19], s[16:17]
82+
; CHECK-NEXT: s_branch .LBB0_2
83+
; CHECK-NEXT: .LBB0_11: ; in Loop: Header=BB0_3 Depth=1
84+
; CHECK-NEXT: s_mov_b64 s[22:23], -1
85+
; CHECK-NEXT: s_mov_b64 s[20:21], 0
86+
; CHECK-NEXT: ; implicit-def: $sgpr16_sgpr17
87+
; CHECK-NEXT: s_mov_b64 s[18:19], s[16:17]
88+
; CHECK-NEXT: s_branch .LBB0_2
89+
; CHECK-NEXT: .LBB0_12: ; %loop.exit.guard6
90+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
91+
; CHECK-NEXT: s_xor_b64 s[14:15], s[20:21], -1
92+
; CHECK-NEXT: s_mov_b64 s[20:21], -1
93+
; CHECK-NEXT: s_and_b64 vcc, exec, s[14:15]
94+
; CHECK-NEXT: s_cbranch_vccz .LBB0_16
95+
; CHECK-NEXT: ; %bb.13: ; %bb14
96+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
97+
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[14:15]
98+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_15
99+
; CHECK-NEXT: ; %bb.14: ; %bb15
100+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
101+
; CHECK-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:4
102+
; CHECK-NEXT: buffer_store_dword v1, off, s[24:27], 0
103+
; CHECK-NEXT: .LBB0_15: ; %Flow
104+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
105+
; CHECK-NEXT: s_mov_b64 s[20:21], 0
106+
; CHECK-NEXT: .LBB0_16: ; %Flow13
107+
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
108+
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[20:21]
109+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_3
110+
; CHECK-NEXT: ; %bb.17: ; %loop.exit.guard
111+
; CHECK-NEXT: s_and_b64 vcc, exec, s[18:19]
112+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_22
113+
; CHECK-NEXT: ; %bb.18: ; %loop.exit.guard5
114+
; CHECK-NEXT: s_and_b64 vcc, exec, s[16:17]
115+
; CHECK-NEXT: s_cbranch_vccnz .LBB0_22
116+
; CHECK-NEXT: ; %bb.19: ; %bb17
117+
; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7]
118+
; CHECK-NEXT: s_cbranch_vccz .LBB0_21
119+
; CHECK-NEXT: ; %bb.20: ; %bb19
120+
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0
121+
; CHECK-NEXT: s_cbranch_vccz .LBB0_22
122+
; CHECK-NEXT: .LBB0_21: ; %bb21
123+
; CHECK-NEXT: s_endpgm
124+
; CHECK-NEXT: .LBB0_22: ; %UnifiedUnreachableBlock
125+
bb:
126+
br label %bb6
127+
128+
bb6: ; preds = %bb15, %bb14, %bb
129+
br label %bb7
130+
131+
bb7: ; preds = %bb16, %bb6
132+
br i1 %arg2, label %bb8, label %bb20
133+
134+
bb8: ; preds = %bb7
135+
br i1 %arg3, label %bb10, label %bb9
136+
137+
bb9: ; preds = %bb8
138+
br i1 %arg1, label %bb13, label %bb12
139+
140+
bb10: ; preds = %bb8
141+
br i1 %arg5, label %bb11, label %bb12
142+
143+
bb11: ; preds = %bb10
144+
br label %bb13
145+
146+
bb12: ; preds = %bb10, %bb9
147+
unreachable
148+
149+
bb13: ; preds = %bb11, %bb9
150+
br i1 %arg1, label %bb16, label %bb14
151+
152+
bb14: ; preds = %bb13
153+
br i1 %arg, label %bb15, label %bb6
154+
155+
bb15: ; preds = %bb14
156+
store double 0.000000e+00, ptr addrspace(5) null, align 2147483648
157+
br label %bb6
158+
159+
bb16: ; preds = %bb13
160+
br i1 %arg4, label %bb17, label %bb7
161+
162+
bb17: ; preds = %bb16
163+
br i1 %arg3, label %bb19, label %bb18
164+
165+
bb18: ; preds = %bb17
166+
ret void
167+
168+
bb19: ; preds = %bb17
169+
br i1 %arg, label %bb20, label %bb21
170+
171+
bb20: ; preds = %bb19, %bb7
172+
unreachable
173+
174+
bb21: ; preds = %bb19
175+
ret void
176+
}

llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -686,3 +686,106 @@ body: |
686686
bb.3:
687687
688688
...
689+
690+
# This was trying to extend the liverange of %0 farther than needed,
691+
# following %1's segment to %bb3
692+
693+
---
694+
name: cannot_create_empty_or_backwards_segment
695+
tracksRegLiveness: true
696+
body: |
697+
; CHECK-LABEL: name: cannot_create_empty_or_backwards_segment
698+
; CHECK: bb.0:
699+
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
700+
; CHECK-NEXT: liveins: $sgpr4_sgpr5
701+
; CHECK-NEXT: {{ $}}
702+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
703+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[COPY]], implicit $exec
704+
; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[COPY]], implicit-def dead $scc
705+
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
706+
; CHECK-NEXT: {{ $}}
707+
; CHECK-NEXT: bb.1:
708+
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
709+
; CHECK-NEXT: {{ $}}
710+
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
711+
; CHECK-NEXT: {{ $}}
712+
; CHECK-NEXT: bb.2:
713+
; CHECK-NEXT: S_ENDPGM 0
714+
; CHECK-NEXT: {{ $}}
715+
; CHECK-NEXT: bb.3:
716+
; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
717+
bb.0:
718+
liveins: $sgpr4_sgpr5
719+
720+
%0:sreg_64_xexec = COPY $sgpr4_sgpr5
721+
%1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
722+
%2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
723+
$vcc = S_AND_B64 $exec, %2, implicit-def dead $scc
724+
S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
725+
726+
bb.1:
727+
S_CBRANCH_VCCNZ %bb.3, implicit killed undef $vcc
728+
729+
bb.2:
730+
S_ENDPGM 0
731+
732+
bb.3:
733+
S_ENDPGM 0, implicit %1
734+
...
735+
736+
---
737+
name: cannot_create_empty_or_backwards_segment_2
738+
tracksRegLiveness: true
739+
body: |
740+
; CHECK-LABEL: name: cannot_create_empty_or_backwards_segment_2
741+
; CHECK: bb.0:
742+
; CHECK-NEXT: successors: %bb.1(0x80000000)
743+
; CHECK-NEXT: liveins: $sgpr4_sgpr5
744+
; CHECK-NEXT: {{ $}}
745+
; CHECK-NEXT: {{ $}}
746+
; CHECK-NEXT: bb.1:
747+
; CHECK-NEXT: successors: %bb.2(0x80000000)
748+
; CHECK-NEXT: liveins: $sgpr4_sgpr5
749+
; CHECK-NEXT: {{ $}}
750+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
751+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[COPY]], implicit $exec
752+
; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[COPY]], implicit-def dead $scc
753+
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
754+
; CHECK-NEXT: {{ $}}
755+
; CHECK-NEXT: bb.2:
756+
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000)
757+
; CHECK-NEXT: liveins: $sgpr4_sgpr5
758+
; CHECK-NEXT: {{ $}}
759+
; CHECK-NEXT: S_NOP 0, implicit-def dead [[V_CNDMASK_B32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
760+
; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc
761+
; CHECK-NEXT: S_BRANCH %bb.1
762+
; CHECK-NEXT: {{ $}}
763+
; CHECK-NEXT: bb.3:
764+
; CHECK-NEXT: S_ENDPGM 0
765+
; CHECK-NEXT: {{ $}}
766+
; CHECK-NEXT: bb.4:
767+
; CHECK-NEXT: S_ENDPGM 0
768+
bb.0:
769+
liveins: $sgpr4_sgpr5
770+
771+
bb.1:
772+
liveins: $sgpr4_sgpr5
773+
774+
%0:sreg_64_xexec = COPY $sgpr4_sgpr5
775+
%1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
776+
%2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec
777+
$vcc = S_AND_B64 $exec, %2, implicit-def dead $scc
778+
S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
779+
780+
bb.2:
781+
liveins: $sgpr4_sgpr5
782+
S_NOP 0, implicit-def %1, implicit %1
783+
S_CBRANCH_VCCNZ %bb.4, implicit killed undef $vcc
784+
S_BRANCH %bb.1
785+
786+
bb.3:
787+
S_ENDPGM 0
788+
789+
bb.4:
790+
S_ENDPGM 0
791+
...

0 commit comments

Comments
 (0)