Skip to content

Commit d708bfb

Browse files
authored
AMDGPU: Fix si-fix-sgpr-copies asserting on VReg_1 phi (#128903)
1 parent 829e2a5 commit d708bfb

File tree

3 files changed

+162
-0
lines changed

3 files changed

+162
-0
lines changed

llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,9 @@ bool SIFixSGPRCopies::run(MachineFunction &MF) {
669669
if (!MO.isReg() || !MO.getReg().isVirtual())
670670
continue;
671671
const TargetRegisterClass *SrcRC = MRI->getRegClass(MO.getReg());
672+
if (SrcRC == &AMDGPU::VReg_1RegClass)
673+
continue;
674+
672675
if (TRI->hasVectorRegisters(SrcRC)) {
673676
const TargetRegisterClass *DestRC =
674677
TRI->getEquivalentSGPRClass(SrcRC);

llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,68 @@ endloop: ; preds = %if1, %Flow2
121121
ret void
122122
}
123123

124+
define amdgpu_ps void @i1_copy_assert(i1 %v4) {
125+
; ISA-LABEL: i1_copy_assert:
126+
; ISA: ; %bb.0: ; %start
127+
; ISA-NEXT: v_and_b32_e32 v0, 1, v0
128+
; ISA-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
129+
; ISA-NEXT: s_mov_b32 s8, 0
130+
; ISA-NEXT: s_mov_b64 s[0:1], 0
131+
; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5
132+
; ISA-NEXT: ; implicit-def: $sgpr2_sgpr3
133+
; ISA-NEXT: s_branch .LBB1_3
134+
; ISA-NEXT: .LBB1_1: ; %endif1
135+
; ISA-NEXT: ; in Loop: Header=BB1_3 Depth=1
136+
; ISA-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
137+
; ISA-NEXT: s_and_b64 s[8:9], vcc, exec
138+
; ISA-NEXT: s_mov_b64 s[6:7], 0
139+
; ISA-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
140+
; ISA-NEXT: .LBB1_2: ; %Flow
141+
; ISA-NEXT: ; in Loop: Header=BB1_3 Depth=1
142+
; ISA-NEXT: s_and_b64 s[8:9], exec, s[4:5]
143+
; ISA-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
144+
; ISA-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
145+
; ISA-NEXT: s_and_b64 s[6:7], s[6:7], exec
146+
; ISA-NEXT: s_mov_b32 s8, 1
147+
; ISA-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
148+
; ISA-NEXT: s_andn2_b64 exec, exec, s[0:1]
149+
; ISA-NEXT: s_cbranch_execz .LBB1_5
150+
; ISA-NEXT: .LBB1_3: ; %loop
151+
; ISA-NEXT: ; =>This Inner Loop Header: Depth=1
152+
; ISA-NEXT: s_or_b64 s[4:5], s[4:5], exec
153+
; ISA-NEXT: s_cmp_lg_u32 s8, 0
154+
; ISA-NEXT: s_cbranch_scc1 .LBB1_1
155+
; ISA-NEXT: ; %bb.4: ; in Loop: Header=BB1_3 Depth=1
156+
; ISA-NEXT: s_mov_b64 s[6:7], -1
157+
; ISA-NEXT: s_branch .LBB1_2
158+
; ISA-NEXT: .LBB1_5: ; %Flow2
159+
; ISA-NEXT: s_or_b64 exec, exec, s[0:1]
160+
; ISA-NEXT: v_mov_b32_e32 v0, 0
161+
; ISA-NEXT: v_cndmask_b32_e64 v1, 0, 1.0, s[2:3]
162+
; ISA-NEXT: exp mrt0 off, off, off, off
163+
; ISA-NEXT: s_endpgm
164+
start:
165+
br label %loop
166+
167+
loop: ; preds = %Flow, %start
168+
%v1 = phi i32 [ 0, %start ], [ 1, %Flow ]
169+
%v2 = icmp ugt i32 %v1, 0
170+
br i1 %v2, label %endif1, label %Flow
171+
172+
Flow2: ; preds = %Flow
173+
%spec.select = select i1 %i1, float 1.000000e+00, float 0.000000e+00
174+
call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %spec.select, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i1 false, i1 false)
175+
ret void
176+
177+
endif1: ; preds = %loop
178+
br label %Flow
179+
180+
Flow: ; preds = %endif1, %loop
181+
%i = phi i1 [ %v4, %endif1 ], [ true, %loop ]
182+
%i1 = phi i1 [ false, %endif1 ], [ true, %loop ]
183+
br i1 %i, label %Flow2, label %loop
184+
}
185+
124186
; Function Attrs: nounwind readnone speculatable willreturn
125187
declare float @llvm.sqrt.f32(float) #0
126188

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=si-fix-sgpr-copies,si-i1-copies -o - %s | FileCheck %s
3+
4+
# Make sure SIFixSGPRCopies does not assert on a phi with vreg_1
5+
# inputs.
6+
7+
---
8+
name: i1_copy_assert
9+
tracksRegLiveness: true
10+
body: |
11+
; CHECK-LABEL: name: i1_copy_assert
12+
; CHECK: bb.0:
13+
; CHECK-NEXT: successors: %bb.1(0x80000000)
14+
; CHECK-NEXT: liveins: $vgpr0
15+
; CHECK-NEXT: {{ $}}
16+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
17+
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[COPY]], 1, implicit $exec
18+
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
19+
; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
20+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
21+
; CHECK-NEXT: {{ $}}
22+
; CHECK-NEXT: bb.1:
23+
; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000)
24+
; CHECK-NEXT: {{ $}}
25+
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %14, %bb.4
26+
; CHECK-NEXT: [[PHI1:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.0, %8, %bb.4
27+
; CHECK-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
28+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_MOV_B64_1]]
29+
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
30+
; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
31+
; CHECK-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[PHI]], $exec, implicit-def $scc
32+
; CHECK-NEXT: S_CMP_LG_U32 [[DEF2]], killed [[S_MOV_B32_]], implicit-def $scc
33+
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit $scc
34+
; CHECK-NEXT: S_BRANCH %bb.4
35+
; CHECK-NEXT: {{ $}}
36+
; CHECK-NEXT: bb.2:
37+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY %8
38+
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
39+
; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, [[V_MOV_B32_e32_]], [[COPY2]], implicit $exec
40+
; CHECK-NEXT: S_ENDPGM 0, implicit killed [[V_CNDMASK_B32_e64_]]
41+
; CHECK-NEXT: {{ $}}
42+
; CHECK-NEXT: bb.3:
43+
; CHECK-NEXT: successors: %bb.4(0x80000000)
44+
; CHECK-NEXT: {{ $}}
45+
; CHECK-NEXT: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 0
46+
; CHECK-NEXT: [[S_ANDN2_B64_:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[S_OR_B64_]], $exec, implicit-def $scc
47+
; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], $exec, implicit-def $scc
48+
; CHECK-NEXT: [[S_OR_B64_1:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_]], [[S_AND_B64_]], implicit-def $scc
49+
; CHECK-NEXT: {{ $}}
50+
; CHECK-NEXT: bb.4:
51+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
52+
; CHECK-NEXT: {{ $}}
53+
; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_64 = PHI [[S_OR_B64_]], %bb.1, [[S_OR_B64_1]], %bb.3
54+
; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_64 = PHI [[COPY1]], %bb.1, [[S_MOV_B64_2]], %bb.3
55+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY [[PHI2]]
56+
; CHECK-NEXT: [[DEF3:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
57+
; CHECK-NEXT: [[S_ANDN2_B64_1:%[0-9]+]]:sreg_64 = S_ANDN2_B64 [[PHI1]], $exec, implicit-def $scc
58+
; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[PHI3]], $exec, implicit-def $scc
59+
; CHECK-NEXT: [[S_OR_B64_2:%[0-9]+]]:sreg_64 = S_OR_B64 [[S_ANDN2_B64_1]], [[S_AND_B64_1]], implicit-def $scc
60+
; CHECK-NEXT: SI_LOOP [[DEF3]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
61+
; CHECK-NEXT: S_BRANCH %bb.2
62+
bb.0:
63+
liveins: $vgpr0
64+
65+
%0:vgpr_32 = COPY $vgpr0
66+
%1:sreg_64 = V_CMP_EQ_U32_e64 killed %0, 1, implicit $exec
67+
%2:sreg_64 = S_MOV_B64 0
68+
%3:vreg_1 = COPY %1
69+
70+
bb.1:
71+
%4:sreg_64 = S_MOV_B64 -1
72+
%5:vreg_1 = COPY %4
73+
%6:sreg_32 = S_MOV_B32 0
74+
%7:sreg_32 = IMPLICIT_DEF
75+
S_CMP_LG_U32 %7, killed %6, implicit-def $scc
76+
S_CBRANCH_SCC1 %bb.3, implicit $scc
77+
S_BRANCH %bb.4
78+
79+
bb.2:
80+
%8:vreg_1 = PHI %9, %bb.4
81+
%10:sreg_64_xexec = COPY %8
82+
%11:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec
83+
%12:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %11, %10, implicit $exec
84+
S_ENDPGM 0, implicit killed %12
85+
86+
bb.3:
87+
%13:sreg_64 = S_MOV_B64 0
88+
89+
bb.4:
90+
%14:vreg_1 = PHI %5, %bb.1, %3, %bb.3
91+
%9:sreg_64 = PHI %5, %bb.1, %13, %bb.3
92+
%15:sreg_64 = COPY %14
93+
%16:sreg_64 = IMPLICIT_DEF
94+
SI_LOOP %16, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
95+
S_BRANCH %bb.2
96+
97+
...

0 commit comments

Comments
 (0)