Skip to content

Commit b2f42f5

Browse files
committed
[LV] Add test variant without sdiv by undef and uses.
Add a variant of @PR34687 with a sdiv with non-undef operands and actual uses, to avoid the SDIV and SELECT being folded away triviall.y
1 parent 1cbd52f commit b2f42f5

File tree

1 file changed

+96
-14
lines changed

1 file changed

+96
-14
lines changed

llvm/test/Transforms/LoopVectorize/reduction-small-size.ll

Lines changed: 96 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,17 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) {
1818
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1919
; CHECK: vector.body:
2020
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
21-
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
21+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
2222
; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2323
; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 255, i32 255, i32 255, i32 255>
2424
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[BROADCAST_SPLAT2]]
25-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8>
26-
; CHECK-NEXT: [[TMP5]] = zext <4 x i8> [[TMP4]] to <4 x i32>
25+
; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8>
26+
; CHECK-NEXT: [[TMP4]] = zext <4 x i8> [[TMP3]] to <4 x i32>
2727
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
28-
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
29-
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
28+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
29+
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3030
; CHECK: middle.block:
31-
; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i8>
31+
; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8>
3232
; CHECK-NEXT: [[TMP7:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP6]])
3333
; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32
3434
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
@@ -49,7 +49,7 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) {
4949
; CHECK-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1
5050
; CHECK-NEXT: [[R_NEXT]] = add nuw nsw i32 [[T1]], [[X]]
5151
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
52-
; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
52+
; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
5353
; CHECK: for.end:
5454
; CHECK-NEXT: [[T2:%.*]] = phi i32 [ [[R_NEXT]], [[IF_END]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
5555
; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i8
@@ -80,6 +80,88 @@ for.end:
8080
ret i8 %t3
8181
}
8282

83+
define i8 @PR34687_no_undef(i1 %c, i32 %x, i32 %n) {
84+
; CHECK-LABEL: @PR34687_no_undef(
85+
; CHECK-NEXT: entry:
86+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], 4
87+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
88+
; CHECK: vector.ph:
89+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
90+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
91+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0
92+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
93+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
94+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
95+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
96+
; CHECK: vector.body:
97+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
98+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
99+
; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
100+
; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> <i32 99, i32 99, i32 99, i32 99>, [[TMP0]]
101+
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true, i1 true, i1 true>
102+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer
103+
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 255, i32 255, i32 255, i32 255>
104+
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[PREDPHI]]
105+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8>
106+
; CHECK-NEXT: [[TMP6]] = zext <4 x i8> [[TMP5]] to <4 x i32>
107+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
108+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
109+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
110+
; CHECK: middle.block:
111+
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i8>
112+
; CHECK-NEXT: [[TMP9:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP8]])
113+
; CHECK-NEXT: [[TMP10:%.*]] = zext i8 [[TMP9]] to i32
114+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
115+
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
116+
; CHECK: scalar.ph:
117+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
118+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
119+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
120+
; CHECK: for.body:
121+
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
122+
; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[R_NEXT:%.*]], [[IF_END]] ]
123+
; CHECK-NEXT: br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
124+
; CHECK: if.then:
125+
; CHECK-NEXT: [[T0:%.*]] = sdiv i32 99, [[X]]
126+
; CHECK-NEXT: br label [[IF_END]]
127+
; CHECK: if.end:
128+
; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[FOR_BODY]] ], [ [[T0]], [[IF_THEN]] ]
129+
; CHECK-NEXT: [[T1:%.*]] = and i32 [[R]], 255
130+
; CHECK-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1
131+
; CHECK-NEXT: [[R_NEXT]] = add nuw nsw i32 [[T1]], [[P]]
132+
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], [[N]]
133+
; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
134+
; CHECK: for.end:
135+
; CHECK-NEXT: [[T2:%.*]] = phi i32 [ [[R_NEXT]], [[IF_END]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
136+
; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i8
137+
; CHECK-NEXT: ret i8 [[T3]]
138+
;
139+
entry:
140+
br label %for.body
141+
142+
for.body:
143+
%i = phi i32 [ 0, %entry ], [ %i.next, %if.end ]
144+
%r = phi i32 [ 0, %entry ], [ %r.next, %if.end ]
145+
br i1 %c, label %if.then, label %if.end
146+
147+
if.then:
148+
%t0 = sdiv i32 99, %x
149+
br label %if.end
150+
151+
if.end:
152+
%p = phi i32 [ 0, %for.body ], [ %t0, %if.then ]
153+
%t1 = and i32 %r, 255
154+
%i.next = add nsw i32 %i, 1
155+
%r.next = add nuw nsw i32 %t1, %p
156+
%cond = icmp eq i32 %i.next, %n
157+
br i1 %cond, label %for.end, label %for.body
158+
159+
for.end:
160+
%t2 = phi i32 [ %r.next, %if.end ]
161+
%t3 = trunc i32 %t2 to i8
162+
ret i8 %t3
163+
}
164+
83165
define i32 @PR35734(i32 %x, i32 %y) {
84166
; CHECK-LABEL: @PR35734(
85167
; CHECK-NEXT: entry:
@@ -96,16 +178,16 @@ define i32 @PR35734(i32 %x, i32 %y) {
96178
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
97179
; CHECK: vector.body:
98180
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
99-
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
181+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
100182
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
101183
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], <i32 -1, i32 -1, i32 -1, i32 -1>
102-
; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i1>
103-
; CHECK-NEXT: [[TMP7]] = sext <4 x i1> [[TMP6]] to <4 x i32>
184+
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i1>
185+
; CHECK-NEXT: [[TMP6]] = sext <4 x i1> [[TMP5]] to <4 x i32>
104186
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
105-
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
106-
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
187+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
188+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
107189
; CHECK: middle.block:
108-
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1>
190+
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i1>
109191
; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> [[TMP8]])
110192
; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i32
111193
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
@@ -121,7 +203,7 @@ define i32 @PR35734(i32 %x, i32 %y) {
121203
; CHECK-NEXT: [[R_NEXT]] = add i32 [[T0]], -1
122204
; CHECK-NEXT: [[I_NEXT]] = add nsw i32 [[I]], 1
123205
; CHECK-NEXT: [[COND:%.*]] = icmp sgt i32 [[I]], 77
124-
; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
206+
; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
125207
; CHECK: for.end:
126208
; CHECK-NEXT: [[T1:%.*]] = phi i32 [ [[R_NEXT]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
127209
; CHECK-NEXT: ret i32 [[T1]]

0 commit comments

Comments
 (0)