Skip to content

Commit b9cb338

Browse files
committed
Precommit tests
1 parent 8fddef8 commit b9cb338

File tree

2 files changed

+205
-1
lines changed

2 files changed

+205
-1
lines changed

llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll

Lines changed: 111 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=riscv64 | FileCheck %s
2+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
33

44

55
; Make sure we don't emit a pair of shift for the zext in the preheader. We
@@ -127,3 +127,113 @@ for.body: ; preds = %for.body, %for.body
127127
%niter.ncmp.1 = icmp eq i64 %niter.next.1, %unroll_iter
128128
br i1 %niter.ncmp.1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body
129129
}
130+
131+
define i1 @widen_anyof_rdx(ptr %p, i64 %n) {
132+
; CHECK-LABEL: widen_anyof_rdx:
133+
; CHECK: # %bb.0: # %entry
134+
; CHECK-NEXT: li a2, 0
135+
; CHECK-NEXT: vsetvli a3, zero, e64, m4, ta, ma
136+
; CHECK-NEXT: vmclr.m v12
137+
; CHECK-NEXT: vid.v v8
138+
; CHECK-NEXT: .LBB2_1: # %loop
139+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
140+
; CHECK-NEXT: sub a3, a1, a2
141+
; CHECK-NEXT: slli a4, a2, 2
142+
; CHECK-NEXT: vsetvli a3, a3, e8, mf2, ta, ma
143+
; CHECK-NEXT: add a4, a0, a4
144+
; CHECK-NEXT: vle32.v v14, (a4)
145+
; CHECK-NEXT: vsetvli a4, zero, e64, m4, ta, ma
146+
; CHECK-NEXT: vmv.v.x v16, a3
147+
; CHECK-NEXT: vmsleu.vv v13, v16, v8
148+
; CHECK-NEXT: vmsltu.vx v16, v8, a3
149+
; CHECK-NEXT: vmand.mm v13, v12, v13
150+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
151+
; CHECK-NEXT: vmsne.vi v17, v14, 0
152+
; CHECK-NEXT: vmor.mm v12, v12, v17
153+
; CHECK-NEXT: vmand.mm v12, v12, v16
154+
; CHECK-NEXT: add a2, a2, a3
155+
; CHECK-NEXT: vmor.mm v12, v12, v13
156+
; CHECK-NEXT: blt a2, a1, .LBB2_1
157+
; CHECK-NEXT: # %bb.2: # %exit
158+
; CHECK-NEXT: vcpop.m a0, v12
159+
; CHECK-NEXT: snez a0, a0
160+
; CHECK-NEXT: ret
161+
entry:
162+
br label %loop
163+
loop:
164+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
165+
%phi = phi <vscale x 4 x i1> [ zeroinitializer, %entry ], [ %rec, %loop ]
166+
%avl = sub i64 %n, %iv
167+
%evl = call i32 @llvm.experimental.get.vector.length(i64 %avl, i32 4, i1 true)
168+
169+
%gep = getelementptr i32, ptr %p, i64 %iv
170+
%x = call <vscale x 4 x i32> @llvm.vp.load(ptr %gep, <vscale x 4 x i1> splat (i1 true), i32 %evl)
171+
%cmp = icmp ne <vscale x 4 x i32> %x, zeroinitializer
172+
%or = or <vscale x 4 x i1> %phi, %cmp
173+
%rec = call <vscale x 4 x i1> @llvm.vp.merge(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> %or, <vscale x 4 x i1> %phi, i32 %evl)
174+
175+
%evl.zext = zext i32 %evl to i64
176+
%iv.next = add i64 %iv, %evl.zext
177+
%done = icmp sge i64 %iv.next, %n
178+
br i1 %done, label %exit, label %loop
179+
exit:
180+
%res = call i1 @llvm.vector.reduce.or(<vscale x 4 x i1> %rec)
181+
ret i1 %res
182+
}
183+
184+
185+
define i1 @widen_anyof_rdx_use_in_loop(ptr %p, i64 %n) {
186+
; CHECK-LABEL: widen_anyof_rdx_use_in_loop:
187+
; CHECK: # %bb.0: # %entry
188+
; CHECK-NEXT: li a2, 0
189+
; CHECK-NEXT: vsetvli a3, zero, e64, m4, ta, ma
190+
; CHECK-NEXT: vmclr.m v12
191+
; CHECK-NEXT: vid.v v8
192+
; CHECK-NEXT: .LBB3_1: # %loop
193+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
194+
; CHECK-NEXT: sub a3, a1, a2
195+
; CHECK-NEXT: slli a4, a2, 2
196+
; CHECK-NEXT: vsetvli a3, a3, e8, mf2, ta, ma
197+
; CHECK-NEXT: add a4, a0, a4
198+
; CHECK-NEXT: vle32.v v14, (a4)
199+
; CHECK-NEXT: vsetvli a5, zero, e64, m4, ta, ma
200+
; CHECK-NEXT: vmv.v.x v16, a3
201+
; CHECK-NEXT: vmsleu.vv v13, v16, v8
202+
; CHECK-NEXT: vmsltu.vx v16, v8, a3
203+
; CHECK-NEXT: vmand.mm v13, v12, v13
204+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
205+
; CHECK-NEXT: vmsne.vi v17, v14, 0
206+
; CHECK-NEXT: vmor.mm v12, v12, v17
207+
; CHECK-NEXT: vmand.mm v12, v12, v16
208+
; CHECK-NEXT: vmor.mm v12, v12, v13
209+
; CHECK-NEXT: add a2, a2, a3
210+
; CHECK-NEXT: vsm.v v12, (a4)
211+
; CHECK-NEXT: blt a2, a1, .LBB3_1
212+
; CHECK-NEXT: # %bb.2: # %exit
213+
; CHECK-NEXT: vcpop.m a0, v12
214+
; CHECK-NEXT: snez a0, a0
215+
; CHECK-NEXT: ret
216+
entry:
217+
br label %loop
218+
loop:
219+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
220+
%phi = phi <vscale x 4 x i1> [ zeroinitializer, %entry ], [ %rec, %loop ]
221+
%avl = sub i64 %n, %iv
222+
%evl = call i32 @llvm.experimental.get.vector.length(i64 %avl, i32 4, i1 true)
223+
224+
%gep = getelementptr i32, ptr %p, i64 %iv
225+
%x = call <vscale x 4 x i32> @llvm.vp.load(ptr %gep, <vscale x 4 x i1> splat (i1 true), i32 %evl)
226+
%cmp = icmp ne <vscale x 4 x i32> %x, zeroinitializer
227+
%or = or <vscale x 4 x i1> %phi, %cmp
228+
%rec = call <vscale x 4 x i1> @llvm.vp.merge(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> %or, <vscale x 4 x i1> %phi, i32 %evl)
229+
230+
store <vscale x 4 x i1> %rec, ptr %gep
231+
232+
%evl.zext = zext i32 %evl to i64
233+
%iv.next = add i64 %iv, %evl.zext
234+
%done = icmp sge i64 %iv.next, %n
235+
br i1 %done, label %exit, label %loop
236+
exit:
237+
%res = call i1 @llvm.vector.reduce.or(<vscale x 4 x i1> %rec)
238+
ret i1 %res
239+
}

llvm/test/CodeGen/RISCV/riscv-codegenprepare.ll

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,97 @@ define i64 @bug(i32 %x) {
103103
%b = and i64 %a, 4294967295
104104
ret i64 %b
105105
}
106+
107+
define i1 @widen_anyof_rdx(ptr %p, i64 %n) {
108+
; CHECK-LABEL: @widen_anyof_rdx(
109+
; CHECK-NEXT: entry:
110+
; CHECK-NEXT: br label [[LOOP:%.*]]
111+
; CHECK: loop:
112+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
113+
; CHECK-NEXT: [[PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[ENTRY]] ], [ [[TMP4:%.*]], [[LOOP]] ]
114+
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[N:%.*]], [[IV]]
115+
; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
116+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IV]]
117+
; CHECK-NEXT: [[X:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr [[GEP]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
118+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <vscale x 4 x i32> [[X]], zeroinitializer
119+
; CHECK-NEXT: [[OR:%.*]] = or <vscale x 4 x i1> [[PHI]], [[CMP]]
120+
; CHECK-NEXT: [[TMP4]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[OR]], <vscale x 4 x i1> [[PHI]], i32 [[EVL]])
121+
; CHECK-NEXT: [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64
122+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[EVL_ZEXT]]
123+
; CHECK-NEXT: [[DONE:%.*]] = icmp sge i64 [[IV_NEXT]], [[N]]
124+
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
125+
; CHECK: exit:
126+
; CHECK-NEXT: [[RES:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP4]])
127+
; CHECK-NEXT: ret i1 [[RES]]
128+
;
129+
entry:
130+
br label %loop
131+
loop:
132+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
133+
%phi = phi <vscale x 4 x i1> [ zeroinitializer, %entry ], [ %rec, %loop ]
134+
%avl = sub i64 %n, %iv
135+
%evl = call i32 @llvm.experimental.get.vector.length(i64 %avl, i32 4, i1 true)
136+
137+
%gep = getelementptr i32, ptr %p, i64 %iv
138+
%x = call <vscale x 4 x i32> @llvm.vp.load(ptr %gep, <vscale x 4 x i1> splat (i1 true), i32 %evl)
139+
%cmp = icmp ne <vscale x 4 x i32> %x, zeroinitializer
140+
%or = or <vscale x 4 x i1> %phi, %cmp
141+
%rec = call <vscale x 4 x i1> @llvm.vp.merge(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> %or, <vscale x 4 x i1> %phi, i32 %evl)
142+
143+
%evl.zext = zext i32 %evl to i64
144+
%iv.next = add i64 %iv, %evl.zext
145+
%done = icmp sge i64 %iv.next, %n
146+
br i1 %done, label %exit, label %loop
147+
exit:
148+
%res = call i1 @llvm.vector.reduce.or(<vscale x 4 x i1> %rec)
149+
ret i1 %res
150+
}
151+
152+
153+
define i1 @widen_anyof_rdx_use_in_loop(ptr %p, i64 %n) {
154+
; CHECK-LABEL: @widen_anyof_rdx_use_in_loop(
155+
; CHECK-NEXT: entry:
156+
; CHECK-NEXT: br label [[LOOP:%.*]]
157+
; CHECK: loop:
158+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
159+
; CHECK-NEXT: [[PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[ENTRY]] ], [ [[REC:%.*]], [[LOOP]] ]
160+
; CHECK-NEXT: [[AVL:%.*]] = sub i64 [[N:%.*]], [[IV]]
161+
; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
162+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[IV]]
163+
; CHECK-NEXT: [[X:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr [[GEP]], <vscale x 4 x i1> splat (i1 true), i32 [[EVL]])
164+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne <vscale x 4 x i32> [[X]], zeroinitializer
165+
; CHECK-NEXT: [[OR:%.*]] = or <vscale x 4 x i1> [[PHI]], [[CMP]]
166+
; CHECK-NEXT: [[REC]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[OR]], <vscale x 4 x i1> [[PHI]], i32 [[EVL]])
167+
; CHECK-NEXT: store <vscale x 4 x i1> [[REC]], ptr [[GEP]], align 1
168+
; CHECK-NEXT: [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64
169+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[EVL_ZEXT]]
170+
; CHECK-NEXT: [[DONE:%.*]] = icmp sge i64 [[IV_NEXT]], [[N]]
171+
; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
172+
; CHECK: exit:
173+
; CHECK-NEXT: [[RES:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[REC]])
174+
; CHECK-NEXT: ret i1 [[RES]]
175+
;
176+
entry:
177+
br label %loop
178+
loop:
179+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
180+
%phi = phi <vscale x 4 x i1> [ zeroinitializer, %entry ], [ %rec, %loop ]
181+
%avl = sub i64 %n, %iv
182+
%evl = call i32 @llvm.experimental.get.vector.length(i64 %avl, i32 4, i1 true)
183+
184+
%gep = getelementptr i32, ptr %p, i64 %iv
185+
%x = call <vscale x 4 x i32> @llvm.vp.load(ptr %gep, <vscale x 4 x i1> splat (i1 true), i32 %evl)
186+
%cmp = icmp ne <vscale x 4 x i32> %x, zeroinitializer
187+
%or = or <vscale x 4 x i1> %phi, %cmp
188+
%rec = call <vscale x 4 x i1> @llvm.vp.merge(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> %or, <vscale x 4 x i1> %phi, i32 %evl)
189+
190+
store <vscale x 4 x i1> %rec, ptr %gep
191+
192+
%evl.zext = zext i32 %evl to i64
193+
%iv.next = add i64 %iv, %evl.zext
194+
%done = icmp sge i64 %iv.next, %n
195+
br i1 %done, label %exit, label %loop
196+
exit:
197+
%res = call i1 @llvm.vector.reduce.or(<vscale x 4 x i1> %rec)
198+
ret i1 %res
199+
}

0 commit comments

Comments
 (0)