Skip to content

Commit 78ab771

Browse files
committed
[RISCV][NFC]Add more test for shuffles with exact vlen, NFC
1 parent 994457f commit 78ab771

File tree

1 file changed

+146
-10
lines changed

1 file changed

+146
-10
lines changed

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll

Lines changed: 146 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
3-
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
44

55
define <4 x i64> @m2_splat_0(<4 x i64> %v1) vscale_range(2,2) {
66
; CHECK-LABEL: m2_splat_0:
@@ -69,16 +69,18 @@ define <4 x i64> @m2_pair_swap_vl4(<4 x i64> %v1) vscale_range(2,2) {
6969
define <8 x i32> @m2_pair_swap_vl8(<8 x i32> %v1) vscale_range(2,2) {
7070
; RV32-LABEL: m2_pair_swap_vl8:
7171
; RV32: # %bb.0:
72+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
73+
; RV32-NEXT: vmv.v.i v10, 0
7274
; RV32-NEXT: li a0, 32
73-
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
75+
; RV32-NEXT: li a1, 63
76+
; RV32-NEXT: vwsubu.vx v12, v10, a0
77+
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
7478
; RV32-NEXT: vmv.v.x v10, a0
75-
; RV32-NEXT: li a0, 63
76-
; RV32-NEXT: vand.vx v12, v10, a0
77-
; RV32-NEXT: vsll.vv v12, v8, v12
78-
; RV32-NEXT: vrsub.vi v10, v10, 0
79-
; RV32-NEXT: vand.vx v10, v10, a0
80-
; RV32-NEXT: vsrl.vv v8, v8, v10
81-
; RV32-NEXT: vor.vv v8, v12, v8
79+
; RV32-NEXT: vand.vx v12, v12, a1
80+
; RV32-NEXT: vand.vx v10, v10, a1
81+
; RV32-NEXT: vsrl.vv v12, v8, v12
82+
; RV32-NEXT: vsll.vv v8, v8, v10
83+
; RV32-NEXT: vor.vv v8, v8, v12
8284
; RV32-NEXT: ret
8385
;
8486
; RV64-LABEL: m2_pair_swap_vl8:
@@ -176,3 +178,137 @@ define <4 x i64> @m2_splat_into_slide_two_source(<4 x i64> %v1, <4 x i64> %v2) v
176178
%res = shufflevector <4 x i64> %v1, <4 x i64> %v2, <4 x i32> <i32 0, i32 0, i32 5, i32 6>
177179
ret <4 x i64> %res
178180
}
181+
182+
define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) {
183+
; CHECK-LABEL: shuffle1:
184+
; CHECK: # %bb.0:
185+
; CHECK-NEXT: addi a0, a0, 252
186+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
187+
; CHECK-NEXT: vid.v v8
188+
; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
189+
; CHECK-NEXT: vle32.v v9, (a0)
190+
; CHECK-NEXT: li a0, 175
191+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
192+
; CHECK-NEXT: vsrl.vi v8, v8, 1
193+
; CHECK-NEXT: vmv.s.x v0, a0
194+
; CHECK-NEXT: vadd.vi v8, v8, 1
195+
; CHECK-NEXT: vrgather.vv v11, v9, v8
196+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
197+
; CHECK-NEXT: vmerge.vim v8, v10, 0, v0
198+
; CHECK-NEXT: addi a0, a1, 672
199+
; CHECK-NEXT: vs2r.v v8, (a0)
200+
; CHECK-NEXT: ret
201+
%1 = getelementptr i32, ptr %explicit_0, i64 63
202+
%2 = load <3 x i32>, ptr %1, align 1
203+
%3 = shufflevector <3 x i32> %2, <3 x i32> undef, <2 x i32> <i32 1, i32 2>
204+
%4 = shufflevector <2 x i32> %3, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
205+
%5 = shufflevector <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 0>, <8 x i32> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 5, i32 9, i32 7>
206+
%6 = getelementptr inbounds <8 x i32>, ptr %explicit_1, i64 21
207+
store <8 x i32> %5, ptr %6, align 32
208+
ret void
209+
}
210+
211+
define <16 x float> @shuffle2(<4 x float> %a) vscale_range(2,2) {
212+
; CHECK-LABEL: shuffle2:
213+
; CHECK: # %bb.0:
214+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
215+
; CHECK-NEXT: vid.v v9
216+
; CHECK-NEXT: li a0, -97
217+
; CHECK-NEXT: vadd.vv v9, v9, v9
218+
; CHECK-NEXT: vrsub.vi v9, v9, 4
219+
; CHECK-NEXT: vmv.s.x v0, a0
220+
; CHECK-NEXT: vrgather.vv v13, v8, v9
221+
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
222+
; CHECK-NEXT: vmerge.vim v8, v12, 0, v0
223+
; CHECK-NEXT: ret
224+
%b = extractelement <4 x float> %a, i32 2
225+
%c = insertelement <16 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %b, i32 5
226+
%b1 = extractelement <4 x float> %a, i32 0
227+
%c1 = insertelement <16 x float> %c, float %b1, i32 6
228+
ret <16 x float>%c1
229+
}
230+
231+
define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vscale_range(2,2) {
232+
; RV32-LABEL: extract_any_extend_vector_inreg_v16i64:
233+
; RV32: # %bb.0:
234+
; RV32-NEXT: li a1, 16
235+
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
236+
; RV32-NEXT: vmv.v.i v16, 0
237+
; RV32-NEXT: vmv.s.x v0, a1
238+
; RV32-NEXT: li a1, 32
239+
; RV32-NEXT: vrgather.vi v16, v8, 15, v0.t
240+
; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
241+
; RV32-NEXT: vslidedown.vx v8, v16, a0
242+
; RV32-NEXT: vmv.x.s a0, v8
243+
; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma
244+
; RV32-NEXT: vsrl.vx v8, v8, a1
245+
; RV32-NEXT: vmv.x.s a1, v8
246+
; RV32-NEXT: ret
247+
;
248+
; RV64-LABEL: extract_any_extend_vector_inreg_v16i64:
249+
; RV64: # %bb.0:
250+
; RV64-NEXT: addi sp, sp, -256
251+
; RV64-NEXT: .cfi_def_cfa_offset 256
252+
; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
253+
; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
254+
; RV64-NEXT: sd s2, 232(sp) # 8-byte Folded Spill
255+
; RV64-NEXT: .cfi_offset ra, -8
256+
; RV64-NEXT: .cfi_offset s0, -16
257+
; RV64-NEXT: .cfi_offset s2, -24
258+
; RV64-NEXT: addi s0, sp, 256
259+
; RV64-NEXT: .cfi_def_cfa s0, 0
260+
; RV64-NEXT: andi sp, sp, -128
261+
; RV64-NEXT: li a1, -17
262+
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
263+
; RV64-NEXT: vmv.s.x v0, a1
264+
; RV64-NEXT: vrgather.vi v16, v8, 15
265+
; RV64-NEXT: vmerge.vim v8, v16, 0, v0
266+
; RV64-NEXT: mv s2, sp
267+
; RV64-NEXT: vs8r.v v8, (s2)
268+
; RV64-NEXT: andi a0, a0, 15
269+
; RV64-NEXT: li a1, 8
270+
; RV64-NEXT: call __muldi3
271+
; RV64-NEXT: add a0, s2, a0
272+
; RV64-NEXT: ld a0, 0(a0)
273+
; RV64-NEXT: addi sp, s0, -256
274+
; RV64-NEXT: .cfi_def_cfa sp, 256
275+
; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
276+
; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
277+
; RV64-NEXT: ld s2, 232(sp) # 8-byte Folded Reload
278+
; RV64-NEXT: .cfi_restore ra
279+
; RV64-NEXT: .cfi_restore s0
280+
; RV64-NEXT: .cfi_restore s2
281+
; RV64-NEXT: addi sp, sp, 256
282+
; RV64-NEXT: .cfi_def_cfa_offset 0
283+
; RV64-NEXT: ret
284+
%1 = extractelement <16 x i64> %a0, i32 15
285+
%2 = insertelement <16 x i64> zeroinitializer, i64 %1, i32 4
286+
%3 = extractelement <16 x i64> %2, i32 %a1
287+
ret i64 %3
288+
}
289+
290+
define <4 x double> @shuffles_add(<4 x double> %0, <4 x double> %1) vscale_range(2,2) {
291+
; CHECK-LABEL: shuffles_add:
292+
; CHECK: # %bb.0:
293+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
294+
; CHECK-NEXT: vrgather.vi v12, v8, 2
295+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
296+
; CHECK-NEXT: vid.v v14
297+
; CHECK-NEXT: vmv.v.i v0, 12
298+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
299+
; CHECK-NEXT: vrgather.vi v16, v8, 3
300+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
301+
; CHECK-NEXT: vadd.vv v8, v14, v14
302+
; CHECK-NEXT: vadd.vi v9, v8, -4
303+
; CHECK-NEXT: vadd.vi v8, v8, -3
304+
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
305+
; CHECK-NEXT: vrgatherei16.vv v12, v10, v9, v0.t
306+
; CHECK-NEXT: vrgatherei16.vv v16, v10, v8, v0.t
307+
; CHECK-NEXT: vfadd.vv v8, v12, v16
308+
; CHECK-NEXT: ret
309+
%3 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
310+
%4 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
311+
%5 = fadd <4 x double> %3, %4
312+
ret <4 x double> %5
313+
}
314+

0 commit comments

Comments
 (0)