Skip to content

Commit 7795282

Browse files
lukel97PhilippRados
authored andcommitted
[RISCV] Lower fixed-length strided VP loads and stores for zvfhmin/zvfbfmin (llvm#114750)
Similarly to llvm#114731, these don't actually require any instructions from the extensions. The motivation for this and llvm#114731 is to eventually enable isLegalElementTypeForRVV for f16 with zvfhmin and bf16 with zvfbfmin in order to enable scalable vectorization. Although the scalable codegen support for f16 and bf16 is now complete enough for anything the loop vectorizer may emit, enabling isLegalElementTypeForRVV would make certian hooks like isLegalInterleavedAccessType and isLegalStridedLoadStore return true for f16 and bf16. This means SLP would start emitting these intrinsics, so we need to add fixed-length codegen support.
1 parent 0cf29b7 commit 7795282

File tree

3 files changed

+166
-38
lines changed

3 files changed

+166
-38
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1347,6 +1347,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
13471347
setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
13481348
Custom);
13491349

1350+
setOperationAction({ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1351+
ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1352+
VT, Custom);
1353+
13501354
if (VT.getVectorElementType() == MVT::f16 &&
13511355
!Subtarget.hasVInstructionsF16()) {
13521356
setOperationAction(ISD::BITCAST, VT, Custom);
@@ -1411,10 +1415,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
14111415
setOperationAction(
14121416
{ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
14131417

1414-
setOperationAction({ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1415-
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1416-
ISD::VP_SCATTER},
1417-
VT, Custom);
1418+
setOperationAction({ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom);
14181419

14191420
setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
14201421
ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll

Lines changed: 103 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,28 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+optimized-zero-stride-load \
2+
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \
33
; RUN: -verify-machineinstrs < %s \
44
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT
5-
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+optimized-zero-stride-load \
5+
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \
66
; RUN: -verify-machineinstrs < %s \
77
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
8-
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh \
8+
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \
99
; RUN: -verify-machineinstrs < %s \
10-
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT
11-
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh \
10+
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH
11+
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \
1212
; RUN: -verify-machineinstrs < %s \
13-
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT
13+
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH
14+
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \
15+
; RUN: -verify-machineinstrs < %s \
16+
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT
17+
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \
18+
; RUN: -verify-machineinstrs < %s \
19+
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
20+
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \
21+
; RUN: -verify-machineinstrs < %s \
22+
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN
23+
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \
24+
; RUN: -verify-machineinstrs < %s \
25+
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN
1426

1527
declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i8(ptr, i8, <2 x i1>, i32)
1628

@@ -278,6 +290,62 @@ define <8 x i64> @strided_vpload_v8i64(ptr %ptr, i32 signext %stride, <8 x i1> %
278290
ret <8 x i64> %load
279291
}
280292

293+
declare <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32(ptr, i32, <2 x i1>, i32)
294+
295+
define <2 x bfloat> @strided_vpload_v2bf16(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
296+
; CHECK-LABEL: strided_vpload_v2bf16:
297+
; CHECK: # %bb.0:
298+
; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
299+
; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
300+
; CHECK-NEXT: ret
301+
%load = call <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
302+
ret <2 x bfloat> %load
303+
}
304+
305+
define <2 x bfloat> @strided_vpload_v2bf16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
306+
; CHECK-LABEL: strided_vpload_v2bf16_allones_mask:
307+
; CHECK: # %bb.0:
308+
; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
309+
; CHECK-NEXT: vlse16.v v8, (a0), a1
310+
; CHECK-NEXT: ret
311+
%load = call <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> splat (i1 true), i32 %evl)
312+
ret <2 x bfloat> %load
313+
}
314+
315+
declare <4 x bfloat> @llvm.experimental.vp.strided.load.v4bf16.p0.i32(ptr, i32, <4 x i1>, i32)
316+
317+
define <4 x bfloat> @strided_vpload_v4bf16(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
318+
; CHECK-LABEL: strided_vpload_v4bf16:
319+
; CHECK: # %bb.0:
320+
; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
321+
; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
322+
; CHECK-NEXT: ret
323+
%load = call <4 x bfloat> @llvm.experimental.vp.strided.load.v4bf16.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
324+
ret <4 x bfloat> %load
325+
}
326+
327+
declare <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32(ptr, i32, <8 x i1>, i32)
328+
329+
define <8 x bfloat> @strided_vpload_v8bf16(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
330+
; CHECK-LABEL: strided_vpload_v8bf16:
331+
; CHECK: # %bb.0:
332+
; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
333+
; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
334+
; CHECK-NEXT: ret
335+
%load = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
336+
ret <8 x bfloat> %load
337+
}
338+
339+
define <8 x bfloat> @strided_vpload_v8bf16_unit_stride(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
340+
; CHECK-LABEL: strided_vpload_v8bf16_unit_stride:
341+
; CHECK: # %bb.0:
342+
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
343+
; CHECK-NEXT: vle16.v v8, (a0), v0.t
344+
; CHECK-NEXT: ret
345+
%load = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32(ptr %ptr, i32 2, <8 x i1> %m, i32 %evl)
346+
ret <8 x bfloat> %load
347+
}
348+
281349
declare <2 x half> @llvm.experimental.vp.strided.load.v2f16.p0.i32(ptr, i32, <2 x i1>, i32)
282350

283351
define <2 x half> @strided_vpload_v2f16(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
@@ -477,10 +545,10 @@ define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x
477545
; CHECK-NEXT: li a4, 16
478546
; CHECK-NEXT: vmv1r.v v9, v0
479547
; CHECK-NEXT: mv a3, a2
480-
; CHECK-NEXT: bltu a2, a4, .LBB40_2
548+
; CHECK-NEXT: bltu a2, a4, .LBB45_2
481549
; CHECK-NEXT: # %bb.1:
482550
; CHECK-NEXT: li a3, 16
483-
; CHECK-NEXT: .LBB40_2:
551+
; CHECK-NEXT: .LBB45_2:
484552
; CHECK-NEXT: mul a4, a3, a1
485553
; CHECK-NEXT: add a4, a0, a4
486554
; CHECK-NEXT: addi a5, a2, -16
@@ -505,10 +573,10 @@ define <32 x double> @strided_vpload_v32f64_allones_mask(ptr %ptr, i32 signext %
505573
; CHECK: # %bb.0:
506574
; CHECK-NEXT: li a4, 16
507575
; CHECK-NEXT: mv a3, a2
508-
; CHECK-NEXT: bltu a2, a4, .LBB41_2
576+
; CHECK-NEXT: bltu a2, a4, .LBB46_2
509577
; CHECK-NEXT: # %bb.1:
510578
; CHECK-NEXT: li a3, 16
511-
; CHECK-NEXT: .LBB41_2:
579+
; CHECK-NEXT: .LBB46_2:
512580
; CHECK-NEXT: mul a4, a3, a1
513581
; CHECK-NEXT: add a4, a0, a4
514582
; CHECK-NEXT: addi a5, a2, -16
@@ -533,21 +601,21 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
533601
; CHECK-RV32-NEXT: li a5, 32
534602
; CHECK-RV32-NEXT: vmv1r.v v8, v0
535603
; CHECK-RV32-NEXT: mv a3, a4
536-
; CHECK-RV32-NEXT: bltu a4, a5, .LBB42_2
604+
; CHECK-RV32-NEXT: bltu a4, a5, .LBB47_2
537605
; CHECK-RV32-NEXT: # %bb.1:
538606
; CHECK-RV32-NEXT: li a3, 32
539-
; CHECK-RV32-NEXT: .LBB42_2:
607+
; CHECK-RV32-NEXT: .LBB47_2:
540608
; CHECK-RV32-NEXT: mul a6, a3, a2
541609
; CHECK-RV32-NEXT: addi a5, a4, -32
542610
; CHECK-RV32-NEXT: sltu a7, a4, a5
543611
; CHECK-RV32-NEXT: addi a7, a7, -1
544612
; CHECK-RV32-NEXT: and a7, a7, a5
545613
; CHECK-RV32-NEXT: li a5, 16
546614
; CHECK-RV32-NEXT: add a6, a1, a6
547-
; CHECK-RV32-NEXT: bltu a7, a5, .LBB42_4
615+
; CHECK-RV32-NEXT: bltu a7, a5, .LBB47_4
548616
; CHECK-RV32-NEXT: # %bb.3:
549617
; CHECK-RV32-NEXT: li a7, 16
550-
; CHECK-RV32-NEXT: .LBB42_4:
618+
; CHECK-RV32-NEXT: .LBB47_4:
551619
; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
552620
; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4
553621
; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma
@@ -556,10 +624,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
556624
; CHECK-RV32-NEXT: sltu a3, a3, a6
557625
; CHECK-RV32-NEXT: addi a3, a3, -1
558626
; CHECK-RV32-NEXT: and a3, a3, a6
559-
; CHECK-RV32-NEXT: bltu a4, a5, .LBB42_6
627+
; CHECK-RV32-NEXT: bltu a4, a5, .LBB47_6
560628
; CHECK-RV32-NEXT: # %bb.5:
561629
; CHECK-RV32-NEXT: li a4, 16
562-
; CHECK-RV32-NEXT: .LBB42_6:
630+
; CHECK-RV32-NEXT: .LBB47_6:
563631
; CHECK-RV32-NEXT: mul a5, a4, a2
564632
; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
565633
; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2
@@ -583,21 +651,21 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
583651
; CHECK-RV64-NEXT: li a5, 32
584652
; CHECK-RV64-NEXT: vmv1r.v v8, v0
585653
; CHECK-RV64-NEXT: mv a4, a3
586-
; CHECK-RV64-NEXT: bltu a3, a5, .LBB42_2
654+
; CHECK-RV64-NEXT: bltu a3, a5, .LBB47_2
587655
; CHECK-RV64-NEXT: # %bb.1:
588656
; CHECK-RV64-NEXT: li a4, 32
589-
; CHECK-RV64-NEXT: .LBB42_2:
657+
; CHECK-RV64-NEXT: .LBB47_2:
590658
; CHECK-RV64-NEXT: mul a6, a4, a2
591659
; CHECK-RV64-NEXT: addi a5, a3, -32
592660
; CHECK-RV64-NEXT: sltu a7, a3, a5
593661
; CHECK-RV64-NEXT: addi a7, a7, -1
594662
; CHECK-RV64-NEXT: and a7, a7, a5
595663
; CHECK-RV64-NEXT: li a5, 16
596664
; CHECK-RV64-NEXT: add a6, a1, a6
597-
; CHECK-RV64-NEXT: bltu a7, a5, .LBB42_4
665+
; CHECK-RV64-NEXT: bltu a7, a5, .LBB47_4
598666
; CHECK-RV64-NEXT: # %bb.3:
599667
; CHECK-RV64-NEXT: li a7, 16
600-
; CHECK-RV64-NEXT: .LBB42_4:
668+
; CHECK-RV64-NEXT: .LBB47_4:
601669
; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
602670
; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4
603671
; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma
@@ -606,10 +674,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
606674
; CHECK-RV64-NEXT: sltu a4, a4, a6
607675
; CHECK-RV64-NEXT: addi a4, a4, -1
608676
; CHECK-RV64-NEXT: and a4, a4, a6
609-
; CHECK-RV64-NEXT: bltu a3, a5, .LBB42_6
677+
; CHECK-RV64-NEXT: bltu a3, a5, .LBB47_6
610678
; CHECK-RV64-NEXT: # %bb.5:
611679
; CHECK-RV64-NEXT: li a3, 16
612-
; CHECK-RV64-NEXT: .LBB42_6:
680+
; CHECK-RV64-NEXT: .LBB47_6:
613681
; CHECK-RV64-NEXT: mul a5, a3, a2
614682
; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
615683
; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2
@@ -659,12 +727,19 @@ define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
659727
; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero
660728
; CHECK-OPT-NEXT: ret
661729
;
662-
; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
663-
; CHECK-NO-OPT: # %bb.0:
664-
; CHECK-NO-OPT-NEXT: flh fa5, 0(a0)
665-
; CHECK-NO-OPT-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
666-
; CHECK-NO-OPT-NEXT: vfmv.v.f v8, fa5
667-
; CHECK-NO-OPT-NEXT: ret
730+
; CHECK-NO-OPT-ZVFH-LABEL: zero_strided_unmasked_vpload_4f16:
731+
; CHECK-NO-OPT-ZVFH: # %bb.0:
732+
; CHECK-NO-OPT-ZVFH-NEXT: flh fa5, 0(a0)
733+
; CHECK-NO-OPT-ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
734+
; CHECK-NO-OPT-ZVFH-NEXT: vfmv.v.f v8, fa5
735+
; CHECK-NO-OPT-ZVFH-NEXT: ret
736+
;
737+
; CHECK-NO-OPT-ZVFHMIN-LABEL: zero_strided_unmasked_vpload_4f16:
738+
; CHECK-NO-OPT-ZVFHMIN: # %bb.0:
739+
; CHECK-NO-OPT-ZVFHMIN-NEXT: lh a0, 0(a0)
740+
; CHECK-NO-OPT-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
741+
; CHECK-NO-OPT-ZVFHMIN-NEXT: vmv.v.x v8, a0
742+
; CHECK-NO-OPT-ZVFHMIN-NEXT: ret
668743
%load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3)
669744
ret <4 x half> %load
670745
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh \
2+
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \
33
; RUN: -verify-machineinstrs < %s \
44
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32
5-
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh \
5+
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \
6+
; RUN: -verify-machineinstrs < %s \
7+
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64
8+
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \
9+
; RUN: -verify-machineinstrs < %s \
10+
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32
11+
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \
612
; RUN: -verify-machineinstrs < %s \
713
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64
814

@@ -232,6 +238,52 @@ define void @strided_vpstore_v8i64(<8 x i64> %val, ptr %ptr, i32 signext %stride
232238
ret void
233239
}
234240

241+
declare void @llvm.experimental.vp.strided.store.v2bf16.p0.i32(<2 x bfloat>, ptr, i32, <2 x i1>, i32)
242+
243+
define void @strided_vpstore_v2bf16(<2 x bfloat> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
244+
; CHECK-LABEL: strided_vpstore_v2bf16:
245+
; CHECK: # %bb.0:
246+
; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
247+
; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
248+
; CHECK-NEXT: ret
249+
call void @llvm.experimental.vp.strided.store.v2bf16.p0.i32(<2 x bfloat> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
250+
ret void
251+
}
252+
253+
declare void @llvm.experimental.vp.strided.store.v4bf16.p0.i32(<4 x bfloat>, ptr, i32, <4 x i1>, i32)
254+
255+
define void @strided_vpstore_v4bf16(<4 x bfloat> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
256+
; CHECK-LABEL: strided_vpstore_v4bf16:
257+
; CHECK: # %bb.0:
258+
; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
259+
; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
260+
; CHECK-NEXT: ret
261+
call void @llvm.experimental.vp.strided.store.v4bf16.p0.i32(<4 x bfloat> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
262+
ret void
263+
}
264+
265+
declare void @llvm.experimental.vp.strided.store.v8bf16.p0.i32(<8 x bfloat>, ptr, i32, <8 x i1>, i32)
266+
267+
define void @strided_vpstore_v8bf16(<8 x bfloat> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
268+
; CHECK-LABEL: strided_vpstore_v8bf16:
269+
; CHECK: # %bb.0:
270+
; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
271+
; CHECK-NEXT: vsse16.v v8, (a0), a1, v0.t
272+
; CHECK-NEXT: ret
273+
call void @llvm.experimental.vp.strided.store.v8bf16.p0.i32(<8 x bfloat> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
274+
ret void
275+
}
276+
277+
define void @strided_vpstore_v8bf16_unit_stride(<8 x bfloat> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
278+
; CHECK-LABEL: strided_vpstore_v8bf16_unit_stride:
279+
; CHECK: # %bb.0:
280+
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
281+
; CHECK-NEXT: vse16.v v8, (a0), v0.t
282+
; CHECK-NEXT: ret
283+
call void @llvm.experimental.vp.strided.store.v8bf16.p0.i32(<8 x bfloat> %val, ptr %ptr, i32 2, <8 x i1> %m, i32 %evl)
284+
ret void
285+
}
286+
235287
declare void @llvm.experimental.vp.strided.store.v2f16.p0.i32(<2 x half>, ptr, i32, <2 x i1>, i32)
236288

237289
define void @strided_vpstore_v2f16(<2 x half> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
@@ -409,10 +461,10 @@ define void @strided_store_v32f64(<32 x double> %v, ptr %ptr, i32 signext %strid
409461
; CHECK: # %bb.0:
410462
; CHECK-NEXT: li a4, 16
411463
; CHECK-NEXT: mv a3, a2
412-
; CHECK-NEXT: bltu a2, a4, .LBB34_2
464+
; CHECK-NEXT: bltu a2, a4, .LBB38_2
413465
; CHECK-NEXT: # %bb.1:
414466
; CHECK-NEXT: li a3, 16
415-
; CHECK-NEXT: .LBB34_2:
467+
; CHECK-NEXT: .LBB38_2:
416468
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
417469
; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
418470
; CHECK-NEXT: mul a3, a3, a1
@@ -435,10 +487,10 @@ define void @strided_store_v32f64_allones_mask(<32 x double> %v, ptr %ptr, i32 s
435487
; CHECK: # %bb.0:
436488
; CHECK-NEXT: li a4, 16
437489
; CHECK-NEXT: mv a3, a2
438-
; CHECK-NEXT: bltu a2, a4, .LBB35_2
490+
; CHECK-NEXT: bltu a2, a4, .LBB39_2
439491
; CHECK-NEXT: # %bb.1:
440492
; CHECK-NEXT: li a3, 16
441-
; CHECK-NEXT: .LBB35_2:
493+
; CHECK-NEXT: .LBB39_2:
442494
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
443495
; CHECK-NEXT: vsse64.v v8, (a0), a1
444496
; CHECK-NEXT: mul a3, a3, a1

0 commit comments

Comments
 (0)