1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+optimized-zero-stride-load \
2
+ ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin,+ optimized-zero-stride-load \
3
3
; RUN: -verify-machineinstrs < %s \
4
4
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT
5
- ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+optimized-zero-stride-load \
5
+ ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin,+ optimized-zero-stride-load \
6
6
; RUN: -verify-machineinstrs < %s \
7
7
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
8
- ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh \
8
+ ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \
9
9
; RUN: -verify-machineinstrs < %s \
10
- ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT
11
- ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh \
10
+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH
11
+ ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \
12
12
; RUN: -verify-machineinstrs < %s \
13
- ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT
13
+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH
14
+ ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \
15
+ ; RUN: -verify-machineinstrs < %s \
16
+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT
17
+ ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \
18
+ ; RUN: -verify-machineinstrs < %s \
19
+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
20
+ ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \
21
+ ; RUN: -verify-machineinstrs < %s \
22
+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN
23
+ ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \
24
+ ; RUN: -verify-machineinstrs < %s \
25
+ ; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN
14
26
15
27
declare <2 x i8 > @llvm.experimental.vp.strided.load.v2i8.p0.i8 (ptr , i8 , <2 x i1 >, i32 )
16
28
@@ -278,6 +290,62 @@ define <8 x i64> @strided_vpload_v8i64(ptr %ptr, i32 signext %stride, <8 x i1> %
278
290
ret <8 x i64 > %load
279
291
}
280
292
293
+ declare <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32 (ptr , i32 , <2 x i1 >, i32 )
294
+
295
+ define <2 x bfloat> @strided_vpload_v2bf16 (ptr %ptr , i32 signext %stride , <2 x i1 > %m , i32 zeroext %evl ) {
296
+ ; CHECK-LABEL: strided_vpload_v2bf16:
297
+ ; CHECK: # %bb.0:
298
+ ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
299
+ ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
300
+ ; CHECK-NEXT: ret
301
+ %load = call <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32 (ptr %ptr , i32 %stride , <2 x i1 > %m , i32 %evl )
302
+ ret <2 x bfloat> %load
303
+ }
304
+
305
+ define <2 x bfloat> @strided_vpload_v2bf16_allones_mask (ptr %ptr , i32 signext %stride , i32 zeroext %evl ) {
306
+ ; CHECK-LABEL: strided_vpload_v2bf16_allones_mask:
307
+ ; CHECK: # %bb.0:
308
+ ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma
309
+ ; CHECK-NEXT: vlse16.v v8, (a0), a1
310
+ ; CHECK-NEXT: ret
311
+ %load = call <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32 (ptr %ptr , i32 %stride , <2 x i1 > splat (i1 true ), i32 %evl )
312
+ ret <2 x bfloat> %load
313
+ }
314
+
315
+ declare <4 x bfloat> @llvm.experimental.vp.strided.load.v4bf16.p0.i32 (ptr , i32 , <4 x i1 >, i32 )
316
+
317
+ define <4 x bfloat> @strided_vpload_v4bf16 (ptr %ptr , i32 signext %stride , <4 x i1 > %m , i32 zeroext %evl ) {
318
+ ; CHECK-LABEL: strided_vpload_v4bf16:
319
+ ; CHECK: # %bb.0:
320
+ ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma
321
+ ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
322
+ ; CHECK-NEXT: ret
323
+ %load = call <4 x bfloat> @llvm.experimental.vp.strided.load.v4bf16.p0.i32 (ptr %ptr , i32 %stride , <4 x i1 > %m , i32 %evl )
324
+ ret <4 x bfloat> %load
325
+ }
326
+
327
+ declare <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32 (ptr , i32 , <8 x i1 >, i32 )
328
+
329
+ define <8 x bfloat> @strided_vpload_v8bf16 (ptr %ptr , i32 signext %stride , <8 x i1 > %m , i32 zeroext %evl ) {
330
+ ; CHECK-LABEL: strided_vpload_v8bf16:
331
+ ; CHECK: # %bb.0:
332
+ ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma
333
+ ; CHECK-NEXT: vlse16.v v8, (a0), a1, v0.t
334
+ ; CHECK-NEXT: ret
335
+ %load = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32 (ptr %ptr , i32 %stride , <8 x i1 > %m , i32 %evl )
336
+ ret <8 x bfloat> %load
337
+ }
338
+
339
+ define <8 x bfloat> @strided_vpload_v8bf16_unit_stride (ptr %ptr , <8 x i1 > %m , i32 zeroext %evl ) {
340
+ ; CHECK-LABEL: strided_vpload_v8bf16_unit_stride:
341
+ ; CHECK: # %bb.0:
342
+ ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
343
+ ; CHECK-NEXT: vle16.v v8, (a0), v0.t
344
+ ; CHECK-NEXT: ret
345
+ %load = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32 (ptr %ptr , i32 2 , <8 x i1 > %m , i32 %evl )
346
+ ret <8 x bfloat> %load
347
+ }
348
+
281
349
declare <2 x half > @llvm.experimental.vp.strided.load.v2f16.p0.i32 (ptr , i32 , <2 x i1 >, i32 )
282
350
283
351
define <2 x half > @strided_vpload_v2f16 (ptr %ptr , i32 signext %stride , <2 x i1 > %m , i32 zeroext %evl ) {
@@ -477,10 +545,10 @@ define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x
477
545
; CHECK-NEXT: li a4, 16
478
546
; CHECK-NEXT: vmv1r.v v9, v0
479
547
; CHECK-NEXT: mv a3, a2
480
- ; CHECK-NEXT: bltu a2, a4, .LBB40_2
548
+ ; CHECK-NEXT: bltu a2, a4, .LBB45_2
481
549
; CHECK-NEXT: # %bb.1:
482
550
; CHECK-NEXT: li a3, 16
483
- ; CHECK-NEXT: .LBB40_2 :
551
+ ; CHECK-NEXT: .LBB45_2 :
484
552
; CHECK-NEXT: mul a4, a3, a1
485
553
; CHECK-NEXT: add a4, a0, a4
486
554
; CHECK-NEXT: addi a5, a2, -16
@@ -505,10 +573,10 @@ define <32 x double> @strided_vpload_v32f64_allones_mask(ptr %ptr, i32 signext %
505
573
; CHECK: # %bb.0:
506
574
; CHECK-NEXT: li a4, 16
507
575
; CHECK-NEXT: mv a3, a2
508
- ; CHECK-NEXT: bltu a2, a4, .LBB41_2
576
+ ; CHECK-NEXT: bltu a2, a4, .LBB46_2
509
577
; CHECK-NEXT: # %bb.1:
510
578
; CHECK-NEXT: li a3, 16
511
- ; CHECK-NEXT: .LBB41_2 :
579
+ ; CHECK-NEXT: .LBB46_2 :
512
580
; CHECK-NEXT: mul a4, a3, a1
513
581
; CHECK-NEXT: add a4, a0, a4
514
582
; CHECK-NEXT: addi a5, a2, -16
@@ -533,21 +601,21 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
533
601
; CHECK-RV32-NEXT: li a5, 32
534
602
; CHECK-RV32-NEXT: vmv1r.v v8, v0
535
603
; CHECK-RV32-NEXT: mv a3, a4
536
- ; CHECK-RV32-NEXT: bltu a4, a5, .LBB42_2
604
+ ; CHECK-RV32-NEXT: bltu a4, a5, .LBB47_2
537
605
; CHECK-RV32-NEXT: # %bb.1:
538
606
; CHECK-RV32-NEXT: li a3, 32
539
- ; CHECK-RV32-NEXT: .LBB42_2 :
607
+ ; CHECK-RV32-NEXT: .LBB47_2 :
540
608
; CHECK-RV32-NEXT: mul a6, a3, a2
541
609
; CHECK-RV32-NEXT: addi a5, a4, -32
542
610
; CHECK-RV32-NEXT: sltu a7, a4, a5
543
611
; CHECK-RV32-NEXT: addi a7, a7, -1
544
612
; CHECK-RV32-NEXT: and a7, a7, a5
545
613
; CHECK-RV32-NEXT: li a5, 16
546
614
; CHECK-RV32-NEXT: add a6, a1, a6
547
- ; CHECK-RV32-NEXT: bltu a7, a5, .LBB42_4
615
+ ; CHECK-RV32-NEXT: bltu a7, a5, .LBB47_4
548
616
; CHECK-RV32-NEXT: # %bb.3:
549
617
; CHECK-RV32-NEXT: li a7, 16
550
- ; CHECK-RV32-NEXT: .LBB42_4 :
618
+ ; CHECK-RV32-NEXT: .LBB47_4 :
551
619
; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
552
620
; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4
553
621
; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma
@@ -556,10 +624,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
556
624
; CHECK-RV32-NEXT: sltu a3, a3, a6
557
625
; CHECK-RV32-NEXT: addi a3, a3, -1
558
626
; CHECK-RV32-NEXT: and a3, a3, a6
559
- ; CHECK-RV32-NEXT: bltu a4, a5, .LBB42_6
627
+ ; CHECK-RV32-NEXT: bltu a4, a5, .LBB47_6
560
628
; CHECK-RV32-NEXT: # %bb.5:
561
629
; CHECK-RV32-NEXT: li a4, 16
562
- ; CHECK-RV32-NEXT: .LBB42_6 :
630
+ ; CHECK-RV32-NEXT: .LBB47_6 :
563
631
; CHECK-RV32-NEXT: mul a5, a4, a2
564
632
; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
565
633
; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2
@@ -583,21 +651,21 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
583
651
; CHECK-RV64-NEXT: li a5, 32
584
652
; CHECK-RV64-NEXT: vmv1r.v v8, v0
585
653
; CHECK-RV64-NEXT: mv a4, a3
586
- ; CHECK-RV64-NEXT: bltu a3, a5, .LBB42_2
654
+ ; CHECK-RV64-NEXT: bltu a3, a5, .LBB47_2
587
655
; CHECK-RV64-NEXT: # %bb.1:
588
656
; CHECK-RV64-NEXT: li a4, 32
589
- ; CHECK-RV64-NEXT: .LBB42_2 :
657
+ ; CHECK-RV64-NEXT: .LBB47_2 :
590
658
; CHECK-RV64-NEXT: mul a6, a4, a2
591
659
; CHECK-RV64-NEXT: addi a5, a3, -32
592
660
; CHECK-RV64-NEXT: sltu a7, a3, a5
593
661
; CHECK-RV64-NEXT: addi a7, a7, -1
594
662
; CHECK-RV64-NEXT: and a7, a7, a5
595
663
; CHECK-RV64-NEXT: li a5, 16
596
664
; CHECK-RV64-NEXT: add a6, a1, a6
597
- ; CHECK-RV64-NEXT: bltu a7, a5, .LBB42_4
665
+ ; CHECK-RV64-NEXT: bltu a7, a5, .LBB47_4
598
666
; CHECK-RV64-NEXT: # %bb.3:
599
667
; CHECK-RV64-NEXT: li a7, 16
600
- ; CHECK-RV64-NEXT: .LBB42_4 :
668
+ ; CHECK-RV64-NEXT: .LBB47_4 :
601
669
; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
602
670
; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4
603
671
; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma
@@ -606,10 +674,10 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
606
674
; CHECK-RV64-NEXT: sltu a4, a4, a6
607
675
; CHECK-RV64-NEXT: addi a4, a4, -1
608
676
; CHECK-RV64-NEXT: and a4, a4, a6
609
- ; CHECK-RV64-NEXT: bltu a3, a5, .LBB42_6
677
+ ; CHECK-RV64-NEXT: bltu a3, a5, .LBB47_6
610
678
; CHECK-RV64-NEXT: # %bb.5:
611
679
; CHECK-RV64-NEXT: li a3, 16
612
- ; CHECK-RV64-NEXT: .LBB42_6 :
680
+ ; CHECK-RV64-NEXT: .LBB47_6 :
613
681
; CHECK-RV64-NEXT: mul a5, a3, a2
614
682
; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
615
683
; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2
@@ -659,12 +727,19 @@ define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
659
727
; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero
660
728
; CHECK-OPT-NEXT: ret
661
729
;
662
- ; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
663
- ; CHECK-NO-OPT: # %bb.0:
664
- ; CHECK-NO-OPT-NEXT: flh fa5, 0(a0)
665
- ; CHECK-NO-OPT-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
666
- ; CHECK-NO-OPT-NEXT: vfmv.v.f v8, fa5
667
- ; CHECK-NO-OPT-NEXT: ret
730
+ ; CHECK-NO-OPT-ZVFH-LABEL: zero_strided_unmasked_vpload_4f16:
731
+ ; CHECK-NO-OPT-ZVFH: # %bb.0:
732
+ ; CHECK-NO-OPT-ZVFH-NEXT: flh fa5, 0(a0)
733
+ ; CHECK-NO-OPT-ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
734
+ ; CHECK-NO-OPT-ZVFH-NEXT: vfmv.v.f v8, fa5
735
+ ; CHECK-NO-OPT-ZVFH-NEXT: ret
736
+ ;
737
+ ; CHECK-NO-OPT-ZVFHMIN-LABEL: zero_strided_unmasked_vpload_4f16:
738
+ ; CHECK-NO-OPT-ZVFHMIN: # %bb.0:
739
+ ; CHECK-NO-OPT-ZVFHMIN-NEXT: lh a0, 0(a0)
740
+ ; CHECK-NO-OPT-ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
741
+ ; CHECK-NO-OPT-ZVFHMIN-NEXT: vmv.v.x v8, a0
742
+ ; CHECK-NO-OPT-ZVFHMIN-NEXT: ret
668
743
%load = call <4 x half > @llvm.experimental.vp.strided.load.4f16.p0.i32 (ptr %ptr , i32 0 , <4 x i1 > splat (i1 true ), i32 3 )
669
744
ret <4 x half > %load
670
745
}
0 commit comments