2
2
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+b | FileCheck %s --check-prefixes=CHECK,V
3
3
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+b,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
4
4
5
- declare i32 @llvm.experimental.constrained.fptosi.i32.f64 (double , metadata )
6
5
declare void @g ()
7
6
8
- ; TODO: Merging scalars into vectors is unprofitable because we have no
9
- ; vector CSRs which creates additional spills around the call.
10
7
define void @f (ptr %m , ptr %n , ptr %p , ptr %q , ptr %r , ptr %s , double %t ) {
11
8
; CHECK-LABEL: f:
12
9
; CHECK: # %bb.0:
@@ -16,40 +13,40 @@ define void @f(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) {
16
13
; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
17
14
; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
18
15
; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
16
+ ; CHECK-NEXT: sd s3, 8(sp) # 8-byte Folded Spill
17
+ ; CHECK-NEXT: sd s4, 0(sp) # 8-byte Folded Spill
19
18
; CHECK-NEXT: .cfi_offset ra, -8
20
19
; CHECK-NEXT: .cfi_offset s0, -16
21
20
; CHECK-NEXT: .cfi_offset s1, -24
22
21
; CHECK-NEXT: .cfi_offset s2, -32
23
- ; CHECK-NEXT: csrr a6, vlenb
24
- ; CHECK-NEXT: sub sp, sp, a6
25
- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 1 * vlenb
22
+ ; CHECK-NEXT: .cfi_offset s3, -40
23
+ ; CHECK-NEXT: .cfi_offset s4, -48
26
24
; CHECK-NEXT: mv s0, a5
27
25
; CHECK-NEXT: mv s1, a4
28
26
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
29
27
; CHECK-NEXT: vle64.v v8, (a0)
30
28
; CHECK-NEXT: vse64.v v8, (a1)
31
- ; CHECK-NEXT: vle64.v v8, (a2)
32
- ; CHECK-NEXT: addi a0, sp, 16
33
- ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
29
+ ; CHECK-NEXT: ld s3, 0(a2)
30
+ ; CHECK-NEXT: ld s4, 8(a2)
34
31
; CHECK-NEXT: mv s2, a3
35
32
; CHECK-NEXT: call g
36
- ; CHECK-NEXT: addi a0, sp, 16
37
- ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
33
+ ; CHECK-NEXT: sd s3, 0(s2)
34
+ ; CHECK-NEXT: sd s4, 8(s2)
38
35
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
39
- ; CHECK-NEXT: vse64.v v8, (s2)
40
36
; CHECK-NEXT: vle64.v v8, (s1)
41
37
; CHECK-NEXT: vse64.v v8, (s0)
42
- ; CHECK-NEXT: csrr a0, vlenb
43
- ; CHECK-NEXT: add sp, sp, a0
44
- ; CHECK-NEXT: .cfi_def_cfa sp, 48
45
38
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
46
39
; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
47
40
; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
48
41
; CHECK-NEXT: ld s2, 16(sp) # 8-byte Folded Reload
42
+ ; CHECK-NEXT: ld s3, 8(sp) # 8-byte Folded Reload
43
+ ; CHECK-NEXT: ld s4, 0(sp) # 8-byte Folded Reload
49
44
; CHECK-NEXT: .cfi_restore ra
50
45
; CHECK-NEXT: .cfi_restore s0
51
46
; CHECK-NEXT: .cfi_restore s1
52
47
; CHECK-NEXT: .cfi_restore s2
48
+ ; CHECK-NEXT: .cfi_restore s3
49
+ ; CHECK-NEXT: .cfi_restore s4
53
50
; CHECK-NEXT: addi sp, sp, 48
54
51
; CHECK-NEXT: .cfi_def_cfa_offset 0
55
52
; CHECK-NEXT: ret
@@ -78,13 +75,13 @@ define void @f(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) {
78
75
ret void
79
76
}
80
77
81
- define void @f1 (ptr %m , ptr %n , ptr % p , ptr %q , ptr %r , ptr %s , double %t ) {
78
+ define void @f1 (ptr %p , ptr %q , double %t ) {
82
79
; CHECK-LABEL: f1:
83
80
; CHECK: # %bb.0:
84
81
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
85
- ; CHECK-NEXT: vle64.v v8, (a2 )
82
+ ; CHECK-NEXT: vle64.v v8, (a0 )
86
83
; CHECK-NEXT: fcvt.wu.d a0, fa0, rtz
87
- ; CHECK-NEXT: vse64.v v8, (a3 )
84
+ ; CHECK-NEXT: vse64.v v8, (a1 )
88
85
; CHECK-NEXT: ret
89
86
%x0 = load i64 , ptr %p
90
87
%p.1 = getelementptr i64 , ptr %p , i64 1
@@ -93,7 +90,6 @@ define void @f1(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) {
93
90
store i64 %x0 , ptr %q
94
91
%q.1 = getelementptr i64 , ptr %q , i64 1
95
92
store i64 %x1 , ptr %q.1
96
-
97
93
ret void
98
94
}
99
95
@@ -515,28 +511,26 @@ define void @two_half_unaligned(ptr %p, ptr %q) {
515
511
; ZVFH-NEXT: .cfi_def_cfa_offset 32
516
512
; ZVFH-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
517
513
; ZVFH-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
514
+ ; ZVFH-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill
515
+ ; ZVFH-NEXT: fsd fs1, 0(sp) # 8-byte Folded Spill
518
516
; ZVFH-NEXT: .cfi_offset ra, -8
519
517
; ZVFH-NEXT: .cfi_offset s0, -16
520
- ; ZVFH-NEXT: csrr a2, vlenb
521
- ; ZVFH-NEXT: sub sp, sp, a2
522
- ; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb
523
- ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
524
- ; ZVFH-NEXT: vle16.v v8, (a0)
525
- ; ZVFH-NEXT: addi a0, sp, 16
526
- ; ZVFH-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
518
+ ; ZVFH-NEXT: .cfi_offset fs0, -24
519
+ ; ZVFH-NEXT: .cfi_offset fs1, -32
520
+ ; ZVFH-NEXT: flh fs0, 0(a0)
521
+ ; ZVFH-NEXT: flh fs1, 2(a0)
527
522
; ZVFH-NEXT: mv s0, a1
528
523
; ZVFH-NEXT: call g
529
- ; ZVFH-NEXT: addi a0, sp, 16
530
- ; ZVFH-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
531
- ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
532
- ; ZVFH-NEXT: vse16.v v8, (s0)
533
- ; ZVFH-NEXT: csrr a0, vlenb
534
- ; ZVFH-NEXT: add sp, sp, a0
535
- ; ZVFH-NEXT: .cfi_def_cfa sp, 32
524
+ ; ZVFH-NEXT: fsh fs0, 0(s0)
525
+ ; ZVFH-NEXT: fsh fs1, 2(s0)
536
526
; ZVFH-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
537
527
; ZVFH-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
528
+ ; ZVFH-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload
529
+ ; ZVFH-NEXT: fld fs1, 0(sp) # 8-byte Folded Reload
538
530
; ZVFH-NEXT: .cfi_restore ra
539
531
; ZVFH-NEXT: .cfi_restore s0
532
+ ; ZVFH-NEXT: .cfi_restore fs0
533
+ ; ZVFH-NEXT: .cfi_restore fs1
540
534
; ZVFH-NEXT: addi sp, sp, 32
541
535
; ZVFH-NEXT: .cfi_def_cfa_offset 0
542
536
; ZVFH-NEXT: ret
@@ -552,9 +546,6 @@ define void @two_half_unaligned(ptr %p, ptr %q) {
552
546
ret void
553
547
}
554
548
555
-
556
- ; TODO: This one is currently a vector which is unprofitable, we should
557
- ; use i64 instead.
558
549
define void @two_float (ptr %p , ptr %q ) {
559
550
; CHECK-LABEL: two_float:
560
551
; CHECK: # %bb.0:
@@ -598,28 +589,26 @@ define void @two_float_unaligned(ptr %p, ptr %q) {
598
589
; CHECK-NEXT: .cfi_def_cfa_offset 32
599
590
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
600
591
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
592
+ ; CHECK-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill
593
+ ; CHECK-NEXT: fsd fs1, 0(sp) # 8-byte Folded Spill
601
594
; CHECK-NEXT: .cfi_offset ra, -8
602
595
; CHECK-NEXT: .cfi_offset s0, -16
603
- ; CHECK-NEXT: csrr a2, vlenb
604
- ; CHECK-NEXT: sub sp, sp, a2
605
- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb
606
- ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
607
- ; CHECK-NEXT: vle32.v v8, (a0)
608
- ; CHECK-NEXT: addi a0, sp, 16
609
- ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
596
+ ; CHECK-NEXT: .cfi_offset fs0, -24
597
+ ; CHECK-NEXT: .cfi_offset fs1, -32
598
+ ; CHECK-NEXT: flw fs0, 0(a0)
599
+ ; CHECK-NEXT: flw fs1, 4(a0)
610
600
; CHECK-NEXT: mv s0, a1
611
601
; CHECK-NEXT: call g
612
- ; CHECK-NEXT: addi a0, sp, 16
613
- ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
614
- ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
615
- ; CHECK-NEXT: vse32.v v8, (s0)
616
- ; CHECK-NEXT: csrr a0, vlenb
617
- ; CHECK-NEXT: add sp, sp, a0
618
- ; CHECK-NEXT: .cfi_def_cfa sp, 32
602
+ ; CHECK-NEXT: fsw fs0, 0(s0)
603
+ ; CHECK-NEXT: fsw fs1, 4(s0)
619
604
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
620
605
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
606
+ ; CHECK-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload
607
+ ; CHECK-NEXT: fld fs1, 0(sp) # 8-byte Folded Reload
621
608
; CHECK-NEXT: .cfi_restore ra
622
609
; CHECK-NEXT: .cfi_restore s0
610
+ ; CHECK-NEXT: .cfi_restore fs0
611
+ ; CHECK-NEXT: .cfi_restore fs1
623
612
; CHECK-NEXT: addi sp, sp, 32
624
613
; CHECK-NEXT: .cfi_def_cfa_offset 0
625
614
; CHECK-NEXT: ret
@@ -679,28 +668,26 @@ define void @two_double(ptr %p, ptr %q) {
679
668
; CHECK-NEXT: .cfi_def_cfa_offset 32
680
669
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
681
670
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
671
+ ; CHECK-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill
672
+ ; CHECK-NEXT: fsd fs1, 0(sp) # 8-byte Folded Spill
682
673
; CHECK-NEXT: .cfi_offset ra, -8
683
674
; CHECK-NEXT: .cfi_offset s0, -16
684
- ; CHECK-NEXT: csrr a2, vlenb
685
- ; CHECK-NEXT: sub sp, sp, a2
686
- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb
687
- ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
688
- ; CHECK-NEXT: vle64.v v8, (a0)
689
- ; CHECK-NEXT: addi a0, sp, 16
690
- ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
675
+ ; CHECK-NEXT: .cfi_offset fs0, -24
676
+ ; CHECK-NEXT: .cfi_offset fs1, -32
677
+ ; CHECK-NEXT: fld fs0, 0(a0)
678
+ ; CHECK-NEXT: fld fs1, 8(a0)
691
679
; CHECK-NEXT: mv s0, a1
692
680
; CHECK-NEXT: call g
693
- ; CHECK-NEXT: addi a0, sp, 16
694
- ; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
695
- ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
696
- ; CHECK-NEXT: vse64.v v8, (s0)
697
- ; CHECK-NEXT: csrr a0, vlenb
698
- ; CHECK-NEXT: add sp, sp, a0
699
- ; CHECK-NEXT: .cfi_def_cfa sp, 32
681
+ ; CHECK-NEXT: fsd fs0, 0(s0)
682
+ ; CHECK-NEXT: fsd fs1, 8(s0)
700
683
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
701
684
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
685
+ ; CHECK-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload
686
+ ; CHECK-NEXT: fld fs1, 0(sp) # 8-byte Folded Reload
702
687
; CHECK-NEXT: .cfi_restore ra
703
688
; CHECK-NEXT: .cfi_restore s0
689
+ ; CHECK-NEXT: .cfi_restore fs0
690
+ ; CHECK-NEXT: .cfi_restore fs1
704
691
; CHECK-NEXT: addi sp, sp, 32
705
692
; CHECK-NEXT: .cfi_def_cfa_offset 0
706
693
; CHECK-NEXT: ret
0 commit comments