@@ -713,14 +713,12 @@ define <7 x half> @exp_v7f16(<7 x half> %a) {
713
713
; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload
714
714
; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload
715
715
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
716
- ; CHECK-GI-NEXT: ldr q2, [sp, #48 ] // 16 -byte Folded Reload
716
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #32 ] // 32 -byte Folded Reload
717
717
; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload
718
- ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
719
- ; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
718
+ ; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
720
719
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
721
- ; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
722
- ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
723
- ; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
720
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp] // 32-byte Folded Reload
721
+ ; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
724
722
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
725
723
; CHECK-GI-NEXT: mov v1.h[6], v0.h[0]
726
724
; CHECK-GI-NEXT: mov v1.h[7], v0.h[0]
@@ -963,14 +961,12 @@ define <8 x half> @exp_v8f16(<8 x half> %a) {
963
961
; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload
964
962
; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload
965
963
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
966
- ; CHECK-GI-NEXT: ldr q2, [sp, #64 ] // 16 -byte Folded Reload
964
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #48 ] // 32 -byte Folded Reload
967
965
; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload
968
- ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
969
- ; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
966
+ ; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
970
967
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
971
- ; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
972
- ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
973
- ; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
968
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload
969
+ ; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
974
970
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
975
971
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
976
972
; CHECK-GI-NEXT: mov v1.h[6], v2.h[0]
@@ -1994,14 +1990,12 @@ define <7 x half> @exp2_v7f16(<7 x half> %a) {
1994
1990
; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload
1995
1991
; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload
1996
1992
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
1997
- ; CHECK-GI-NEXT: ldr q2, [sp, #48 ] // 16 -byte Folded Reload
1993
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #32 ] // 32 -byte Folded Reload
1998
1994
; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload
1999
- ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
2000
- ; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
1995
+ ; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
2001
1996
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
2002
- ; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
2003
- ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
2004
- ; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
1997
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp] // 32-byte Folded Reload
1998
+ ; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
2005
1999
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
2006
2000
; CHECK-GI-NEXT: mov v1.h[6], v0.h[0]
2007
2001
; CHECK-GI-NEXT: mov v1.h[7], v0.h[0]
@@ -2244,14 +2238,12 @@ define <8 x half> @exp2_v8f16(<8 x half> %a) {
2244
2238
; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload
2245
2239
; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload
2246
2240
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
2247
- ; CHECK-GI-NEXT: ldr q2, [sp, #64 ] // 16 -byte Folded Reload
2241
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #48 ] // 32 -byte Folded Reload
2248
2242
; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload
2249
- ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
2250
- ; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
2243
+ ; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
2251
2244
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
2252
- ; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
2253
- ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
2254
- ; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
2245
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload
2246
+ ; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
2255
2247
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
2256
2248
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
2257
2249
; CHECK-GI-NEXT: mov v1.h[6], v2.h[0]
@@ -3275,14 +3267,12 @@ define <7 x half> @log_v7f16(<7 x half> %a) {
3275
3267
; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload
3276
3268
; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload
3277
3269
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
3278
- ; CHECK-GI-NEXT: ldr q2, [sp, #48 ] // 16 -byte Folded Reload
3270
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #32 ] // 32 -byte Folded Reload
3279
3271
; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload
3280
- ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
3281
- ; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
3272
+ ; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
3282
3273
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
3283
- ; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
3284
- ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
3285
- ; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
3274
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp] // 32-byte Folded Reload
3275
+ ; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
3286
3276
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
3287
3277
; CHECK-GI-NEXT: mov v1.h[6], v0.h[0]
3288
3278
; CHECK-GI-NEXT: mov v1.h[7], v0.h[0]
@@ -3525,14 +3515,12 @@ define <8 x half> @log_v8f16(<8 x half> %a) {
3525
3515
; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload
3526
3516
; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload
3527
3517
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
3528
- ; CHECK-GI-NEXT: ldr q2, [sp, #64 ] // 16 -byte Folded Reload
3518
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #48 ] // 32 -byte Folded Reload
3529
3519
; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload
3530
- ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
3531
- ; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
3520
+ ; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
3532
3521
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
3533
- ; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
3534
- ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
3535
- ; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
3522
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload
3523
+ ; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
3536
3524
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
3537
3525
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
3538
3526
; CHECK-GI-NEXT: mov v1.h[6], v2.h[0]
@@ -4556,14 +4544,12 @@ define <7 x half> @log2_v7f16(<7 x half> %a) {
4556
4544
; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload
4557
4545
; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload
4558
4546
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
4559
- ; CHECK-GI-NEXT: ldr q2, [sp, #48 ] // 16 -byte Folded Reload
4547
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #32 ] // 32 -byte Folded Reload
4560
4548
; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload
4561
- ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
4562
- ; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
4549
+ ; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
4563
4550
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
4564
- ; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
4565
- ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
4566
- ; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
4551
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp] // 32-byte Folded Reload
4552
+ ; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
4567
4553
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
4568
4554
; CHECK-GI-NEXT: mov v1.h[6], v0.h[0]
4569
4555
; CHECK-GI-NEXT: mov v1.h[7], v0.h[0]
@@ -4806,14 +4792,12 @@ define <8 x half> @log2_v8f16(<8 x half> %a) {
4806
4792
; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload
4807
4793
; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload
4808
4794
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
4809
- ; CHECK-GI-NEXT: ldr q2, [sp, #64 ] // 16 -byte Folded Reload
4795
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #48 ] // 32 -byte Folded Reload
4810
4796
; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload
4811
- ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
4812
- ; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
4797
+ ; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
4813
4798
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
4814
- ; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
4815
- ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
4816
- ; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
4799
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload
4800
+ ; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
4817
4801
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
4818
4802
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
4819
4803
; CHECK-GI-NEXT: mov v1.h[6], v2.h[0]
@@ -5837,14 +5821,12 @@ define <7 x half> @log10_v7f16(<7 x half> %a) {
5837
5821
; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload
5838
5822
; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload
5839
5823
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
5840
- ; CHECK-GI-NEXT: ldr q2, [sp, #48 ] // 16 -byte Folded Reload
5824
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #32 ] // 32 -byte Folded Reload
5841
5825
; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload
5842
- ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
5843
- ; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
5826
+ ; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
5844
5827
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
5845
- ; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
5846
- ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
5847
- ; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
5828
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp] // 32-byte Folded Reload
5829
+ ; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
5848
5830
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
5849
5831
; CHECK-GI-NEXT: mov v1.h[6], v0.h[0]
5850
5832
; CHECK-GI-NEXT: mov v1.h[7], v0.h[0]
@@ -6087,14 +6069,12 @@ define <8 x half> @log10_v8f16(<8 x half> %a) {
6087
6069
; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload
6088
6070
; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload
6089
6071
; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
6090
- ; CHECK-GI-NEXT: ldr q2, [sp, #64 ] // 16 -byte Folded Reload
6072
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #48 ] // 32 -byte Folded Reload
6091
6073
; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload
6092
- ; CHECK-GI-NEXT: mov v1.h[2], v2.h[0]
6093
- ; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload
6074
+ ; CHECK-GI-NEXT: mov v1.h[2], v3.h[0]
6094
6075
; CHECK-GI-NEXT: mov v1.h[3], v2.h[0]
6095
- ; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
6096
- ; CHECK-GI-NEXT: mov v1.h[4], v2.h[0]
6097
- ; CHECK-GI-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
6076
+ ; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload
6077
+ ; CHECK-GI-NEXT: mov v1.h[4], v3.h[0]
6098
6078
; CHECK-GI-NEXT: mov v1.h[5], v2.h[0]
6099
6079
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
6100
6080
; CHECK-GI-NEXT: mov v1.h[6], v2.h[0]
0 commit comments