@@ -223,18 +223,31 @@ entry:
223
223
}
224
224
225
225
define arm_aapcs_vfpcc <8 x i16 > @shuffle3_i16 (<8 x i16 > %src ) {
226
- ; CHECK-LABEL: shuffle3_i16:
227
- ; CHECK: @ %bb.0: @ %entry
228
- ; CHECK-NEXT: vmovx.f16 s5, s3
229
- ; CHECK-NEXT: vmovx.f16 s6, s1
230
- ; CHECK-NEXT: vmovx.f16 s4, s0
231
- ; CHECK-NEXT: vins.f16 s1, s0
232
- ; CHECK-NEXT: vins.f16 s6, s4
233
- ; CHECK-NEXT: vins.f16 s5, s3
234
- ; CHECK-NEXT: vmov.f32 s4, s2
235
- ; CHECK-NEXT: vmov.f32 s7, s1
236
- ; CHECK-NEXT: vmov q0, q1
237
- ; CHECK-NEXT: bx lr
226
+ ; CHECK-LV-LABEL: shuffle3_i16:
227
+ ; CHECK-LV: @ %bb.0: @ %entry
228
+ ; CHECK-LV-NEXT: vmovx.f16 s5, s3
229
+ ; CHECK-LV-NEXT: vmovx.f16 s6, s1
230
+ ; CHECK-LV-NEXT: vmovx.f16 s4, s0
231
+ ; CHECK-LV-NEXT: vins.f16 s1, s0
232
+ ; CHECK-LV-NEXT: vins.f16 s6, s4
233
+ ; CHECK-LV-NEXT: vins.f16 s5, s3
234
+ ; CHECK-LV-NEXT: vmov.f32 s4, s2
235
+ ; CHECK-LV-NEXT: vmov.f32 s7, s1
236
+ ; CHECK-LV-NEXT: vmov q0, q1
237
+ ; CHECK-LV-NEXT: bx lr
238
+
239
+ ; CHECK-LIS-LABEL: shuffle3_i16:
240
+ ; CHECK-LIS: @ %bb.0: @ %entry
241
+ ; CHECK-LIS-NEXT: vmov q1, q0
242
+ ; CHECK-LIS-NEXT: vmovx.f16 s2, s5
243
+ ; CHECK-LIS-NEXT: vmovx.f16 s0, s4
244
+ ; CHECK-LIS-NEXT: vins.f16 s5, s4
245
+ ; CHECK-LIS-NEXT: vins.f16 s2, s0
246
+ ; CHECK-LIS-NEXT: vmov.f32 s0, s6
247
+ ; CHECK-LIS-NEXT: vmovx.f16 s1, s7
248
+ ; CHECK-LIS-NEXT: vmov.f32 s3, s5
249
+ ; CHECK-LIS-NEXT: vins.f16 s1, s7
250
+ ; CHECK-LIS-NEXT: bx lr
238
251
entry:
239
252
%out = shufflevector <8 x i16 > %src , <8 x i16 > undef , <8 x i32 > <i32 4 , i32 5 , i32 7 , i32 6 , i32 3 , i32 1 , i32 2 , i32 0 >
240
253
ret <8 x i16 > %out
@@ -1145,18 +1158,31 @@ entry:
1145
1158
}
1146
1159
1147
1160
define arm_aapcs_vfpcc <8 x half > @shuffle3_f16 (<8 x half > %src ) {
1148
- ; CHECK-LABEL: shuffle3_f16:
1149
- ; CHECK: @ %bb.0: @ %entry
1150
- ; CHECK-NEXT: vmovx.f16 s5, s3
1151
- ; CHECK-NEXT: vmovx.f16 s6, s1
1152
- ; CHECK-NEXT: vmovx.f16 s4, s0
1153
- ; CHECK-NEXT: vins.f16 s1, s0
1154
- ; CHECK-NEXT: vins.f16 s6, s4
1155
- ; CHECK-NEXT: vins.f16 s5, s3
1156
- ; CHECK-NEXT: vmov.f32 s4, s2
1157
- ; CHECK-NEXT: vmov.f32 s7, s1
1158
- ; CHECK-NEXT: vmov q0, q1
1159
- ; CHECK-NEXT: bx lr
1161
+ ; CHECK-LV-LABEL: shuffle3_f16:
1162
+ ; CHECK-LV: @ %bb.0: @ %entry
1163
+ ; CHECK-LV-NEXT: vmovx.f16 s5, s3
1164
+ ; CHECK-LV-NEXT: vmovx.f16 s6, s1
1165
+ ; CHECK-LV-NEXT: vmovx.f16 s4, s0
1166
+ ; CHECK-LV-NEXT: vins.f16 s1, s0
1167
+ ; CHECK-LV-NEXT: vins.f16 s6, s4
1168
+ ; CHECK-LV-NEXT: vins.f16 s5, s3
1169
+ ; CHECK-LV-NEXT: vmov.f32 s4, s2
1170
+ ; CHECK-LV-NEXT: vmov.f32 s7, s1
1171
+ ; CHECK-LV-NEXT: vmov q0, q1
1172
+ ; CHECK-LV-NEXT: bx lr
1173
+
1174
+ ; CHECK-LIS-LABEL: shuffle3_f16:
1175
+ ; CHECK-LIS: @ %bb.0: @ %entry
1176
+ ; CHECK-LIS-NEXT: vmov q1, q0
1177
+ ; CHECK-LIS-NEXT: vmovx.f16 s2, s5
1178
+ ; CHECK-LIS-NEXT: vmovx.f16 s0, s4
1179
+ ; CHECK-LIS-NEXT: vins.f16 s5, s4
1180
+ ; CHECK-LIS-NEXT: vins.f16 s2, s0
1181
+ ; CHECK-LIS-NEXT: vmov.f32 s0, s6
1182
+ ; CHECK-LIS-NEXT: vmovx.f16 s1, s7
1183
+ ; CHECK-LIS-NEXT: vmov.f32 s3, s5
1184
+ ; CHECK-LIS-NEXT: vins.f16 s1, s7
1185
+ ; CHECK-LIS-NEXT: bx lr
1160
1186
entry:
1161
1187
%out = shufflevector <8 x half > %src , <8 x half > undef , <8 x i32 > <i32 4 , i32 5 , i32 7 , i32 6 , i32 3 , i32 1 , i32 2 , i32 0 >
1162
1188
ret <8 x half > %out
@@ -1467,27 +1493,47 @@ entry:
1467
1493
ret <2 x double > %out
1468
1494
}
1469
1495
define arm_aapcs_vfpcc <8 x double > @shuffle9_f64 (<4 x double > %src1 , <4 x double > %src2 ) {
1470
- ; CHECK-LABEL: shuffle9_f64:
1471
- ; CHECK: @ %bb.0: @ %entry
1472
- ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
1473
- ; CHECK-NEXT: vpush {d8, d9, d10, d11}
1474
- ; CHECK-NEXT: vmov q5, q2
1475
- ; CHECK-NEXT: vmov.f32 s16, s0
1476
- ; CHECK-NEXT: vmov.f32 s18, s20
1477
- ; CHECK-NEXT: vmov.f32 s20, s2
1478
- ; CHECK-NEXT: vmov.f32 s10, s12
1479
- ; CHECK-NEXT: vmov.f32 s19, s21
1480
- ; CHECK-NEXT: vmov.f32 s8, s4
1481
- ; CHECK-NEXT: vmov.f32 s17, s1
1482
- ; CHECK-NEXT: vmov.f32 s21, s3
1483
- ; CHECK-NEXT: vmov q0, q4
1484
- ; CHECK-NEXT: vmov.f32 s12, s6
1485
- ; CHECK-NEXT: vmov.f32 s11, s13
1486
- ; CHECK-NEXT: vmov.f32 s9, s5
1487
- ; CHECK-NEXT: vmov.f32 s13, s7
1488
- ; CHECK-NEXT: vmov q1, q5
1489
- ; CHECK-NEXT: vpop {d8, d9, d10, d11}
1490
- ; CHECK-NEXT: bx lr
1496
+ ; CHECK-LV-LABEL: shuffle9_f64:
1497
+ ; CHECK-LV: @ %bb.0: @ %entry
1498
+ ; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11}
1499
+ ; CHECK-LV-NEXT: vpush {d8, d9, d10, d11}
1500
+ ; CHECK-LV-NEXT: vmov q5, q2
1501
+ ; CHECK-LV-NEXT: vmov.f32 s16, s0
1502
+ ; CHECK-LV-NEXT: vmov.f32 s18, s20
1503
+ ; CHECK-LV-NEXT: vmov.f32 s20, s2
1504
+ ; CHECK-LV-NEXT: vmov.f32 s10, s12
1505
+ ; CHECK-LV-NEXT: vmov.f32 s19, s21
1506
+ ; CHECK-LV-NEXT: vmov.f32 s8, s4
1507
+ ; CHECK-LV-NEXT: vmov.f32 s17, s1
1508
+ ; CHECK-LV-NEXT: vmov.f32 s21, s3
1509
+ ; CHECK-LV-NEXT: vmov q0, q4
1510
+ ; CHECK-LV-NEXT: vmov.f32 s12, s6
1511
+ ; CHECK-LV-NEXT: vmov.f32 s11, s13
1512
+ ; CHECK-LV-NEXT: vmov.f32 s9, s5
1513
+ ; CHECK-LV-NEXT: vmov.f32 s13, s7
1514
+ ; CHECK-LV-NEXT: vmov q1, q5
1515
+ ; CHECK-LV-NEXT: vpop {d8, d9, d10, d11}
1516
+ ; CHECK-LV-NEXT: bx lr
1517
+
1518
+ ; CHECK-LIS-LABEL: shuffle9_f64:
1519
+ ; CHECK-LIS: @ %bb.0: @ %entry
1520
+ ; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11}
1521
+ ; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11}
1522
+ ; CHECK-LIS-NEXT: vmov q5, q2
1523
+ ; CHECK-LIS-NEXT: vmov q4, q0
1524
+ ; CHECK-LIS-NEXT: vmov.f32 s2, s20
1525
+ ; CHECK-LIS-NEXT: vmov.f32 s20, s18
1526
+ ; CHECK-LIS-NEXT: vmov.f32 s10, s12
1527
+ ; CHECK-LIS-NEXT: vmov.f32 s3, s21
1528
+ ; CHECK-LIS-NEXT: vmov.f32 s8, s4
1529
+ ; CHECK-LIS-NEXT: vmov.f32 s21, s19
1530
+ ; CHECK-LIS-NEXT: vmov.f32 s12, s6
1531
+ ; CHECK-LIS-NEXT: vmov.f32 s11, s13
1532
+ ; CHECK-LIS-NEXT: vmov.f32 s9, s5
1533
+ ; CHECK-LIS-NEXT: vmov.f32 s13, s7
1534
+ ; CHECK-LIS-NEXT: vmov q1, q5
1535
+ ; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11}
1536
+ ; CHECK-LIS-NEXT: bx lr
1491
1537
entry:
1492
1538
%out = shufflevector <4 x double > %src1 , <4 x double > %src2 , <8 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 , i32 2 , i32 6 , i32 3 , i32 7 >
1493
1539
ret <8 x double > %out
@@ -1560,27 +1606,47 @@ entry:
1560
1606
ret <2 x i64 > %out
1561
1607
}
1562
1608
define arm_aapcs_vfpcc <8 x i64 > @shuffle9_i64 (<4 x i64 > %src1 , <4 x i64 > %src2 ) {
1563
- ; CHECK-LABEL: shuffle9_i64:
1564
- ; CHECK: @ %bb.0: @ %entry
1565
- ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
1566
- ; CHECK-NEXT: vpush {d8, d9, d10, d11}
1567
- ; CHECK-NEXT: vmov q5, q2
1568
- ; CHECK-NEXT: vmov.f32 s16, s0
1569
- ; CHECK-NEXT: vmov.f32 s18, s20
1570
- ; CHECK-NEXT: vmov.f32 s20, s2
1571
- ; CHECK-NEXT: vmov.f32 s10, s12
1572
- ; CHECK-NEXT: vmov.f32 s19, s21
1573
- ; CHECK-NEXT: vmov.f32 s8, s4
1574
- ; CHECK-NEXT: vmov.f32 s17, s1
1575
- ; CHECK-NEXT: vmov.f32 s21, s3
1576
- ; CHECK-NEXT: vmov q0, q4
1577
- ; CHECK-NEXT: vmov.f32 s12, s6
1578
- ; CHECK-NEXT: vmov.f32 s11, s13
1579
- ; CHECK-NEXT: vmov.f32 s9, s5
1580
- ; CHECK-NEXT: vmov.f32 s13, s7
1581
- ; CHECK-NEXT: vmov q1, q5
1582
- ; CHECK-NEXT: vpop {d8, d9, d10, d11}
1583
- ; CHECK-NEXT: bx lr
1609
+ ; CHECK-LV-LABEL: shuffle9_i64:
1610
+ ; CHECK-LV: @ %bb.0: @ %entry
1611
+ ; CHECK-LV-NEXT: .vsave {d8, d9, d10, d11}
1612
+ ; CHECK-LV-NEXT: vpush {d8, d9, d10, d11}
1613
+ ; CHECK-LV-NEXT: vmov q5, q2
1614
+ ; CHECK-LV-NEXT: vmov.f32 s16, s0
1615
+ ; CHECK-LV-NEXT: vmov.f32 s18, s20
1616
+ ; CHECK-LV-NEXT: vmov.f32 s20, s2
1617
+ ; CHECK-LV-NEXT: vmov.f32 s10, s12
1618
+ ; CHECK-LV-NEXT: vmov.f32 s19, s21
1619
+ ; CHECK-LV-NEXT: vmov.f32 s8, s4
1620
+ ; CHECK-LV-NEXT: vmov.f32 s17, s1
1621
+ ; CHECK-LV-NEXT: vmov.f32 s21, s3
1622
+ ; CHECK-LV-NEXT: vmov q0, q4
1623
+ ; CHECK-LV-NEXT: vmov.f32 s12, s6
1624
+ ; CHECK-LV-NEXT: vmov.f32 s11, s13
1625
+ ; CHECK-LV-NEXT: vmov.f32 s9, s5
1626
+ ; CHECK-LV-NEXT: vmov.f32 s13, s7
1627
+ ; CHECK-LV-NEXT: vmov q1, q5
1628
+ ; CHECK-LV-NEXT: vpop {d8, d9, d10, d11}
1629
+ ; CHECK-LV-NEXT: bx lr
1630
+
1631
+ ; CHECK-LIS-LABEL: shuffle9_i64:
1632
+ ; CHECK-LIS: @ %bb.0: @ %entry
1633
+ ; CHECK-LIS-NEXT: .vsave {d8, d9, d10, d11}
1634
+ ; CHECK-LIS-NEXT: vpush {d8, d9, d10, d11}
1635
+ ; CHECK-LIS-NEXT: vmov q5, q2
1636
+ ; CHECK-LIS-NEXT: vmov q4, q0
1637
+ ; CHECK-LIS-NEXT: vmov.f32 s2, s20
1638
+ ; CHECK-LIS-NEXT: vmov.f32 s20, s18
1639
+ ; CHECK-LIS-NEXT: vmov.f32 s10, s12
1640
+ ; CHECK-LIS-NEXT: vmov.f32 s3, s21
1641
+ ; CHECK-LIS-NEXT: vmov.f32 s8, s4
1642
+ ; CHECK-LIS-NEXT: vmov.f32 s21, s19
1643
+ ; CHECK-LIS-NEXT: vmov.f32 s12, s6
1644
+ ; CHECK-LIS-NEXT: vmov.f32 s11, s13
1645
+ ; CHECK-LIS-NEXT: vmov.f32 s9, s5
1646
+ ; CHECK-LIS-NEXT: vmov.f32 s13, s7
1647
+ ; CHECK-LIS-NEXT: vmov q1, q5
1648
+ ; CHECK-LIS-NEXT: vpop {d8, d9, d10, d11}
1649
+ ; CHECK-LIS-NEXT: bx lr
1584
1650
entry:
1585
1651
%out = shufflevector <4 x i64 > %src1 , <4 x i64 > %src2 , <8 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 , i32 2 , i32 6 , i32 3 , i32 7 >
1586
1652
ret <8 x i64 > %out
0 commit comments