Skip to content

Commit 4bd2fce

Browse files
committed
Lower [x]vshuf.d to [x]vshuf4i.d if possible
1 parent 96eeb6c commit 4bd2fce

File tree

9 files changed

+100
-43
lines changed

9 files changed

+100
-43
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,37 +1026,39 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
10261026
MVT VT, SDValue V1, SDValue V2,
10271027
SelectionDAG &DAG) {
10281028

1029-
// When the size is less than 4, lower cost instructions may be used.
1030-
if (Mask.size() < 4)
1031-
return SDValue();
1029+
unsigned SubVecSize = 4;
1030+
if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
1031+
VT == MVT::v4i64) {
1032+
SubVecSize = 2;
1033+
}
10321034

10331035
int SubMask[4] = {-1, -1, -1, -1};
1034-
for (unsigned i = 0; i < 4; ++i) {
1035-
for (unsigned j = i; j < Mask.size(); j += 4) {
1036-
int Idx = Mask[j];
1036+
for (unsigned i = 0; i < SubVecSize; ++i) {
1037+
for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1038+
int M = Mask[j];
10371039

10381040
// Convert from vector index to 4-element subvector index
10391041
// If an index refers to an element outside of the subvector then give up
1040-
if (Idx != -1) {
1041-
Idx -= 4 * (j / 4);
1042-
if (Idx < 0 || Idx >= 4)
1042+
if (M != -1) {
1043+
M -= 4 * (j / SubVecSize);
1044+
if (M < 0 || M >= 4)
10431045
return SDValue();
10441046
}
10451047

10461048
// If the mask has an undef, replace it with the current index.
10471049
// Note that it might still be undef if the current index is also undef
10481050
if (SubMask[i] == -1)
1049-
SubMask[i] = Idx;
1051+
SubMask[i] = M;
10501052
// Check that non-undef values are the same as in the mask. If they
10511053
// aren't then give up
1052-
else if (Idx != -1 && Idx != SubMask[i])
1054+
else if (M != -1 && M != SubMask[i])
10531055
return SDValue();
10541056
}
10551057
}
10561058

10571059
// Calculate the immediate. Replace any remaining undefs with zero
10581060
APInt Imm(64, 0);
1059-
for (int i = 3; i >= 0; --i) {
1061+
for (int i = SubVecSize - 1; i >= 0; --i) {
10601062
int Idx = SubMask[i];
10611063

10621064
if (Idx == -1)
@@ -1066,6 +1068,12 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
10661068
Imm |= Idx & 0x3;
10671069
}
10681070

1071+
// Return vshuf4i.d and xvshuf4i.d
1072+
if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
1073+
VT == MVT::v4i64)
1074+
return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1075+
DAG.getConstant(Imm, DL, MVT::i64));
1076+
10691077
return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
10701078
DAG.getConstant(Imm, DL, MVT::i64));
10711079
}
@@ -1375,6 +1383,11 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
13751383
/* V2 = V1; */
13761384
}
13771385

1386+
if (VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) {
1387+
if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
1388+
return Result;
1389+
}
1390+
13781391
// It is recommended not to change the pattern comparison order for better
13791392
// performance.
13801393
if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
@@ -1447,10 +1460,6 @@ static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
14471460
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
14481461
MVT VT, SDValue V1, SDValue V2,
14491462
SelectionDAG &DAG) {
1450-
// When the size is less than or equal to 4, lower cost instructions may be
1451-
// used.
1452-
if (Mask.size() <= 4)
1453-
return SDValue();
14541463
return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
14551464
}
14561465

@@ -1818,6 +1827,11 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
18181827
/* V2 = V1; */
18191828
}
18201829

1830+
if (VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) {
1831+
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1832+
return Result;
1833+
}
1834+
18211835
// It is recommended not to change the pattern comparison order for better
18221836
// performance.
18231837
if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1729,6 +1729,10 @@ def : Pat<(loongarch_vshuf4i v8i32:$xj, immZExt8:$ui8),
17291729
(XVSHUF4I_W v8i32:$xj, immZExt8:$ui8)>;
17301730
def : Pat<(loongarch_vshuf4i v8f32:$xj, immZExt8:$ui8),
17311731
(XVSHUF4I_W v8f32:$xj, immZExt8:$ui8)>;
1732+
def : Pat<(loongarch_vshuf4i_d v4i64:$xj, v4i64:$xk, immZExt8:$ui8),
1733+
(XVSHUF4I_D v4i64:$xj, v4i64:$xk, immZExt8:$ui8)>;
1734+
def : Pat<(loongarch_vshuf4i_d v4f64:$xj, v4f64:$xk, immZExt8:$ui8),
1735+
(XVSHUF4I_D v4f64:$xj, v4f64:$xk, immZExt8:$ui8)>;
17321736

17331737
// XVREPL128VEI_{B/H/W/D}
17341738
def : Pat<(loongarch_vreplvei v32i8:$xj, immZExt4:$ui4),

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
2323
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
2424
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
2525
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
26+
def SDT_LoongArchVShuf4i_D
27+
: SDTypeProfile<1, 3,
28+
[SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
29+
SDTCisVT<3, i64>]>;
2630
def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
2731
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
2832
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
@@ -53,6 +57,8 @@ def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
5357
def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
5458

5559
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
60+
def loongarch_vshuf4i_d
61+
: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchVShuf4i_D>;
5662
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
5763
def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
5864

@@ -1914,6 +1920,10 @@ def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8),
19141920
(VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>;
19151921
def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8),
19161922
(VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>;
1923+
def : Pat<(loongarch_vshuf4i_d v2i64:$vj, v2i64:$vk, immZExt8:$ui8),
1924+
(VSHUF4I_D v2i64:$vj, v2i64:$vk, immZExt8:$ui8)>;
1925+
def : Pat<(loongarch_vshuf4i_d v2f64:$vj, v2f64:$vk, immZExt8:$ui8),
1926+
(VSHUF4I_D v2f64:$vj, v2f64:$vk, immZExt8:$ui8)>;
19171927

19181928
// VREPLVEI_{B/H/W/D}
19191929
def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4),

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,23 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b)
4141
%c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
4242
ret <8 x float> %c
4343
}
44+
45+
;; xvshuf4i.d
46+
define <4 x i64> @shufflevector_xvshuf4i_v4d64(<4 x i64> %a, <4 x i64> %b) {
47+
; CHECK-LABEL: shufflevector_xvshuf4i_v4d64:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
50+
; CHECK-NEXT: ret
51+
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
52+
ret <4 x i64> %c
53+
}
54+
55+
;; xvshuf4i.d
56+
define <4 x double> @shufflevector_xvshuf4i_v4f64(<4 x double> %a, <4 x double> %b) {
57+
; CHECK-LABEL: shufflevector_xvshuf4i_v4f64:
58+
; CHECK: # %bb.0:
59+
; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
60+
; CHECK-NEXT: ret
61+
%c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
62+
ret <4 x double> %c
63+
}

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ define <4 x i32> @shufflevector_pack_ev_v4i32(<4 x i32> %a, <4 x i32> %b) {
3535
define <2 x i64> @shufflevector_pack_ev_v2i64(<2 x i64> %a, <2 x i64> %b) {
3636
; CHECK-LABEL: shufflevector_pack_ev_v2i64:
3737
; CHECK: # %bb.0:
38-
; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0
38+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 8
3939
; CHECK-NEXT: ret
4040
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
4141
ret <2 x i64> %c
@@ -55,7 +55,7 @@ define <4 x float> @shufflevector_pack_ev_v4f32(<4 x float> %a, <4 x float> %b)
5555
define <2 x double> @shufflevector_pack_ev_v2f64(<2 x double> %a, <2 x double> %b) {
5656
; CHECK-LABEL: shufflevector_pack_ev_v2f64:
5757
; CHECK: # %bb.0:
58-
; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0
58+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 8
5959
; CHECK-NEXT: ret
6060
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
6161
ret <2 x double> %c
@@ -95,7 +95,7 @@ define <4 x i32> @shufflevector_pack_od_v4i32(<4 x i32> %a, <4 x i32> %b) {
9595
define <2 x i64> @shufflodector_pack_od_v2i64(<2 x i64> %a, <2 x i64> %b) {
9696
; CHECK-LABEL: shufflodector_pack_od_v2i64:
9797
; CHECK: # %bb.0:
98-
; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0
98+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 13
9999
; CHECK-NEXT: ret
100100
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
101101
ret <2 x i64> %c
@@ -115,7 +115,7 @@ define <4 x float> @shufflodector_pack_od_v4f32(<4 x float> %a, <4 x float> %b)
115115
define <2 x double> @shufflodector_pack_od_v2f64(<2 x double> %a, <2 x double> %b) {
116116
; CHECK-LABEL: shufflodector_pack_od_v2f64:
117117
; CHECK: # %bb.0:
118-
; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0
118+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 13
119119
; CHECK-NEXT: ret
120120
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
121121
ret <2 x double> %c

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,7 @@ define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) {
4242
define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) {
4343
; CHECK-LABEL: shufflevector_v2i64:
4444
; CHECK: # %bb.0:
45-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
46-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI3_0)
47-
; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
48-
; CHECK-NEXT: vori.b $vr0, $vr2, 0
45+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
4946
; CHECK-NEXT: ret
5047
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
5148
ret <2 x i64> %c
@@ -68,10 +65,7 @@ define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) {
6865
define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) {
6966
; CHECK-LABEL: shufflevector_v2f64:
7067
; CHECK: # %bb.0:
71-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
72-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI5_0)
73-
; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
74-
; CHECK-NEXT: vori.b $vr0, $vr2, 0
68+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
7569
; CHECK-NEXT: ret
7670
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
7771
ret <2 x double> %c

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
33

4-
;; vilvh.b
4+
;; vshuf4i.b
55
define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
66
; CHECK-LABEL: shufflevector_vshuf4i_v16i8:
77
; CHECK: # %bb.0:
@@ -11,7 +11,7 @@ define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
1111
ret <16 x i8> %c
1212
}
1313

14-
;; vilvh.h
14+
;; vshuf4i.h
1515
define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
1616
; CHECK-LABEL: shufflevector_vshuf4i_v8i4:
1717
; CHECK: # %bb.0:
@@ -21,7 +21,7 @@ define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
2121
ret <8 x i16> %c
2222
}
2323

24-
;; vilvh.w
24+
;; vshuf4i.w
2525
define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
2626
; CHECK-LABEL: shufflevector_vshuf4i_v4i32:
2727
; CHECK: # %bb.0:
@@ -31,7 +31,7 @@ define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
3131
ret <4 x i32> %c
3232
}
3333

34-
;; vilvh.w
34+
;; vshuf4i.w
3535
define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b) {
3636
; CHECK-LABEL: shufflevector_vshuf4i_v4f32:
3737
; CHECK: # %bb.0:
@@ -40,3 +40,23 @@ define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b)
4040
%c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
4141
ret <4 x float> %c
4242
}
43+
44+
;; vshuf4i.d
45+
define <2 x i64> @shufflevector_vshuf4i_v2d64(<2 x i64> %a, <2 x i64> %b) {
46+
; CHECK-LABEL: shufflevector_vshuf4i_v2d64:
47+
; CHECK: # %bb.0:
48+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
49+
; CHECK-NEXT: ret
50+
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
51+
ret <2 x i64> %c
52+
}
53+
54+
;; vshuf4i.d
55+
define <2 x double> @shufflevector_vshuf4i_v2f64(<2 x double> %a, <2 x double> %b) {
56+
; CHECK-LABEL: shufflevector_vshuf4i_v2f64:
57+
; CHECK: # %bb.0:
58+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
59+
; CHECK-NEXT: ret
60+
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
61+
ret <2 x double> %c
62+
}

llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,7 @@ define <4 x i32> @byte_rotate_v4i32_3(<4 x i32> %a) nounwind {
103103
define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
104104
; CHECK-LABEL: byte_rotate_v2i64_1:
105105
; CHECK: # %bb.0:
106-
; CHECK-NEXT: vbsrl.v $vr1, $vr1, 8
107-
; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
108-
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
106+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 3
109107
; CHECK-NEXT: ret
110108
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
111109
ret <2 x i64> %shuffle
@@ -114,9 +112,7 @@ define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
114112
define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
115113
; CHECK-LABEL: byte_rotate_v2i64_2:
116114
; CHECK: # %bb.0:
117-
; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
118-
; CHECK-NEXT: vbsll.v $vr1, $vr1, 8
119-
; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
115+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
120116
; CHECK-NEXT: ret
121117
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
122118
ret <2 x i64> %shuffle
@@ -125,9 +121,7 @@ define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
125121
define <2 x i64> @byte_rotate_v2i64_3(<2 x i64> %a) nounwind {
126122
; CHECK-LABEL: byte_rotate_v2i64_3:
127123
; CHECK: # %bb.0:
128-
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
129-
; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
130-
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
124+
; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1
131125
; CHECK-NEXT: ret
132126
%shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
133127
ret <2 x i64> %shuffle

llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ define <2 x i64> @shuffle_2i64_vbsll_v_8(<2 x i64> %a) nounwind {
230230
; CHECK-LABEL: shuffle_2i64_vbsll_v_8:
231231
; CHECK: # %bb.0:
232232
; CHECK-NEXT: vrepli.b $vr1, 0
233-
; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1
233+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 2
234234
; CHECK-NEXT: ret
235235
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
236236
ret <2 x i64> %shuffle
@@ -464,7 +464,8 @@ define <4 x i32> @shuffle_4i32_vbsrl_v_12(<4 x i32> %a) nounwind {
464464
define <2 x i64> @shuffle_2i64_vbsrl_v_8(<2 x i64> %a) nounwind {
465465
; CHECK-LABEL: shuffle_2i64_vbsrl_v_8:
466466
; CHECK: # %bb.0:
467-
; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
467+
; CHECK-NEXT: vrepli.b $vr1, 0
468+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
468469
; CHECK-NEXT: ret
469470
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2>
470471
ret <2 x i64> %shuffle

0 commit comments

Comments
 (0)