Skip to content

Commit cfb057a

Browse files
authored
[LoongArch] Lower [x]vshuf.d to [x]vshuf4i.d if possible. (#137918)
1 parent c0f5b0a commit cfb057a

File tree

9 files changed

+98
-54
lines changed

9 files changed

+98
-54
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,46 +1026,54 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
10261026
MVT VT, SDValue V1, SDValue V2,
10271027
SelectionDAG &DAG) {
10281028

1029-
// When the size is less than 4, lower cost instructions may be used.
1030-
if (Mask.size() < 4)
1031-
return SDValue();
1029+
unsigned SubVecSize = 4;
1030+
if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
1031+
VT == MVT::v4i64) {
1032+
SubVecSize = 2;
1033+
}
10321034

10331035
int SubMask[4] = {-1, -1, -1, -1};
1034-
for (unsigned i = 0; i < 4; ++i) {
1035-
for (unsigned j = i; j < Mask.size(); j += 4) {
1036-
int Idx = Mask[j];
1036+
for (unsigned i = 0; i < SubVecSize; ++i) {
1037+
for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1038+
int M = Mask[j];
10371039

10381040
// Convert from vector index to 4-element subvector index
10391041
// If an index refers to an element outside of the subvector then give up
1040-
if (Idx != -1) {
1041-
Idx -= 4 * (j / 4);
1042-
if (Idx < 0 || Idx >= 4)
1042+
if (M != -1) {
1043+
M -= 4 * (j / SubVecSize);
1044+
if (M < 0 || M >= 4)
10431045
return SDValue();
10441046
}
10451047

10461048
// If the mask has an undef, replace it with the current index.
10471049
// Note that it might still be undef if the current index is also undef
10481050
if (SubMask[i] == -1)
1049-
SubMask[i] = Idx;
1051+
SubMask[i] = M;
10501052
// Check that non-undef values are the same as in the mask. If they
10511053
// aren't then give up
1052-
else if (Idx != -1 && Idx != SubMask[i])
1054+
else if (M != -1 && M != SubMask[i])
10531055
return SDValue();
10541056
}
10551057
}
10561058

10571059
// Calculate the immediate. Replace any remaining undefs with zero
10581060
APInt Imm(64, 0);
1059-
for (int i = 3; i >= 0; --i) {
1060-
int Idx = SubMask[i];
1061+
for (int i = SubVecSize - 1; i >= 0; --i) {
1062+
int M = SubMask[i];
10611063

1062-
if (Idx == -1)
1063-
Idx = 0;
1064+
if (M == -1)
1065+
M = 0;
10641066

10651067
Imm <<= 2;
1066-
Imm |= Idx & 0x3;
1068+
Imm |= M & 0x3;
10671069
}
10681070

1071+
// Return vshuf4i.d and xvshuf4i.d
1072+
if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
1073+
VT == MVT::v4i64)
1074+
return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1075+
DAG.getConstant(Imm, DL, MVT::i64));
1076+
10691077
return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
10701078
DAG.getConstant(Imm, DL, MVT::i64));
10711079
}
@@ -1389,6 +1397,9 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
13891397
return Result;
13901398
if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
13911399
return Result;
1400+
if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
1401+
(Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
1402+
return Result;
13921403
if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
13931404
Zeroable)))
13941405
return Result;
@@ -1447,10 +1458,6 @@ static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
14471458
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
14481459
MVT VT, SDValue V1, SDValue V2,
14491460
SelectionDAG &DAG) {
1450-
// When the size is less than or equal to 4, lower cost instructions may be
1451-
// used.
1452-
if (Mask.size() <= 4)
1453-
return SDValue();
14541461
return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
14551462
}
14561463

@@ -1832,6 +1839,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
18321839
return Result;
18331840
if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
18341841
return Result;
1842+
if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) &&
1843+
(Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1844+
return Result;
18351845
if ((Result =
18361846
lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG, Zeroable)))
18371847
return Result;

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1729,6 +1729,10 @@ def : Pat<(loongarch_vshuf4i v8i32:$xj, immZExt8:$ui8),
17291729
(XVSHUF4I_W v8i32:$xj, immZExt8:$ui8)>;
17301730
def : Pat<(loongarch_vshuf4i v8f32:$xj, immZExt8:$ui8),
17311731
(XVSHUF4I_W v8f32:$xj, immZExt8:$ui8)>;
1732+
def : Pat<(loongarch_vshuf4i_d v4i64:$xj, v4i64:$xk, immZExt8:$ui8),
1733+
(XVSHUF4I_D v4i64:$xj, v4i64:$xk, immZExt8:$ui8)>;
1734+
def : Pat<(loongarch_vshuf4i_d v4f64:$xj, v4f64:$xk, immZExt8:$ui8),
1735+
(XVSHUF4I_D v4f64:$xj, v4f64:$xk, immZExt8:$ui8)>;
17321736

17331737
// XVREPL128VEI_{B/H/W/D}
17341738
def : Pat<(loongarch_vreplvei v32i8:$xj, immZExt4:$ui4),

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
2323
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
2424
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
2525
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
26+
def SDT_LoongArchVShuf4i_D
27+
: SDTypeProfile<1, 3,
28+
[SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
29+
SDTCisVT<3, i64>]>;
2630
def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
2731
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
2832
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
@@ -53,6 +57,8 @@ def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
5357
def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
5458

5559
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
60+
def loongarch_vshuf4i_d
61+
: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchVShuf4i_D>;
5662
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
5763
def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
5864

@@ -1914,6 +1920,10 @@ def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8),
19141920
(VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>;
19151921
def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8),
19161922
(VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>;
1923+
def : Pat<(loongarch_vshuf4i_d v2i64:$vj, v2i64:$vk, immZExt8:$ui8),
1924+
(VSHUF4I_D v2i64:$vj, v2i64:$vk, immZExt8:$ui8)>;
1925+
def : Pat<(loongarch_vshuf4i_d v2f64:$vj, v2f64:$vk, immZExt8:$ui8),
1926+
(VSHUF4I_D v2f64:$vj, v2f64:$vk, immZExt8:$ui8)>;
19171927

19181928
// VREPLVEI_{B/H/W/D}
19191929
def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4),

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,23 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b)
4141
%c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
4242
ret <8 x float> %c
4343
}
44+
45+
;; xvshuf4i.d
46+
define <4 x i64> @shufflevector_xvshuf4i_v4d64(<4 x i64> %a, <4 x i64> %b) {
47+
; CHECK-LABEL: shufflevector_xvshuf4i_v4d64:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
50+
; CHECK-NEXT: ret
51+
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
52+
ret <4 x i64> %c
53+
}
54+
55+
;; xvshuf4i.d
56+
define <4 x double> @shufflevector_xvshuf4i_v4f64(<4 x double> %a, <4 x double> %b) {
57+
; CHECK-LABEL: shufflevector_xvshuf4i_v4f64:
58+
; CHECK: # %bb.0:
59+
; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
60+
; CHECK-NEXT: ret
61+
%c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
62+
ret <4 x double> %c
63+
}

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,7 @@ define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) {
4242
define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) {
4343
; CHECK-LABEL: shufflevector_v2i64:
4444
; CHECK: # %bb.0:
45-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
46-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI3_0)
47-
; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
48-
; CHECK-NEXT: vori.b $vr0, $vr2, 0
45+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
4946
; CHECK-NEXT: ret
5047
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
5148
ret <2 x i64> %c
@@ -68,10 +65,7 @@ define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) {
6865
define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) {
6966
; CHECK-LABEL: shufflevector_v2f64:
7067
; CHECK: # %bb.0:
71-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
72-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI5_0)
73-
; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
74-
; CHECK-NEXT: vori.b $vr0, $vr2, 0
68+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
7569
; CHECK-NEXT: ret
7670
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
7771
ret <2 x double> %c

llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
33

4-
;; vilvh.b
4+
;; vshuf4i.b
55
define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
66
; CHECK-LABEL: shufflevector_vshuf4i_v16i8:
77
; CHECK: # %bb.0:
@@ -11,7 +11,7 @@ define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
1111
ret <16 x i8> %c
1212
}
1313

14-
;; vilvh.h
14+
;; vshuf4i.h
1515
define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
1616
; CHECK-LABEL: shufflevector_vshuf4i_v8i4:
1717
; CHECK: # %bb.0:
@@ -21,7 +21,7 @@ define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
2121
ret <8 x i16> %c
2222
}
2323

24-
;; vilvh.w
24+
;; vshuf4i.w
2525
define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
2626
; CHECK-LABEL: shufflevector_vshuf4i_v4i32:
2727
; CHECK: # %bb.0:
@@ -31,7 +31,7 @@ define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
3131
ret <4 x i32> %c
3232
}
3333

34-
;; vilvh.w
34+
;; vshuf4i.w
3535
define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b) {
3636
; CHECK-LABEL: shufflevector_vshuf4i_v4f32:
3737
; CHECK: # %bb.0:
@@ -40,3 +40,23 @@ define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b)
4040
%c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
4141
ret <4 x float> %c
4242
}
43+
44+
;; vshuf4i.d
45+
define <2 x i64> @shufflevector_vshuf4i_v2d64(<2 x i64> %a, <2 x i64> %b) {
46+
; CHECK-LABEL: shufflevector_vshuf4i_v2d64:
47+
; CHECK: # %bb.0:
48+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
49+
; CHECK-NEXT: ret
50+
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
51+
ret <2 x i64> %c
52+
}
53+
54+
;; vshuf4i.d
55+
define <2 x double> @shufflevector_vshuf4i_v2f64(<2 x double> %a, <2 x double> %b) {
56+
; CHECK-LABEL: shufflevector_vshuf4i_v2f64:
57+
; CHECK: # %bb.0:
58+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
59+
; CHECK-NEXT: ret
60+
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
61+
ret <2 x double> %c
62+
}

llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,7 @@ define <4 x i32> @byte_rotate_v4i32_3(<4 x i32> %a) nounwind {
103103
define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
104104
; CHECK-LABEL: byte_rotate_v2i64_1:
105105
; CHECK: # %bb.0:
106-
; CHECK-NEXT: vbsrl.v $vr1, $vr1, 8
107-
; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
108-
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
106+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 3
109107
; CHECK-NEXT: ret
110108
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
111109
ret <2 x i64> %shuffle
@@ -114,9 +112,7 @@ define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
114112
define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
115113
; CHECK-LABEL: byte_rotate_v2i64_2:
116114
; CHECK: # %bb.0:
117-
; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
118-
; CHECK-NEXT: vbsll.v $vr1, $vr1, 8
119-
; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
115+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
120116
; CHECK-NEXT: ret
121117
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
122118
ret <2 x i64> %shuffle
@@ -125,9 +121,7 @@ define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
125121
define <2 x i64> @byte_rotate_v2i64_3(<2 x i64> %a) nounwind {
126122
; CHECK-LABEL: byte_rotate_v2i64_3:
127123
; CHECK: # %bb.0:
128-
; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
129-
; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
130-
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
124+
; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1
131125
; CHECK-NEXT: ret
132126
%shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
133127
ret <2 x i64> %shuffle

llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,8 @@ define <4 x i32> @shuffle_4i32_vbsrl_v_12(<4 x i32> %a) nounwind {
464464
define <2 x i64> @shuffle_2i64_vbsrl_v_8(<2 x i64> %a) nounwind {
465465
; CHECK-LABEL: shuffle_2i64_vbsrl_v_8:
466466
; CHECK: # %bb.0:
467-
; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
467+
; CHECK-NEXT: vrepli.b $vr1, 0
468+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
468469
; CHECK-NEXT: ret
469470
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2>
470471
ret <2 x i64> %shuffle

llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,7 @@ define <16 x i8> @widen_shuffle_mask_v16i8_to_v4i32(<16 x i8> %a, <16 x i8> %b)
2828
define <16 x i8> @widen_shuffle_mask_v16i8_to_v2i64(<16 x i8> %a, <16 x i8> %b) {
2929
; CHECK-LABEL: widen_shuffle_mask_v16i8_to_v2i64:
3030
; CHECK: # %bb.0:
31-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
32-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI2_0)
33-
; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
34-
; CHECK-NEXT: vori.b $vr0, $vr2, 0
31+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
3532
; CHECK-NEXT: ret
3633
%r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
3734
ret <16 x i8> %r
@@ -52,10 +49,7 @@ define <8 x i16> @widen_shuffle_mask_v8i16_to_v4i32(<8 x i16> %a, <8 x i16> %b)
5249
define <8 x i16> @widen_shuffle_mask_v8i16_to_v2i64(<8 x i16> %a, <8 x i16> %b) {
5350
; CHECK-LABEL: widen_shuffle_mask_v8i16_to_v2i64:
5451
; CHECK: # %bb.0:
55-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
56-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI4_0)
57-
; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
58-
; CHECK-NEXT: vori.b $vr0, $vr2, 0
52+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
5953
; CHECK-NEXT: ret
6054
%r = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
6155
ret <8 x i16> %r
@@ -64,10 +58,7 @@ define <8 x i16> @widen_shuffle_mask_v8i16_to_v2i64(<8 x i16> %a, <8 x i16> %b)
6458
define <4 x i32> @widen_shuffle_mask_v4i32_to_v2i64(<4 x i32> %a, <4 x i32> %b) {
6559
; CHECK-LABEL: widen_shuffle_mask_v4i32_to_v2i64:
6660
; CHECK: # %bb.0:
67-
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
68-
; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI5_0)
69-
; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
70-
; CHECK-NEXT: vori.b $vr0, $vr2, 0
61+
; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
7162
; CHECK-NEXT: ret
7263
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
7364
ret <4 x i32> %r

0 commit comments

Comments
 (0)