Skip to content

Commit 948a847

Browse files
authored
[WebAssembly] Recognise EXTEND_HIGH (llvm#123325)
When lowering EXTEND_VECTOR_INREG, check whether the operand is a shuffle that is moving the top half of a vector into the lower half. If so, we can EXTEND_HIGH the input to the shuffle instead.
1 parent 94585dc commit 948a847

File tree

3 files changed

+263
-8
lines changed

3 files changed

+263
-8
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2266,6 +2266,32 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
22662266
Op.getOperand(1));
22672267
}
22682268

2269+
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2270+
SelectionDAG &DAG) {
2271+
if (Op.getOpcode() != ISD::VECTOR_SHUFFLE)
2272+
return SDValue();
2273+
2274+
assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2275+
UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2276+
"expected extend_low");
2277+
auto *Shuffle = cast<ShuffleVectorSDNode>(Op.getNode());
2278+
2279+
ArrayRef<int> Mask = Shuffle->getMask();
2280+
// Look for a shuffle which moves from the high half to the low half.
2281+
size_t FirstIdx = Mask.size() / 2;
2282+
for (size_t i = 0; i < Mask.size() / 2; ++i) {
2283+
if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2284+
return SDValue();
2285+
}
2286+
}
2287+
2288+
SDLoc DL(Op);
2289+
unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2290+
? WebAssemblyISD::EXTEND_HIGH_S
2291+
: WebAssemblyISD::EXTEND_HIGH_U;
2292+
return DAG.getNode(Opc, DL, VT, Shuffle->getOperand(0));
2293+
}
2294+
22692295
SDValue
22702296
WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
22712297
SelectionDAG &DAG) const {
@@ -2295,6 +2321,12 @@ WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
22952321
break;
22962322
}
22972323

2324+
if (Scale == 2) {
2325+
// See if we can use EXTEND_HIGH.
2326+
if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2327+
return ExtendHigh;
2328+
}
2329+
22982330
SDValue Ret = Src;
22992331
while (Scale != 1) {
23002332
Ret = DAG.getNode(Ext, DL,
Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
3+
; RUN: llc < %s -mtriple=wasm32 -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefix=SIMD128
4+
5+
target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-i128:128-n32:64-S128-ni:1:10:20"
6+
target triple = "wasm32"
7+
8+
define <4 x i32> @sext_high_v4i8(<8 x i8> %in) {
9+
; SIMD128-LABEL: sext_high_v4i8:
10+
; SIMD128: .functype sext_high_v4i8 (v128) -> (v128)
11+
; SIMD128-NEXT: # %bb.0:
12+
; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
13+
; SIMD128-NEXT: i16x8.extend_low_i8x16_s $push1=, $pop0
14+
; SIMD128-NEXT: i32x4.extend_low_i16x8_s $push2=, $pop1
15+
; SIMD128-NEXT: return $pop2
16+
%shuffle = shufflevector <8 x i8> %in, <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
17+
%res = sext <4 x i8> %shuffle to <4 x i32>
18+
ret <4 x i32> %res
19+
}
20+
21+
define <4 x i32> @zext_high_v4i8(<8 x i8> %in) {
22+
; SIMD128-LABEL: zext_high_v4i8:
23+
; SIMD128: .functype zext_high_v4i8 (v128) -> (v128)
24+
; SIMD128-NEXT: # %bb.0:
25+
; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
26+
; SIMD128-NEXT: i16x8.extend_low_i8x16_u $push1=, $pop0
27+
; SIMD128-NEXT: i32x4.extend_low_i16x8_u $push2=, $pop1
28+
; SIMD128-NEXT: return $pop2
29+
%shuffle = shufflevector <8 x i8> %in, <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
30+
%res = zext <4 x i8> %shuffle to <4 x i32>
31+
ret <4 x i32> %res
32+
}
33+
34+
define <8 x i16> @sext_high_v8i8(<16 x i8> %in) {
35+
; SIMD128-LABEL: sext_high_v8i8:
36+
; SIMD128: .functype sext_high_v8i8 (v128) -> (v128)
37+
; SIMD128-NEXT: # %bb.0:
38+
; SIMD128-NEXT: i16x8.extend_high_i8x16_s $push0=, $0
39+
; SIMD128-NEXT: return $pop0
40+
%shuffle = shufflevector <16 x i8> %in, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
41+
%res = sext <8 x i8> %shuffle to <8 x i16>
42+
ret <8 x i16> %res
43+
}
44+
45+
define <8 x i16> @zext_high_v8i8(<16 x i8> %in) {
46+
; SIMD128-LABEL: zext_high_v8i8:
47+
; SIMD128: .functype zext_high_v8i8 (v128) -> (v128)
48+
; SIMD128-NEXT: # %bb.0:
49+
; SIMD128-NEXT: i16x8.extend_high_i8x16_u $push0=, $0
50+
; SIMD128-NEXT: return $pop0
51+
%shuffle = shufflevector <16 x i8> %in, <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
52+
%res = zext <8 x i8> %shuffle to <8 x i16>
53+
ret <8 x i16> %res
54+
}
55+
56+
define <2 x i32> @sext_high_v2i16(<4 x i16> %in) {
57+
; SIMD128-LABEL: sext_high_v2i16:
58+
; SIMD128: .functype sext_high_v2i16 (v128) -> (v128)
59+
; SIMD128-NEXT: # %bb.0:
60+
; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
61+
; SIMD128-NEXT: i32x4.extend_low_i16x8_s $push1=, $pop0
62+
; SIMD128-NEXT: return $pop1
63+
%shuffle = shufflevector <4 x i16> %in, <4 x i16> poison, <2 x i32> <i32 2, i32 3>
64+
%res = sext <2 x i16> %shuffle to <2 x i32>
65+
ret <2 x i32> %res
66+
}
67+
68+
define <2 x i32> @zext_high_v2i16(<4 x i16> %in) {
69+
; SIMD128-LABEL: zext_high_v2i16:
70+
; SIMD128: .functype zext_high_v2i16 (v128) -> (v128)
71+
; SIMD128-NEXT: # %bb.0:
72+
; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 4, 5, 6, 7, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
73+
; SIMD128-NEXT: i32x4.extend_low_i16x8_u $push1=, $pop0
74+
; SIMD128-NEXT: return $pop1
75+
%shuffle = shufflevector <4 x i16> %in, <4 x i16> poison, <2 x i32> <i32 2, i32 3>
76+
%res = zext <2 x i16> %shuffle to <2 x i32>
77+
ret <2 x i32> %res
78+
}
79+
80+
define <4 x i32> @sext_high_v4i16(<8 x i16> %in) {
81+
; SIMD128-LABEL: sext_high_v4i16:
82+
; SIMD128: .functype sext_high_v4i16 (v128) -> (v128)
83+
; SIMD128-NEXT: # %bb.0:
84+
; SIMD128-NEXT: i32x4.extend_high_i16x8_s $push0=, $0
85+
; SIMD128-NEXT: return $pop0
86+
%shuffle = shufflevector <8 x i16> %in, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
87+
%res = sext <4 x i16> %shuffle to <4 x i32>
88+
ret <4 x i32> %res
89+
}
90+
91+
define <4 x i32> @zext_high_v4i16(<8 x i16> %in) {
92+
; SIMD128-LABEL: zext_high_v4i16:
93+
; SIMD128: .functype zext_high_v4i16 (v128) -> (v128)
94+
; SIMD128-NEXT: # %bb.0:
95+
; SIMD128-NEXT: i32x4.extend_high_i16x8_u $push0=, $0
96+
; SIMD128-NEXT: return $pop0
97+
%shuffle = shufflevector <8 x i16> %in, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
98+
%res = zext <4 x i16> %shuffle to <4 x i32>
99+
ret <4 x i32> %res
100+
}
101+
102+
define <2 x i64> @sext_high_v2i32(<4 x i32> %in) {
103+
; SIMD128-LABEL: sext_high_v2i32:
104+
; SIMD128: .functype sext_high_v2i32 (v128) -> (v128)
105+
; SIMD128-NEXT: # %bb.0:
106+
; SIMD128-NEXT: i64x2.extend_high_i32x4_s $push0=, $0
107+
; SIMD128-NEXT: return $pop0
108+
%shuffle = shufflevector <4 x i32> %in, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
109+
%res = sext <2 x i32> %shuffle to <2 x i64>
110+
ret <2 x i64> %res
111+
}
112+
113+
define <2 x i64> @zext_high_v2i32(<4 x i32> %in) {
114+
; SIMD128-LABEL: zext_high_v2i32:
115+
; SIMD128: .functype zext_high_v2i32 (v128) -> (v128)
116+
; SIMD128-NEXT: # %bb.0:
117+
; SIMD128-NEXT: i64x2.extend_high_i32x4_u $push0=, $0
118+
; SIMD128-NEXT: return $pop0
119+
%shuffle = shufflevector <4 x i32> %in, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
120+
%res = zext <2 x i32> %shuffle to <2 x i64>
121+
ret <2 x i64> %res
122+
}
123+
124+
define <4 x i32> @sext_low_v4i8(<8 x i8> %in) {
125+
; SIMD128-LABEL: sext_low_v4i8:
126+
; SIMD128: .functype sext_low_v4i8 (v128) -> (v128)
127+
; SIMD128-NEXT: # %bb.0:
128+
; SIMD128-NEXT: i16x8.extend_low_i8x16_s $push0=, $0
129+
; SIMD128-NEXT: i32x4.extend_low_i16x8_s $push1=, $pop0
130+
; SIMD128-NEXT: return $pop1
131+
%shuffle = shufflevector <8 x i8> %in, <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
132+
%res = sext <4 x i8> %shuffle to <4 x i32>
133+
ret <4 x i32> %res
134+
}
135+
136+
define <4 x i32> @zext_low_v4i8(<8 x i8> %in) {
137+
; SIMD128-LABEL: zext_low_v4i8:
138+
; SIMD128: .functype zext_low_v4i8 (v128) -> (v128)
139+
; SIMD128-NEXT: # %bb.0:
140+
; SIMD128-NEXT: i16x8.extend_low_i8x16_u $push0=, $0
141+
; SIMD128-NEXT: i32x4.extend_low_i16x8_u $push1=, $pop0
142+
; SIMD128-NEXT: return $pop1
143+
%shuffle = shufflevector <8 x i8> %in, <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
144+
%res = zext <4 x i8> %shuffle to <4 x i32>
145+
ret <4 x i32> %res
146+
}
147+
148+
define <8 x i16> @sext_low_v8i8(<16 x i8> %in) {
149+
; SIMD128-LABEL: sext_low_v8i8:
150+
; SIMD128: .functype sext_low_v8i8 (v128) -> (v128)
151+
; SIMD128-NEXT: # %bb.0:
152+
; SIMD128-NEXT: i16x8.extend_low_i8x16_s $push0=, $0
153+
; SIMD128-NEXT: return $pop0
154+
%shuffle = shufflevector <16 x i8> %in, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
155+
%res = sext <8 x i8> %shuffle to <8 x i16>
156+
ret <8 x i16> %res
157+
}
158+
159+
define <8 x i16> @zext_low_v8i8(<16 x i8> %in) {
160+
; SIMD128-LABEL: zext_low_v8i8:
161+
; SIMD128: .functype zext_low_v8i8 (v128) -> (v128)
162+
; SIMD128-NEXT: # %bb.0:
163+
; SIMD128-NEXT: i16x8.extend_low_i8x16_u $push0=, $0
164+
; SIMD128-NEXT: return $pop0
165+
%shuffle = shufflevector <16 x i8> %in, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
166+
%res = zext <8 x i8> %shuffle to <8 x i16>
167+
ret <8 x i16> %res
168+
}
169+
170+
define <4 x i32> @sext_low_v4i16(<8 x i16> %in) {
171+
; SIMD128-LABEL: sext_low_v4i16:
172+
; SIMD128: .functype sext_low_v4i16 (v128) -> (v128)
173+
; SIMD128-NEXT: # %bb.0:
174+
; SIMD128-NEXT: i32x4.extend_low_i16x8_s $push0=, $0
175+
; SIMD128-NEXT: return $pop0
176+
%shuffle = shufflevector <8 x i16> %in, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
177+
%res = sext <4 x i16> %shuffle to <4 x i32>
178+
ret <4 x i32> %res
179+
}
180+
181+
define <4 x i32> @zext_low_v4i16(<8 x i16> %in) {
182+
; SIMD128-LABEL: zext_low_v4i16:
183+
; SIMD128: .functype zext_low_v4i16 (v128) -> (v128)
184+
; SIMD128-NEXT: # %bb.0:
185+
; SIMD128-NEXT: i32x4.extend_low_i16x8_u $push0=, $0
186+
; SIMD128-NEXT: return $pop0
187+
%shuffle = shufflevector <8 x i16> %in, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
188+
%res = zext <4 x i16> %shuffle to <4 x i32>
189+
ret <4 x i32> %res
190+
}
191+
192+
define <2 x i64> @sext_low_v2i32(<4 x i32> %in) {
193+
; SIMD128-LABEL: sext_low_v2i32:
194+
; SIMD128: .functype sext_low_v2i32 (v128) -> (v128)
195+
; SIMD128-NEXT: # %bb.0:
196+
; SIMD128-NEXT: i64x2.extend_low_i32x4_s $push0=, $0
197+
; SIMD128-NEXT: return $pop0
198+
%shuffle = shufflevector <4 x i32> %in, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
199+
%res = sext <2 x i32> %shuffle to <2 x i64>
200+
ret <2 x i64> %res
201+
}
202+
203+
define <2 x i64> @zext_low_v2i32(<4 x i32> %in) {
204+
; SIMD128-LABEL: zext_low_v2i32:
205+
; SIMD128: .functype zext_low_v2i32 (v128) -> (v128)
206+
; SIMD128-NEXT: # %bb.0:
207+
; SIMD128-NEXT: i64x2.extend_low_i32x4_u $push0=, $0
208+
; SIMD128-NEXT: return $pop0
209+
%shuffle = shufflevector <4 x i32> %in, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
210+
%res = zext <2 x i32> %shuffle to <2 x i64>
211+
ret <2 x i64> %res
212+
}
213+
214+
define <2 x i64> @multi_use_ext_v2i32(<4 x i32> %in) {
215+
; SIMD128-LABEL: multi_use_ext_v2i32:
216+
; SIMD128: .functype multi_use_ext_v2i32 (v128) -> (v128)
217+
; SIMD128-NEXT: # %bb.0:
218+
; SIMD128-NEXT: i64x2.extend_high_i32x4_u $push1=, $0
219+
; SIMD128-NEXT: i64x2.extend_high_i32x4_s $push0=, $0
220+
; SIMD128-NEXT: i64x2.add $push2=, $pop1, $pop0
221+
; SIMD128-NEXT: return $pop2
222+
%shuffle = shufflevector <4 x i32> %in, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
223+
%zext = zext <2 x i32> %shuffle to <2 x i64>
224+
%sext = sext <2 x i32> %shuffle to <2 x i64>
225+
%res = add <2 x i64> %zext, %sext
226+
ret <2 x i64> %res
227+
}

llvm/test/CodeGen/WebAssembly/int-mac-reduction-loops.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,8 @@ define hidden i64 @i64_mac_s32(ptr nocapture noundef readonly %a, ptr nocapture
151151
; MAX-BANDWIDTH: v128.load
152152
; MAX-BANDWIDTH: v128.load
153153
; MAX-BANDWIDTH: i32x4.mul
154-
; MAX-BANDWIDTH: i64x2.extend_low_i32x4_s
154+
; MAX-BANDWIDTH: i64x2.extend_high_i32x4_s
155155
; MAX-BANDWIDTH: i64x2.add
156-
; MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
157156
; MAX-BANDWIDTH: i64x2.extend_low_i32x4_s
158157
; MAX-BANDWIDTH: i64x2.add
159158

@@ -272,11 +271,9 @@ define hidden i32 @i32_mac_u16_s16(ptr nocapture noundef readonly %a, ptr nocapt
272271
; CHECK: i32x4.add
273272

274273
; MAX-BANDWIDTH: v128.load
275-
; MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
276-
; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
274+
; MAX-BANDWIDTH: i32x4.extend_high_i16x8_s
277275
; MAX-BANDWIDTH: v128.load
278-
; MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 0, 1, 0, 1, 0, 1
279-
; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u
276+
; MAX-BANDWIDTH: i32x4.extend_high_i16x8_u
280277
; MAX-BANDWIDTH: i32x4.mul
281278
; MAX-BANDWIDTH: i32x4.extend_low_i16x8_s
282279
; MAX-BANDWIDTH: i32x4.extend_low_i16x8_u
@@ -377,9 +374,8 @@ define hidden i64 @i64_mac_u32(ptr nocapture noundef readonly %a, ptr nocapture
377374
; MAX-BANDWIDTH: v128.load
378375
; MAX-BANDWIDTH: v128.load
379376
; MAX-BANDWIDTH: i32x4.mul
380-
; MAX-BANDWIDTH: i64x2.extend_low_i32x4_u
377+
; MAX-BANDWIDTH: i64x2.extend_high_i32x4_u
381378
; MAX-BANDWIDTH: i64x2.add
382-
; MAX-BANDWIDTH: i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
383379
; MAX-BANDWIDTH: i64x2.extend_low_i32x4_u
384380
; MAX-BANDWIDTH: i64x2.add
385381

0 commit comments

Comments
 (0)