Skip to content

Commit 4acdb8e

Browse files
authored
[VectorCombine] Scalarize extracts of ZExt if profitable. (#142976)
Add a new scalarization transform that tries to convert extracts of a vector ZExt to a set of scalar shift and mask operations. This can be profitable if the cost of extracting is the same or higher than the cost of 2 scalar ops. This is the case on AArch64 for example. For AArch64,this shows up in a number of workloads, including av1aom, gmsh, minizinc and astc-encoder. PR: #142976
1 parent 3efa461 commit 4acdb8e

File tree

2 files changed

+173
-24
lines changed

2 files changed

+173
-24
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ class VectorCombine {
123123
bool foldBinopOfReductions(Instruction &I);
124124
bool foldSingleElementStore(Instruction &I);
125125
bool scalarizeLoadExtract(Instruction &I);
126+
bool scalarizeExtExtract(Instruction &I);
126127
bool foldConcatOfBoolMasks(Instruction &I);
127128
bool foldPermuteOfBinops(Instruction &I);
128129
bool foldShuffleOfBinops(Instruction &I);
@@ -1777,6 +1778,73 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
17771778
return true;
17781779
}
17791780

1781+
bool VectorCombine::scalarizeExtExtract(Instruction &I) {
1782+
auto *Ext = dyn_cast<ZExtInst>(&I);
1783+
if (!Ext)
1784+
return false;
1785+
1786+
// Try to convert a vector zext feeding only extracts to a set of scalar
1787+
// (Src << ExtIdx *Size) & (Size -1)
1788+
// if profitable .
1789+
auto *SrcTy = dyn_cast<FixedVectorType>(Ext->getOperand(0)->getType());
1790+
if (!SrcTy)
1791+
return false;
1792+
auto *DstTy = cast<FixedVectorType>(Ext->getType());
1793+
1794+
Type *ScalarDstTy = DstTy->getElementType();
1795+
if (DL->getTypeSizeInBits(SrcTy) != DL->getTypeSizeInBits(ScalarDstTy))
1796+
return false;
1797+
1798+
InstructionCost VectorCost =
1799+
TTI.getCastInstrCost(Instruction::ZExt, DstTy, SrcTy,
1800+
TTI::CastContextHint::None, CostKind, Ext);
1801+
unsigned ExtCnt = 0;
1802+
bool ExtLane0 = false;
1803+
for (User *U : Ext->users()) {
1804+
const APInt *Idx;
1805+
if (!match(U, m_ExtractElt(m_Value(), m_APInt(Idx))))
1806+
return false;
1807+
if (cast<Instruction>(U)->use_empty())
1808+
continue;
1809+
ExtCnt += 1;
1810+
ExtLane0 |= Idx->isZero();
1811+
VectorCost += TTI.getVectorInstrCost(Instruction::ExtractElement, DstTy,
1812+
CostKind, Idx->getZExtValue(), U);
1813+
}
1814+
1815+
InstructionCost ScalarCost =
1816+
ExtCnt * TTI.getArithmeticInstrCost(
1817+
Instruction::And, ScalarDstTy, CostKind,
1818+
{TTI::OK_AnyValue, TTI::OP_None},
1819+
{TTI::OK_NonUniformConstantValue, TTI::OP_None}) +
1820+
(ExtCnt - ExtLane0) *
1821+
TTI.getArithmeticInstrCost(
1822+
Instruction::LShr, ScalarDstTy, CostKind,
1823+
{TTI::OK_AnyValue, TTI::OP_None},
1824+
{TTI::OK_NonUniformConstantValue, TTI::OP_None});
1825+
if (ScalarCost > VectorCost)
1826+
return false;
1827+
1828+
Value *ScalarV = Ext->getOperand(0);
1829+
if (!isGuaranteedNotToBePoison(ScalarV, &AC, dyn_cast<Instruction>(ScalarV),
1830+
&DT))
1831+
ScalarV = Builder.CreateFreeze(ScalarV);
1832+
ScalarV = Builder.CreateBitCast(
1833+
ScalarV,
1834+
IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy)));
1835+
uint64_t SrcEltSizeInBits = DL->getTypeSizeInBits(SrcTy->getElementType());
1836+
uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1;
1837+
for (User *U : Ext->users()) {
1838+
auto *Extract = cast<ExtractElementInst>(U);
1839+
uint64_t Idx =
1840+
cast<ConstantInt>(Extract->getIndexOperand())->getZExtValue();
1841+
Value *LShr = Builder.CreateLShr(ScalarV, Idx * SrcEltSizeInBits);
1842+
Value *And = Builder.CreateAnd(LShr, EltBitMask);
1843+
U->replaceAllUsesWith(And);
1844+
}
1845+
return true;
1846+
}
1847+
17801848
/// Try to fold "(or (zext (bitcast X)), (shl (zext (bitcast Y)), C))"
17811849
/// to "(bitcast (concat X, Y))"
17821850
/// where X/Y are bitcasted from i1 mask vectors.
@@ -3665,6 +3733,7 @@ bool VectorCombine::run() {
36653733
if (IsVectorType) {
36663734
MadeChange |= scalarizeOpOrCmp(I);
36673735
MadeChange |= scalarizeLoadExtract(I);
3736+
MadeChange |= scalarizeExtExtract(I);
36683737
MadeChange |= scalarizeVPIntrinsic(I);
36693738
MadeChange |= foldInterleaveIntrinsics(I);
36703739
}

llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll

Lines changed: 104 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,23 @@ define void @zext_v4i8_all_lanes_used(<4 x i8> %src) {
99
; CHECK-LABEL: define void @zext_v4i8_all_lanes_used(
1010
; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
1111
; CHECK-NEXT: [[ENTRY:.*:]]
12+
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
13+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
14+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
15+
; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
16+
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 255
17+
; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP1]], 8
18+
; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 255
19+
; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP1]], 255
1220
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
1321
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT9]], i64 0
1422
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT9]], i64 1
1523
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT9]], i64 2
1624
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT9]], i64 3
17-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_0]])
18-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_1]])
19-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_2]])
20-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_3]])
25+
; CHECK-NEXT: call void @use.i32(i32 [[TMP9]])
26+
; CHECK-NEXT: call void @use.i32(i32 [[TMP7]])
27+
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
28+
; CHECK-NEXT: call void @use.i32(i32 [[TMP2]])
2129
; CHECK-NEXT: ret void
2230
;
2331
entry:
@@ -68,13 +76,20 @@ define void @zext_v4i8_3_lanes_used_1(<4 x i8> %src) {
6876
; CHECK-LABEL: define void @zext_v4i8_3_lanes_used_1(
6977
; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
7078
; CHECK-NEXT: [[ENTRY:.*:]]
79+
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
80+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
81+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
82+
; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
83+
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 255
84+
; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP1]], 8
85+
; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 255
7186
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
7287
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT9]], i64 1
7388
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT9]], i64 2
7489
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT9]], i64 3
75-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_1]])
76-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_2]])
77-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_3]])
90+
; CHECK-NEXT: call void @use.i32(i32 [[TMP7]])
91+
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
92+
; CHECK-NEXT: call void @use.i32(i32 [[TMP2]])
7893
; CHECK-NEXT: ret void
7994
;
8095
entry:
@@ -93,13 +108,19 @@ define void @zext_v4i8_3_lanes_used_2(<4 x i8> %src) {
93108
; CHECK-LABEL: define void @zext_v4i8_3_lanes_used_2(
94109
; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
95110
; CHECK-NEXT: [[ENTRY:.*:]]
111+
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
112+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
113+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
114+
; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 8
115+
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 255
116+
; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP1]], 255
96117
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
97118
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT9]], i64 0
98119
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT9]], i64 1
99120
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT9]], i64 3
100-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_0]])
101-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_1]])
102-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_3]])
121+
; CHECK-NEXT: call void @use.i32(i32 [[TMP7]])
122+
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
123+
; CHECK-NEXT: call void @use.i32(i32 [[TMP2]])
103124
; CHECK-NEXT: ret void
104125
;
105126
entry:
@@ -118,11 +139,17 @@ define void @zext_v4i8_2_lanes_used_1(<4 x i8> %src) {
118139
; CHECK-LABEL: define void @zext_v4i8_2_lanes_used_1(
119140
; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
120141
; CHECK-NEXT: [[ENTRY:.*:]]
142+
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
143+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
144+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 16
145+
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 255
146+
; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 8
147+
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 255
121148
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
122149
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT9]], i64 1
123150
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT9]], i64 2
124-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_1]])
125-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_2]])
151+
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
152+
; CHECK-NEXT: call void @use.i32(i32 [[TMP3]])
126153
; CHECK-NEXT: ret void
127154
;
128155
entry:
@@ -139,11 +166,16 @@ define void @zext_v4i8_2_lanes_used_2(<4 x i8> %src) {
139166
; CHECK-LABEL: define void @zext_v4i8_2_lanes_used_2(
140167
; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
141168
; CHECK-NEXT: [[ENTRY:.*:]]
169+
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
170+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
171+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 16
172+
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 255
173+
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP1]], 255
142174
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
143175
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT9]], i64 0
144176
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT9]], i64 2
145-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_0]])
146-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_2]])
177+
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
178+
; CHECK-NEXT: call void @use.i32(i32 [[TMP3]])
147179
; CHECK-NEXT: ret void
148180
;
149181
entry:
@@ -160,15 +192,22 @@ define void @zext_v4i8_all_lanes_used_noundef(<4 x i8> noundef %src) {
160192
; CHECK-LABEL: define void @zext_v4i8_all_lanes_used_noundef(
161193
; CHECK-SAME: <4 x i8> noundef [[SRC:%.*]]) {
162194
; CHECK-NEXT: [[ENTRY:.*:]]
195+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[SRC]] to i32
196+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 24
197+
; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP0]], 16
198+
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
199+
; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP0]], 8
200+
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
201+
; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP0]], 255
163202
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
164203
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT9]], i64 0
165204
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT9]], i64 1
166205
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT9]], i64 2
167206
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT9]], i64 3
168-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_0]])
169-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_1]])
170-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_2]])
171-
; CHECK-NEXT: call void @use.i32(i32 [[EXT_3]])
207+
; CHECK-NEXT: call void @use.i32(i32 [[TMP8]])
208+
; CHECK-NEXT: call void @use.i32(i32 [[TMP6]])
209+
; CHECK-NEXT: call void @use.i32(i32 [[TMP4]])
210+
; CHECK-NEXT: call void @use.i32(i32 [[TMP1]])
172211
; CHECK-NEXT: ret void
173212
;
174213
entry:
@@ -221,15 +260,23 @@ define void @zext_v4i16_all_lanes_used(<4 x i16> %src) {
221260
; CHECK-LABEL: define void @zext_v4i16_all_lanes_used(
222261
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) {
223262
; CHECK-NEXT: [[ENTRY:.*:]]
263+
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i16> [[SRC]]
264+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
265+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 48
266+
; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP1]], 32
267+
; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65535
268+
; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP1]], 16
269+
; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 65535
270+
; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP1]], 65535
224271
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <4 x i16> [[SRC]] to <4 x i64>
225272
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i64> [[EXT9]], i64 0
226273
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i64> [[EXT9]], i64 1
227274
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i64> [[EXT9]], i64 2
228275
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i64> [[EXT9]], i64 3
229-
; CHECK-NEXT: call void @use.i64(i64 [[EXT_0]])
230-
; CHECK-NEXT: call void @use.i64(i64 [[EXT_1]])
231-
; CHECK-NEXT: call void @use.i64(i64 [[EXT_2]])
232-
; CHECK-NEXT: call void @use.i64(i64 [[EXT_3]])
276+
; CHECK-NEXT: call void @use.i64(i64 [[TMP9]])
277+
; CHECK-NEXT: call void @use.i64(i64 [[TMP7]])
278+
; CHECK-NEXT: call void @use.i64(i64 [[TMP5]])
279+
; CHECK-NEXT: call void @use.i64(i64 [[TMP2]])
233280
; CHECK-NEXT: ret void
234281
;
235282
entry:
@@ -250,11 +297,15 @@ define void @zext_v2i32_all_lanes_used(<2 x i32> %src) {
250297
; CHECK-LABEL: define void @zext_v2i32_all_lanes_used(
251298
; CHECK-SAME: <2 x i32> [[SRC:%.*]]) {
252299
; CHECK-NEXT: [[ENTRY:.*:]]
300+
; CHECK-NEXT: [[TMP0:%.*]] = freeze <2 x i32> [[SRC]]
301+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
302+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 32
303+
; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP1]], 4294967295
253304
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <2 x i32> [[SRC]] to <2 x i64>
254305
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <2 x i64> [[EXT9]], i64 0
255306
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <2 x i64> [[EXT9]], i64 1
256-
; CHECK-NEXT: call void @use.i64(i64 [[EXT_0]])
257-
; CHECK-NEXT: call void @use.i64(i64 [[EXT_1]])
307+
; CHECK-NEXT: call void @use.i64(i64 [[TMP5]])
308+
; CHECK-NEXT: call void @use.i64(i64 [[TMP2]])
258309
; CHECK-NEXT: ret void
259310
;
260311
entry:
@@ -266,3 +317,32 @@ entry:
266317
call void @use.i64(i64 %ext.1)
267318
ret void
268319
}
320+
321+
define void @zext_nxv4i8_all_lanes_used(<vscale x 4 x i8> %src) {
322+
; CHECK-LABEL: define void @zext_nxv4i8_all_lanes_used(
323+
; CHECK-SAME: <vscale x 4 x i8> [[SRC:%.*]]) {
324+
; CHECK-NEXT: [[ENTRY:.*:]]
325+
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <vscale x 4 x i8> [[SRC]] to <vscale x 4 x i32>
326+
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <vscale x 4 x i32> [[EXT9]], i64 0
327+
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <vscale x 4 x i32> [[EXT9]], i64 1
328+
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <vscale x 4 x i32> [[EXT9]], i64 2
329+
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <vscale x 4 x i32> [[EXT9]], i64 3
330+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_0]])
331+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_1]])
332+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_2]])
333+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_3]])
334+
; CHECK-NEXT: ret void
335+
;
336+
entry:
337+
%ext9 = zext nneg <vscale x 4 x i8> %src to <vscale x 4 x i32>
338+
%ext.0 = extractelement <vscale x 4 x i32> %ext9, i64 0
339+
%ext.1 = extractelement <vscale x 4 x i32> %ext9, i64 1
340+
%ext.2 = extractelement <vscale x 4 x i32> %ext9, i64 2
341+
%ext.3 = extractelement <vscale x 4 x i32> %ext9, i64 3
342+
343+
call void @use.i32(i32 %ext.0)
344+
call void @use.i32(i32 %ext.1)
345+
call void @use.i32(i32 %ext.2)
346+
call void @use.i32(i32 %ext.3)
347+
ret void
348+
}

0 commit comments

Comments
 (0)