Skip to content

Commit e894c3d

Browse files
committed
[SLP]Improve stores vectorization.
Use O(nlogn) instead of O(N2) (N <= 32) sorting approach and do not try to revectorize all possible combinations of stores, if they definitely cannot be combined because of mem/data dependencies. Compile time (O3 + lto, skylake_avx512): External/SPEC/CINT2006/483.xalancbmk/483.xalancbmk.test 117.15 120.11 2.5% External/SPEC/CINT2017speed/623.xalancbmk_s/623.xalancbmk_s.test 203.67 207.42 1.8% External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 232.43 235.01 1.1% External/SPEC/CINT2017rate/523.xalancbmk_r/523.xalancbmk_r.test 205.49 207.25 0.9% External/SPEC/CFP2017rate/510.parest_r/510.parest_r.test 310.46 306.23 -1.4% Link time (O3+lto, skylake_avx512): External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 1383.69 1475.94 6.7% Other changes are too small, cannot rely on them. size..text Program size..text results results0 diff test-suite :: SingleSource/Regression/C/Regression-C-sumarray.test 392.00 1439.00 267.1% test-suite :: MultiSource/Applications/JM/ldecod/ldecod.test 394258.00 394818.00 0.1% test-suite :: MultiSource/Applications/JM/lencod/lencod.test 846355.00 847075.00 0.1% test-suite :: External/SPEC/CINT2006/464.h264ref/464.h264ref.test 782816.00 783360.00 0.1% test-suite :: External/SPEC/CFP2017rate/508.namd_r/508.namd_r.test 779667.00 779923.00 0.0% test-suite :: MultiSource/Benchmarks/mafft/pairlocalalign.test 224398.00 224446.00 0.0% test-suite :: MultiSource/Applications/oggenc/oggenc.test 185019.00 185035.00 0.0% test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 12487610.00 12488010.00 0.0% test-suite :: MultiSource/Benchmarks/7zip/7zip-benchmark.test 1051772.00 1051804.00 0.0% test-suite :: MultiSource/Applications/SPASS/SPASS.test 529586.00 529602.00 0.0% test-suite :: External/SPEC/CINT2006/400.perlbench/400.perlbench.test 1084684.00 1084716.00 0.0% test-suite :: MultiSource/Benchmarks/tramp3d-v4/tramp3d-v4.test 1014245.00 1014261.00 0.0% test-suite :: MultiSource/Benchmarks/MallocBench/espresso/espresso.test 223494.00 223478.00 -0.0% test-suite :: External/SPEC/CINT2017speed/625.x264_s/625.x264_s.test 660843.00 660795.00 -0.0% test-suite :: External/SPEC/CINT2017rate/525.x264_r/525.x264_r.test 660843.00 660795.00 -0.0% test-suite :: MultiSource/Applications/ClamAV/clamscan.test 568824.00 568760.00 -0.0% espresso - 2 more stores vectorized x264 - small number of changes in 3-4 functions, generated a bit more vector stores (2 4x zeroinitializer stores + some other small variations). clamscan - emitted 32xi8 store instead of several scalar stores + several 4x-8x stores. Differential Revision: https://reviews.llvm.org/D155246
1 parent f1fc29b commit e894c3d

File tree

3 files changed

+204
-134
lines changed

3 files changed

+204
-134
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 202 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,6 @@ static cl::opt<unsigned>
140140
MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden,
141141
cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
142142

143-
static cl::opt<int>
144-
MaxStoreLookup("slp-max-store-lookup", cl::init(32), cl::Hidden,
145-
cl::desc("Maximum depth of the lookup for consecutive stores."));
146-
147143
/// Limits the size of scheduling regions in a block.
148144
/// It avoid long compile times for _very_ large blocks where vector
149145
/// instructions are spread over a wide range.
@@ -12439,139 +12435,206 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
1243912435
BoUpSLP::ValueSet VectorizedStores;
1244012436
bool Changed = false;
1244112437

12442-
int E = Stores.size();
12443-
SmallBitVector Tails(E, false);
12444-
int MaxIter = MaxStoreLookup.getValue();
12445-
SmallVector<std::pair<int, int>, 16> ConsecutiveChain(
12446-
E, std::make_pair(E, INT_MAX));
12447-
SmallVector<SmallBitVector, 4> CheckedPairs(E, SmallBitVector(E, false));
12448-
int IterCnt;
12449-
auto &&FindConsecutiveAccess = [this, &Stores, &Tails, &IterCnt, MaxIter,
12450-
&CheckedPairs,
12451-
&ConsecutiveChain](int K, int Idx) {
12452-
if (IterCnt >= MaxIter)
12453-
return true;
12454-
if (CheckedPairs[Idx].test(K))
12455-
return ConsecutiveChain[K].second == 1 &&
12456-
ConsecutiveChain[K].first == Idx;
12457-
++IterCnt;
12458-
CheckedPairs[Idx].set(K);
12459-
CheckedPairs[K].set(Idx);
12460-
std::optional<int> Diff = getPointersDiff(
12461-
Stores[K]->getValueOperand()->getType(), Stores[K]->getPointerOperand(),
12462-
Stores[Idx]->getValueOperand()->getType(),
12463-
Stores[Idx]->getPointerOperand(), *DL, *SE, /*StrictCheck=*/true);
12464-
if (!Diff || *Diff == 0)
12465-
return false;
12466-
int Val = *Diff;
12467-
if (Val < 0) {
12468-
if (ConsecutiveChain[Idx].second > -Val) {
12469-
Tails.set(K);
12470-
ConsecutiveChain[Idx] = std::make_pair(K, -Val);
12471-
}
12472-
return false;
12438+
// Stores the pair of stores (first_store, last_store) in a range, that were
12439+
// already tried to be vectorized. Allows to skip the store ranges that were
12440+
// already tried to be vectorized but the attempts were unsuccessful.
12441+
DenseSet<std::pair<Value *, Value *>> TriedSequences;
12442+
struct StoreDistCompare {
12443+
bool operator()(const std::pair<unsigned, int> &Op1,
12444+
const std::pair<unsigned, int> &Op2) const {
12445+
return Op1.second < Op2.second;
1247312446
}
12474-
if (ConsecutiveChain[K].second <= Val)
12475-
return false;
12476-
12477-
Tails.set(Idx);
12478-
ConsecutiveChain[K] = std::make_pair(Idx, Val);
12479-
return Val == 1;
1248012447
};
12481-
// Do a quadratic search on all of the given stores in reverse order and find
12482-
// all of the pairs of stores that follow each other.
12483-
for (int Idx = E - 1; Idx >= 0; --Idx) {
12484-
// If a store has multiple consecutive store candidates, search according
12485-
// to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ...
12486-
// This is because usually pairing with immediate succeeding or preceding
12487-
// candidate create the best chance to find slp vectorization opportunity.
12488-
const int MaxLookDepth = std::max(E - Idx, Idx + 1);
12489-
IterCnt = 0;
12490-
for (int Offset = 1, F = MaxLookDepth; Offset < F; ++Offset)
12491-
if ((Idx >= Offset && FindConsecutiveAccess(Idx - Offset, Idx)) ||
12492-
(Idx + Offset < E && FindConsecutiveAccess(Idx + Offset, Idx)))
12493-
break;
12494-
}
12495-
12496-
// Tracks if we tried to vectorize stores starting from the given tail
12497-
// already.
12498-
SmallBitVector TriedTails(E, false);
12499-
// For stores that start but don't end a link in the chain:
12500-
for (int Cnt = E; Cnt > 0; --Cnt) {
12501-
int I = Cnt - 1;
12502-
if (ConsecutiveChain[I].first == E || Tails.test(I))
12503-
continue;
12504-
// We found a store instr that starts a chain. Now follow the chain and try
12505-
// to vectorize it.
12448+
// A set of pairs (index of store in Stores array ref, Distance of the store
12449+
// address relative to base store address in units).
12450+
using StoreIndexToDistSet =
12451+
std::set<std::pair<unsigned, int>, StoreDistCompare>;
12452+
auto TryToVectorize = [&](const StoreIndexToDistSet &Set) {
12453+
int PrevDist = -1;
1250612454
BoUpSLP::ValueList Operands;
1250712455
// Collect the chain into a list.
12508-
while (I != E && !VectorizedStores.count(Stores[I])) {
12509-
Operands.push_back(Stores[I]);
12510-
Tails.set(I);
12511-
if (ConsecutiveChain[I].second != 1) {
12512-
// Mark the new end in the chain and go back, if required. It might be
12513-
// required if the original stores come in reversed order, for example.
12514-
if (ConsecutiveChain[I].first != E &&
12515-
Tails.test(ConsecutiveChain[I].first) && !TriedTails.test(I) &&
12516-
!VectorizedStores.count(Stores[ConsecutiveChain[I].first])) {
12517-
TriedTails.set(I);
12518-
Tails.reset(ConsecutiveChain[I].first);
12519-
if (Cnt < ConsecutiveChain[I].first + 2)
12520-
Cnt = ConsecutiveChain[I].first + 2;
12456+
for (auto [Idx, Data] : enumerate(Set)) {
12457+
if (Operands.empty() || Data.second - PrevDist == 1) {
12458+
Operands.push_back(Stores[Data.first]);
12459+
PrevDist = Data.second;
12460+
if (Idx != Set.size() - 1)
12461+
continue;
12462+
}
12463+
if (Operands.size() <= 1) {
12464+
Operands.clear();
12465+
Operands.push_back(Stores[Data.first]);
12466+
PrevDist = Data.second;
12467+
continue;
12468+
}
12469+
12470+
unsigned MaxVecRegSize = R.getMaxVecRegSize();
12471+
unsigned EltSize = R.getVectorElementSize(Operands[0]);
12472+
unsigned MaxElts = llvm::bit_floor(MaxVecRegSize / EltSize);
12473+
12474+
unsigned MaxVF =
12475+
std::min(R.getMaximumVF(EltSize, Instruction::Store), MaxElts);
12476+
auto *Store = cast<StoreInst>(Operands[0]);
12477+
Type *StoreTy = Store->getValueOperand()->getType();
12478+
Type *ValueTy = StoreTy;
12479+
if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
12480+
ValueTy = Trunc->getSrcTy();
12481+
unsigned MinVF = TTI->getStoreMinimumVF(
12482+
R.getMinVF(DL->getTypeSizeInBits(ValueTy)), StoreTy, ValueTy);
12483+
12484+
if (MaxVF <= MinVF) {
12485+
LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
12486+
<< ") <= "
12487+
<< "MinVF (" << MinVF << ")\n");
12488+
}
12489+
12490+
// FIXME: Is division-by-2 the correct step? Should we assert that the
12491+
// register size is a power-of-2?
12492+
unsigned StartIdx = 0;
12493+
for (unsigned Size = MaxVF; Size >= MinVF; Size /= 2) {
12494+
for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
12495+
ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
12496+
assert(
12497+
all_of(
12498+
Slice,
12499+
[&](Value *V) {
12500+
return cast<StoreInst>(V)->getValueOperand()->getType() ==
12501+
cast<StoreInst>(Slice.front())
12502+
->getValueOperand()
12503+
->getType();
12504+
}) &&
12505+
"Expected all operands of same type.");
12506+
if (!VectorizedStores.count(Slice.front()) &&
12507+
!VectorizedStores.count(Slice.back()) &&
12508+
TriedSequences.insert(std::make_pair(Slice.front(), Slice.back()))
12509+
.second &&
12510+
vectorizeStoreChain(Slice, R, Cnt, MinVF)) {
12511+
// Mark the vectorized stores so that we don't vectorize them again.
12512+
VectorizedStores.insert(Slice.begin(), Slice.end());
12513+
Changed = true;
12514+
// If we vectorized initial block, no need to try to vectorize it
12515+
// again.
12516+
if (Cnt == StartIdx)
12517+
StartIdx += Size;
12518+
Cnt += Size;
12519+
continue;
12520+
}
12521+
++Cnt;
1252112522
}
12522-
break;
12523+
// Check if the whole array was vectorized already - exit.
12524+
if (StartIdx >= Operands.size())
12525+
break;
1252312526
}
12524-
// Move to the next value in the chain.
12525-
I = ConsecutiveChain[I].first;
12527+
Operands.clear();
12528+
Operands.push_back(Stores[Data.first]);
12529+
PrevDist = Data.second;
1252612530
}
12527-
assert(!Operands.empty() && "Expected non-empty list of stores.");
12531+
};
1252812532

12529-
unsigned MaxVecRegSize = R.getMaxVecRegSize();
12530-
unsigned EltSize = R.getVectorElementSize(Operands[0]);
12531-
unsigned MaxElts = llvm::bit_floor(MaxVecRegSize / EltSize);
12532-
12533-
unsigned MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store),
12534-
MaxElts);
12535-
auto *Store = cast<StoreInst>(Operands[0]);
12536-
Type *StoreTy = Store->getValueOperand()->getType();
12537-
Type *ValueTy = StoreTy;
12538-
if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
12539-
ValueTy = Trunc->getSrcTy();
12540-
unsigned MinVF = TTI->getStoreMinimumVF(
12541-
R.getMinVF(DL->getTypeSizeInBits(ValueTy)), StoreTy, ValueTy);
12542-
12543-
if (MaxVF <= MinVF) {
12544-
LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF << ") <= "
12545-
<< "MinVF (" << MinVF << ")\n");
12546-
}
12547-
12548-
// FIXME: Is division-by-2 the correct step? Should we assert that the
12549-
// register size is a power-of-2?
12550-
unsigned StartIdx = 0;
12551-
for (unsigned Size = MaxVF; Size >= MinVF; Size /= 2) {
12552-
for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
12553-
ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
12554-
if (!VectorizedStores.count(Slice.front()) &&
12555-
!VectorizedStores.count(Slice.back()) &&
12556-
vectorizeStoreChain(Slice, R, Cnt, MinVF)) {
12557-
// Mark the vectorized stores so that we don't vectorize them again.
12558-
VectorizedStores.insert(Slice.begin(), Slice.end());
12559-
Changed = true;
12560-
// If we vectorized initial block, no need to try to vectorize it
12561-
// again.
12562-
if (Cnt == StartIdx)
12563-
StartIdx += Size;
12564-
Cnt += Size;
12565-
continue;
12566-
}
12567-
++Cnt;
12533+
// Stores pair (first: index of the store into Stores array ref, address of
12534+
// which taken as base, second: sorted set of pairs {index, dist}, which are
12535+
// indices of stores in the set and their store location distances relative to
12536+
// the base address).
12537+
12538+
// Need to store the index of the very first store separately, since the set
12539+
// may be reordered after the insertion and the first store may be moved. This
12540+
// container allows to reduce number of calls of getPointersDiff() function.
12541+
SmallVector<std::pair<unsigned, StoreIndexToDistSet>> SortedStores;
12542+
// Inserts the specified store SI with the given index Idx to the set of the
12543+
// stores. If the store with the same distance is found already - stop
12544+
// insertion, try to vectorize already found stores. If some stores from this
12545+
// sequence were not vectorized - try to vectorize them with the new store
12546+
// later. But this logic is applied only to the stores, that come before the
12547+
// previous store with the same distance.
12548+
// Example:
12549+
// 1. store x, %p
12550+
// 2. store y, %p+1
12551+
// 3. store z, %p+2
12552+
// 4. store a, %p
12553+
// 5. store b, %p+3
12554+
// - Scan this from the last to first store. The very first bunch of stores is
12555+
// {5, {{4, -3}, {2, -2}, {3, -1}, {5, 0}}} (the element in SortedStores
12556+
// vector).
12557+
// - The next store in the list - #1 - has the same distance from store #5 as
12558+
// the store #4.
12559+
// - Try to vectorize sequence of stores 4,2,3,5.
12560+
// - If all these stores are vectorized - just drop them.
12561+
// - If some of them are not vectorized (say, #3 and #5), do extra analysis.
12562+
// - Start new stores sequence.
12563+
// The new bunch of stores is {1, {1, 0}}.
12564+
// - Add the stores from previous sequence, that were not vectorized.
12565+
// Here we consider the stores in the reversed order, rather they are used in
12566+
// the IR (Stores are reversed already, see vectorizeStoreChains() function).
12567+
// Store #3 can be added -> comes after store #4 with the same distance as
12568+
// store #1.
12569+
// Store #5 cannot be added - comes before store #4.
12570+
// This logic allows to improve the compile time, we assume that the stores
12571+
// after previous store with the same distance most likely have memory
12572+
// dependencies and no need to waste compile time to try to vectorize them.
12573+
// - Try to vectorize the sequence {1, {1, 0}, {3, 2}}.
12574+
auto FillStoresSet = [&](unsigned Idx, StoreInst *SI) {
12575+
for (std::pair<unsigned, StoreIndexToDistSet> &Set : SortedStores) {
12576+
std::optional<int> Diff = getPointersDiff(
12577+
Stores[Set.first]->getValueOperand()->getType(),
12578+
Stores[Set.first]->getPointerOperand(),
12579+
SI->getValueOperand()->getType(), SI->getPointerOperand(), *DL, *SE,
12580+
/*StrictCheck=*/true);
12581+
if (!Diff)
12582+
continue;
12583+
auto It = Set.second.find(std::make_pair(Idx, *Diff));
12584+
if (It == Set.second.end()) {
12585+
Set.second.emplace(Idx, *Diff);
12586+
return;
1256812587
}
12569-
// Check if the whole array was vectorized already - exit.
12570-
if (StartIdx >= Operands.size())
12571-
break;
12588+
// Try to vectorize the first found set to avoid duplicate analysis.
12589+
TryToVectorize(Set.second);
12590+
StoreIndexToDistSet PrevSet;
12591+
PrevSet.swap(Set.second);
12592+
Set.first = Idx;
12593+
Set.second.emplace(Idx, 0);
12594+
// Insert stores that followed previous match to try to vectorize them
12595+
// with this store.
12596+
unsigned StartIdx = It->first + 1;
12597+
SmallBitVector UsedStores(Idx - StartIdx);
12598+
// Distances to previously found dup store (or this store, since they
12599+
// store to the same addresses).
12600+
SmallVector<int> Dists(Idx - StartIdx, 0);
12601+
for (const std::pair<unsigned, int> &Pair : reverse(PrevSet)) {
12602+
// Do not try to vectorize sequences, we already tried.
12603+
if (Pair.first <= It->first ||
12604+
VectorizedStores.contains(Stores[Pair.first]))
12605+
break;
12606+
unsigned BI = Pair.first - StartIdx;
12607+
UsedStores.set(BI);
12608+
Dists[BI] = Pair.second - It->second;
12609+
}
12610+
for (unsigned I = StartIdx; I < Idx; ++I) {
12611+
unsigned BI = I - StartIdx;
12612+
if (UsedStores.test(BI))
12613+
Set.second.emplace(I, Dists[BI]);
12614+
}
12615+
return;
1257212616
}
12617+
auto &Res = SortedStores.emplace_back();
12618+
Res.first = Idx;
12619+
Res.second.emplace(Idx, 0);
12620+
};
12621+
StoreInst *PrevStore = Stores.front();
12622+
for (auto [I, SI] : enumerate(Stores)) {
12623+
// Check that we do not try to vectorize stores of different types.
12624+
if (PrevStore->getValueOperand()->getType() !=
12625+
SI->getValueOperand()->getType()) {
12626+
for (auto &Set : SortedStores)
12627+
TryToVectorize(Set.second);
12628+
SortedStores.clear();
12629+
PrevStore = SI;
12630+
}
12631+
FillStoresSet(I, SI);
1257312632
}
1257412633

12634+
// Final vectorization attempt.
12635+
for (auto &Set : SortedStores)
12636+
TryToVectorize(Set.second);
12637+
1257512638
return Changed;
1257612639
}
1257712640

@@ -15135,6 +15198,12 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
1513515198
// compatible (have the same opcode, same parent), otherwise it is
1513615199
// definitely not profitable to try to vectorize them.
1513715200
auto &&StoreSorter = [this](StoreInst *V, StoreInst *V2) {
15201+
if (V->getValueOperand()->getType()->getTypeID() <
15202+
V2->getValueOperand()->getType()->getTypeID())
15203+
return true;
15204+
if (V->getValueOperand()->getType()->getTypeID() >
15205+
V2->getValueOperand()->getType()->getTypeID())
15206+
return false;
1513815207
if (V->getPointerOperandType()->getTypeID() <
1513915208
V2->getPointerOperandType()->getTypeID())
1514015209
return true;
@@ -15173,6 +15242,8 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
1517315242
auto &&AreCompatibleStores = [this](StoreInst *V1, StoreInst *V2) {
1517415243
if (V1 == V2)
1517515244
return true;
15245+
if (V1->getValueOperand()->getType() != V2->getValueOperand()->getType())
15246+
return false;
1517615247
if (V1->getPointerOperandType() != V2->getPointerOperandType())
1517715248
return false;
1517815249
// Undefs are compatible with any other value.
@@ -15204,8 +15275,13 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
1520415275
if (!isValidElementType(Pair.second.front()->getValueOperand()->getType()))
1520515276
continue;
1520615277

15278+
// Reverse stores to do bottom-to-top analysis. This is important if the
15279+
// values are stores to the same addresses several times, in this case need
15280+
// to follow the stores order (reversed to meet the memory dependecies).
15281+
SmallVector<StoreInst *> ReversedStores(Pair.second.rbegin(),
15282+
Pair.second.rend());
1520715283
Changed |= tryToVectorizeSequence<StoreInst>(
15208-
Pair.second, StoreSorter, AreCompatibleStores,
15284+
ReversedStores, StoreSorter, AreCompatibleStores,
1520915285
[this, &R](ArrayRef<StoreInst *> Candidates, bool) {
1521015286
return vectorizeStores(Candidates, R);
1521115287
},

llvm/test/Transforms/SLPVectorizer/X86/many_stores.ll

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@ define i32 @test(ptr %p) {
55
; CHECK-LABEL: define i32 @test
66
; CHECK-SAME: (ptr [[P:%.*]]) {
77
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: [[IDX2:%.*]] = getelementptr i32, ptr [[P]], i64 4
9-
; CHECK-NEXT: store i32 0, ptr [[IDX2]], align 4
10-
; CHECK-NEXT: [[IDX3:%.*]] = getelementptr i32, ptr [[P]], i64 6
11-
; CHECK-NEXT: store i32 0, ptr [[IDX3]], align 4
128
; CHECK-NEXT: [[IDX4:%.*]] = getelementptr i32, ptr [[P]], i64 8
139
; CHECK-NEXT: store i32 0, ptr [[IDX4]], align 4
1410
; CHECK-NEXT: [[IDX5:%.*]] = getelementptr i32, ptr [[P]], i64 10
@@ -69,9 +65,7 @@ define i32 @test(ptr %p) {
6965
; CHECK-NEXT: store i32 0, ptr [[IDX33]], align 4
7066
; CHECK-NEXT: store i32 0, ptr [[P]], align 4
7167
; CHECK-NEXT: [[IDX0:%.*]] = getelementptr i32, ptr [[P]], i64 3
72-
; CHECK-NEXT: store i32 0, ptr [[IDX0]], align 4
73-
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr i32, ptr [[P]], i64 5
74-
; CHECK-NEXT: store i32 0, ptr [[IDX1]], align 4
68+
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[IDX0]], align 4
7569
; CHECK-NEXT: ret i32 0
7670
;
7771
entry:

0 commit comments

Comments
 (0)