Skip to content

Commit efd6055

Browse files
committed
Revert "[SLP]Attempt to vectorize long stores, if short one failed."
This reverts commit 7d4e8c1. Contrary to the commit description, this does cause large compile-time regressions (up to 10% on individual files).
1 parent 8c9d814 commit efd6055

File tree

2 files changed

+65
-62
lines changed

2 files changed

+65
-62
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 30 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -15237,60 +15237,39 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
1523715237
Size *= 2;
1523815238
});
1523915239
unsigned StartIdx = 0;
15240-
unsigned Repeat = 0;
15241-
constexpr unsigned MaxAttempts = 2;
15242-
while (true) {
15243-
++Repeat;
15244-
for (unsigned Size : CandidateVFs) {
15245-
for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
15246-
ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
15247-
assert(
15248-
all_of(
15249-
Slice,
15250-
[&](Value *V) {
15251-
return cast<StoreInst>(V)->getValueOperand()->getType() ==
15252-
cast<StoreInst>(Slice.front())
15253-
->getValueOperand()
15254-
->getType();
15255-
}) &&
15256-
"Expected all operands of same type.");
15257-
if (!VectorizedStores.count(Slice.front()) &&
15258-
!VectorizedStores.count(Slice.back()) &&
15259-
TriedSequences
15260-
.insert(std::make_pair(Slice.front(), Slice.back()))
15261-
.second &&
15262-
vectorizeStoreChain(Slice, R, Cnt, MinVF)) {
15263-
// Mark the vectorized stores so that we don't vectorize them
15264-
// again.
15265-
VectorizedStores.insert(Slice.begin(), Slice.end());
15266-
Changed = true;
15267-
// If we vectorized initial block, no need to try to vectorize
15268-
// it again.
15269-
if (Cnt == StartIdx)
15270-
StartIdx += Size;
15271-
Cnt += Size;
15272-
continue;
15273-
}
15274-
++Cnt;
15275-
}
15276-
// Check if the whole array was vectorized already - exit.
15277-
if (StartIdx >= Operands.size()) {
15278-
Repeat = MaxAttempts;
15279-
break;
15240+
for (unsigned Size : CandidateVFs) {
15241+
for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
15242+
ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);
15243+
assert(
15244+
all_of(
15245+
Slice,
15246+
[&](Value *V) {
15247+
return cast<StoreInst>(V)->getValueOperand()->getType() ==
15248+
cast<StoreInst>(Slice.front())
15249+
->getValueOperand()
15250+
->getType();
15251+
}) &&
15252+
"Expected all operands of same type.");
15253+
if (!VectorizedStores.count(Slice.front()) &&
15254+
!VectorizedStores.count(Slice.back()) &&
15255+
TriedSequences.insert(std::make_pair(Slice.front(), Slice.back()))
15256+
.second &&
15257+
vectorizeStoreChain(Slice, R, Cnt, MinVF)) {
15258+
// Mark the vectorized stores so that we don't vectorize them again.
15259+
VectorizedStores.insert(Slice.begin(), Slice.end());
15260+
Changed = true;
15261+
// If we vectorized initial block, no need to try to vectorize it
15262+
// again.
15263+
if (Cnt == StartIdx)
15264+
StartIdx += Size;
15265+
Cnt += Size;
15266+
continue;
1528015267
}
15268+
++Cnt;
1528115269
}
15282-
// Check if tried all attempts or no need for the last attempts at all.
15283-
if (Repeat >= MaxAttempts)
15284-
break;
15285-
const unsigned MaxTotalNum = bit_floor(Operands.size() - StartIdx);
15286-
if (MaxVF >= MaxTotalNum)
15270+
// Check if the whole array was vectorized already - exit.
15271+
if (StartIdx >= Operands.size())
1528715272
break;
15288-
// Last attempt to vectorize max number of elements, if all previous
15289-
// attempts were unsuccessful because of the cost issues.
15290-
CandidateVFs.clear();
15291-
for (unsigned Size = MaxTotalNum; Size > MaxVF; Size /= 2) {
15292-
CandidateVFs.push_back(Size);
15293-
}
1529415273
}
1529515274
}
1529615275
};

llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -100,17 +100,41 @@ define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) {
100100
define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) {
101101
; SSE-LABEL: @store_i64(
102102
; SSE-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1:%.*]] to i64
103-
; SSE-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]]
104-
; SSE-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i64 0
105-
; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> zeroinitializer
106-
; SSE-NEXT: [[TMP8:%.*]] = mul <4 x i64> [[TMP5]], [[TMP7]]
107-
; SSE-NEXT: [[TMP9:%.*]] = lshr <4 x i64> [[TMP8]], <i64 15, i64 15, i64 15, i64 15>
108-
; SSE-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32>
109-
; SSE-NEXT: [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], <i32 255, i32 255, i32 255, i32 255>
110-
; SSE-NEXT: [[TMP12:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32>
111-
; SSE-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>
112-
; SSE-NEXT: [[TMP14:%.*]] = zext <4 x i32> [[TMP13]] to <4 x i64>
113-
; SSE-NEXT: store <4 x i64> [[TMP14]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
103+
; SSE-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]]
104+
; SSE-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], [[TMP4]]
105+
; SSE-NEXT: [[TMP7:%.*]] = lshr i64 [[TMP6]], 15
106+
; SSE-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
107+
; SSE-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 255
108+
; SSE-NEXT: [[TMP10:%.*]] = and i64 [[TMP7]], 4294967295
109+
; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], i64 [[TMP10]], i64 255
110+
; SSE-NEXT: store i64 [[TMP11]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
111+
; SSE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
112+
; SSE-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 8, !tbaa [[TBAA5]]
113+
; SSE-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], [[TMP4]]
114+
; SSE-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP14]], 15
115+
; SSE-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32
116+
; SSE-NEXT: [[TMP17:%.*]] = icmp ult i32 [[TMP16]], 255
117+
; SSE-NEXT: [[TMP18:%.*]] = and i64 [[TMP15]], 4294967295
118+
; SSE-NEXT: [[TMP19:%.*]] = select i1 [[TMP17]], i64 [[TMP18]], i64 255
119+
; SSE-NEXT: store i64 [[TMP19]], ptr [[TMP12]], align 8, !tbaa [[TBAA5]]
120+
; SSE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
121+
; SSE-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP20]], align 8, !tbaa [[TBAA5]]
122+
; SSE-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], [[TMP4]]
123+
; SSE-NEXT: [[TMP23:%.*]] = lshr i64 [[TMP22]], 15
124+
; SSE-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32
125+
; SSE-NEXT: [[TMP25:%.*]] = icmp ult i32 [[TMP24]], 255
126+
; SSE-NEXT: [[TMP26:%.*]] = and i64 [[TMP23]], 4294967295
127+
; SSE-NEXT: [[TMP27:%.*]] = select i1 [[TMP25]], i64 [[TMP26]], i64 255
128+
; SSE-NEXT: store i64 [[TMP27]], ptr [[TMP20]], align 8, !tbaa [[TBAA5]]
129+
; SSE-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 24
130+
; SSE-NEXT: [[TMP29:%.*]] = load i64, ptr [[TMP28]], align 8, !tbaa [[TBAA5]]
131+
; SSE-NEXT: [[TMP30:%.*]] = mul i64 [[TMP29]], [[TMP4]]
132+
; SSE-NEXT: [[TMP31:%.*]] = lshr i64 [[TMP30]], 15
133+
; SSE-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP31]] to i32
134+
; SSE-NEXT: [[TMP33:%.*]] = icmp ult i32 [[TMP32]], 255
135+
; SSE-NEXT: [[TMP34:%.*]] = and i64 [[TMP31]], 4294967295
136+
; SSE-NEXT: [[TMP35:%.*]] = select i1 [[TMP33]], i64 [[TMP34]], i64 255
137+
; SSE-NEXT: store i64 [[TMP35]], ptr [[TMP28]], align 8, !tbaa [[TBAA5]]
114138
; SSE-NEXT: ret void
115139
;
116140
; AVX-LABEL: @store_i64(

0 commit comments

Comments
 (0)