Skip to content

Commit 417fe52

Browse files
committed
Revert "[SLP] Check with target before vectorizing GEP Indices."
This reverts commit 1387a13. This introduced performance regressions on AArch64, when the cost of a vector GEP + extracts is offset by the benefits of vectorizing the rest of the tree. The test in llvm/test/Transforms/SLPVectorizer/AArch64/vector-getelementptr.ll illustrates the issue. It was extracted from code that regressed a SPEC benchmark by 15%.
1 parent bd76284 commit 417fe52

File tree

11 files changed

+412
-805
lines changed

11 files changed

+412
-805
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5250,9 +5250,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
52505250
Depth](ArrayRef<Value *> VL) {
52515251
if (!S.getOpcode() || !S.isAltShuffle() || VL.size() > 2)
52525252
return false;
5253-
if (S.getOpcode() == Instruction::GetElementPtr &&
5254-
!TTI->prefersVectorizedAddressing())
5255-
return true;
52565253
if (VectorizableTree.size() < MinTreeSize)
52575254
return false;
52585255
if (Depth >= RecursionMaxDepth - 1)
@@ -12130,23 +12127,21 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
1213012127
if (!isValidElementType(SI->getValueOperand()->getType()))
1213112128
continue;
1213212129
Stores[getUnderlyingObject(SI->getPointerOperand())].push_back(SI);
12133-
continue;
1213412130
}
1213512131

1213612132
// Ignore getelementptr instructions that have more than one index, a
1213712133
// constant index, or a pointer operand that doesn't point to a scalar
1213812134
// type.
12139-
if (TTI->prefersVectorizedAddressing())
12140-
if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
12141-
auto Idx = GEP->idx_begin()->get();
12142-
if (GEP->getNumIndices() > 1 || isa<Constant>(Idx))
12143-
continue;
12144-
if (!isValidElementType(Idx->getType()))
12145-
continue;
12146-
if (GEP->getType()->isVectorTy())
12147-
continue;
12148-
GEPs[GEP->getPointerOperand()].push_back(GEP);
12149-
}
12135+
else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
12136+
auto Idx = GEP->idx_begin()->get();
12137+
if (GEP->getNumIndices() > 1 || isa<Constant>(Idx))
12138+
continue;
12139+
if (!isValidElementType(Idx->getType()))
12140+
continue;
12141+
if (GEP->getType()->isVectorTy())
12142+
continue;
12143+
GEPs[GEP->getPointerOperand()].push_back(GEP);
12144+
}
1215012145
}
1215112146
}
1215212147

llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,18 @@ define void @test1(<4 x i16> %a, <4 x i16> %b, ptr %p) {
1212
; CHECK-NEXT: [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
1313
; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32>
1414
; CHECK-NEXT: [[SUB0:%.*]] = sub <4 x i32> [[Z0]], [[Z1]]
15-
; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x i32> [[SUB0]], i32 0
16-
; CHECK-NEXT: [[S0:%.*]] = sext i32 [[E0]] to i64
17-
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[S0]]
15+
; CHECK-NEXT: [[TMP0:%.*]] = sext <4 x i32> [[SUB0]] to <4 x i64>
16+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
17+
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP1]]
1818
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4
19-
; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[SUB0]], i32 1
20-
; CHECK-NEXT: [[S1:%.*]] = sext i32 [[E1]] to i64
21-
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[S1]]
19+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP0]], i32 1
20+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP2]]
2221
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4
23-
; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[SUB0]], i32 2
24-
; CHECK-NEXT: [[S2:%.*]] = sext i32 [[E2]] to i64
25-
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[S2]]
22+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP0]], i32 2
23+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP3]]
2624
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4
27-
; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3
28-
; CHECK-NEXT: [[S3:%.*]] = sext i32 [[E3]] to i64
29-
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[S3]]
25+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP0]], i32 3
26+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP4]]
3027
; CHECK-NEXT: [[LOAD3:%.*]] = load i64, ptr [[GEP3]], align 4
3128
; CHECK-NEXT: call void @foo(i64 [[LOAD0]], i64 [[LOAD1]], i64 [[LOAD2]], i64 [[LOAD3]])
3229
; CHECK-NEXT: ret void
@@ -61,25 +58,23 @@ define void @test2(<4 x i16> %a, <4 x i16> %b, i64 %c0, i64 %c1, i64 %c2, i64 %c
6158
; CHECK-NEXT: [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
6259
; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32>
6360
; CHECK-NEXT: [[SUB0:%.*]] = sub <4 x i32> [[Z0]], [[Z1]]
64-
; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x i32> [[SUB0]], i32 0
65-
; CHECK-NEXT: [[S0:%.*]] = sext i32 [[E0]] to i64
66-
; CHECK-NEXT: [[A0:%.*]] = add i64 [[S0]], [[C0:%.*]]
67-
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[A0]]
61+
; CHECK-NEXT: [[TMP0:%.*]] = sext <4 x i32> [[SUB0]] to <4 x i64>
62+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[C0:%.*]], i32 0
63+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[C1:%.*]], i32 1
64+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[C2:%.*]], i32 2
65+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[C3:%.*]], i32 3
66+
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP0]], [[TMP4]]
67+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
68+
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP6]]
6869
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4
69-
; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[SUB0]], i32 1
70-
; CHECK-NEXT: [[S1:%.*]] = sext i32 [[E1]] to i64
71-
; CHECK-NEXT: [[A1:%.*]] = add i64 [[S1]], [[C1:%.*]]
72-
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[A1]]
70+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
71+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP7]]
7372
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4
74-
; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[SUB0]], i32 2
75-
; CHECK-NEXT: [[S2:%.*]] = sext i32 [[E2]] to i64
76-
; CHECK-NEXT: [[A2:%.*]] = add i64 [[S2]], [[C2:%.*]]
77-
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[A2]]
73+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
74+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP8]]
7875
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4
79-
; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3
80-
; CHECK-NEXT: [[S3:%.*]] = sext i32 [[E3]] to i64
81-
; CHECK-NEXT: [[A3:%.*]] = add i64 [[S3]], [[C3:%.*]]
82-
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[A3]]
76+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
77+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP9]]
8378
; CHECK-NEXT: [[LOAD3:%.*]] = load i64, ptr [[GEP3]], align 4
8479
; CHECK-NEXT: call void @foo(i64 [[LOAD0]], i64 [[LOAD1]], i64 [[LOAD2]], i64 [[LOAD3]])
8580
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)