Skip to content

Commit 1387a13

Browse files
committed
[SLP] Check with target before vectorizing GEP Indices.
The target hook prefersVectorizedAddressing() already exists to check with target if address computations should be vectorized, so it seems like this should be used in SLPVectorizer as well. Reviewed By: ABataev, RKSimon Differential Revision: https://reviews.llvm.org/D144128
1 parent 8347ca7 commit 1387a13

File tree

10 files changed

+775
-397
lines changed

10 files changed

+775
-397
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5191,6 +5191,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
51915191
Depth](ArrayRef<Value *> VL) {
51925192
if (!S.getOpcode() || !S.isAltShuffle() || VL.size() > 2)
51935193
return false;
5194+
if (S.getOpcode() == Instruction::GetElementPtr &&
5195+
!TTI->prefersVectorizedAddressing())
5196+
return true;
51945197
if (VectorizableTree.size() < MinTreeSize)
51955198
return false;
51965199
if (Depth >= RecursionMaxDepth - 1)
@@ -11873,21 +11876,23 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
1187311876
if (!isValidElementType(SI->getValueOperand()->getType()))
1187411877
continue;
1187511878
Stores[getUnderlyingObject(SI->getPointerOperand())].push_back(SI);
11879+
continue;
1187611880
}
1187711881

1187811882
// Ignore getelementptr instructions that have more than one index, a
1187911883
// constant index, or a pointer operand that doesn't point to a scalar
1188011884
// type.
11881-
else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
11882-
auto Idx = GEP->idx_begin()->get();
11883-
if (GEP->getNumIndices() > 1 || isa<Constant>(Idx))
11884-
continue;
11885-
if (!isValidElementType(Idx->getType()))
11886-
continue;
11887-
if (GEP->getType()->isVectorTy())
11888-
continue;
11889-
GEPs[GEP->getPointerOperand()].push_back(GEP);
11890-
}
11885+
if (TTI->prefersVectorizedAddressing())
11886+
if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
11887+
auto Idx = GEP->idx_begin()->get();
11888+
if (GEP->getNumIndices() > 1 || isa<Constant>(Idx))
11889+
continue;
11890+
if (!isValidElementType(Idx->getType()))
11891+
continue;
11892+
if (GEP->getType()->isVectorTy())
11893+
continue;
11894+
GEPs[GEP->getPointerOperand()].push_back(GEP);
11895+
}
1189111896
}
1189211897
}
1189311898

llvm/test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,21 @@ define void @test1(<4 x i16> %a, <4 x i16> %b, ptr %p) {
1212
; CHECK-NEXT: [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
1313
; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32>
1414
; CHECK-NEXT: [[SUB0:%.*]] = sub <4 x i32> [[Z0]], [[Z1]]
15-
; CHECK-NEXT: [[TMP0:%.*]] = sext <4 x i32> [[SUB0]] to <4 x i64>
16-
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
17-
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP1]]
15+
; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x i32> [[SUB0]], i32 0
16+
; CHECK-NEXT: [[S0:%.*]] = sext i32 [[E0]] to i64
17+
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[S0]]
1818
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4
19-
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP0]], i32 1
20-
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP2]]
19+
; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[SUB0]], i32 1
20+
; CHECK-NEXT: [[S1:%.*]] = sext i32 [[E1]] to i64
21+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[S1]]
2122
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4
22-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP0]], i32 2
23-
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP3]]
23+
; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[SUB0]], i32 2
24+
; CHECK-NEXT: [[S2:%.*]] = sext i32 [[E2]] to i64
25+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[S2]]
2426
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4
25-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP0]], i32 3
26-
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP4]]
27+
; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3
28+
; CHECK-NEXT: [[S3:%.*]] = sext i32 [[E3]] to i64
29+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[S3]]
2730
; CHECK-NEXT: [[LOAD3:%.*]] = load i64, ptr [[GEP3]], align 4
2831
; CHECK-NEXT: call void @foo(i64 [[LOAD0]], i64 [[LOAD1]], i64 [[LOAD2]], i64 [[LOAD3]])
2932
; CHECK-NEXT: ret void
@@ -58,23 +61,25 @@ define void @test2(<4 x i16> %a, <4 x i16> %b, i64 %c0, i64 %c1, i64 %c2, i64 %c
5861
; CHECK-NEXT: [[Z0:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
5962
; CHECK-NEXT: [[Z1:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32>
6063
; CHECK-NEXT: [[SUB0:%.*]] = sub <4 x i32> [[Z0]], [[Z1]]
61-
; CHECK-NEXT: [[TMP0:%.*]] = sext <4 x i32> [[SUB0]] to <4 x i64>
62-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[C0:%.*]], i32 0
63-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[C1:%.*]], i32 1
64-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[C2:%.*]], i32 2
65-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[C3:%.*]], i32 3
66-
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP0]], [[TMP4]]
67-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
68-
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[TMP6]]
64+
; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x i32> [[SUB0]], i32 0
65+
; CHECK-NEXT: [[S0:%.*]] = sext i32 [[E0]] to i64
66+
; CHECK-NEXT: [[A0:%.*]] = add i64 [[S0]], [[C0:%.*]]
67+
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[A0]]
6968
; CHECK-NEXT: [[LOAD0:%.*]] = load i64, ptr [[GEP0]], align 4
70-
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
71-
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP7]]
69+
; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[SUB0]], i32 1
70+
; CHECK-NEXT: [[S1:%.*]] = sext i32 [[E1]] to i64
71+
; CHECK-NEXT: [[A1:%.*]] = add i64 [[S1]], [[C1:%.*]]
72+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[A1]]
7273
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[GEP1]], align 4
73-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
74-
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP8]]
74+
; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[SUB0]], i32 2
75+
; CHECK-NEXT: [[S2:%.*]] = sext i32 [[E2]] to i64
76+
; CHECK-NEXT: [[A2:%.*]] = add i64 [[S2]], [[C2:%.*]]
77+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[A2]]
7578
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[GEP2]], align 4
76-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
77-
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[TMP9]]
79+
; CHECK-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[SUB0]], i32 3
80+
; CHECK-NEXT: [[S3:%.*]] = sext i32 [[E3]] to i64
81+
; CHECK-NEXT: [[A3:%.*]] = add i64 [[S3]], [[C3:%.*]]
82+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 [[A3]]
7883
; CHECK-NEXT: [[LOAD3:%.*]] = load i64, ptr [[GEP3]], align 4
7984
; CHECK-NEXT: call void @foo(i64 [[LOAD0]], i64 [[LOAD1]], i64 [[LOAD2]], i64 [[LOAD3]])
8085
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)