Skip to content

Commit 3d1c63e

Browse files
authored
[SLP][REVEC] Expand getelementptr into vector form. (#103704)
1 parent 57c1e21 commit 3d1c63e

File tree

2 files changed

+64
-0
lines changed

2 files changed

+64
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13765,6 +13765,27 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1376513765
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
1376613766
return E->VectorizedValue;
1376713767
}
13768+
if (isa<FixedVectorType>(ScalarTy)) {
13769+
assert(SLPReVec && "FixedVectorType is not expected.");
13770+
// CreateMaskedGather expects VecTy and VecPtr have same size. We need
13771+
// to expand VecPtr if ScalarTy is a vector type.
13772+
unsigned ScalarTyNumElements =
13773+
cast<FixedVectorType>(ScalarTy)->getNumElements();
13774+
unsigned VecTyNumElements =
13775+
cast<FixedVectorType>(VecTy)->getNumElements();
13776+
assert(VecTyNumElements % ScalarTyNumElements == 0 &&
13777+
"Cannot expand getelementptr.");
13778+
unsigned VF = VecTyNumElements / ScalarTyNumElements;
13779+
SmallVector<Constant *> Indices(VecTyNumElements);
13780+
transform(seq(VecTyNumElements), Indices.begin(), [=](unsigned I) {
13781+
return Builder.getInt64(I % ScalarTyNumElements);
13782+
});
13783+
VecPtr = Builder.CreateGEP(
13784+
VecTy->getElementType(),
13785+
Builder.CreateShuffleVector(
13786+
VecPtr, createReplicatedMask(ScalarTyNumElements, VF)),
13787+
ConstantVector::get(Indices));
13788+
}
1376813789
// Use the minimum alignment of the gathered loads.
1376913790
Align CommonAlignment = computeCommonAlignment<LoadInst>(E->Scalars);
1377013791
NewLI = Builder.CreateMaskedGather(VecTy, VecPtr, CommonAlignment);
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -mtriple=riscv64 -mcpu=sifive-x280 -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s
3+
4+
define i32 @test() {
5+
; CHECK-LABEL: @test(
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: br label [[IF_END_I87:%.*]]
8+
; CHECK: if.end.i87:
9+
; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> <ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64036 to ptr), ptr inttoptr (i64 64064 to ptr), ptr inttoptr (i64 64064 to ptr)>, <4 x i64> <i64 0, i64 1, i64 0, i64 1>), i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
10+
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> poison, i64 0)
11+
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP1]], <2 x i32> zeroinitializer, i64 2)
12+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
13+
; CHECK-NEXT: switch i32 0, label [[SW_BB509_I:%.*]] [
14+
; CHECK-NEXT: i32 1, label [[SW_BB509_I]]
15+
; CHECK-NEXT: i32 0, label [[IF_THEN458_I:%.*]]
16+
; CHECK-NEXT: ]
17+
; CHECK: if.then458.i:
18+
; CHECK-NEXT: br label [[SW_BB509_I]]
19+
; CHECK: sw.bb509.i:
20+
; CHECK-NEXT: [[TMP4:%.*]] = phi <4 x i32> [ [[TMP0]], [[IF_THEN458_I]] ], [ [[TMP3]], [[IF_END_I87]] ], [ [[TMP3]], [[IF_END_I87]] ]
21+
; CHECK-NEXT: ret i32 0
22+
;
23+
entry:
24+
%getelementptr0 = getelementptr i8, ptr null, i64 64036
25+
%getelementptr1 = getelementptr i8, ptr null, i64 64064
26+
br label %if.end.i87
27+
28+
if.end.i87: ; preds = %entry
29+
%0 = load <2 x i32>, ptr %getelementptr0, align 4
30+
%1 = load <2 x i32>, ptr %getelementptr1, align 8
31+
switch i32 0, label %sw.bb509.i [
32+
i32 1, label %sw.bb509.i
33+
i32 0, label %if.then458.i
34+
]
35+
36+
if.then458.i: ; preds = %if.end.i87
37+
br label %sw.bb509.i
38+
39+
sw.bb509.i: ; preds = %if.then458.i, %if.end.i87, %if.end.i87
40+
%4 = phi <2 x i32> [ %0, %if.then458.i ], [ %0, %if.end.i87 ], [ %0, %if.end.i87 ]
41+
%5 = phi <2 x i32> [ %1, %if.then458.i ], [ zeroinitializer, %if.end.i87 ], [ zeroinitializer, %if.end.i87 ]
42+
ret i32 0
43+
}

0 commit comments

Comments
 (0)