Skip to content

Commit 8361c5d

Browse files
committed
[SLPVectorizer] Handle external load/store pointer uses with opaque pointers
In this case we may not generate a bitcast, so the new load/store becomes the external user.
1 parent d506a9e commit 8361c5d

File tree

2 files changed

+40
-13
lines changed

2 files changed

+40
-13
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7013,19 +7013,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
70137013
unsigned AS = LI->getPointerAddressSpace();
70147014
Value *PO = LI->getPointerOperand();
70157015
if (E->State == TreeEntry::Vectorize) {
7016-
70177016
Value *VecPtr = Builder.CreateBitCast(PO, VecTy->getPointerTo(AS));
7017+
NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
70187018

70197019
// The pointer operand uses an in-tree scalar so we add the new BitCast
7020-
// to ExternalUses list to make sure that an extract will be generated
7021-
// in the future.
7020+
// or LoadInst to ExternalUses list to make sure that an extract will
7021+
// be generated in the future.
70227022
if (TreeEntry *Entry = getTreeEntry(PO)) {
70237023
// Find which lane we need to extract.
70247024
unsigned FoundLane = Entry->findLaneForValue(PO);
7025-
ExternalUses.emplace_back(PO, cast<User>(VecPtr), FoundLane);
7025+
ExternalUses.emplace_back(
7026+
PO, PO != VecPtr ? cast<User>(VecPtr) : NewLI, FoundLane);
70267027
}
7027-
7028-
NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
70297028
} else {
70307029
assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state");
70317030
Value *VecPtr = vectorizeTree(E->getOperand(0));
@@ -7058,17 +7057,18 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
70587057
Value *ScalarPtr = SI->getPointerOperand();
70597058
Value *VecPtr = Builder.CreateBitCast(
70607059
ScalarPtr, VecValue->getType()->getPointerTo(AS));
7061-
StoreInst *ST = Builder.CreateAlignedStore(VecValue, VecPtr,
7062-
SI->getAlign());
7060+
StoreInst *ST =
7061+
Builder.CreateAlignedStore(VecValue, VecPtr, SI->getAlign());
70637062

7064-
// The pointer operand uses an in-tree scalar, so add the new BitCast to
7065-
// ExternalUses to make sure that an extract will be generated in the
7066-
// future.
7063+
// The pointer operand uses an in-tree scalar, so add the new BitCast or
7064+
// StoreInst to ExternalUses to make sure that an extract will be
7065+
// generated in the future.
70677066
if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) {
70687067
// Find which lane we need to extract.
70697068
unsigned FoundLane = Entry->findLaneForValue(ScalarPtr);
7070-
ExternalUses.push_back(
7071-
ExternalUser(ScalarPtr, cast<User>(VecPtr), FoundLane));
7069+
ExternalUses.push_back(ExternalUser(
7070+
ScalarPtr, ScalarPtr != VecPtr ? cast<User>(VecPtr) : ST,
7071+
FoundLane));
70727072
}
70737073

70747074
Value *V = propagateMetadata(ST, E->Scalars);

llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,30 @@ define void @test(ptr %r, ptr %p, ptr %q) #0 {
5555
%g3 = getelementptr inbounds i32, ptr %r, i64 %sub3
5656
ret void
5757
}
58+
59+
define void @test2(i64* %a, i64* %b) {
60+
; CHECK-LABEL: @test2(
61+
; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 2
62+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
63+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[B:%.*]], i32 1
64+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> <i64 1, i64 3>
65+
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64>
66+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0
67+
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
68+
; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP4]], [[TMP6]]
69+
; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8
70+
; CHECK-NEXT: ret void
71+
;
72+
%a1 = getelementptr inbounds i64, i64* %a, i64 1
73+
%a2 = getelementptr inbounds i64, i64* %a, i64 2
74+
%i1 = ptrtoint i64* %a1 to i64
75+
%b3 = getelementptr inbounds i64, i64* %b, i64 3
76+
%i2 = ptrtoint i64* %b3 to i64
77+
%v1 = load i64, i64* %a1, align 8
78+
%v2 = load i64, i64* %a2, align 8
79+
%add1 = add i64 %i1, %v1
80+
%add2 = add i64 %i2, %v2
81+
store i64 %add1, i64* %a1, align 8
82+
store i64 %add2, i64* %a2, align 8
83+
ret void
84+
}

0 commit comments

Comments
 (0)