Skip to content

Commit 980d2f6

Browse files
committed
Bad SLPVectorization shufflevector replacement, resulting in write to wrong memory location
We see that it might otherwise do: %10 = getelementptr {}**, <2 x {}***> %9, <2 x i32> <i32 10, i32 4> %11 = bitcast <2 x {}***> %10 to <2 x i64*> ... %27 = extractelement <2 x i64*> %11, i32 0 %28 = bitcast i64* %27 to <2 x i64>* store <2 x i64> %22, <2 x i64>* %28, align 4, !tbaa !2 Which is an out-of-bounds store (the extractelement got offset 10 instead of offset 4 as intended). With the fix, we correctly generate extractelement for i32 1 and generate correct code. Differential Revision: https://reviews.llvm.org/D106613
1 parent 8f6e49e commit 980d2f6

File tree

2 files changed

+65
-13
lines changed

2 files changed

+65
-13
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1707,6 +1707,18 @@ class BoUpSLP {
17071707
return true;
17081708
}
17091709

1710+
/// When ReuseShuffleIndices is empty it just returns position of \p V
1711+
/// within vector of Scalars. Otherwise, try to remap on its reuse index.
1712+
int findLaneForValue(Value *V) const {
1713+
unsigned FoundLane = std::distance(Scalars.begin(), find(Scalars, V));
1714+
assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
1715+
if (!ReuseShuffleIndices.empty()) {
1716+
FoundLane = std::distance(ReuseShuffleIndices.begin(),
1717+
find(ReuseShuffleIndices, FoundLane));
1718+
}
1719+
return FoundLane;
1720+
}
1721+
17101722
#ifndef NDEBUG
17111723
/// Debug printer.
17121724
LLVM_DUMP_METHOD void dump() const {
@@ -4268,13 +4280,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
42684280
// Add to our 'need-to-extract' list.
42694281
if (TreeEntry *Entry = getTreeEntry(Val)) {
42704282
// Find which lane we need to extract.
4271-
unsigned FoundLane = std::distance(Entry->Scalars.begin(),
4272-
find(Entry->Scalars, Val));
4273-
assert(FoundLane < Entry->Scalars.size() && "Couldn't find extract lane");
4274-
if (!Entry->ReuseShuffleIndices.empty()) {
4275-
FoundLane = std::distance(Entry->ReuseShuffleIndices.begin(),
4276-
find(Entry->ReuseShuffleIndices, FoundLane));
4277-
}
4283+
unsigned FoundLane = Entry->findLaneForValue(Val);
42784284
ExternalUses.push_back(ExternalUser(Val, InsElt, FoundLane));
42794285
}
42804286
}
@@ -4602,8 +4608,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
46024608
// The pointer operand uses an in-tree scalar so we add the new BitCast
46034609
// to ExternalUses list to make sure that an extract will be generated
46044610
// in the future.
4605-
if (getTreeEntry(PO))
4606-
ExternalUses.emplace_back(PO, cast<User>(VecPtr), 0);
4611+
if (TreeEntry *Entry = getTreeEntry(PO)) {
4612+
// Find which lane we need to extract.
4613+
unsigned FoundLane = Entry->findLaneForValue(PO);
4614+
ExternalUses.emplace_back(PO, cast<User>(VecPtr), FoundLane);
4615+
}
46074616

46084617
NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
46094618
} else {
@@ -4654,8 +4663,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
46544663
// The pointer operand uses an in-tree scalar, so add the new BitCast to
46554664
// ExternalUses to make sure that an extract will be generated in the
46564665
// future.
4657-
if (getTreeEntry(ScalarPtr))
4658-
ExternalUses.push_back(ExternalUser(ScalarPtr, cast<User>(VecPtr), 0));
4666+
if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) {
4667+
// Find which lane we need to extract.
4668+
unsigned FoundLane = Entry->findLaneForValue(ScalarPtr);
4669+
ExternalUses.push_back(
4670+
ExternalUser(ScalarPtr, cast<User>(VecPtr), FoundLane));
4671+
}
46594672

46604673
Value *V = propagateMetadata(ST, E->Scalars);
46614674
if (NeedToShuffleReuses)
@@ -4756,8 +4769,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
47564769
// The scalar argument uses an in-tree scalar so we add the new vectorized
47574770
// call to ExternalUses list to make sure that an extract will be
47584771
// generated in the future.
4759-
if (ScalarArg && getTreeEntry(ScalarArg))
4760-
ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
4772+
if (ScalarArg) {
4773+
if (TreeEntry *Entry = getTreeEntry(ScalarArg)) {
4774+
// Find which lane we need to extract.
4775+
unsigned FoundLane = Entry->findLaneForValue(ScalarArg);
4776+
ExternalUses.push_back(
4777+
ExternalUser(ScalarArg, cast<User>(V), FoundLane));
4778+
}
4779+
}
47614780

47624781
propagateIRFlags(V, E->Scalars, VL0);
47634782
if (NeedToShuffleReuses)

llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,36 @@ entry:
9999
ret void
100100

101101
}
102+
103+
define void @externally_used_ptrs() {
104+
; CHECK-LABEL: @externally_used_ptrs(
105+
; CHECK-NEXT: entry:
106+
; CHECK-NEXT: [[TMP0:%.*]] = load i64*, i64** @a, align 8
107+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64*> poison, i64* [[TMP0]], i32 0
108+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64*> [[TMP1]], i64* [[TMP0]], i32 1
109+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> <i64 56, i64 11>
110+
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x i64*> [[TMP3]] to <2 x i64>
111+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 12
112+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 1
113+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
114+
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 8
115+
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]]
116+
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
117+
; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP10]], align 8
118+
; CHECK-NEXT: ret void
119+
;
120+
entry:
121+
%0 = load i64*, i64** @a, align 8
122+
%add.ptr = getelementptr inbounds i64, i64* %0, i64 11
123+
%1 = ptrtoint i64* %add.ptr to i64
124+
%add.ptr1 = getelementptr inbounds i64, i64* %0, i64 56
125+
%2 = ptrtoint i64* %add.ptr1 to i64
126+
%arrayidx2 = getelementptr inbounds i64, i64* %0, i64 12
127+
%3 = load i64, i64* %arrayidx2, align 8
128+
%4 = load i64, i64* %add.ptr, align 8
129+
%5 = add i64 %1, %3
130+
%6 = add i64 %2, %4
131+
store i64 %6, i64* %add.ptr, align 8
132+
store i64 %5, i64* %arrayidx2, align 8
133+
ret void
134+
}

0 commit comments

Comments
 (0)