Skip to content

Commit 32bb956

Browse files
vtjnashtstellar
authored andcommitted
Bad SLPVectorization shufflevector replacement, resulting in write to wrong memory location
We see that it might otherwise do: %10 = getelementptr {}**, <2 x {}***> %9, <2 x i32> <i32 10, i32 4> %11 = bitcast <2 x {}***> %10 to <2 x i64*> ... %27 = extractelement <2 x i64*> %11, i32 0 %28 = bitcast i64* %27 to <2 x i64>* store <2 x i64> %22, <2 x i64>* %28, align 4, !tbaa !2 Which is an out-of-bounds store (the extractelement got offset 10 instead of offset 4 as intended). With the fix, we correctly generate extractelement for i32 1 and generate correct code. Differential Revision: https://reviews.llvm.org/D106613 (cherry picked from commit e27a6db)
1 parent 93edfb2 commit 32bb956

File tree

2 files changed

+20
-8
lines changed

2 files changed

+20
-8
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5430,8 +5430,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
54305430
// The pointer operand uses an in-tree scalar so we add the new BitCast
54315431
// to ExternalUses list to make sure that an extract will be generated
54325432
// in the future.
5433-
if (getTreeEntry(PO))
5434-
ExternalUses.emplace_back(PO, cast<User>(VecPtr), 0);
5433+
if (TreeEntry *Entry = getTreeEntry(PO)) {
5434+
// Find which lane we need to extract.
5435+
unsigned FoundLane = Entry->findLaneForValue(PO);
5436+
ExternalUses.emplace_back(PO, cast<User>(VecPtr), FoundLane);
5437+
}
54355438

54365439
NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
54375440
} else {
@@ -5474,8 +5477,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
54745477
// The pointer operand uses an in-tree scalar, so add the new BitCast to
54755478
// ExternalUses to make sure that an extract will be generated in the
54765479
// future.
5477-
if (getTreeEntry(ScalarPtr))
5478-
ExternalUses.push_back(ExternalUser(ScalarPtr, cast<User>(VecPtr), 0));
5480+
if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) {
5481+
// Find which lane we need to extract.
5482+
unsigned FoundLane = Entry->findLaneForValue(ScalarPtr);
5483+
ExternalUses.push_back(
5484+
ExternalUser(ScalarPtr, cast<User>(VecPtr), FoundLane));
5485+
}
54795486

54805487
Value *V = propagateMetadata(ST, E->Scalars);
54815488

@@ -5577,8 +5584,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
55775584
// The scalar argument uses an in-tree scalar so we add the new vectorized
55785585
// call to ExternalUses list to make sure that an extract will be
55795586
// generated in the future.
5580-
if (ScalarArg && getTreeEntry(ScalarArg))
5581-
ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
5587+
if (ScalarArg) {
5588+
if (TreeEntry *Entry = getTreeEntry(ScalarArg)) {
5589+
// Find which lane we need to extract.
5590+
unsigned FoundLane = Entry->findLaneForValue(ScalarArg);
5591+
ExternalUses.push_back(
5592+
ExternalUser(ScalarArg, cast<User>(V), FoundLane));
5593+
}
5594+
}
55825595

55835596
propagateIRFlags(V, E->Scalars, VL0);
55845597
ShuffleBuilder.addMask(E->ReuseShuffleIndices);

llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,9 @@ define void @externally_used_ptrs() {
108108
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x i64*> [[TMP2]], <2 x i64> <i64 56, i64 11>
109109
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x i64*> [[TMP3]] to <2 x i64>
110110
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 12
111-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 0
111+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 1
112112
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
113113
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 8
114-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 1
115114
; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]]
116115
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
117116
; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP10]], align 8

0 commit comments

Comments
 (0)