Skip to content

Commit 689babd

Browse files
committed
[SLP] Don't try to vectorize allocas
While a collection of allocas are technically vectorizeable - by forming a wider alloca - this was not a transform SLP actually knows how to do. Instead, we were forming a bundle with missing dependencies, and then relying on the scheduling code to preserve program order if multiple instructions were scheduleable at once. I haven't been able to write a test case, but I'm 99% sure this was wrong in some edge case. The unknown op case was flowing down the shufflevector path. This did result in some splat handling being lost with this change, but the same lack of splat handling is visible in a whole bunch of simple examples for the gather path. I didn't consider this interesting to fix given how narrow the splat of allocas case is.
1 parent 97e0366 commit 689babd

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3910,6 +3910,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
39103910
return;
39113911
}
39123912

3913+
// Avoid attempting to schedule allocas; there are unmodeled dependencies
3914+
// for "static" alloca status and for reordering with stacksave calls.
3915+
for (Value *V : VL) {
3916+
if (isa<AllocaInst>(V)) {
3917+
LLVM_DEBUG(dbgs() << "SLP: Gathering due to alloca.\n");
3918+
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
3919+
return;
3920+
}
3921+
}
3922+
39133923
if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
39143924
if (SI->getValueOperand()->getType()->isVectorTy()) {
39153925
LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");

llvm/test/Transforms/SLPVectorizer/X86/store_alloca.ll

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,13 @@ define void @ham() #1 {
3232
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8** [[VAR32]] to <4 x i8*>*
3333
; CHECK-NEXT: store <4 x i8*> [[SHUFFLE]], <4 x i8*>* [[TMP2]], align 4
3434
; CHECK-NEXT: [[VAR36:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 4
35+
; CHECK-NEXT: store i8* [[VAR4]], i8** [[VAR36]], align 4
3536
; CHECK-NEXT: [[VAR37:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 5
37+
; CHECK-NEXT: store i8* [[VAR5]], i8** [[VAR37]], align 4
3638
; CHECK-NEXT: [[VAR38:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 6
39+
; CHECK-NEXT: store i8* [[VAR5]], i8** [[VAR38]], align 4
3740
; CHECK-NEXT: [[VAR39:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 7
38-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8*> [[TMP1]], i8* [[VAR5]], i32 1
39-
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i8*> [[TMP3]], <4 x i8*> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
40-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[VAR36]] to <4 x i8*>*
41-
; CHECK-NEXT: store <4 x i8*> [[SHUFFLE1]], <4 x i8*>* [[TMP4]], align 4
41+
; CHECK-NEXT: store i8* [[VAR5]], i8** [[VAR39]], align 4
4242
; CHECK-NEXT: ret void
4343
;
4444
%var2 = alloca i8
@@ -78,14 +78,13 @@ define void @spam() #1 {
7878
; CHECK-NEXT: [[VAR5:%.*]] = alloca i8, align 1
7979
; CHECK-NEXT: [[VAR12:%.*]] = alloca [12 x i8*], align 4
8080
; CHECK-NEXT: [[VAR36:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 4
81+
; CHECK-NEXT: store i8* [[VAR4]], i8** [[VAR36]], align 4
8182
; CHECK-NEXT: [[VAR37:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 5
83+
; CHECK-NEXT: store i8* [[VAR5]], i8** [[VAR37]], align 4
8284
; CHECK-NEXT: [[VAR38:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 6
85+
; CHECK-NEXT: store i8* [[VAR5]], i8** [[VAR38]], align 4
8386
; CHECK-NEXT: [[VAR39:%.*]] = getelementptr inbounds [12 x i8*], [12 x i8*]* [[VAR12]], i32 0, i32 7
84-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i8*> poison, i8* [[VAR4]], i32 0
85-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i8*> [[TMP1]], i8* [[VAR5]], i32 1
86-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i8*> [[TMP2]], <4 x i8*> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
87-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8** [[VAR36]] to <4 x i8*>*
88-
; CHECK-NEXT: store <4 x i8*> [[SHUFFLE]], <4 x i8*>* [[TMP3]], align 4
87+
; CHECK-NEXT: store i8* [[VAR5]], i8** [[VAR39]], align 4
8988
; CHECK-NEXT: ret void
9089
;
9190
%var4 = alloca i8

0 commit comments

Comments
 (0)