Skip to content

Commit fac7c7e

Browse files
[SLP] Fix vector element size for the store chains
Vector element size could be different for different store chains. This patch prevents wrong computation of maximum number of elements for that case. Differential Revision: https://reviews.llvm.org/D93192
1 parent 6c8ded0 commit fac7c7e

File tree

2 files changed

+15
-22
lines changed

2 files changed

+15
-22
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6076,7 +6076,7 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
60766076

60776077
// If a vector register can't hold 1 element, we are done.
60786078
unsigned MaxVecRegSize = R.getMaxVecRegSize();
6079-
unsigned EltSize = R.getVectorElementSize(Stores[0]);
6079+
unsigned EltSize = R.getVectorElementSize(Operands[0]);
60806080
if (MaxVecRegSize % EltSize != 0)
60816081
continue;
60826082

llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,28 +23,21 @@ define void @foo(i8* %v0, i8* readonly %v1) {
2323
; CHECK-NEXT: [[T252:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 9
2424
; CHECK-NEXT: [[T292:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 10
2525
; CHECK-NEXT: [[T322:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 11
26-
; CHECK-NEXT: [[T19:%.*]] = load i32, i32* [[T14]], align 4
27-
; CHECK-NEXT: [[T23:%.*]] = load i32, i32* [[T18]], align 4
28-
; CHECK-NEXT: [[T27:%.*]] = load i32, i32* [[T22]], align 4
29-
; CHECK-NEXT: [[T30:%.*]] = load i32, i32* [[T26]], align 4
30-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[T142]] to <2 x i64>*
31-
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
32-
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[T222]] to <2 x i64>*
26+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[T14]] to <4 x i32>*
27+
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
28+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[T142]] to <2 x i64>*
3329
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
34-
; CHECK-NEXT: [[T20:%.*]] = add nsw i32 [[T19]], 4
35-
; CHECK-NEXT: [[T24:%.*]] = add nsw i32 [[T23]], 4
36-
; CHECK-NEXT: [[T28:%.*]] = add nsw i32 [[T27]], 6
37-
; CHECK-NEXT: [[T31:%.*]] = add nsw i32 [[T30]], 7
38-
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP2]], <i64 4, i64 4>
39-
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 6, i64 7>
40-
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[T212]] to <2 x i64>*
41-
; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP7]], align 8
42-
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[T292]] to <2 x i64>*
43-
; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP8]], align 8
44-
; CHECK-NEXT: store i32 [[T20]], i32* [[T21]], align 4
45-
; CHECK-NEXT: store i32 [[T24]], i32* [[T25]], align 4
46-
; CHECK-NEXT: store i32 [[T28]], i32* [[T29]], align 4
47-
; CHECK-NEXT: store i32 [[T31]], i32* [[T32]], align 4
30+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[T222]] to <2 x i64>*
31+
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 8
32+
; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 4, i32 4, i32 6, i32 7>
33+
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 4, i64 4>
34+
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[TMP6]], <i64 6, i64 7>
35+
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[T212]] to <2 x i64>*
36+
; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* [[TMP10]], align 8
37+
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[T292]] to <2 x i64>*
38+
; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP11]], align 8
39+
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[T21]] to <4 x i32>*
40+
; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP12]], align 4
4841
; CHECK-NEXT: ret void
4942
;
5043
%t0 = bitcast i8* %v0 to i32*

0 commit comments

Comments
 (0)