Skip to content

Commit 6704faf

Browse files
committed
[SLP] Use StoreTy to compute min VF.
This ensures that MinVF is a power-of-2, even if ValueTy's width is not a power-of-2. This should fix a number of buildbot failures with X86 bootstrapping.
1 parent 0412a86 commit 6704faf

File tree

2 files changed

+110
-1
lines changed

2 files changed

+110
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15137,7 +15137,7 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
1513715137
if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
1513815138
ValueTy = Trunc->getSrcTy();
1513915139
unsigned MinVF = TTI->getStoreMinimumVF(
15140-
R.getMinVF(DL->getTypeSizeInBits(ValueTy)), StoreTy, ValueTy);
15140+
R.getMinVF(DL->getTypeSizeInBits(StoreTy)), StoreTy, ValueTy);
1514115141

1514215142
if (MaxVF < MinVF) {
1514315143
LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -p slp-vectorizer -S %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
5+
target triple = "x86_64-unknown-linux-gnu"
6+
7+
define void @test_2_i24_stores(ptr %A) {
8+
; CHECK-LABEL: define void @test_2_i24_stores(
9+
; CHECK-SAME: ptr [[A:%.*]]) {
10+
; CHECK-NEXT: [[L:%.*]] = load i24, ptr [[A]], align 4
11+
; CHECK-NEXT: store i24 [[L]], ptr [[A]], align 1
12+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i24, ptr [[A]], i64 1
13+
; CHECK-NEXT: store i24 0, ptr [[GEP]], align 1
14+
; CHECK-NEXT: ret void
15+
;
16+
%l = load i24, ptr %A
17+
store i24 %l, ptr %A, align 1
18+
%gep = getelementptr i24, ptr %A, i64 1
19+
store i24 0, ptr %gep, align 1
20+
ret void
21+
}
22+
23+
define void @test_2_trunc_i24_to_i8(i24 %x, ptr %A) {
24+
; CHECK-LABEL: define void @test_2_trunc_i24_to_i8(
25+
; CHECK-SAME: i24 [[X:%.*]], ptr [[A:%.*]]) {
26+
; CHECK-NEXT: [[T:%.*]] = trunc i24 [[X]] to i8
27+
; CHECK-NEXT: store i8 [[T]], ptr [[A]], align 1
28+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 1
29+
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
30+
; CHECK-NEXT: ret void
31+
;
32+
%t = trunc i24 %x to i8
33+
store i8 %t, ptr %A, align 1
34+
%gep = getelementptr i8, ptr %A, i64 1
35+
store i8 0, ptr %gep, align 1
36+
ret void
37+
}
38+
39+
define void @test_4_trunc_i24_to_i8(i24 %x, ptr %A) {
40+
; CHECK-LABEL: define void @test_4_trunc_i24_to_i8(
41+
; CHECK-SAME: i24 [[X:%.*]], ptr [[A:%.*]]) {
42+
; CHECK-NEXT: [[T:%.*]] = trunc i24 [[X]] to i8
43+
; CHECK-NEXT: store i8 [[T]], ptr [[A]], align 1
44+
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[A]], i64 1
45+
; CHECK-NEXT: store i8 [[T]], ptr [[GEP_1]], align 1
46+
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[A]], i64 2
47+
; CHECK-NEXT: store i8 [[T]], ptr [[GEP_2]], align 1
48+
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, ptr [[A]], i64 3
49+
; CHECK-NEXT: store i8 [[T]], ptr [[GEP_3]], align 1
50+
; CHECK-NEXT: ret void
51+
;
52+
%t = trunc i24 %x to i8
53+
store i8 %t, ptr %A, align 1
54+
%gep.1 = getelementptr i8, ptr %A, i64 1
55+
store i8 %t, ptr %gep.1, align 1
56+
%gep.2 = getelementptr i8, ptr %A, i64 2
57+
store i8 %t, ptr %gep.2, align 1
58+
%gep.3 = getelementptr i8, ptr %A, i64 3
59+
store i8 %t, ptr %gep.3, align 1
60+
ret void
61+
}
62+
63+
define void @test_8_trunc_i24_to_i8(i24 %x, ptr %A) {
64+
; CHECK-LABEL: define void @test_8_trunc_i24_to_i8(
65+
; CHECK-SAME: i24 [[X:%.*]], ptr [[A:%.*]]) {
66+
; CHECK-NEXT: [[T:%.*]] = trunc i24 [[X]] to i8
67+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[T]], i32 0
68+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> zeroinitializer
69+
; CHECK-NEXT: store <8 x i8> [[TMP2]], ptr [[A]], align 1
70+
; CHECK-NEXT: ret void
71+
;
72+
%t = trunc i24 %x to i8
73+
store i8 %t, ptr %A, align 1
74+
%gep.1 = getelementptr i8, ptr %A, i64 1
75+
store i8 %t, ptr %gep.1, align 1
76+
%gep.2 = getelementptr i8, ptr %A, i64 2
77+
store i8 %t, ptr %gep.2, align 1
78+
%gep.3 = getelementptr i8, ptr %A, i64 3
79+
store i8 %t, ptr %gep.3, align 1
80+
%gep.4 = getelementptr i8, ptr %A, i64 4
81+
store i8 %t, ptr %gep.4, align 1
82+
%gep.5 = getelementptr i8, ptr %A, i64 5
83+
store i8 %t, ptr %gep.5, align 1
84+
%gep.6 = getelementptr i8, ptr %A, i64 6
85+
store i8 %t, ptr %gep.6, align 1
86+
%gep.7 = getelementptr i8, ptr %A, i64 7
87+
store i8 %t, ptr %gep.7, align 1
88+
ret void
89+
}
90+
91+
define void @test_4_trunc_i24_to_i16(i24 %x, ptr %A) {
92+
; CHECK-LABEL: define void @test_4_trunc_i24_to_i16(
93+
; CHECK-SAME: i24 [[X:%.*]], ptr [[A:%.*]]) {
94+
; CHECK-NEXT: [[T:%.*]] = trunc i24 [[X]] to i16
95+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[T]], i32 0
96+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <4 x i32> zeroinitializer
97+
; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[A]], align 1
98+
; CHECK-NEXT: ret void
99+
;
100+
%t = trunc i24 %x to i16
101+
store i16 %t, ptr %A, align 1
102+
%gep.1 = getelementptr i16, ptr %A, i64 1
103+
store i16 %t, ptr %gep.1, align 1
104+
%gep.2 = getelementptr i16, ptr %A, i64 2
105+
store i16 %t, ptr %gep.2, align 1
106+
%gep.3 = getelementptr i16, ptr %A, i64 3
107+
store i16 %t, ptr %gep.3, align 1
108+
ret void
109+
}

0 commit comments

Comments
 (0)