Skip to content

Commit b73476c

Browse files
committed
[SLP] Make sure MinVF is a power-of-2 by using PowerOf2Ceil.
This should ensure we explore the same VFs as before 6d66db3. Fixes #88640.
1 parent 34013e7 commit b73476c

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15155,8 +15155,8 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
1515515155
Type *ValueTy = StoreTy;
1515615156
if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
1515715157
ValueTy = Trunc->getSrcTy();
15158-
unsigned MinVF = TTI->getStoreMinimumVF(
15159-
R.getMinVF(DL->getTypeSizeInBits(StoreTy)), StoreTy, ValueTy);
15158+
unsigned MinVF = PowerOf2Ceil(TTI->getStoreMinimumVF(
15159+
R.getMinVF(DL->getTypeStoreSizeInBits(StoreTy)), StoreTy, ValueTy));
1516015160

1516115161
if (MaxVF < MinVF) {
1516215162
LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF

llvm/test/Transforms/SLPVectorizer/X86/trunc-store-value-ty-not-power-of-2.ll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,36 @@ define void @test_4_trunc_i24_to_i16(i24 %x, ptr %A) {
107107
store i16 %t, ptr %gep.3, align 1
108108
ret void
109109
}
110+
111+
%struct.d = type { [3 x i8], [3 x i8], [2 x i8] }
112+
113+
; Test case for https://github.com/llvm/llvm-project/issues/88640.
114+
define void @test_access_i24_directly(ptr %src, ptr noalias %dst) "target-cpu"="btver2" {
115+
; CHECK-LABEL: define void @test_access_i24_directly(
116+
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
117+
; CHECK-NEXT: entry:
118+
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[SRC]], align 8
119+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i24
120+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds [[STRUCT_D:%.*]], ptr [[SRC]], i64 0, i32 1
121+
; CHECK-NEXT: [[BF_LOAD:%.*]] = load i24, ptr [[GEP_SRC]], align 1
122+
; CHECK-NEXT: [[BF_VALUE:%.*]] = and i24 [[TMP1]], 8388607
123+
; CHECK-NEXT: [[BF_CLEAR:%.*]] = and i24 [[BF_LOAD]], -8388608
124+
; CHECK-NEXT: [[BF_SET:%.*]] = or disjoint i24 [[BF_CLEAR]], [[BF_VALUE]]
125+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds [[STRUCT_D]], ptr [[DST]], i64 0, i32 1
126+
; CHECK-NEXT: store i24 [[BF_SET]], ptr [[GEP_DST]], align 1
127+
; CHECK-NEXT: store i24 0, ptr [[DST]], align 8
128+
; CHECK-NEXT: ret void
129+
;
130+
entry:
131+
%0 = load i64, ptr %src, align 8
132+
%1 = trunc i64 %0 to i24
133+
%gep.src = getelementptr inbounds %struct.d, ptr %src, i64 0, i32 1
134+
%bf.load = load i24, ptr %gep.src, align 1
135+
%bf.value = and i24 %1, 8388607
136+
%bf.clear = and i24 %bf.load, -8388608
137+
%bf.set = or disjoint i24 %bf.clear, %bf.value
138+
%gep.dst = getelementptr inbounds %struct.d, ptr %dst, i64 0, i32 1
139+
store i24 %bf.set, ptr %gep.dst, align 1
140+
store i24 0, ptr %dst, align 8
141+
ret void
142+
}

0 commit comments

Comments
 (0)