File tree Expand file tree Collapse file tree 2 files changed +29
-4
lines changed
test/Transforms/SLPVectorizer/X86 Expand file tree Collapse file tree 2 files changed +29
-4
lines changed Original file line number Diff line number Diff line change @@ -19724,10 +19724,18 @@ bool SLPVectorizerPass::vectorizeStores(
19724
19724
Type *ValueTy = StoreTy;
19725
19725
if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
19726
19726
ValueTy = Trunc->getSrcTy();
19727
- unsigned MinVF = std::max<unsigned>(
19728
- 2, PowerOf2Ceil(TTI->getStoreMinimumVF(
19729
- R.getMinVF(DL->getTypeStoreSizeInBits(StoreTy)), StoreTy,
19730
- ValueTy)));
19727
+ // When REVEC is enabled, StoreTy and ValueTy may be FixedVectorType. But
19728
+ // getStoreMinimumVF only support scalar type as arguments. As a result,
19729
+ // we need to use the element type of StoreTy and ValueTy to retrieve the
19730
+ // VF and then transform it back.
19731
+ // Remember: VF is defined as the number we want to vectorize, not the
19732
+ // number of elements in the final vector.
19733
+ Type *StoreScalarTy = StoreTy->getScalarType();
19734
+ unsigned MinVF = PowerOf2Ceil(TTI->getStoreMinimumVF(
19735
+ R.getMinVF(DL->getTypeStoreSizeInBits(StoreScalarTy)), StoreScalarTy,
19736
+ ValueTy->getScalarType()));
19737
+ MinVF /= getNumElements(StoreTy);
19738
+ MinVF = std::max<unsigned>(2, MinVF);
19731
19739
19732
19740
if (MaxVF < MinVF) {
19733
19741
LLVM_DEBUG(dbgs() << "SLP: Vectorization infeasible as MaxVF (" << MaxVF
Original file line number Diff line number Diff line change
1
+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2
+ ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
3
+
4
+ define void @test () {
5
+ ; CHECK-LABEL: @test(
6
+ ; CHECK-NEXT: entry:
7
+ ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> zeroinitializer, i64 0)
8
+ ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP0]], <4 x i8> zeroinitializer, i64 4)
9
+ ; CHECK-NEXT: store <8 x i8> [[TMP1]], ptr null, align 1
10
+ ; CHECK-NEXT: ret void
11
+ ;
12
+ entry:
13
+ %0 = getelementptr i8 , ptr null , i64 4
14
+ store <4 x i8 > zeroinitializer , ptr null , align 1
15
+ store <4 x i8 > zeroinitializer , ptr %0 , align 1
16
+ ret void
17
+ }
You can’t perform that action at this time.
0 commit comments