Skip to content

Commit e4aeeba

Browse files
authored
[SLP][REVEC] When ScalarTy is FixedVectorType, the insertion index should consider the number of elements of ScalarTy. (#114526)
1 parent f484a04 commit e4aeeba

File tree

2 files changed

+64
-9
lines changed

2 files changed

+64
-9
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13814,13 +13814,12 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1381413814
unsigned VF = 0,
1381513815
function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
1381613816
IsFinalized = true;
13817+
unsigned ScalarTyNumElements = getNumElements(ScalarTy);
1381713818
SmallVector<int> NewExtMask(ExtMask);
13818-
if (auto *VecTy = dyn_cast<FixedVectorType>(ScalarTy)) {
13819+
if (ScalarTyNumElements != 1) {
1381913820
assert(SLPReVec && "FixedVectorType is not expected.");
13820-
transformScalarShuffleIndiciesToVector(VecTy->getNumElements(),
13821-
CommonMask);
13822-
transformScalarShuffleIndiciesToVector(VecTy->getNumElements(),
13823-
NewExtMask);
13821+
transformScalarShuffleIndiciesToVector(ScalarTyNumElements, CommonMask);
13822+
transformScalarShuffleIndiciesToVector(ScalarTyNumElements, NewExtMask);
1382413823
ExtMask = NewExtMask;
1382513824
}
1382613825
if (Action) {
@@ -13863,12 +13862,14 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1386313862
return !isKnownNonNegative(
1386413863
V, SimplifyQuery(*R.DL));
1386513864
}));
13865+
unsigned InsertionIndex = Idx * ScalarTyNumElements;
1386613866
Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V,
13867-
Builder.getInt64(Idx));
13867+
Builder.getInt64(InsertionIndex));
1386813868
if (!CommonMask.empty()) {
13869-
std::iota(std::next(CommonMask.begin(), Idx),
13870-
std::next(CommonMask.begin(), Idx + E->getVectorFactor()),
13871-
Idx);
13869+
std::iota(std::next(CommonMask.begin(), InsertionIndex),
13870+
std::next(CommonMask.begin(), (Idx + E->getVectorFactor()) *
13871+
ScalarTyNumElements),
13872+
InsertionIndex);
1387213873
}
1387313874
}
1387413875
InVectors.front() = Vec;

llvm/test/Transforms/SLPVectorizer/revec.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,3 +355,57 @@ entry:
355355
%10 = icmp ne <2 x i8> %8, zeroinitializer
356356
ret void
357357
}
358+
359+
define void @test12() {
360+
; CHECK-LABEL: @test12(
361+
; CHECK-NEXT: entry:
362+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr null, i64 33
363+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr null, i64 50
364+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr null, i64 75
365+
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4
366+
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x float>, ptr [[TMP2]], align 4
367+
; CHECK-NEXT: [[TMP5:%.*]] = load <16 x float>, ptr [[TMP0]], align 4
368+
; CHECK-NEXT: [[TMP6:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> [[TMP4]], i64 0)
369+
; CHECK-NEXT: [[TMP7:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP6]], <8 x float> [[TMP3]], i64 8)
370+
; CHECK-NEXT: [[TMP8:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v16f32(<32 x float> [[TMP7]], <16 x float> [[TMP5]], i64 16)
371+
; CHECK-NEXT: [[TMP9:%.*]] = fpext <32 x float> [[TMP8]] to <32 x double>
372+
; CHECK-NEXT: [[TMP10:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> poison, <8 x double> zeroinitializer, i64 0)
373+
; CHECK-NEXT: [[TMP11:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP10]], <8 x double> zeroinitializer, i64 8)
374+
; CHECK-NEXT: [[TMP12:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP11]], <8 x double> zeroinitializer, i64 16)
375+
; CHECK-NEXT: [[TMP13:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP12]], <8 x double> zeroinitializer, i64 24)
376+
; CHECK-NEXT: [[TMP14:%.*]] = fadd <32 x double> [[TMP13]], [[TMP9]]
377+
; CHECK-NEXT: [[TMP15:%.*]] = fptrunc <32 x double> [[TMP14]] to <32 x float>
378+
; CHECK-NEXT: [[TMP16:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> zeroinitializer, i64 0)
379+
; CHECK-NEXT: [[TMP17:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP16]], <8 x float> zeroinitializer, i64 8)
380+
; CHECK-NEXT: [[TMP18:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP17]], <8 x float> zeroinitializer, i64 16)
381+
; CHECK-NEXT: [[TMP19:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP18]], <8 x float> zeroinitializer, i64 24)
382+
; CHECK-NEXT: [[TMP20:%.*]] = fcmp ogt <32 x float> [[TMP19]], [[TMP15]]
383+
; CHECK-NEXT: ret void
384+
;
385+
entry:
386+
%0 = getelementptr float, ptr null, i64 33
387+
%1 = getelementptr float, ptr null, i64 41
388+
%2 = getelementptr float, ptr null, i64 50
389+
%3 = getelementptr float, ptr null, i64 75
390+
%4 = load <8 x float>, ptr %0, align 4
391+
%5 = load <8 x float>, ptr %1, align 4
392+
%6 = load <8 x float>, ptr %2, align 4
393+
%7 = load <8 x float>, ptr %3, align 4
394+
%8 = fpext <8 x float> %4 to <8 x double>
395+
%9 = fpext <8 x float> %5 to <8 x double>
396+
%10 = fpext <8 x float> %6 to <8 x double>
397+
%11 = fpext <8 x float> %7 to <8 x double>
398+
%12 = fadd <8 x double> zeroinitializer, %8
399+
%13 = fadd <8 x double> zeroinitializer, %9
400+
%14 = fadd <8 x double> zeroinitializer, %10
401+
%15 = fadd <8 x double> zeroinitializer, %11
402+
%16 = fptrunc <8 x double> %12 to <8 x float>
403+
%17 = fptrunc <8 x double> %13 to <8 x float>
404+
%18 = fptrunc <8 x double> %14 to <8 x float>
405+
%19 = fptrunc <8 x double> %15 to <8 x float>
406+
%20 = fcmp ogt <8 x float> zeroinitializer, %16
407+
%21 = fcmp ogt <8 x float> zeroinitializer, %17
408+
%22 = fcmp ogt <8 x float> zeroinitializer, %18
409+
%23 = fcmp ogt <8 x float> zeroinitializer, %19
410+
ret void
411+
}

0 commit comments

Comments
 (0)