Skip to content

Commit bec6d9e

Browse files
author
git apple-llvm automerger
committed
Merge commit '058ac837bc35' from llvm.org/main into next
2 parents e6f75da + 058ac83 commit bec6d9e

File tree

2 files changed

+50
-6
lines changed

2 files changed

+50
-6
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3045,7 +3045,9 @@ class BoUpSLP {
30453045

30463046
/// \returns a vector from a collection of scalars in \p VL. if \p Root is not
30473047
/// specified, the starting vector value is poison.
3048-
Value *gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy);
3048+
Value *
3049+
gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy,
3050+
function_ref<Value *(Value *, Value *, ArrayRef<int>)> CreateShuffle);
30493051

30503052
/// \returns whether the VectorizableTree is fully vectorizable and will
30513053
/// be beneficial even the tree height is tiny.
@@ -9167,8 +9169,9 @@ class BaseShuffleAnalysis {
91679169
int VF = Mask.size();
91689170
if (auto *FTy = dyn_cast<FixedVectorType>(V1->getType()))
91699171
VF = FTy->getNumElements();
9170-
if (V2 &&
9171-
!isUndefVector(V2, buildUseMask(VF, Mask, UseMask::SecondArg)).all()) {
9172+
if (V2 && !isUndefVector</*IsPoisonOnly=*/true>(
9173+
V2, buildUseMask(VF, Mask, UseMask::SecondArg))
9174+
.all()) {
91729175
// Peek through shuffles.
91739176
Value *Op1 = V1;
91749177
Value *Op2 = V2;
@@ -13454,7 +13457,9 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
1345413457
Builder.SetCurrentDebugLocation(Front->getDebugLoc());
1345513458
}
1345613459

13457-
Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy) {
13460+
Value *BoUpSLP::gather(
13461+
ArrayRef<Value *> VL, Value *Root, Type *ScalarTy,
13462+
function_ref<Value *(Value *, Value *, ArrayRef<int>)> CreateShuffle) {
1345813463
// List of instructions/lanes from current block and/or the blocks which are
1345913464
// part of the current loop. These instructions will be inserted at the end to
1346013465
// make it possible to optimize loops and hoist invariant instructions out of
@@ -13560,7 +13565,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root, Type *ScalarTy) {
1356013565
if (isa<PoisonValue>(Vec)) {
1356113566
Vec = OriginalRoot;
1356213567
} else {
13563-
Vec = Builder.CreateShuffleVector(Root, Vec, Mask);
13568+
Vec = CreateShuffle(Root, Vec, Mask);
1356413569
if (auto *OI = dyn_cast<Instruction>(OriginalRoot);
1356513570
OI && OI->hasNUses(0))
1356613571
eraseInstruction(OI);
@@ -14022,7 +14027,10 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1402214027
}
1402314028
Value *gather(ArrayRef<Value *> VL, unsigned MaskVF = 0,
1402414029
Value *Root = nullptr) {
14025-
return R.gather(VL, Root, ScalarTy);
14030+
return R.gather(VL, Root, ScalarTy,
14031+
[&](Value *V1, Value *V2, ArrayRef<int> Mask) {
14032+
return createShuffle(V1, V2, Mask);
14033+
});
1402614034
}
1402714035
Value *createFreeze(Value *V) { return Builder.CreateFreeze(V); }
1402814036
/// Finalize emission of the shuffles.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define void @test(i16 %arg) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: i16 [[ARG:%.*]]) {
7+
; CHECK-NEXT: [[BB:.*:]]
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> <i16 0, i16 poison>, i16 [[ARG]], i32 1
9+
; CHECK-NEXT: [[TMP1:%.*]] = sitofp <2 x i16> [[TMP0]] to <2 x float>
10+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
11+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> <float 0.000000e+00, float poison, float poison, float poison>, <4 x i32> <i32 4, i32 1, i32 poison, i32 poison>
12+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
13+
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> zeroinitializer, [[TMP4]]
14+
; CHECK-NEXT: [[TMP6:%.*]] = fsub <4 x float> zeroinitializer, [[TMP4]]
15+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
16+
; CHECK-NEXT: [[TMP8:%.*]] = fsub <4 x float> [[TMP7]], [[TMP2]]
17+
; CHECK-NEXT: store <4 x float> [[TMP8]], ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 20), align 4
18+
; CHECK-NEXT: ret void
19+
;
20+
bb:
21+
%sitofp = sitofp i16 %arg to float
22+
%fadd = fadd float 0.000000e+00, 0.000000e+00
23+
%fsub = fsub float 0.000000e+00, %sitofp
24+
%fsub1 = fsub float 0.000000e+00, %sitofp
25+
%fsub2 = fsub float 0.000000e+00, %sitofp
26+
%sitofp3 = sitofp i16 0 to float
27+
%fsub4 = fsub float %fadd, %sitofp3
28+
store float %fsub4, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 20), align 4
29+
%fsub5 = fsub float %fsub, %sitofp
30+
store float %fsub5, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 24), align 4
31+
%fsub6 = fsub float %fsub1, %sitofp
32+
store float %fsub6, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 28), align 4
33+
%fsub7 = fsub float %fsub2, %sitofp
34+
store float %fsub7, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 32), align 4
35+
ret void
36+
}

0 commit comments

Comments
 (0)