Skip to content

Commit 8f8b387

Browse files
sparker-armyuxuanchen1997
authored andcommitted
[TTI][WebAssembly] Pairwise reduction expansion (#93948)
Summary: WebAssembly doesn't support horizontal operations nor does it have a way of expressing fast-math or reassoc flags, so runtimes are currently unable to use pairwise operations when generating code from the existing shuffle patterns. This patch allows the backend to select which, arbitary, shuffle pattern to be used per reduction intrinsic. The default behaviour is the same as the existing, which is by splitting the vector into a top and bottom half. The other pattern introduced is for a pairwise shuffle. WebAssembly enables pairwise reductions for int/fp add/sub. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250991
1 parent b47af04 commit 8f8b387

File tree

9 files changed

+1151
-16
lines changed

9 files changed

+1151
-16
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1705,6 +1705,13 @@ class TargetTransformInfo {
17051705
/// into a shuffle sequence.
17061706
bool shouldExpandReduction(const IntrinsicInst *II) const;
17071707

1708+
enum struct ReductionShuffle { SplitHalf, Pairwise };
1709+
1710+
/// \returns The shuffle sequence pattern used to expand the given reduction
1711+
/// intrinsic.
1712+
ReductionShuffle
1713+
getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const;
1714+
17081715
/// \returns the size cost of rematerializing a GlobalValue address relative
17091716
/// to a stack reload.
17101717
unsigned getGISelRematGlobalCost() const;
@@ -2156,6 +2163,8 @@ class TargetTransformInfo::Concept {
21562163
virtual bool preferEpilogueVectorization() const = 0;
21572164

21582165
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
2166+
virtual ReductionShuffle
2167+
getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const = 0;
21592168
virtual unsigned getGISelRematGlobalCost() const = 0;
21602169
virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
21612170
virtual bool enableScalableVectorization() const = 0;
@@ -2898,6 +2907,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
28982907
return Impl.shouldExpandReduction(II);
28992908
}
29002909

2910+
ReductionShuffle
2911+
getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const override {
2912+
return Impl.getPreferredExpandedReductionShuffle(II);
2913+
}
2914+
29012915
unsigned getGISelRematGlobalCost() const override {
29022916
return Impl.getGISelRematGlobalCost();
29032917
}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -936,6 +936,11 @@ class TargetTransformInfoImplBase {
936936

937937
bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
938938

939+
TTI::ReductionShuffle
940+
getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const {
941+
return TTI::ReductionShuffle::SplitHalf;
942+
}
943+
939944
unsigned getGISelRematGlobalCost() const { return 1; }
940945

941946
unsigned getMinTripCountTailFoldingThreshold() const { return 0; }

llvm/include/llvm/Transforms/Utils/LoopUtils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "llvm/Analysis/IVDescriptors.h"
1717
#include "llvm/Analysis/LoopAccessAnalysis.h"
18+
#include "llvm/Analysis/TargetTransformInfo.h"
1819
#include "llvm/IR/VectorBuilder.h"
1920
#include "llvm/Transforms/Utils/ValueMapper.h"
2021

@@ -385,6 +386,7 @@ Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
385386
/// Generates a vector reduction using shufflevectors to reduce the value.
386387
/// Fast-math-flags are propagated using the IRBuilder's setting.
387388
Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
389+
TargetTransformInfo::ReductionShuffle RS,
388390
RecurKind MinMaxKind = RecurKind::None);
389391

390392
/// Create a target reduction of the given vector. The reduction operation

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1317,6 +1317,12 @@ bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
13171317
return TTIImpl->shouldExpandReduction(II);
13181318
}
13191319

1320+
TargetTransformInfo::ReductionShuffle
1321+
TargetTransformInfo::getPreferredExpandedReductionShuffle(
1322+
const IntrinsicInst *II) const {
1323+
return TTIImpl->getPreferredExpandedReductionShuffle(II);
1324+
}
1325+
13201326
unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
13211327
return TTIImpl->getGISelRematGlobalCost();
13221328
}

llvm/lib/CodeGen/ExpandReductions.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
5959
isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
6060
Intrinsic::ID ID = II->getIntrinsicID();
6161
RecurKind RK = getMinMaxReductionRecurKind(ID);
62+
TargetTransformInfo::ReductionShuffle RS =
63+
TTI->getPreferredExpandedReductionShuffle(II);
6264

6365
Value *Rdx = nullptr;
6466
IRBuilder<> Builder(II);
@@ -79,7 +81,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
7981
if (!isPowerOf2_32(
8082
cast<FixedVectorType>(Vec->getType())->getNumElements()))
8183
continue;
82-
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK);
84+
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
8385
Rdx = Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, Acc, Rdx,
8486
"bin.rdx");
8587
}
@@ -112,7 +114,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
112114
break;
113115
}
114116
unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
115-
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK);
117+
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
116118
break;
117119
}
118120
case Intrinsic::vector_reduce_add:
@@ -127,7 +129,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
127129
cast<FixedVectorType>(Vec->getType())->getNumElements()))
128130
continue;
129131
unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
130-
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK);
132+
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
131133
break;
132134
}
133135
case Intrinsic::vector_reduce_fmax:
@@ -140,7 +142,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
140142
!FMF.noNaNs())
141143
continue;
142144
unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
143-
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK);
145+
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
144146
break;
145147
}
146148
}

llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,18 @@ WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
9494
return Cost;
9595
}
9696

97+
TTI::ReductionShuffle WebAssemblyTTIImpl::getPreferredExpandedReductionShuffle(
98+
const IntrinsicInst *II) const {
99+
100+
switch (II->getIntrinsicID()) {
101+
default:
102+
break;
103+
case Intrinsic::vector_reduce_fadd:
104+
return TTI::ReductionShuffle::Pairwise;
105+
}
106+
return TTI::ReductionShuffle::SplitHalf;
107+
}
108+
97109
bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller,
98110
const Function *Callee) const {
99111
// Allow inlining only when the Callee has a subset of the Caller's

llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
7070
TTI::TargetCostKind CostKind,
7171
unsigned Index, Value *Op0, Value *Op1);
7272

73+
TTI::ReductionShuffle
74+
getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const;
7375
/// @}
7476

7577
bool areInlineCompatible(const Function *Caller,

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,7 +1077,9 @@ Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
10771077

10781078
// Helper to generate a log2 shuffle reduction.
10791079
Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
1080-
unsigned Op, RecurKind RdxKind) {
1080+
unsigned Op,
1081+
TargetTransformInfo::ReductionShuffle RS,
1082+
RecurKind RdxKind) {
10811083
unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
10821084
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
10831085
// and vector ops, reducing the set of values being computed by half each
@@ -1091,18 +1093,10 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
10911093
// will never be relevant here. Note that it would be generally unsound to
10921094
// propagate these from an intrinsic call to the expansion anyways as we/
10931095
// change the order of operations.
1094-
Value *TmpVec = Src;
1095-
SmallVector<int, 32> ShuffleMask(VF);
1096-
for (unsigned i = VF; i != 1; i >>= 1) {
1097-
// Move the upper half of the vector to the lower half.
1098-
for (unsigned j = 0; j != i / 2; ++j)
1099-
ShuffleMask[j] = i / 2 + j;
1100-
1101-
// Fill the rest of the mask with undef.
1102-
std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
1103-
1096+
auto BuildShuffledOp = [&Builder, &Op,
1097+
&RdxKind](SmallVectorImpl<int> &ShuffleMask,
1098+
Value *&TmpVec) -> void {
11041099
Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
1105-
11061100
if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
11071101
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
11081102
"bin.rdx");
@@ -1111,6 +1105,30 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
11111105
"Invalid min/max");
11121106
TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
11131107
}
1108+
};
1109+
1110+
Value *TmpVec = Src;
1111+
if (TargetTransformInfo::ReductionShuffle::Pairwise == RS) {
1112+
SmallVector<int, 32> ShuffleMask(VF);
1113+
for (unsigned stride = 1; stride < VF; stride <<= 1) {
1114+
// Initialise the mask with undef.
1115+
std::fill(ShuffleMask.begin(), ShuffleMask.end(), -1);
1116+
for (unsigned j = 0; j < VF; j += stride << 1) {
1117+
ShuffleMask[j] = j + stride;
1118+
}
1119+
BuildShuffledOp(ShuffleMask, TmpVec);
1120+
}
1121+
} else {
1122+
SmallVector<int, 32> ShuffleMask(VF);
1123+
for (unsigned i = VF; i != 1; i >>= 1) {
1124+
// Move the upper half of the vector to the lower half.
1125+
for (unsigned j = 0; j != i / 2; ++j)
1126+
ShuffleMask[j] = i / 2 + j;
1127+
1128+
// Fill the rest of the mask with undef.
1129+
std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
1130+
BuildShuffledOp(ShuffleMask, TmpVec);
1131+
}
11141132
}
11151133
// The result is in the first element of the vector.
11161134
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));

0 commit comments

Comments
 (0)