Skip to content

Commit bdfcb50

Browse files
committed
[TTI][WebAssembly] Pairwise reduction expansion
WebAssembly doesn't support horizontal operations nor does it have a way of expressing fast-math or reassoc flags, so runtimes are currently unable to use pairwise operations when generating code from the existing shuffle patterns. This patch allows the backend to select which, arbitary, shuffle pattern to be used per reduction intrinsic. The default behaviour is the same as the existing, which is by splitting the vector into a top and bottom half. The other pattern introduced is for a pairwise shuffle. WebAssembly enables pairwise reductions for int/fp add.
1 parent 1dbc2aa commit bdfcb50

File tree

9 files changed

+1152
-16
lines changed

9 files changed

+1152
-16
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1700,6 +1700,13 @@ class TargetTransformInfo {
17001700
/// into a shuffle sequence.
17011701
bool shouldExpandReduction(const IntrinsicInst *II) const;
17021702

1703+
enum struct ReductionShuffle { SplitHalf, Pairwise };
1704+
1705+
/// \returns The shuffle sequence pattern used to expand the given reduction
1706+
/// intrinsic.
1707+
ReductionShuffle
1708+
getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const;
1709+
17031710
/// \returns the size cost of rematerializing a GlobalValue address relative
17041711
/// to a stack reload.
17051712
unsigned getGISelRematGlobalCost() const;
@@ -2150,6 +2157,8 @@ class TargetTransformInfo::Concept {
21502157
virtual bool preferEpilogueVectorization() const = 0;
21512158

21522159
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
2160+
virtual ReductionShuffle
2161+
getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const = 0;
21532162
virtual unsigned getGISelRematGlobalCost() const = 0;
21542163
virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
21552164
virtual bool enableScalableVectorization() const = 0;
@@ -2889,6 +2898,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
28892898
return Impl.shouldExpandReduction(II);
28902899
}
28912900

2901+
ReductionShuffle
2902+
getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const override {
2903+
return Impl.getPreferredExpandedReductionShuffle(II);
2904+
}
2905+
28922906
unsigned getGISelRematGlobalCost() const override {
28932907
return Impl.getGISelRematGlobalCost();
28942908
}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,11 @@ class TargetTransformInfoImplBase {
929929

930930
bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
931931

932+
TTI::ReductionShuffle
933+
getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const {
934+
return TTI::ReductionShuffle::SplitHalf;
935+
}
936+
932937
unsigned getGISelRematGlobalCost() const { return 1; }
933938

934939
unsigned getMinTripCountTailFoldingThreshold() const { return 0; }

llvm/include/llvm/Transforms/Utils/LoopUtils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "llvm/Analysis/IVDescriptors.h"
1717
#include "llvm/Analysis/LoopAccessAnalysis.h"
18+
#include "llvm/Analysis/TargetTransformInfo.h"
1819
#include "llvm/Transforms/Utils/ValueMapper.h"
1920

2021
namespace llvm {
@@ -384,6 +385,7 @@ Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
384385
/// Generates a vector reduction using shufflevectors to reduce the value.
385386
/// Fast-math-flags are propagated using the IRBuilder's setting.
386387
Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
388+
TargetTransformInfo::ReductionShuffle RS,
387389
RecurKind MinMaxKind = RecurKind::None);
388390

389391
/// Create a target reduction of the given vector. The reduction operation

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,6 +1313,12 @@ bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
13131313
return TTIImpl->shouldExpandReduction(II);
13141314
}
13151315

1316+
TargetTransformInfo::ReductionShuffle
1317+
TargetTransformInfo::getPreferredExpandedReductionShuffle(
1318+
const IntrinsicInst *II) const {
1319+
return TTIImpl->getPreferredExpandedReductionShuffle(II);
1320+
}
1321+
13161322
unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
13171323
return TTIImpl->getGISelRematGlobalCost();
13181324
}

llvm/lib/CodeGen/ExpandReductions.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
5959
isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
6060
Intrinsic::ID ID = II->getIntrinsicID();
6161
RecurKind RK = getMinMaxReductionRecurKind(ID);
62+
TargetTransformInfo::ReductionShuffle RS =
63+
TTI->getPreferredExpandedReductionShuffle(II);
6264

6365
Value *Rdx = nullptr;
6466
IRBuilder<> Builder(II);
@@ -79,7 +81,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
7981
if (!isPowerOf2_32(
8082
cast<FixedVectorType>(Vec->getType())->getNumElements()))
8183
continue;
82-
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK);
84+
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
8385
Rdx = Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, Acc, Rdx,
8486
"bin.rdx");
8587
}
@@ -112,7 +114,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
112114
break;
113115
}
114116
unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
115-
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK);
117+
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
116118
break;
117119
}
118120
case Intrinsic::vector_reduce_add:
@@ -127,7 +129,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
127129
cast<FixedVectorType>(Vec->getType())->getNumElements()))
128130
continue;
129131
unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
130-
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK);
132+
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
131133
break;
132134
}
133135
case Intrinsic::vector_reduce_fmax:
@@ -140,7 +142,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
140142
!FMF.noNaNs())
141143
continue;
142144
unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
143-
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RK);
145+
Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
144146
break;
145147
}
146148
}

llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,19 @@ WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
9494
return Cost;
9595
}
9696

97+
TTI::ReductionShuffle WebAssemblyTTIImpl::getPreferredExpandedReductionShuffle(
98+
const IntrinsicInst *II) const {
99+
100+
switch (II->getIntrinsicID()) {
101+
default:
102+
break;
103+
case Intrinsic::vector_reduce_add:
104+
case Intrinsic::vector_reduce_fadd:
105+
return TTI::ReductionShuffle::Pairwise;
106+
}
107+
return TTI::ReductionShuffle::SplitHalf;
108+
}
109+
97110
bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller,
98111
const Function *Callee) const {
99112
// Allow inlining only when the Callee has a subset of the Caller's

llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
7070
TTI::TargetCostKind CostKind,
7171
unsigned Index, Value *Op0, Value *Op1);
7272

73+
TTI::ReductionShuffle
74+
getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const;
7375
/// @}
7476

7577
bool areInlineCompatible(const Function *Caller,

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,7 +1077,9 @@ Value *llvm::getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
10771077

10781078
// Helper to generate a log2 shuffle reduction.
10791079
Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
1080-
unsigned Op, RecurKind RdxKind) {
1080+
unsigned Op,
1081+
TargetTransformInfo::ReductionShuffle RS,
1082+
RecurKind RdxKind) {
10811083
unsigned VF = cast<FixedVectorType>(Src->getType())->getNumElements();
10821084
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
10831085
// and vector ops, reducing the set of values being computed by half each
@@ -1091,18 +1093,10 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
10911093
// will never be relevant here. Note that it would be generally unsound to
10921094
// propagate these from an intrinsic call to the expansion anyways as we/
10931095
// change the order of operations.
1094-
Value *TmpVec = Src;
1095-
SmallVector<int, 32> ShuffleMask(VF);
1096-
for (unsigned i = VF; i != 1; i >>= 1) {
1097-
// Move the upper half of the vector to the lower half.
1098-
for (unsigned j = 0; j != i / 2; ++j)
1099-
ShuffleMask[j] = i / 2 + j;
1100-
1101-
// Fill the rest of the mask with undef.
1102-
std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
1103-
1096+
auto BuildShuffledOp = [&Builder, &Op,
1097+
&RdxKind](SmallVectorImpl<int> &ShuffleMask,
1098+
Value *&TmpVec) -> void {
11041099
Value *Shuf = Builder.CreateShuffleVector(TmpVec, ShuffleMask, "rdx.shuf");
1105-
11061100
if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
11071101
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
11081102
"bin.rdx");
@@ -1111,6 +1105,30 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
11111105
"Invalid min/max");
11121106
TmpVec = createMinMaxOp(Builder, RdxKind, TmpVec, Shuf);
11131107
}
1108+
};
1109+
1110+
Value *TmpVec = Src;
1111+
if (TargetTransformInfo::ReductionShuffle::Pairwise == RS) {
1112+
SmallVector<int, 32> ShuffleMask(VF);
1113+
for (unsigned stride = 1; stride < VF; stride <<= 1) {
1114+
// Initialise the mask with undef.
1115+
std::fill(ShuffleMask.begin(), ShuffleMask.end(), -1);
1116+
for (unsigned j = 0; j < VF; j += stride << 1) {
1117+
ShuffleMask[j] = j + stride;
1118+
}
1119+
BuildShuffledOp(ShuffleMask, TmpVec);
1120+
}
1121+
} else {
1122+
SmallVector<int, 32> ShuffleMask(VF);
1123+
for (unsigned i = VF; i != 1; i >>= 1) {
1124+
// Move the upper half of the vector to the lower half.
1125+
for (unsigned j = 0; j != i / 2; ++j)
1126+
ShuffleMask[j] = i / 2 + j;
1127+
1128+
// Fill the rest of the mask with undef.
1129+
std::fill(&ShuffleMask[i / 2], ShuffleMask.end(), -1);
1130+
BuildShuffledOp(ShuffleMask, TmpVec);
1131+
}
11141132
}
11151133
// The result is in the first element of the vector.
11161134
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));

0 commit comments

Comments
 (0)