|
8 | 8 | //
|
9 | 9 | #include "mlir/Dialect/Tensor/IR/Tensor.h"
|
10 | 10 | #include "mlir/Dialect/Tensor/Transforms/Transforms.h"
|
| 11 | +#include "mlir/Dialect/Utils/IndexingUtils.h" |
11 | 12 | #include "mlir/IR/Matchers.h"
|
12 | 13 | #include "mlir/IR/PatternMatch.h"
|
13 | 14 |
|
| 15 | +#include "llvm/ADT/TypeSwitch.h" |
| 16 | + |
14 | 17 | using namespace mlir;
|
15 | 18 | using namespace mlir::tensor;
|
16 | 19 |
|
@@ -45,9 +48,155 @@ struct GenerateToConstant : public OpRewritePattern<GenerateOp> {
|
45 | 48 | }
|
46 | 49 | };
|
47 | 50 |
|
| 51 | +/// Transform a linear index from one indexing space to another given: |
| 52 | +/// |
| 53 | +/// - the shape of the source indexing space, |
| 54 | +/// - the strides of the target indexing space, |
| 55 | +/// - a linear index into the source indexing space. |
| 56 | +/// |
| 57 | +/// This function is logically a sequence of linearize/delinearize over |
| 58 | +/// different bases but avoids allocating intermediate SmallVectors. |
| 59 | +int64_t transformIndexSpace(ArrayRef<int64_t> inputShape, |
| 60 | + ArrayRef<int64_t> outputStrides, |
| 61 | + int64_t srcLinearIndex) { |
| 62 | + assert(inputShape.size() == outputStrides.size()); |
| 63 | + |
| 64 | + int64_t dstLinearIndex = 0; |
| 65 | + |
| 66 | + for (int64_t dim = inputShape.size() - 1; dim >= 0; --dim) { |
| 67 | + // Compute the index into the current dimension of the source tensor. |
| 68 | + // `quotient` is the remaining linear index after accounting for the |
| 69 | + // current dimension. |
| 70 | + // |
| 71 | + // `remainder` is the index into the source tensor for the current |
| 72 | + // dimension. |
| 73 | + auto [quotient, remainder] = std::div(srcLinearIndex, inputShape[dim]); |
| 74 | + |
| 75 | + srcLinearIndex = quotient; |
| 76 | + |
| 77 | + // Add the contribution of the current dimension to the output using the |
| 78 | + // permutation map. |
| 79 | + dstLinearIndex += outputStrides[dim] * remainder; |
| 80 | + } |
| 81 | + |
| 82 | + return dstLinearIndex; |
| 83 | +} |
| 84 | + |
| 85 | +template <typename ElemType, typename AttrType> |
| 86 | +Value constantFoldPadOp(PatternRewriter &rewriter, Location loc, |
| 87 | + DenseElementsAttr input, AttrType padValue, |
| 88 | + ArrayRef<int64_t> padLow, ArrayRef<int64_t> padHigh) { |
| 89 | + auto inputValues = input.tryGetValues<ElemType>(); |
| 90 | + if (failed(inputValues)) |
| 91 | + return nullptr; |
| 92 | + |
| 93 | + auto oldShape = input.getType().getShape(); |
| 94 | + |
| 95 | + // Compute the output shape of the new value. |
| 96 | + auto newShape = |
| 97 | + llvm::map_to_vector(llvm::zip(oldShape, padLow, padHigh), |
| 98 | + [](std::tuple<int64_t, int64_t, int64_t> pack) { |
| 99 | + auto [old, low, high] = pack; |
| 100 | + return old + low + high; |
| 101 | + }); |
| 102 | + |
| 103 | + int64_t outputSize = computeProduct(newShape); |
| 104 | + |
| 105 | + // Fully initialize the vector with the padding value. |
| 106 | + // The non-padded area will then be copied. |
| 107 | + SmallVector<ElemType> values(outputSize, padValue.getValue()); |
| 108 | + |
| 109 | + // Strides for input and output are used to transform between the indexing |
| 110 | + // space of the input and output tensors. |
| 111 | + SmallVector<int64_t> outputStrides = computeStrides(newShape); |
| 112 | + |
| 113 | + // The contribution of the low padding to the offset in the output tensor. |
| 114 | + // This is the starting position of the source tensor within the padding |
| 115 | + // tensor. |
| 116 | + int64_t startingOffset = linearize(padLow, outputStrides); |
| 117 | + |
| 118 | + // Copy values from the input tensor to the corresponding sub-region |
| 119 | + // of the output tensor. |
| 120 | + for (auto [inputIndex, inputValue] : llvm::enumerate(*inputValues)) { |
| 121 | + auto outputIndex = transformIndexSpace(oldShape, outputStrides, inputIndex); |
| 122 | + values[outputIndex + startingOffset] = inputValue; |
| 123 | + } |
| 124 | + |
| 125 | + // Create an attribute for the folded value. |
| 126 | + auto newType = input.getType().clone(newShape); |
| 127 | + auto newAttr = DenseElementsAttr::get(newType, values); |
| 128 | + |
| 129 | + Operation *constantOp = |
| 130 | + rewriter.getContext() |
| 131 | + ->getLoadedDialect<TensorDialect>() |
| 132 | + ->materializeConstant(rewriter, newAttr, newType, loc); |
| 133 | + |
| 134 | + return constantOp ? constantOp->getResult(0) : nullptr; |
| 135 | +} |
| 136 | + |
| 137 | +struct PadOpToConstant final : public OpRewritePattern<PadOp> { |
| 138 | + using OpRewritePattern<PadOp>::OpRewritePattern; |
| 139 | + |
| 140 | + LogicalResult matchAndRewrite(PadOp padTensorOp, |
| 141 | + PatternRewriter &rewriter) const override { |
| 142 | + if (padTensorOp.getNofold()) |
| 143 | + return rewriter.notifyMatchFailure( |
| 144 | + padTensorOp, "refusing to fold nofold pad operation"); |
| 145 | + |
| 146 | + TypedValue<RankedTensorType> input = padTensorOp.getSource(); |
| 147 | + RankedTensorType resultType = padTensorOp.getResult().getType(); |
| 148 | + |
| 149 | + DenseElementsAttr inputAttr = nullptr; |
| 150 | + if (!matchPattern(input, m_Constant(&inputAttr))) |
| 151 | + return failure(); |
| 152 | + |
| 153 | + Value paddingValue = padTensorOp.getConstantPaddingValue(); |
| 154 | + |
| 155 | + // Extract the constant value used for padding or bail out. |
| 156 | + Attribute paddingAttr = nullptr; |
| 157 | + if (!paddingValue || !matchPattern(paddingValue, m_Constant(&paddingAttr))) |
| 158 | + return rewriter.notifyMatchFailure(padTensorOp, |
| 159 | + "unable to get constant value"); |
| 160 | + |
| 161 | + // Try to extract the constant values of the low and high padding. |
| 162 | + auto lowPad = getConstantIntValues(padTensorOp.getMixedLowPad()); |
| 163 | + auto highPad = getConstantIntValues(padTensorOp.getMixedHighPad()); |
| 164 | + |
| 165 | + // If the padding cannot be extracted, bail out. |
| 166 | + if (!lowPad || !highPad) |
| 167 | + return rewriter.notifyMatchFailure(padTensorOp, |
| 168 | + "unable to extract constant padding"); |
| 169 | + |
| 170 | + Location loc = padTensorOp.getLoc(); |
| 171 | + |
| 172 | + // Try constant folding the supported cases of integer and float values. |
| 173 | + Value newOp = |
| 174 | + llvm::TypeSwitch<Attribute, Value>(paddingAttr) |
| 175 | + .Case([&](FloatAttr floatAttr) { |
| 176 | + return constantFoldPadOp<llvm::APFloat>( |
| 177 | + rewriter, loc, inputAttr, floatAttr, *lowPad, *highPad); |
| 178 | + }) |
| 179 | + .Case([&](IntegerAttr integerAttr) { |
| 180 | + return constantFoldPadOp<llvm::APInt>( |
| 181 | + rewriter, loc, inputAttr, integerAttr, *lowPad, *highPad); |
| 182 | + }) |
| 183 | + .Default(Value()); |
| 184 | + |
| 185 | + if (!newOp) |
| 186 | + return rewriter.notifyMatchFailure(padTensorOp, |
| 187 | + "tensor type not supported"); |
| 188 | + |
| 189 | + if (newOp.getType() != resultType) |
| 190 | + newOp = rewriter.create<tensor::CastOp>(loc, resultType, newOp); |
| 191 | + |
| 192 | + rewriter.replaceOp(padTensorOp, newOp); |
| 193 | + return success(); |
| 194 | + } |
| 195 | +}; |
| 196 | + |
48 | 197 | } // namespace
|
49 | 198 |
|
50 | 199 | void mlir::tensor::populateRewriteAsConstantPatterns(
|
51 | 200 | RewritePatternSet &patterns) {
|
52 |
| - patterns.add<GenerateToConstant>(patterns.getContext()); |
| 201 | + patterns.add<GenerateToConstant, PadOpToConstant>(patterns.getContext()); |
53 | 202 | }
|
0 commit comments