Skip to content

Commit d6437e9

Browse files
committed
update comments
1 parent 4532a04 commit d6437e9

File tree

2 files changed

+18
-15
lines changed

2 files changed

+18
-15
lines changed

mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@ using namespace mlir;
4343
///
4444
/// %mask = [1, 1, 0, 0, 0, 0]
4545
///
46-
/// will first be padded with number of `intraDataOffset` zeros:
46+
/// will first be padded in the front with number of `intraDataOffset` zeros,
47+
/// and pad zeros in the back to make the number of elements a multiple of
48+
/// `scale` (just to make it easier to compute). The new mask will be:
4749
/// %mask = [0, 1, 1, 0, 0, 0, 0, 0]
4850
///
4951
/// then it will return the following new compressed mask:
@@ -54,7 +56,7 @@ static FailureOr<Operation *> getCompressedMaskOp(OpBuilder &rewriter,
5456
int origElements, int scale,
5557
int intraDataOffset = 0) {
5658
assert(intraDataOffset < scale && "intraDataOffset must be less than scale");
57-
auto numElements = (intraDataOffset + origElements + scale - 1) / scale;
59+
auto numElements = llvm::divideCeil(intraDataOffset + origElements, scale);
5860

5961
Operation *maskOp = mask.getDefiningOp();
6062
SmallVector<vector::ExtractOp, 2> extractOps;

mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned.mlir

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ func.func @vector_maskedload_i2_dynamic_indexing_mixed(%passthru: vector<3xi2>,
206206
// CHECK: %[[ONE:.+]] = arith.constant dense<true> : vector<2xi1>
207207
// CHECK: %[[ZERO:.+]] = arith.constant dense<0> : vector<8xi2>
208208

209-
// extract passthru vector, and insert into zero vector, this is for constructing a new passthru
209+
// Extract passthru vector, and insert into zero vector, this is for constructing a new passthru
210210
// CHECK: %[[EX1:.+]] = vector.extract %[[PTH]][0] : i2 from vector<3xi2>
211211
// CHECK: %[[IN1:.+]] = vector.insert %[[EX1]], %[[ZERO]] [%[[LINEAR2]]] : i2 into vector<8xi2>
212212
// CHECK: %[[C1:.+]] = arith.constant 1 : index
@@ -216,32 +216,33 @@ func.func @vector_maskedload_i2_dynamic_indexing_mixed(%passthru: vector<3xi2>,
216216
// CHECK: %[[C2:.+]] = arith.constant 2 : index
217217
// CHECK: %[[INCIDX2:.+]] = arith.addi %[[LINEAR2]], %[[C2]] : index
218218
// CHECK: %[[EX3:.+]] = vector.extract %[[PTH]][2] : i2 from vector<3xi2>
219-
// CHECK: %[[IN3:.+]] = vector.insert %[[EX3]], %[[IN2]] [%[[INCIDX2]]] : i2 into vector<8xi2>
219+
// CHECK: %[[NEW_PASSTHRU:.+]] = vector.insert %[[EX3]], %[[IN2]] [%[[INCIDX2]]] : i2 into vector<8xi2>
220220

221-
// bitcast the new passthru vector to emulated i8 vector
222-
// CHECK: %[[BITCAST:.+]] = vector.bitcast %[[IN3]] : vector<8xi2> to vector<2xi8>
221+
// Bitcast the new passthru vector to emulated i8 vector
222+
// CHECK: %[[BCAST_PASSTHRU:.+]] = vector.bitcast %[[NEW_PASSTHRU]] : vector<8xi2> to vector<2xi8>
223223

224-
// use the emulated i8 vector to masked load from the memory
225-
// CHECK: %[[MASKEDLOAD:.+]] = vector.maskedload %[[ALLOC]][%[[LINEAR1]]], %[[ONE]], %[[BITCAST]]
224+
// Use the emulated i8 vector for masked load from the source memory
225+
// CHECK: %[[SOURCE:.+]] = vector.maskedload %[[ALLOC]][%[[LINEAR1]]], %[[ONE]], %[[BCAST_PASSTHRU]]
226226
// CHECK-SAME: memref<3xi8>, vector<2xi1>, vector<2xi8> into vector<2xi8>
227227

228-
// bitcast back to i2 vector
229-
// CHECK: %[[BITCAST2:.+]] = vector.bitcast %[[MASKEDLOAD]] : vector<2xi8> to vector<8xi2>
228+
// Bitcast back to i2 vector
229+
// CHECK: %[[BCAST_MASKLOAD:.+]] = vector.bitcast %[[SOURCE]] : vector<2xi8> to vector<8xi2>
230230

231231
// CHECK: %[[CST1:.+]] = arith.constant dense<false> : vector<8xi1>
232232

233-
// create a mask vector and select passthru part from the loaded vector.
234-
// note that if indices are known then we can fold the part generating mask.
233+
// Create a mask vector
234+
// Note that if indices are known then we can fold the part generating mask.
235235
// CHECK: %[[EX4:.+]] = vector.extract %[[MASK]][0] : i1 from vector<3xi1>
236236
// CHECK: %[[IN4:.+]] = vector.insert %[[EX4]], %[[CST1]] [%[[LINEAR2]]] : i1 into vector<8xi1>
237237
// CHECK: %[[EX5:.+]] = vector.extract %[[MASK]][1] : i1 from vector<3xi1>
238238
// CHECK: %[[IN5:.+]] = vector.insert %[[EX5]], %[[IN4]] [%[[INCIDX]]] : i1 into vector<8xi1>
239239
// CHECK: %[[EX6:.+]] = vector.extract %[[MASK]][2] : i1 from vector<3xi1>
240-
// CHECK: %[[IN6:.+]] = vector.insert %[[EX6]], %[[IN5]] [%[[INCIDX2]]] : i1 into vector<8xi1>
240+
// CHECK: %[[NEW_MASK:.+]] = vector.insert %[[EX6]], %[[IN5]] [%[[INCIDX2]]] : i1 into vector<8xi1>
241241

242-
// CHECK: %[[SELECT:.+]] = arith.select %[[IN6]], %[[BITCAST2]], %[[IN3]] : vector<8xi1>, vector<8xi2>
242+
// Select the effective part from the source and passthru vectors
243+
// CHECK: %[[SELECT:.+]] = arith.select %[[NEW_MASK]], %[[BCAST_MASKLOAD]], %[[NEW_PASSTHRU]] : vector<8xi1>, vector<8xi2>
243244

244-
// finally, insert the selected parts into actual passthru vector.
245+
// Finally, insert the selected parts into actual passthru vector.
245246
// CHECK: %[[EX7:.+]] = vector.extract %[[SELECT]][%[[LINEAR2]]] : i2 from vector<8xi2>
246247
// CHECK: %[[IN7:.+]] = vector.insert %[[EX7]], %[[PTH]] [0] : i2 into vector<3xi2>
247248
// CHECK: %[[EX8:.+]] = vector.extract %[[SELECT]][%[[INCIDX]]] : i2 from vector<8xi2>

0 commit comments

Comments
 (0)