@@ -1414,27 +1414,39 @@ static SmallVector<int64_t> getTiledPackShape(tensor::PackOp packOp,
1414
1414
// / create an empty destination tensor and create a TransferWriteOp from the
1415
1415
// / input to the empty tensor. If the destination shape is not the same as the
1416
1416
// / inputVectorSizes for the first rank(inputVectorSizes) dims, then create a
1417
- // / mask for the write.
1417
+ // / mask for the write. If `useInBoundsInsteadOfMasking` is set, then update the
1418
+ // / inBounds attribute of the transfer write op instead of masking.
1418
1419
static Operation *createWriteOrMaskedWrite (OpBuilder &builder, Location loc,
1419
1420
Value input,
1420
1421
SmallVector<OpFoldResult> destSizes,
1421
- ArrayRef<int64_t > inputVectorSizes) {
1422
+ ArrayRef<int64_t > inputVectorSizes,
1423
+ bool useInBoundsInsteadOfMasking) {
1424
+
1422
1425
auto inputType = cast<VectorType>(input.getType ());
1423
1426
Value dest = builder.create <tensor::EmptyOp>(loc, destSizes,
1424
1427
inputType.getElementType ());
1425
1428
int64_t rank = cast<ShapedType>(dest.getType ()).getRank ();
1426
1429
auto zero = builder.create <arith::ConstantIndexOp>(loc, 0 );
1430
+ auto destShape = cast<ShapedType>(dest.getType ()).getShape ();
1431
+ SmallVector<bool > inBoundsVal (rank, true );
1432
+ if (useInBoundsInsteadOfMasking) {
1433
+ // Update the inBounds attribute.
1434
+ for (unsigned i = 0 ; i < rank; i++)
1435
+ inBoundsVal[i] = (destShape[i] == inputVectorSizes[i]) &&
1436
+ !ShapedType::isDynamic (destShape[i]);
1437
+ }
1427
1438
Operation *write = builder.create <vector::TransferWriteOp>(
1428
1439
loc,
1429
1440
/* vector=*/ input,
1430
1441
/* source=*/ dest,
1431
1442
/* indices=*/ SmallVector<Value>(rank, zero),
1432
- /* inBounds=*/ SmallVector<bool >(rank, true ));
1433
- auto destShape = cast<ShapedType>(dest.getType ()).getShape ();
1443
+ /* inBounds=*/ inBoundsVal);
1434
1444
assert (llvm::none_of (
1435
1445
destShape.drop_front (inputVectorSizes.size ()),
1436
1446
[](int64_t size) { return size == ShapedType::kDynamic ; }) &&
1437
1447
" Only dims aligned with inputVectorSizes may be dynamic" );
1448
+ if (useInBoundsInsteadOfMasking)
1449
+ return write;
1438
1450
bool needMaskForWrite = !llvm::equal (
1439
1451
inputVectorSizes, destShape.take_front (inputVectorSizes.size ()));
1440
1452
if (needMaskForWrite) {
@@ -1535,9 +1547,9 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
1535
1547
loc, shapeCastOp.getResult (), destPermutation);
1536
1548
1537
1549
// Create TransferWriteOp.
1538
- Operation *write =
1539
- createWriteOrMaskedWrite ( rewriter, loc, transposeOp.getResult (),
1540
- reifiedReturnShapes[ 0 ], inputVectorSizes );
1550
+ Operation *write = createWriteOrMaskedWrite (
1551
+ rewriter, loc, transposeOp.getResult (), reifiedReturnShapes[ 0 ] ,
1552
+ inputVectorSizes, /* useInBoundsInsteadOfMasking= */ false );
1541
1553
newResults.push_back (write->getResult (0 ));
1542
1554
return success ();
1543
1555
}
@@ -1547,7 +1559,9 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
1547
1559
// / vector::TransposeOp - Transpose the Source tensor
1548
1560
// / ShapeCastOp - Reshape the data based on the target.
1549
1561
// / vector::TransferWriteOp. - Write the result vector back to the destination
1550
- // / tensor
1562
+ // / tensor. If the vector sizes are not provided, then the vector sizes are
1563
+ // / determined by the result tensor shape. In case the vector sizes aren't
1564
+ // / provided, we update the inBounds attribute instead of masking.
1551
1565
static LogicalResult
1552
1566
vectorizeAsTensorUnpackOp (RewriterBase &rewriter, tensor::UnPackOp unpackOp,
1553
1567
ArrayRef<int64_t > inputVectorSizes,
@@ -1560,11 +1574,32 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp,
1560
1574
1561
1575
ArrayRef<int64_t > innerDimPos = unpackOp.getInnerDimsPos ();
1562
1576
ArrayRef<int64_t > innerTiles = unpackOp.getStaticInnerTiles ();
1577
+ ArrayRef<int64_t > sourceShape = unpackTensorType.getShape ();
1578
+ bool useInBoundsInsteadOfMasking = false ;
1579
+ ArrayRef<int64_t > outerDimsPerm = unpackOp.getOuterDimsPerm ();
1580
+
1581
+ auto destSize = unpackOp.getDestRank ();
1582
+
1583
+ // initVectorShape is the shape of the vector that will be read from the
1584
+ // source tensor. It is set like this: Let's say the sourceShape is 'M' and
1585
+ // the vectorSize (VS) array is size 'N' where N <= M. Thus:
1586
+ // - initVectorShape = sourceShape.take_front(N)
1587
+ // - if outer_dims_perms is present: do that permutation on initVectorShape.
1588
+ // - Multiply all the locations pointed by innerDimPos by the innerTileSize
1589
+ // attribute value.
1590
+ SmallVector<int64_t > initVectorShape{sourceShape.take_front (destSize)};
1591
+ if (inputVectorSizes.empty ()) {
1592
+ if (!outerDimsPerm.empty ())
1593
+ applyPermutationToVector (initVectorShape, outerDimsPerm);
1594
+ for (auto [i, pos] : llvm::enumerate (innerDimPos))
1595
+ initVectorShape[pos] *= innerTiles[i];
1596
+
1597
+ inputVectorSizes = initVectorShape;
1598
+ useInBoundsInsteadOfMasking = true ;
1599
+ }
1563
1600
1564
1601
SmallVector<int64_t > readMaskShape (inputVectorSizes.begin (),
1565
1602
inputVectorSizes.end ());
1566
- ArrayRef<int64_t > outerDimsPerm = unpackOp.getOuterDimsPerm ();
1567
- ArrayRef<int64_t > sourceShape = unpackTensorType.getShape ();
1568
1603
1569
1604
// ReadMask is the size of tensor used to read and apply mask. It is
1570
1605
// set like this: Let's say the vectorSize (VS) array is size 'N' and
@@ -1642,9 +1677,9 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp,
1642
1677
unpackOp.getDestType ().hasStaticShape ()
1643
1678
? inputVectorSizes
1644
1679
: shapeCastOp.getResultVectorType ().getShape ());
1645
- Operation *write =
1646
- createWriteOrMaskedWrite ( rewriter, loc, shapeCastOp.getResult (),
1647
- reifiedRetShapes[ 0 ], writeMaskShape );
1680
+ Operation *write = createWriteOrMaskedWrite (
1681
+ rewriter, loc, shapeCastOp.getResult (), reifiedRetShapes[ 0 ] ,
1682
+ writeMaskShape, useInBoundsInsteadOfMasking );
1648
1683
newResults.push_back (write->getResult (0 ));
1649
1684
return success ();
1650
1685
}
@@ -1673,7 +1708,8 @@ vectorizeAsTensorPadOp(RewriterBase &rewriter, tensor::PadOp padOp,
1673
1708
rewriter, loc, padOp.getSource (), inputVectorSizes, padValue,
1674
1709
/* useInBoundsInsteadOfMasking=*/ false );
1675
1710
Operation *write = createWriteOrMaskedWrite (
1676
- rewriter, loc, maskedRead, reifiedReturnShapes[0 ], inputVectorSizes);
1711
+ rewriter, loc, maskedRead, reifiedReturnShapes[0 ], inputVectorSizes,
1712
+ /* useInBoundsInsteadOfMasking=*/ false );
1677
1713
newResults.push_back (write->getResult (0 ));
1678
1714
return success ();
1679
1715
}
@@ -1755,8 +1791,11 @@ vectorizeUnPackOpPrecondition(tensor::UnPackOp unpackOp,
1755
1791
LDBG (" Inner-tiles must be constant: " << unpackOp << " \n " );
1756
1792
return failure ();
1757
1793
}
1758
- llvm::ArrayRef<int64_t > resultShape = unpackOp.getDestType ().getShape ();
1759
- if (!inputVectorSizes.empty () &&
1794
+ ArrayRef<int64_t > resultShape = unpackOp.getDestType ().getShape ();
1795
+ bool satisfyEmptyCond = inputVectorSizes.empty () &&
1796
+ unpackOp.getDestType ().hasStaticShape () &&
1797
+ unpackOp.getSourceType ().hasStaticShape ();
1798
+ if (!satisfyEmptyCond &&
1760
1799
failed (vector::isValidMaskedInputVector (resultShape, inputVectorSizes)))
1761
1800
return failure ();
1762
1801
0 commit comments