@@ -403,9 +403,6 @@ namespace {
403
403
struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
404
404
using OpConversionPattern::OpConversionPattern;
405
405
406
- ConvertVectorStore (MLIRContext *context, bool useAtomicWrites)
407
- : OpConversionPattern<vector::StoreOp>(context) {}
408
-
409
406
LogicalResult
410
407
matchAndRewrite (vector::StoreOp op, OpAdaptor adaptor,
411
408
ConversionPatternRewriter &rewriter) const override {
@@ -416,10 +413,10 @@ struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
416
413
" only 1-D vectors are supported ATM" );
417
414
418
415
auto loc = op.getLoc ();
419
- auto convertedType = cast<MemRefType>(adaptor.getBase ().getType ());
420
416
auto valueToStore = cast<VectorValue>(op.getValueToStore ());
421
417
auto oldElementType = valueToStore.getType ().getElementType ();
422
- auto newElementType = convertedType.getElementType ();
418
+ auto newElementType =
419
+ cast<MemRefType>(adaptor.getBase ().getType ()).getElementType ();
423
420
int srcBits = oldElementType.getIntOrFloatBitWidth ();
424
421
int dstBits = newElementType.getIntOrFloatBitWidth ();
425
422
@@ -464,21 +461,24 @@ struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
464
461
: getConstantIntValue (linearizedInfo.intraDataOffset );
465
462
466
463
if (!foldedNumFrontPadElems) {
467
- return failure (" subbyte store emulation: dynamic front padding size is "
468
- " not yet implemented" );
464
+ return rewriter.notifyMatchFailure (
465
+ op, " subbyte store emulation: dynamic front padding size is "
466
+ " not yet implemented" );
469
467
}
470
468
471
469
auto memrefBase = cast<MemRefValue>(adaptor.getBase ());
472
470
473
- // Shortcut: conditions when subbyte emulated store at the front is not
474
- // needed:
471
+ // Conditions when subbyte emulated store is not needed:
475
472
// 1. The source vector size (in bits) is a multiple of byte size.
476
473
// 2. The address of the store is aligned to the emulated width boundary.
477
474
//
478
475
// For example, to store a vector<4xi2> to <13xi2> at offset 4, does not
479
476
// need unaligned emulation because the store address is aligned and the
480
477
// source is a whole byte.
481
- if (isAlignedEmulation && *foldedNumFrontPadElems == 0 ) {
478
+ bool emulationRequiresPartialStores =
479
+ !isAlignedEmulation || *foldedNumFrontPadElems != 0 ;
480
+ if (!emulationRequiresPartialStores) {
481
+ // Basic case: storing full bytes.
482
482
auto numElements = origElements / numSrcElemsPerDest;
483
483
auto bitCast = rewriter.create <vector::BitCastOp>(
484
484
loc, VectorType::get (numElements, newElementType),
0 commit comments