@@ -1541,5 +1541,37 @@ func.func @should_fuse_and_preserve_dep_on_constant() {
1541
1541
return
1542
1542
}
1543
1543
1544
+ // -----
1545
+
1546
+ // CHECK-LABEL: @producer_consumer_with_outmost_user
1547
+ func.func @producer_consumer_with_outmost_user (%arg0 : f16 ) {
1548
+ %c0 = arith.constant 0 : index
1549
+ %src = memref.alloc () : memref <f16 , 1 >
1550
+ %dst = memref.alloc () : memref <f16 >
1551
+ %tag = memref.alloc () : memref <1 xi32 >
1552
+ affine.for %arg1 = 4 to 6 {
1553
+ affine.for %arg2 = 0 to 1 {
1554
+ %0 = arith.addf %arg0 , %arg0 : f16
1555
+ affine.store %0 , %src [] : memref <f16 , 1 >
1556
+ }
1557
+ affine.for %arg3 = 0 to 1 {
1558
+ %0 = affine.load %src [] : memref <f16 , 1 >
1559
+ }
1560
+ }
1561
+ affine.dma_start %src [], %dst [], %tag [%c0 ], %c0 : memref <f16 , 1 >, memref <f16 >, memref <1 xi32 >
1562
+ // CHECK: %[[CST_INDEX:.*]] = arith.constant 0 : index
1563
+ // CHECK: %[[DMA_SRC:.*]] = memref.alloc() : memref<f16, 1>
1564
+ // CHECK: %[[DMA_DST:.*]] = memref.alloc() : memref<f16>
1565
+ // CHECK: %[[DMA_TAG:.*]] = memref.alloc() : memref<1xi32>
1566
+ // CHECK: affine.for %arg1 = 4 to 6
1567
+ // CHECK-NEXT: affine.for %arg2 = 0 to 1
1568
+ // CHECK-NEXT: %[[RESULT_ADD:.*]] = arith.addf %arg0, %arg0 : f16
1569
+ // CHECK-NEXT: affine.store %[[RESULT_ADD]], %[[DMA_SRC]][] : memref<f16, 1>
1570
+ // CHECK-NEXT: affine.load %[[DMA_SRC]][] : memref<f16, 1>
1571
+ // CHECK: affine.dma_start %[[DMA_SRC]][], %[[DMA_DST]][], %[[DMA_TAG]][%[[CST_INDEX]]], %[[CST_INDEX]] : memref<f16, 1>, memref<f16>, memref<1xi32>
1572
+ // CHECK-NEXT: return
1573
+ return
1574
+ }
1575
+
1544
1576
// Add further tests in mlir/test/Transforms/loop-fusion-4.mlir
1545
1577
0 commit comments