1
1
// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{mode=producer}))' -split-input-file | FileCheck %s --check-prefix=PRODUCER-CONSUMER
2
2
// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{fusion-maximal mode=sibling}))' -split-input-file | FileCheck %s --check-prefix=SIBLING-MAXIMAL
3
+ // All fusion: producer-consumer and sibling.
4
+ // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion))' -split-input-file
3
5
// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(spirv.func(affine-loop-fusion{mode=producer}))' -split-input-file | FileCheck %s --check-prefix=SPIRV
4
6
5
7
// Part I of fusion tests in mlir/test/Transforms/loop-fusion.mlir.
@@ -108,6 +110,7 @@ func.func @check_src_dst_step(%m : memref<100xf32>,
108
110
func.func @reduce_add_non_maximal_f32_f32 (%arg0: memref <64 x64 xf32 , 1 >, %arg1 : memref <1 x64 xf32 , 1 >, %arg2 : memref <1 x64 xf32 , 1 >) {
109
111
%cst_0 = arith.constant 0.000000e+00 : f32
110
112
%cst_1 = arith.constant 1.000000e+00 : f32
113
+ // This nest writes to %arg1 but can be eliminated post sibling fusion.
111
114
affine.for %arg3 = 0 to 1 {
112
115
affine.for %arg4 = 0 to 64 {
113
116
%accum = affine.for %arg5 = 0 to 64 iter_args (%prevAccum = %cst_0 ) -> f32 {
@@ -137,11 +140,11 @@ func.func @reduce_add_non_maximal_f32_f32(%arg0: memref<64x64xf32, 1>, %arg1 : m
137
140
// since the destination loop and source loop trip counts do not
138
141
// match.
139
142
// SIBLING-MAXIMAL: %[[cst_0:.*]] = arith.constant 0.000000e+00 : f32
140
- // SIBLING-MAXIMAL-NEXT: %[[cst_1:.*]] = arith.constant 1.000000e+00 : f32
141
- // SIBLING-MAXIMAL-NEXT: affine.for %[[idx_0:.*]] = 0 to 1 {
142
- // SIBLING-MAXIMAL-NEXT: affine.for %[[idx_1:.*]] = 0 to 64 {
143
- // SIBLING-MAXIMAL-NEXT: %[[result_1:.*]] = affine.for %[[idx_2:.*]] = 0 to 32 iter_args(%[[iter_0:.*]] = %[[cst_1]]) -> (f32) {
144
- // SIBLING-MAXIMAL-NEXT: %[[result_0:.*]] = affine.for %[[idx_3:.*]] = 0 to 64 iter_args(%[[iter_1:.*]] = %[[cst_0]]) -> (f32) {
143
+ // SIBLING-MAXIMAL-NEXT: %[[cst_1:.*]] = arith.constant 1.000000e+00 : f32
144
+ // SIBLING-MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 1 {
145
+ // SIBLING-MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 64 {
146
+ // SIBLING-MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 32 iter_args(%{{.*}} = %[[cst_1]]) -> (f32) {
147
+ // SIBLING-MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 64 iter_args(%{{.*}} = %[[cst_0]]) -> (f32) {
145
148
146
149
// -----
147
150
@@ -315,11 +318,16 @@ func.func @same_memref_load_store(%producer : memref<32xf32>, %consumer: memref<
315
318
return
316
319
}
317
320
321
+ // -----
322
+
318
323
// PRODUCER-CONSUMER-LABEL: func @same_memref_load_multiple_stores
324
+ // ALL-LABEL: func @same_memref_load_multiple_stores
319
325
func.func @same_memref_load_multiple_stores (%producer : memref <32 xf32 >, %producer_2 : memref <32 xf32 >, %consumer: memref <16 xf32 >){
320
326
%cst = arith.constant 2.000000e+00 : f32
321
- // Source isn't removed.
327
+ // Ensure that source isn't removed during both producer-consumer fusion and
328
+ // sibling fusion.
322
329
// PRODUCER-CONSUMER: affine.for %{{.*}} = 0 to 32
330
+ // ALL: affine.for %{{.*}} = 0 to 32
323
331
affine.for %arg3 = 0 to 32 {
324
332
%0 = affine.load %producer [%arg3 ] : memref <32 xf32 >
325
333
%2 = arith.mulf %0 , %cst : f32
@@ -343,5 +351,8 @@ func.func @same_memref_load_multiple_stores(%producer : memref<32xf32>, %produce
343
351
// PRODUCER-CONSUMER-NEXT: arith.addf
344
352
// PRODUCER-CONSUMER-NEXT: affine.store
345
353
// PRODUCER-CONSUMER-NEXT: }
354
+ // ALL: affine.for %{{.*}} = 0 to 16
355
+ // ALL: mulf
356
+ // ALL: addf
346
357
return
347
358
}
0 commit comments