@@ -2837,6 +2837,7 @@ func @should_fuse_multi_store_producer_with_scaping_memrefs_and_preserve_src(
2837
2837
}
2838
2838
2839
2839
// -----
2840
+
2840
2841
func @should_not_fuse_due_to_dealloc (%arg0: memref <16 xf32 >){
2841
2842
%A = alloc () : memref <16 xf32 >
2842
2843
%C = alloc () : memref <16 xf32 >
@@ -2866,3 +2867,152 @@ func @should_not_fuse_due_to_dealloc(%arg0: memref<16xf32>){
2866
2867
// CHECK-NEXT: affine.load
2867
2868
// CHECK-NEXT: addf
2868
2869
// CHECK-NEXT: affine.store
2870
+
2871
+ // -----
2872
+
2873
+ // CHECK-LABEL: func @should_fuse_defining_node_has_no_dependence_from_source_node
2874
+ func @should_fuse_defining_node_has_no_dependence_from_source_node (
2875
+ %a : memref <10 xf32 >, %b : memref <f32 >) -> () {
2876
+ affine.for %i0 = 0 to 10 {
2877
+ %0 = affine.load %b [] : memref <f32 >
2878
+ affine.store %0 , %a [%i0 ] : memref <10 xf32 >
2879
+ }
2880
+ %0 = affine.load %b [] : memref <f32 >
2881
+ affine.for %i1 = 0 to 10 {
2882
+ %1 = affine.load %a [%i1 ] : memref <10 xf32 >
2883
+ %2 = divf %0 , %1 : f32
2884
+ }
2885
+
2886
+ // Loops '%i0' and '%i1' should be fused even though there is a defining
2887
+ // node between the loops. It is because the node has no dependence from '%i0'.
2888
+ // CHECK: affine.load %{{.*}}[] : memref<f32>
2889
+ // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
2890
+ // CHECK-NEXT: affine.load %{{.*}}[] : memref<f32>
2891
+ // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
2892
+ // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
2893
+ // CHECK-NEXT: divf
2894
+ // CHECK-NEXT: }
2895
+ // CHECK-NOT: affine.for
2896
+ return
2897
+ }
2898
+
2899
+ // -----
2900
+
2901
+ // CHECK-LABEL: func @should_not_fuse_defining_node_has_dependence_from_source_loop
2902
+ func @should_not_fuse_defining_node_has_dependence_from_source_loop (
2903
+ %a : memref <10 xf32 >, %b : memref <f32 >) -> () {
2904
+ %cst = constant 0.000000e+00 : f32
2905
+ affine.for %i0 = 0 to 10 {
2906
+ affine.store %cst , %b [] : memref <f32 >
2907
+ affine.store %cst , %a [%i0 ] : memref <10 xf32 >
2908
+ }
2909
+ %0 = affine.load %b [] : memref <f32 >
2910
+ affine.for %i1 = 0 to 10 {
2911
+ %1 = affine.load %a [%i1 ] : memref <10 xf32 >
2912
+ %2 = divf %0 , %1 : f32
2913
+ }
2914
+
2915
+ // Loops '%i0' and '%i1' should not be fused because the defining node
2916
+ // of '%0' used in '%i1' has dependence from loop '%i0'.
2917
+ // CHECK: affine.for %{{.*}} = 0 to 10 {
2918
+ // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[] : memref<f32>
2919
+ // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
2920
+ // CHECK-NEXT: }
2921
+ // CHECK-NEXT: affine.load %{{.*}}[] : memref<f32>
2922
+ // CHECK: affine.for %{{.*}} = 0 to 10 {
2923
+ // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
2924
+ // CHECK-NEXT: divf
2925
+ // CHECK-NEXT: }
2926
+ return
2927
+ }
2928
+
2929
+ // -----
2930
+
2931
+ // CHECK-LABEL: func @should_not_fuse_defining_node_has_transitive_dependence_from_source_loop
2932
+ func @should_not_fuse_defining_node_has_transitive_dependence_from_source_loop (
2933
+ %a : memref <10 xf32 >, %b : memref <10 xf32 >, %c : memref <f32 >) -> () {
2934
+ %cst = constant 0.000000e+00 : f32
2935
+ affine.for %i0 = 0 to 10 {
2936
+ affine.store %cst , %a [%i0 ] : memref <10 xf32 >
2937
+ affine.store %cst , %b [%i0 ] : memref <10 xf32 >
2938
+ }
2939
+ affine.for %i1 = 0 to 10 {
2940
+ %1 = affine.load %b [%i1 ] : memref <10 xf32 >
2941
+ affine.store %1 , %c [] : memref <f32 >
2942
+ }
2943
+ %0 = affine.load %c [] : memref <f32 >
2944
+ affine.for %i2 = 0 to 10 {
2945
+ %1 = affine.load %a [%i2 ] : memref <10 xf32 >
2946
+ %2 = divf %0 , %1 : f32
2947
+ }
2948
+
2949
+ // When loops '%i0' and '%i2' are evaluated first, they should not be
2950
+ // fused. The defining node of '%0' in loop '%i2' has transitive dependence
2951
+ // from loop '%i0'. After that, loops '%i0' and '%i1' are evaluated, and they
2952
+ // will be fused as usual.
2953
+ // CHECK: affine.for %{{.*}} = 0 to 10 {
2954
+ // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
2955
+ // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
2956
+ // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
2957
+ // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[] : memref<f32>
2958
+ // CHECK-NEXT: }
2959
+ // CHECK-NEXT: affine.load %{{.*}}[] : memref<f32>
2960
+ // CHECK: affine.for %{{.*}} = 0 to 10 {
2961
+ // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
2962
+ // CHECK-NEXT: divf
2963
+ // CHECK-NEXT: }
2964
+ // CHECK-NOT: affine.for
2965
+ return
2966
+ }
2967
+
2968
+ // -----
2969
+
2970
+ // CHECK-LABEL: func @should_not_fuse_dest_loop_nest_return_value
2971
+ func @should_not_fuse_dest_loop_nest_return_value (
2972
+ %a : memref <10 xf32 >) -> () {
2973
+ %cst = constant 0.000000e+00 : f32
2974
+ affine.for %i0 = 0 to 10 {
2975
+ affine.store %cst , %a [%i0 ] : memref <10 xf32 >
2976
+ }
2977
+ %b = affine.for %i1 = 0 to 10 step 2 iter_args (%b_iter = %cst ) -> f32 {
2978
+ %load_a = affine.load %a [%i1 ] : memref <10 xf32 >
2979
+ affine.yield %load_a: f32
2980
+ }
2981
+
2982
+ // CHECK: affine.for %{{.*}} = 0 to 10 {
2983
+ // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
2984
+ // CHECK-NEXT: }
2985
+ // CHECK: affine.for %{{.*}} = 0 to 10 step 2 iter_args(%{{.*}} = %{{.*}}) -> (f32) {
2986
+ // CHECK-NEXT: affine.load
2987
+ // CHECK-NEXT: affine.yield
2988
+ // CHECK-NEXT: }
2989
+
2990
+ return
2991
+ }
2992
+
2993
+ // -----
2994
+
2995
+ // CHECK-LABEL: func @should_not_fuse_src_loop_nest_return_value
2996
+ func @should_not_fuse_src_loop_nest_return_value (
2997
+ %a : memref <10 xf32 >) -> () {
2998
+ %cst = constant 1.000000e+00 : f32
2999
+ %b = affine.for %i = 0 to 10 step 2 iter_args (%b_iter = %cst ) -> f32 {
3000
+ %c = addf %b_iter , %b_iter : f32
3001
+ affine.store %c , %a [%i ] : memref <10 xf32 >
3002
+ affine.yield %c: f32
3003
+ }
3004
+ affine.for %i1 = 0 to 10 {
3005
+ %1 = affine.load %a [%i1 ] : memref <10 xf32 >
3006
+ }
3007
+
3008
+ // CHECK: %{{.*}} = affine.for %{{.*}} = 0 to 10 step 2 iter_args(%{{.*}} = %{{.*}}) -> (f32) {
3009
+ // CHECK-NEXT: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
3010
+ // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
3011
+ // CHECK-NEXT: affine.yield %{{.*}} : f32
3012
+ // CHECK-NEXT: }
3013
+ // CHECK: affine.for %{{.*}} = 0 to 10 {
3014
+ // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
3015
+ // CHECK-NEXT: }
3016
+
3017
+ return
3018
+ }
0 commit comments