@@ -912,3 +912,104 @@ func @interleaved_extract_insert_slice_chain_2(
912
912
913
913
return %15 : tensor <62 x90 xf32 >
914
914
}
915
+
916
+ // -----
917
+
918
+ #accesses = [
919
+ affine_map <(i ) -> (i )>
920
+ ]
921
+ #trait = {
922
+ indexing_maps = #accesses ,
923
+ iterator_types = [" parallel" ]
924
+ }
925
+
926
+ // CHECK-LABEL: func @reading_scf_for
927
+ func @reading_scf_for (%t1: tensor <?xf32 > {linalg.inplaceable = true },
928
+ %s: index , %v: vector <5 xf32 >) -> (tensor <?xf32 >, vector <5 xf32 >) {
929
+
930
+ %c0 = arith.constant 0 : index
931
+ %c1 = arith.constant 1 : index
932
+ %cst = arith.constant 0.0 : f32
933
+
934
+ // Write to %t1.
935
+ // CHECK: vector.transfer_write
936
+ // CHECK-SAME: __inplace_results_attr__ = ["false"]
937
+ %t3 = vector.transfer_write %v , %t1 [%s ] : vector <5 xf32 >, tensor <?xf32 >
938
+
939
+ // Read the old value of %t1 inside the loop via an alias.
940
+ // CHECK: scf.for
941
+ %r , %v3 = scf.for %i = %c0 to %s step %c1 iter_args (%t2 = %t1 , %v0 = %v ) -> (tensor <?xf32 >, vector <5 xf32 >) {
942
+ // CHECK: tensor.extract_slice
943
+ // CHECK-SAME: __inplace_results_attr__ = ["true"]
944
+ %e = tensor.extract_slice %t2 [%s ][%s ][1 ] : tensor <?xf32 > to tensor <?xf32 >
945
+
946
+ // Read from %t1 via alias %e.
947
+ %v2 = vector.transfer_read %e [%s ], %cst : tensor <?xf32 >, vector <5 xf32 >
948
+ scf.yield %e , %v2 : tensor <?xf32 >, vector <5 xf32 >
949
+ }
950
+ // CHECK: __inplace_results_attr__ = ["true", "none"]
951
+
952
+ // Use %t3 in some way without reading it, so that it does not get DCE'd.
953
+ // CHECK: linalg.generic
954
+ // CHECK-SAME: __inplace_results_attr__ = ["true"]
955
+ %o = linalg.generic #trait outs (%t3 : tensor <?xf32 >) {
956
+ ^bb (%0: f32 ) :
957
+ linalg.yield %cst : f32
958
+ } -> (tensor <?xf32 >)
959
+
960
+ return %o , %v3 : tensor <?xf32 >, vector <5 xf32 >
961
+ }
962
+
963
+ // -----
964
+
965
+ #accesses = [
966
+ affine_map <(i ) -> (i )>
967
+ ]
968
+ #trait = {
969
+ indexing_maps = #accesses ,
970
+ iterator_types = [" parallel" ]
971
+ }
972
+
973
+ // CHECK-LABEL: func @non_reading_scf_for
974
+ func @non_reading_scf_for (%t1: tensor <?xf32 > {linalg.inplaceable = true },
975
+ %s: index , %v: vector <5 xf32 >) -> (tensor <?xf32 >, vector <5 xf32 >) {
976
+
977
+ %c0 = arith.constant 0 : index
978
+ %c1 = arith.constant 1 : index
979
+ %cst = arith.constant 0.0 : f32
980
+
981
+ // Write to %t1.
982
+ // CHECK: vector.transfer_write
983
+ // CHECK-SAME: __inplace_results_attr__ = ["true"]
984
+ %t3 = vector.transfer_write %v , %t1 [%s ] : vector <5 xf32 >, tensor <?xf32 >
985
+
986
+ // This loop does not read from %t1. It only writes to it.
987
+ // CHECK: scf.for
988
+ %r , %v3 = scf.for %i = %c0 to %s step %c1 iter_args (%t2 = %t1 , %v0 = %v ) -> (tensor <?xf32 >, vector <5 xf32 >) {
989
+ // CHECK: tensor.extract_slice
990
+ // CHECK-SAME: __inplace_results_attr__ = ["true"]
991
+ %e = tensor.extract_slice %t2 [%s ][%s ][1 ] : tensor <?xf32 > to tensor <?xf32 >
992
+
993
+ // Write to %t1 via alias. (Overwrite %t3.)
994
+ // CHECK: linalg.generic
995
+ // CHECK-SAME: __inplace_results_attr__ = ["true"]
996
+ %o2 = linalg.generic #trait outs (%e : tensor <?xf32 >) {
997
+ ^bb (%0: f32 ) :
998
+ linalg.yield %cst : f32
999
+ } -> (tensor <?xf32 >)
1000
+
1001
+ // Read overwritten value. This is not a read of %t1.
1002
+ %v2 = vector.transfer_read %o2 [%s ], %cst : tensor <?xf32 >, vector <5 xf32 >
1003
+ scf.yield %o2 , %v2 : tensor <?xf32 >, vector <5 xf32 >
1004
+ }
1005
+
1006
+ // Use %t3 in some way without reading it, so that it does not get DCE'd.
1007
+ // CHECK: linalg.generic
1008
+ // CHECK-SAME: __inplace_results_attr__ = ["true"]
1009
+ %o = linalg.generic #trait outs (%t3 : tensor <?xf32 >) {
1010
+ ^bb (%0: f32 ) :
1011
+ linalg.yield %cst : f32
1012
+ } -> (tensor <?xf32 >)
1013
+
1014
+ return %o , %v3 : tensor <?xf32 >, vector <5 xf32 >
1015
+ }
0 commit comments