@@ -946,3 +946,38 @@ func.func @index_switch(%pred: index, %b: tensor<5xf32>, %c: tensor<5xf32>) -> t
946
946
// CHECK: return %[[r]]
947
947
return %0 : tensor <5 xf32 >
948
948
}
949
+
950
+ // -----
951
+
952
+ // See Issue https://github.com/llvm/llvm-project/issues/133964 . Checks that
953
+ // tensor.parallel_insert_slice dest operand does not have read semantics.
954
+ func.func @check_scfforall_inplace_bufferizer (%arg0 : tensor <?x?xf32 >,
955
+ %arg1 : tensor <?x?xf32 >,
956
+ %arg2 : tensor <?xf32 > {bufferization.writable = true }) -> tensor <?xf32 > {
957
+ %c0 = arith.constant 0 : index
958
+ %c1 = arith.constant 1 : index
959
+ %d0 = tensor.dim %arg2 , %c0 : tensor <?xf32 >
960
+ %d1 = tensor.dim %arg1 , %c1 : tensor <?x?xf32 >
961
+ %0 = scf.forall (%arg3 ) in (%c1 ) shared_outs (%arg4 = %arg2 ) -> (tensor <?xf32 >) {
962
+ %1 = tensor.extract_slice %arg0 [0 , 0 ][%d0 , %d1 ][1 , 1 ] : tensor <?x?xf32 > to tensor <?x?xf32 >
963
+ %2 = tensor.extract_slice %arg1 [0 , 0 ][%d0 , %d1 ][1 , 1 ] : tensor <?x?xf32 > to tensor <?x?xf32 >
964
+ %3 = linalg.generic {
965
+ indexing_maps = [affine_map <(d0 , d1 ) -> (d0 , d1 )>,
966
+ affine_map <(d0 , d1 ) -> (d0 , d1 )>,
967
+ affine_map <(d0 , d1 ) -> (d0 )>],
968
+ iterator_types = [" parallel" , " reduction" ]}
969
+ ins (%1 , %2 : tensor <?x?xf32 >, tensor <?x?xf32 >)
970
+ outs (%arg4 : tensor <?xf32 >) {
971
+ ^bb0 (%b0 : f32 , %b1: f32 , %b2 : f32 ):
972
+ %4 = arith.mulf %b0 , %b1 : f32
973
+ %5 = arith.addf %4 , %b2 : f32
974
+ linalg.yield %5 : f32
975
+ } -> tensor <?xf32 >
976
+ scf.forall.in_parallel {
977
+ tensor.parallel_insert_slice %3 into %arg4 [0 ] [%d0 ] [1 ] : tensor <?xf32 > into tensor <?xf32 >
978
+ }
979
+ }
980
+ return %0 : tensor <?xf32 >
981
+ }
982
+ // CHECK-LABEL: func @check_scfforall_inplace_bufferizer
983
+ // CHECK-NOT: memref.alloc
0 commit comments