@@ -344,3 +344,97 @@ func @reshape_as_consumer_permutation
344
344
// CHECK: %[[T9:.+]] = addi %[[T7]], %[[T8]]
345
345
// CHECK: %[[T10:.+]] = index_cast %[[ARG7]]
346
346
// CHECK: %[[T11:.+]] = addi %[[T9]], %[[T10]]
347
+
348
+ // -----
349
+
350
+ func @reshape_as_producer_projected_permutation
351
+ (%arg0 : tensor <33 x8 x?xi32 >) -> tensor <264 x?x4 xi32 > {
352
+ %0 = linalg.tensor_reshape %arg0 [affine_map <(d0 , d1 , d2 ) -> (d0 , d1 )>,
353
+ affine_map <(d0 , d1 , d2 ) -> (d2 )>]
354
+ : tensor <33 x8 x?xi32 > into tensor <264 x?xi32 >
355
+ %1 = linalg.indexed_generic
356
+ {index ing_maps = [affine_map <(d0 , d1 , d2 ) -> (d0 , d1 )>,
357
+ affine_map <(d0 , d1 , d2 ) -> (d0 , d1 , d2 )>],
358
+ iterator_types = [" parallel" , " parallel" , " parallel" ]} ins (%0 : tensor <264 x?xi32 >) {
359
+ ^bb0 (%arg1: index , %arg2: index , %arg3: index , %arg4: i32 ): // no predecessors
360
+ %2 = index_cast %arg1 : index to i32
361
+ %3 = addi %arg4 , %2 : i32
362
+ %4 = index_cast %arg2 : index to i32
363
+ %5 = addi %3 , %4 : i32
364
+ %6 = index_cast %arg3 : index to i32
365
+ %7 = addi %5 , %6 : i32
366
+ linalg.yield %7 : i32
367
+ } -> tensor <264 x?x4 xi32 >
368
+ return %1 : tensor <264 x?x4 xi32 >
369
+ }
370
+
371
+ // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
372
+ // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
373
+ // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1) -> (d0 * 8 + d1)>
374
+ // CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1)>
375
+ // CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0, d1, d2, d3) -> (d2)>
376
+ // CHECK-DAG: #[[MAP5:.+]] = affine_map<(d0, d1, d2, d3) -> (d3)>
377
+ // CHECK: @reshape_as_producer_projected_permutation
378
+ // CHECK-SAME: %[[ARG0:.+]]: tensor<33x8x?xi32>
379
+ // CHECK: %[[RES:.+]] = linalg.indexed_generic
380
+ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]]
381
+ // CHECK-SAME: ins(%[[ARG0]] : tensor<33x8x?xi32>)
382
+ // CHECK: ^{{.+}}(
383
+ // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index,
384
+ // CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index,
385
+ // CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index,
386
+ // CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index,
387
+ // CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: i32)
388
+ // CHECK: %[[T0:.+]] = affine.apply #[[MAP2]](%[[ARG1]], %[[ARG2]])
389
+ // CHECK: %[[T1:.+]] = index_cast %[[T0]] : index to i32
390
+ // CHECK: %[[T2:.+]] = addi %[[ARG5]], %[[T1]] : i32
391
+ // CHECK: %[[T3:.+]] = index_cast %[[ARG3]] : index to i32
392
+ // CHECK: %[[T4:.+]] = addi %[[T2]], %[[T3]] : i32
393
+ // CHECK: %[[T5:.+]] = index_cast %[[ARG4]] : index to i32
394
+ // CHECK: %[[T6:.+]] = addi %[[T4]], %[[T5]] : i32
395
+ // CHECK: linalg.yield %[[T6]] : i32
396
+ // CHECK: %[[RES2:.+]] = linalg.tensor_reshape %[[RES]]
397
+ // CHECK-SAME: [#[[MAP3]], #[[MAP4]], #[[MAP5]]]
398
+ // CHECK-SAME: : tensor<33x8x?x4xi32> into tensor<264x?x4xi32>
399
+ // CHECK: return %[[RES2]] : tensor<264x?x4xi32>
400
+
401
+ // -----
402
+
403
+ #map0 = affine_map <(d0 , d1 ) -> (d0 , d1 )>
404
+ #map1 = affine_map <(d0 , d1 ) -> (d1 , d0 )>
405
+ func @generic_op_reshape_consumer_fusion_projected (%arg0 : tensor <?x?xf32 >,
406
+ %arg1 : tensor <?x?xf32 >) ->
407
+ tensor <?x?x4 x5 xf32 >
408
+ {
409
+ %0 = linalg.generic {
410
+ indexing_maps = [#map0 , #map0 , #map1 ],
411
+ iterator_types = [" parallel" , " parallel" ]}
412
+ ins (%arg0 , %arg1 : tensor <?x?xf32 >, tensor <?x?xf32 >) {
413
+ ^bb0 (%arg3: f32 , %arg4: f32 ): // no predecessors
414
+ %1 = mulf %arg3 , %arg4 : f32
415
+ linalg.yield %1 : f32
416
+ } -> tensor <?x?xf32 >
417
+ %1 = linalg.tensor_reshape %0 [affine_map <(i , j , k , l ) -> (i )>,
418
+ affine_map <(i , j , k , l ) -> (j , k , l )>] :
419
+ tensor <?x?xf32 > into tensor <?x?x4 x5 xf32 >
420
+ return %1 : tensor <?x?x4 x5 xf32 >
421
+ }
422
+
423
+ // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
424
+ // CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d3)>
425
+ // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
426
+ // CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)>
427
+ // CHECK: func @generic_op_reshape_consumer_fusion_projected
428
+ // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
429
+ // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
430
+ // CHECK: %[[T0:.+]] = linalg.tensor_reshape %[[ARG0]]
431
+ // CHECK-SAME: [#[[MAP0]], #[[MAP1]]]
432
+ // CHECK-SAME: tensor<?x?xf32> into tensor<?x4x5x?xf32>
433
+ // CHECK: %[[T1:.+]] = linalg.tensor_reshape %[[ARG1]]
434
+ // CHECK-SAME: [#[[MAP0]], #[[MAP1]]]
435
+ // CHECK-SAME: tensor<?x?xf32> into tensor<?x4x5x?xf32>
436
+ // CHECK: %[[T2:.+]] = linalg.generic
437
+ // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP2]], #[[MAP3]]]
438
+ // CHECK-SAME: ["parallel", "parallel", "parallel", "parallel"]
439
+ // CHECK-SAME: ins(%[[T0]], %[[T1]] : tensor<?x4x5x?xf32>, tensor<?x4x5x?xf32>)
440
+ // CHECK: return %[[T2]] : tensor<?x?x4x5xf32>
0 commit comments