@@ -43,7 +43,7 @@ module attributes {transform.with_named_sequence} {
43
43
!type = memref <2 x 32 x f32 >
44
44
!type1d = memref <32 x f32 >
45
45
46
- // CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 128)>
46
+ // CHECK-DAG: #[[$MAP:.*]] = affine_map<()[s0] -> (s0 floordiv 128)>
47
47
48
48
// CHECK-LABEL: func.func @warpgroup_3d(
49
49
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -61,7 +61,7 @@ func.func @warpgroup_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream
61
61
// CHECK: gpu.launch
62
62
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
63
63
// CHECK: %[[TIDY:.*]] = gpu.thread_id y
64
- // CHECK-DAG: %[[WG:.*]] = affine.apply #[[$MAP]](%[[TIDX]])
64
+ // CHECK-DAG: %[[WG:.*]] = affine.apply #[[$MAP]]()[ %[[TIDX]]]
65
65
// CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C384]] : index
66
66
// CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C1]] : index
67
67
// CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1
@@ -95,7 +95,7 @@ module attributes {transform.with_named_sequence} {
95
95
!type = memref <2 x 32 x f32 >
96
96
!type1d = memref <32 x f32 >
97
97
98
- // CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 16)>
98
+ // CHECK-DAG: #map = affine_map<()[s0] -> (s0 floordiv 16)>
99
99
100
100
// CHECK-LABEL: func.func @warp_3d(
101
101
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -114,7 +114,7 @@ func.func @warp_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !g
114
114
// CHECK: gpu.launch
115
115
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
116
116
// CHECK: %[[TIDY:.*]] = gpu.thread_id y
117
- // CHECK-DAG: %[[W:.*]] = affine.apply #[[$MAP]](%[[TIDX]])
117
+ // CHECK-DAG: %[[W:.*]] = affine.apply #[[$MAP]]()[ %[[TIDX]]]
118
118
// CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C32]] : index
119
119
// CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C3]] : index
120
120
// CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1
@@ -354,9 +354,9 @@ module attributes {transform.with_named_sequence} {
354
354
!type = memref <2 x 32 x f32 >
355
355
!type1d = memref <32 x f32 >
356
356
357
- // CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)>
358
- // CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 32) floordiv 128) mod 2)>
359
- // CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<(d0, d1, d2) -> (d2 + ((d0 + d1 * 32) floordiv 128) floordiv 2)>
357
+ // CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)>
358
+ // CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 32) floordiv 128) mod 2)>
359
+ // CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<()[s0, s1, s2] -> (s2 + ((s0 + s1 * 32) floordiv 128) floordiv 2)>
360
360
361
361
// CHECK-LABEL: func.func @warpgroup_linear(
362
362
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -376,9 +376,9 @@ func.func @warpgroup_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %st
376
376
// CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id x
377
377
// CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id y
378
378
// CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id z
379
- // CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
380
- // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]](%[[TIDX]], %[[TIDY]])
381
- // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
379
+ // CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]]()[ %[[TIDX]], %[[TIDY]], %[[TIDZ]]]
380
+ // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]]()[ %[[TIDX]], %[[TIDY]]]
381
+ // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]]()[ %[[TIDX]], %[[TIDY]], %[[TIDZ]]]
382
382
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C768]] : index
383
383
// CHECK: scf.if %[[CMPLIN]]
384
384
// CHECK: memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]]
@@ -410,9 +410,9 @@ module attributes {transform.with_named_sequence} {
410
410
!type = memref <2 x 32 x f32 >
411
411
!type1d = memref <32 x f32 >
412
412
413
- // CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)>
414
- // CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) mod 2)>
415
- // CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) floordiv 2)>
413
+ // CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)>
414
+ // CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) mod 2)>
415
+ // CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) floordiv 2)>
416
416
417
417
// CHECK-LABEL: func.func @warp_linear(
418
418
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
@@ -432,9 +432,9 @@ func.func @warp_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream
432
432
// CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id x
433
433
// CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id y
434
434
// CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id z
435
- // CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
436
- // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
437
- // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
435
+ // CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]]()[ %[[TIDX]], %[[TIDY]], %[[TIDZ]]]
436
+ // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[ %[[TIDX]], %[[TIDY]], %[[TIDZ]]]
437
+ // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[ %[[TIDX]], %[[TIDY]], %[[TIDZ]]]
438
438
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C192]] : index
439
439
// CHECK: scf.if %[[CMPLIN]]
440
440
// CHECK: memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]]
@@ -466,12 +466,12 @@ module attributes {transform.with_named_sequence} {
466
466
!type = memref <2 x 32 x f32 >
467
467
!type1d = memref <32 x f32 >
468
468
469
- // CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 18) floordiv 32) mod 3)>
470
- // CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1) -> ((((d0 + d1 * 18) floordiv 32) mod 6) floordiv 3)>
469
+ // CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 18) floordiv 32) mod 3)>
470
+ // CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1] -> ((((s0 + s1 * 18) floordiv 32) mod 6) floordiv 3)>
471
471
472
- // CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<(d0, d1) -> (d0 + d1 * 18)>
473
- // CHECK-DAG: #[[$MAPLX:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) mod 10)>
474
- // CHECK-DAG: #[[$MAPLY:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) floordiv 10)>
472
+ // CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<()[s0, s1] -> (s0 + s1 * 18)>
473
+ // CHECK-DAG: #[[$MAPLX:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) mod 10)>
474
+ // CHECK-DAG: #[[$MAPLY:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) floordiv 10)>
475
475
476
476
// CHECK-LABEL: func.func @map_multi_level_linear(
477
477
func.func @map_multi_level_linear (%x: !type , %y: !type , %t: !type1d , %alpha : f32 , %stream : !gpu.async.token ) -> !type {
@@ -504,9 +504,9 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3
504
504
memref.store %6 , %y [%i , %j ] : !type
505
505
} { mapping = [#gpu.thread <y >, #gpu.thread <x >]}
506
506
507
- // CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]](%[[TIDX]], %[[TIDY]])
508
- // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]])
509
- // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]])
507
+ // CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]]()[ %[[TIDX]], %[[TIDY]]]
508
+ // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[ %[[TIDX]], %[[TIDY]]]
509
+ // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[ %[[TIDX]], %[[TIDY]]]
510
510
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[LIN]], %[[C192]] : index
511
511
// CHECK: scf.if %[[CMPLIN]]
512
512
scf.forall (%i , %j , %k ) in (%c3 , %c2 , %c1 ) {
@@ -515,8 +515,8 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3
515
515
memref.store %8 , %y [%i , %j ] : !type
516
516
} {mapping = [#gpu.warp <linear_dim_0 >, #gpu.warp <linear_dim_1 >, #gpu.warp <linear_dim_2 >] }
517
517
518
- // CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]](%[[TIDX]], %[[TIDY]])
519
- // CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]](%[[TIDX]], %[[TIDY]])
518
+ // CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]]()[ %[[TIDX]], %[[TIDY]]]
519
+ // CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]]()[ %[[TIDX]], %[[TIDY]]]
520
520
// CHECK-DAG: %[[COND:.*]] = arith.cmpi ult, %[[LIN]], %[[C20]] : index
521
521
// CHECK: scf.if %[[COND]]
522
522
// CHECK: memref.load %{{.*}}[%[[LIDX]]] : memref<32xf32>
@@ -648,7 +648,7 @@ module attributes {transform.with_named_sequence} {
648
648
#map1 = affine_map <(d0 ) -> (d0 * 32 )>
649
649
650
650
// CHECK-DAG: #[[$MAPB:.*]] = affine_map<(d0) -> (d0 * 128)>
651
- // CHECK-DAG: #[[$MAPW:.*]] = affine_map<(d0, d1, d2) -> (d2 * 32 + ((d0 + d1 * 4) floordiv 32) * 32)>
651
+ // CHECK-DAG: #[[$MAPW:.*]] = affine_map<()[s0, s1, s2] -> (s2 * 32 + ((s0 + s1 * 4) floordiv 32) * 32)>
652
652
653
653
// CHECK-LABEL: func.func @simple_fill(
654
654
func.func @simple_fill (%arg0: memref <128 xf32 >) -> memref <128 xf32 > {
@@ -667,7 +667,7 @@ func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> {
667
667
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
668
668
// CHECK: %[[TIDY:.*]] = gpu.thread_id y
669
669
// CHECK: %[[TIDZ:.*]] = gpu.thread_id z
670
- // CHECK: %[[THX:.*]] = affine.apply #[[$MAPW]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
670
+ // CHECK: %[[THX:.*]] = affine.apply #[[$MAPW]]()[ %[[TIDX]], %[[TIDY]], %[[TIDZ]]]
671
671
// CHECK-NOT: scf.if
672
672
// CHECK: memref.subview %{{.*}}[%[[THX]]]
673
673
%1 = affine.apply #map1 (%arg2 )
0 commit comments