Skip to content

[mlir][affine]introducing new symbol rules that the result of a Pure operation that whose operands are valid symbolic identifiers #118478

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions mlir/docs/Dialects/Affine.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,7 @@ immediately enclosed by the latter),
3. a value that dominates the `AffineScope` op enclosing the value's
use,
4. the result of a constant operation,
5. the result of an
[`affine.apply` operation](#affineapply-mliraffineapplyop) that recursively takes as
arguments any valid symbolic identifiers, or
5. the result of a `Pure` operation that its operands are the valid symbolic identifiers.
6. the result of a
[`dim` operation](MemRef.md/#memrefdim-mlirmemrefdimop) on either a memref that
is an argument to a `AffineScope` op or a memref where the corresponding
Expand Down
15 changes: 11 additions & 4 deletions mlir/lib/Dialect/Affine/IR/AffineOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,8 @@ bool mlir::affine::isValidSymbol(Value value) {
/// A value can be used as a symbol for `region` iff it meets one of the
/// following conditions:
/// *) It is a constant.
/// *) It is the result of an affine apply operation with symbol arguments.
/// *) It is a result of a `Pure` operation that its operands are the valid
/// *) symbolic identifiers.
/// *) It is a result of the dim op on a memref whose corresponding size is
/// a valid symbol.
/// *) It is defined at the top level of 'region' or is its argument.
Expand Down Expand Up @@ -443,9 +444,15 @@ bool mlir::affine::isValidSymbol(Value value, Region *region) {
if (matchPattern(defOp, m_Constant(&operandCst)))
return true;

// Affine apply operation is ok if all of its operands are ok.
if (auto applyOp = dyn_cast<AffineApplyOp>(defOp))
return applyOp.isValidSymbol(region);
// `Pure` operation that its operands are the valid symbolic identifiers.
if (isPure(defOp)) {
bool operandVaildSymbol =
llvm::all_of(defOp->getOperands(), [&](Value operand) {
return affine::isValidSymbol(operand, region);
});
if (operandVaildSymbol)
return true;
}

// Dim op results could be valid symbols at any level.
if (auto dimOp = dyn_cast<ShapedDimOpInterface>(defOp))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -638,13 +638,13 @@ func.func @vecdim_reduction_complex_ub(%in: memref<256x512xf32>, %out: memref<25
return
}

// CHECK: #[[$map3:.*]] = affine_map<([[d0:.*]], [[d1:.*]]) -> ([[d0]], [[d1]] * 2)>
// CHECK: #[[$map3_sub:.*]] = affine_map<([[d0:.*]], [[d1:.*]]) -> ([[d0]] - [[d1]])>
// CHECK: #[[$map3:.*]] = affine_map<(d0, d1) -> (d0, d1 * 2)>
// CHECK: #[[$map3_sub:.*]] = affine_map<(d0)[s0] -> (-d0 + s0)>
// CHECK-LABEL: @vecdim_reduction_complex_ub
// CHECK: %[[vzero:.*]] = arith.constant dense<0.000000e+00> : vector<128xf32>
// CHECK: %{{.*}} = affine.for %[[iv:.*]] = 0 to min #[[$map3]](%[[M:.*]], %[[N:.*]]) step 128 iter_args(%[[red_iter:.*]] = {{.*}}) -> (vector<128xf32>) {
// CHECK: %[[ub:.*]] = affine.min #[[$map3]](%[[M]], %[[N]])
// CHECK: %[[elems_left:.*]] = affine.apply #[[$map3_sub]](%[[ub]], %[[iv]])
// CHECK: %[[elems_left:.*]] = affine.apply #[[$map3_sub]](%[[iv]])[%[[ub]]]
// CHECK: %[[mask:.*]] = vector.create_mask %[[elems_left]] : vector<128xi1>
// CHECK: %[[ld:.*]] = vector.transfer_read %{{.*}} : memref<256x512xf32>, vector<128xf32>
// CHECK: %[[select:.*]] = arith.select %[[mask]], %[[ld]], %[[vzero]] : vector<128xi1>, vector<128xf32>
Expand Down
44 changes: 0 additions & 44 deletions mlir/test/Dialect/Affine/invalid.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -20,36 +20,6 @@ func.func @affine_apply_resul_non_index(%arg0 : index) {
return
}

// -----

#map = affine_map<(d0)[s0] -> (d0 + s0)>

func.func @affine_for_lower_bound_invalid_dim(%arg : index) {
affine.for %n0 = 0 to 7 {
%dim = arith.addi %arg, %arg : index

// expected-error@+1 {{operand cannot be used as a dimension id}}
affine.for %n1 = 0 to #map(%dim)[%arg] {
}
}
return
}

// -----

#map = affine_map<(d0)[s0] -> (d0 + s0)>

func.func @affine_for_upper_bound_invalid_dim(%arg : index) {
affine.for %n0 = 0 to 7 {
%dim = arith.addi %arg, %arg : index

// expected-error@+1 {{operand cannot be used as a dimension id}}
affine.for %n1 = #map(%dim)[%arg] to 7 {
}
}
return
}

// -----
func.func @affine_load_invalid_dim(%M : memref<10xi32>) {
"unknown"() ({
Expand Down Expand Up @@ -93,20 +63,6 @@ func.func @affine_for_upper_bound_invalid_sym() {

#set0 = affine_set<(i)[N] : (i >= 0, N - i >= 0)>

func.func @affine_if_invalid_dim(%arg : index) {
affine.for %n0 = 0 to 7 {
%dim = arith.addi %arg, %arg : index

// expected-error@+1 {{operand cannot be used as a dimension id}}
affine.if #set0(%dim)[%n0] {}
}
return
}

// -----

#set0 = affine_set<(i)[N] : (i >= 0, N - i >= 0)>

func.func @affine_if_invalid_sym() {
affine.for %i0 = 0 to 7 {
// expected-error@+1 {{operand cannot be used as a symbol}}
Expand Down
37 changes: 37 additions & 0 deletions mlir/test/Dialect/Affine/ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -324,3 +324,40 @@ module attributes {gpu.container_module} {
// CHECK: affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 {
// CHECK: }
// CHECK: gpu.return

// -----

#map = affine_map<()[s0] -> (s0 mod 32)>

// CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0] -> (s0 mod 32)>

// CHECK-LABEL: @affine_thread_id

module {
gpu.module @gpu {
gpu.func @affine_thread_id(%arg0: memref<?x?xf32>) kernel {
%c3 = arith.constant 3 : index
%dim = memref.dim %arg0, %c3 : memref<?x?xf32>
%c0 = arith.constant 0 : index
affine.for %arg3 = %c0 to %dim step 32 {
%thread_id_x = gpu.thread_id x
%0 = affine.apply #map()[%thread_id_x]
%c128 = arith.constant 128 : index
affine.for %arg4 = %0 to %c128 step 8 {
%c32 = arith.constant 32 : index
}
}
gpu.return
}
}
}

// CHECK-SAME: (%[[VAL_0:.*]]: memref<?x?xf32>) kernel {
// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index
// CHECK: %[[VAL_2:.*]] = memref.dim %[[VAL_0]], %[[VAL_1]] : memref<?x?xf32>
// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
// CHECK: affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 {
// CHECK: %[[VAL_5:.*]] = gpu.thread_id x
// CHECK: %[[VAL_6:.*]] = affine.apply #[[$ATTR_0]](){{\[}}%[[VAL_5]]]
// CHECK: %[[VAL_7:.*]] = arith.constant 128 : index
// CHECK: affine.for %[[VAL_8:.*]] = %[[VAL_6]] to %[[VAL_7]] step 8 {
80 changes: 40 additions & 40 deletions mlir/test/Dialect/GPU/transform-gpu.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ module attributes {transform.with_named_sequence} {
!type = memref<2 x 32 x f32>
!type1d = memref<32 x f32>

// CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 128)>
// CHECK-DAG: #[[$MAP:.*]] = affine_map<()[s0] -> (s0 floordiv 128)>

// CHECK-LABEL: func.func @warpgroup_3d(
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
Expand All @@ -61,7 +61,7 @@ func.func @warpgroup_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream
// CHECK: gpu.launch
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
// CHECK: %[[TIDY:.*]] = gpu.thread_id y
// CHECK-DAG: %[[WG:.*]] = affine.apply #[[$MAP]](%[[TIDX]])
// CHECK-DAG: %[[WG:.*]] = affine.apply #[[$MAP]]()[%[[TIDX]]]
// CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C384]] : index
// CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C1]] : index
// CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1
Expand Down Expand Up @@ -95,7 +95,7 @@ module attributes {transform.with_named_sequence} {
!type = memref<2 x 32 x f32>
!type1d = memref<32 x f32>

// CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 floordiv 16)>
// CHECK-DAG: #map = affine_map<()[s0] -> (s0 floordiv 16)>

// CHECK-LABEL: func.func @warp_3d(
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
Expand All @@ -114,7 +114,7 @@ func.func @warp_3d(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !g
// CHECK: gpu.launch
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
// CHECK: %[[TIDY:.*]] = gpu.thread_id y
// CHECK-DAG: %[[W:.*]] = affine.apply #[[$MAP]](%[[TIDX]])
// CHECK-DAG: %[[W:.*]] = affine.apply #[[$MAP]]()[%[[TIDX]]]
// CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[TIDX]], %[[C32]] : index
// CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[TIDY]], %[[C3]] : index
// CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1
Expand Down Expand Up @@ -354,9 +354,9 @@ module attributes {transform.with_named_sequence} {
!type = memref<2 x 32 x f32>
!type1d = memref<32 x f32>

// CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)>
// CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 32) floordiv 128) mod 2)>
// CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<(d0, d1, d2) -> (d2 + ((d0 + d1 * 32) floordiv 128) floordiv 2)>
// CHECK-DAG: #[[$MAPWGLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)>
// CHECK-DAG: #[[$MAPWGX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 32) floordiv 128) mod 2)>
// CHECK-DAG: #[[$MAPWGY:.*]] = affine_map<()[s0, s1, s2] -> (s2 + ((s0 + s1 * 32) floordiv 128) floordiv 2)>

// CHECK-LABEL: func.func @warpgroup_linear(
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
Expand All @@ -376,9 +376,9 @@ func.func @warpgroup_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %st
// CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id x
// CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id y
// CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id z
// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]](%[[TIDX]], %[[TIDY]])
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWGLIN]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWGX]]()[%[[TIDX]], %[[TIDY]]]
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWGY]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C768]] : index
// CHECK: scf.if %[[CMPLIN]]
// CHECK: memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]]
Expand Down Expand Up @@ -410,9 +410,9 @@ module attributes {transform.with_named_sequence} {
!type = memref<2 x 32 x f32>
!type1d = memref<32 x f32>

// CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 32 + d2 * 256)>
// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) mod 2)>
// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1, d2) -> ((d1 + d2 * 8 + d0 floordiv 32) floordiv 2)>
// CHECK-DAG: #[[$MAPWLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 32 + s2 * 256)>
// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) mod 2)>
// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1, s2] -> ((s1 + s2 * 8 + s0 floordiv 32) floordiv 2)>

// CHECK-LABEL: func.func @warp_linear(
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
Expand All @@ -432,9 +432,9 @@ func.func @warp_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream
// CHECK-DAG: %[[TIDX:.*]] = gpu.thread_id x
// CHECK-DAG: %[[TIDY:.*]] = gpu.thread_id y
// CHECK-DAG: %[[TIDZ:.*]] = gpu.thread_id z
// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
// CHECK-DAG: %[[WIDLIN:.*]] = affine.apply #[[$MAPWLIN]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[WIDLIN]], %[[C192]] : index
// CHECK: scf.if %[[CMPLIN]]
// CHECK: memref.load %[[ARGX]][%[[WIDX]], %[[WIDY]]]
Expand Down Expand Up @@ -466,12 +466,12 @@ module attributes {transform.with_named_sequence} {
!type = memref<2 x 32 x f32>
!type1d = memref<32 x f32>

// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 18) floordiv 32) mod 3)>
// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1) -> ((((d0 + d1 * 18) floordiv 32) mod 6) floordiv 3)>
// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<()[s0, s1] -> (((s0 + s1 * 18) floordiv 32) mod 3)>
// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<()[s0, s1] -> ((((s0 + s1 * 18) floordiv 32) mod 6) floordiv 3)>

// CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<(d0, d1) -> (d0 + d1 * 18)>
// CHECK-DAG: #[[$MAPLX:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) mod 10)>
// CHECK-DAG: #[[$MAPLY:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 18) floordiv 10)>
// CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<()[s0, s1] -> (s0 + s1 * 18)>
// CHECK-DAG: #[[$MAPLX:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) mod 10)>
// CHECK-DAG: #[[$MAPLY:.*]] = affine_map<()[s0, s1] -> ((s0 + s1 * 18) floordiv 10)>

// CHECK-LABEL: func.func @map_multi_level_linear(
func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !gpu.async.token) -> !type {
Expand Down Expand Up @@ -504,9 +504,9 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3
memref.store %6, %y[%i, %j] : !type
} { mapping = [#gpu.thread<y>, #gpu.thread<x>]}

// CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]](%[[TIDX]], %[[TIDY]])
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]])
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]])
// CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]]()[%[[TIDX]], %[[TIDY]]]
// CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]]()[%[[TIDX]], %[[TIDY]]]
// CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]]()[%[[TIDX]], %[[TIDY]]]
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[LIN]], %[[C192]] : index
// CHECK: scf.if %[[CMPLIN]]
scf.forall (%i, %j, %k) in (%c3, %c2, %c1) {
Expand All @@ -515,8 +515,8 @@ func.func @map_multi_level_linear(%x: !type, %y: !type, %t: !type1d, %alpha : f3
memref.store %8, %y[%i, %j] : !type
} {mapping = [#gpu.warp<linear_dim_0>, #gpu.warp<linear_dim_1>, #gpu.warp<linear_dim_2>] }

// CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]](%[[TIDX]], %[[TIDY]])
// CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]](%[[TIDX]], %[[TIDY]])
// CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]]()[%[[TIDX]], %[[TIDY]]]
// CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]]()[%[[TIDX]], %[[TIDY]]]
// CHECK-DAG: %[[COND:.*]] = arith.cmpi ult, %[[LIN]], %[[C20]] : index
// CHECK: scf.if %[[COND]]
// CHECK: memref.load %{{.*}}[%[[LIDX]]] : memref<32xf32>
Expand Down Expand Up @@ -545,9 +545,9 @@ module attributes {transform.with_named_sequence} {
!type = memref<2 x 32 x f32>
!type1d = memref<32 x f32>

// CHECK-DAG: #[[$MAPBLIN:.*]] = affine_map<(d0, d1, d2) -> (d0 + d1 * 12 + d2 * 108)>
// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<(d0, d1, d2) -> ((d0 + d1 * 12 + d2 * 108) mod 7)>
// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<(d0, d1, d2) -> ((d0 + d1 * 12 + d2 * 108) floordiv 7)>
// CHECK-DAG: #[[$MAPBLIN:.*]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * 12 + s2 * 108)>
// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<()[s0, s1, s2] -> ((s0 + s1 * 12 + s2 * 108) mod 7)>
// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<()[s0, s1, s2] -> ((s0 + s1 * 12 + s2 * 108) floordiv 7)>

// CHECK-LABEL: func.func @block_linear_existing_launch(
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
Expand All @@ -566,9 +566,9 @@ func.func @block_linear_existing_launch(
// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
// CHECK-DAG: %[[BIDZ:.*]] = gpu.block_id z
// CHECK-DAG: %[[BIDLIN:.*]] = affine.apply #[[$MAPBLIN]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
// CHECK-DAG: %[[BIDLIN:.*]] = affine.apply #[[$MAPBLIN]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
// CHECK-DAG: %[[CMPLIN:.*]] = arith.cmpi ult, %[[BIDLIN]], %[[C63]] : index
// CHECK: scf.if %[[CMPLIN]]
// CHECK: memref.load %[[ARGX]][%[[BLX]], %[[BLY]]]
Expand Down Expand Up @@ -600,8 +600,8 @@ module attributes {transform.with_named_sequence} {
!type = memref<2 x 32 x f32>
!type1d = memref<32 x f32>

// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<(d0) -> (d0 mod 7)>
// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<(d0, d1, d2) -> (d1 + d2 * 9 + d0 floordiv 7)>
// CHECK-DAG: #[[$MAPBX:.*]] = affine_map<()[s0] -> (s0 mod 7)>
// CHECK-DAG: #[[$MAPBY:.*]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * 9 + s0 floordiv 7)>

// CHECK-LABEL: func.func @block_linear_generate_launch(
// CHECK-SAME: %[[ARGX:[0-9a-z]+]]: memref<2x32xf32>
Expand All @@ -620,8 +620,8 @@ func.func @block_linear_generate_launch(
// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x
// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y
// CHECK-DAG: %[[BIDZ:.*]] = gpu.block_id z
// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]](%[[BIDX]])
// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]](%[[BIDX]], %[[BIDY]], %[[BIDZ]])
// CHECK-DAG: %[[BLX:.*]] = affine.apply #[[$MAPBX]]()[%[[BIDX]]]
// CHECK-DAG: %[[BLY:.*]] = affine.apply #[[$MAPBY]]()[%[[BIDX]], %[[BIDY]], %[[BIDZ]]]
// CHECK: memref.load %[[ARGX]][%[[BLX]], %[[BLY]]]
// CHECK: memref.load %[[ARGY]][%[[BLX]], %[[BLY]]]
scf.forall (%i, %j) in (%c7, %c9) {
Expand All @@ -647,8 +647,8 @@ module attributes {transform.with_named_sequence} {
#map = affine_map<(d0) -> (d0 * 128)>
#map1 = affine_map<(d0) -> (d0 * 32)>

// CHECK-DAG: #[[$MAPB:.*]] = affine_map<(d0) -> (d0 * 128)>
// CHECK-DAG: #[[$MAPW:.*]] = affine_map<(d0, d1, d2) -> (d2 * 32 + ((d0 + d1 * 4) floordiv 32) * 32)>
// CHECK-DAG: #[[$MAPB:.*]] = affine_map<()[s0] -> (s0 * 128)>
// CHECK-DAG: #[[$MAPW:.*]] = affine_map<()[s0, s1, s2] -> (s2 * 32 + ((s0 + s1 * 4) floordiv 32) * 32)>

// CHECK-LABEL: func.func @simple_fill(
func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> {
Expand All @@ -660,14 +660,14 @@ func.func @simple_fill(%arg0: memref<128xf32>) -> memref<128xf32> {
// CHECK: gpu.launch
scf.forall (%arg1) in (1) {
// CHECK: %[[BIDX:.*]] = gpu.block_id x
// CHECK: %[[BLX:.*]] = affine.apply #[[$MAPB]](%[[BIDX]])
// CHECK: %[[BLX:.*]] = affine.apply #[[$MAPB]]()[%[[BIDX]]]
%0 = affine.apply #map(%arg1)
%subview = memref.subview %arg0[%0] [128] [1] : memref<128xf32> to memref<128xf32, strided<[1], offset: ?>>
scf.forall (%arg2) in (4) {
// CHECK: %[[TIDX:.*]] = gpu.thread_id x
// CHECK: %[[TIDY:.*]] = gpu.thread_id y
// CHECK: %[[TIDZ:.*]] = gpu.thread_id z
// CHECK: %[[THX:.*]] = affine.apply #[[$MAPW]](%[[TIDX]], %[[TIDY]], %[[TIDZ]])
// CHECK: %[[THX:.*]] = affine.apply #[[$MAPW]]()[%[[TIDX]], %[[TIDY]], %[[TIDZ]]]
// CHECK-NOT: scf.if
// CHECK: memref.subview %{{.*}}[%[[THX]]]
%1 = affine.apply #map1(%arg2)
Expand Down
Loading
Loading