@@ -11,16 +11,16 @@ func @gemm1(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
11
11
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
12
12
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
13
13
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
14
- // CHECK: %[[T1 :.*]] = "gpu.block_id"() {dimension = "y"}
15
- // CHECK: %[[T2 :.*]] = "gpu.block_id"() {dimension = "x"}
14
+ // CHECK: %[[BIDY :.*]] = "gpu.block_id"() {dimension = "y"}
15
+ // CHECK: %[[BIDX :.*]] = "gpu.block_id"() {dimension = "x"}
16
16
// CHECK: scf.for %[[ARG3:.*]] =
17
- // CHECK: %[[T3 :.*]] = affine.apply #[[MAP0]]()[%[[T1 ]]]
18
- // CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[T3 ]], %[[ARG3]]]
19
- // CHECK: %[[T11 :.*]] = affine.apply #[[MAP0]]()[%[[T2 ]]]
20
- // CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[T11 ]]]
21
- // CHECK: %[[T15 :.*]] = affine.apply #[[MAP0]]()[%[[T1 ]]]
22
- // CHECK: %[[T18 :.*]] = affine.apply #[[MAP0]]()[%[[T2 ]]]
23
- // CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[T15 ]], %[[T18 ]]]
17
+ // CHECK: %[[OFFSETY :.*]] = affine.apply #[[MAP0]]()[%[[BIDY ]]]
18
+ // CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[OFFSETY ]], %[[ARG3]]]
19
+ // CHECK: %[[OFFSETX :.*]] = affine.apply #[[MAP0]]()[%[[BIDX ]]]
20
+ // CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[OFFSETX ]]]
21
+ // CHECK: %[[OFFSETY_2 :.*]] = affine.apply #[[MAP0]]()[%[[BIDY ]]]
22
+ // CHECK: %[[OFFSETX :.*]] = affine.apply #[[MAP0]]()[%[[BIDX ]]]
23
+ // CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[OFFSETY_2 ]], %[[OFFSETX ]]]
24
24
// CHECK: linalg.matmul %[[SV1]], %[[SV2]], %[[SV3]]
25
25
26
26
// -----
@@ -36,22 +36,22 @@ func @gemm2(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
36
36
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
37
37
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
38
38
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
39
- // CHECK: %[[T3 :.*]] = "gpu.block_id"() {dimension = "y"}
40
- // CHECK: %[[T4 :.*]] = affine.apply #[[MAP0]]()[%[[T3]]]
41
- // CHECK: %[[T5 :.*]] = "gpu.block_id"() {dimension = "x"}
42
- // CHECK: %[[T6 :.*]] = affine.apply #[[MAP0]]()[%[[T5 ]]]
43
- // CHECK: %[[T7 :.*]] = cmpi "slt", %[[T4 ]], %{{.*}}
44
- // CHECK: %[[T8 :.*]] = cmpi "slt", %[[T6 ]], %{{.*}}
45
- // CHECK: %[[T9 :.*]] = and %[[T7 ]], %[[T8 ]]
46
- // CHECK: scf.if %[[T9 ]]
39
+ // CHECK-DAG : %[[BIDY :.*]] = "gpu.block_id"() {dimension = "y"}
40
+ // CHECK-DAG : %[[BIDX :.*]] = "gpu.block_id"() {dimension = "x"}
41
+ // CHECK: %[[ITERY :.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
42
+ // CHECK: %[[ITERX :.*]] = affine.apply #[[MAP0]]()[%[[BIDX ]]]
43
+ // CHECK: %[[INBOUNDSY :.*]] = cmpi "slt", %[[ITERY ]], %{{.*}}
44
+ // CHECK: %[[INBOUNDSX :.*]] = cmpi "slt", %[[ITERX ]], %{{.*}}
45
+ // CHECK: %[[INBOUNDS :.*]] = and %[[INBOUNDSY ]], %[[INBOUNDSX ]]
46
+ // CHECK: scf.if %[[INBOUNDS ]]
47
47
// CHECK: scf.for %[[ARG3:.*]] =
48
- // CHECK: %[[T10 :.*]] = affine.apply #[[MAP0]]()[%[[T3 ]]]
49
- // CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[T10 ]], %[[ARG3]]]
50
- // CHECK: %[[T18 :.*]] = affine.apply #[[MAP0]]()[%[[T5 ]]]
51
- // CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[T18 ]]]
52
- // CHECK: %[[T22 :.*]] = affine.apply #[[MAP0]]()[%[[T3 ]]]
53
- // CHECK: %[[T25 :.*]] = affine.apply #[[MAP0]]()[%[[T5 ]]]
54
- // CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[T22 ]], %[[T25 ]]]
48
+ // CHECK: %[[OFFSETY :.*]] = affine.apply #[[MAP0]]()[%[[BIDY ]]]
49
+ // CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[OFFSETY ]], %[[ARG3]]]
50
+ // CHECK: %[[OFFSETX :.*]] = affine.apply #[[MAP0]]()[%[[BIDX ]]]
51
+ // CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[OFFSETX ]]]
52
+ // CHECK: %[[OFFSETY_2 :.*]] = affine.apply #[[MAP0]]()[%[[BIDY ]]]
53
+ // CHECK: %[[OFFSETX_2 :.*]] = affine.apply #[[MAP0]]()[%[[BIDX ]]]
54
+ // CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[OFFSETY_2 ]], %[[OFFSETX_2 ]]]
55
55
// CHECK: linalg.matmul %[[SV1]], %[[SV2]], %[[SV3]]
56
56
57
57
// -----
@@ -67,15 +67,15 @@ func @gemm3(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
67
67
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
68
68
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
69
69
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
70
- // CHECK: %[[T3 :.*]] = "gpu.block_id"() {dimension = "y"}
71
- // CHECK: %[[T4 :.*]] = "gpu.grid_dim"() {dimension = "y"}
72
- // CHECK: %[[T5 :.*]] = affine.apply #[[MAP0]]()[%[[T3]]]
73
- // CHECK: %[[T6 :.*]] = affine.apply #[[MAP0]]()[%[[T4]]]
74
- // CHECK: %[[T7 :.*]] = "gpu.block_id"() {dimension = "x"}
75
- // CHECK: %[[T8 :.*]] = "gpu.grid_dim"() {dimension = "x"}
76
- // CHECK: %[[T9 :.*]] = affine.apply #[[MAP0]]()[%[[T7 ]]]
77
- // CHECK: %[[T10 :.*]] = affine.apply #[[MAP0]]()[%[[T8 ]]]
78
- // CHECK: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = (%[[T5 ]], %[[T9 ]]) to (%{{.*}}, %{{.*}}) step (%[[T6 ]], %[[T10 ]])
70
+ // CHECK: %[[BIDY :.*]] = "gpu.block_id"() {dimension = "y"}
71
+ // CHECK: %[[NBLOCKSY :.*]] = "gpu.grid_dim"() {dimension = "y"}
72
+ // CHECK: %[[BIDX :.*]] = "gpu.block_id"() {dimension = "x"}
73
+ // CHECK: %[[NBLOCKSX :.*]] = "gpu.grid_dim"() {dimension = "x"}
74
+ // CHECK: %[[LBY :.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
75
+ // CHECK: %[[STEPY :.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]]
76
+ // CHECK: %[[LBX :.*]] = affine.apply #[[MAP0]]()[%[[BIDX ]]]
77
+ // CHECK: %[[STEPX :.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX ]]]
78
+ // CHECK: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = (%[[LBY ]], %[[LBX ]]) to (%{{.*}}, %{{.*}}) step (%[[STEPY ]], %[[STEPX ]])
79
79
// CHECK: scf.for %[[ARG5:.*]] =
80
80
// CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[ARG3]], %[[ARG5]]]
81
81
// CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG5]], %[[ARG4]]]
@@ -95,19 +95,19 @@ func @gemm4(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
95
95
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
96
96
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
97
97
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
98
- // CHECK: %[[T2 :.*]] = "gpu.block_id"() {dimension = "y"}
99
- // CHECK: %[[T3 :.*]] = "gpu.block_id"() {dimension = "x"}
100
- // CHECK: %[[T4 :.*]] = affine.apply #[[MAP0]]()[%[[T3 ]]]
101
- // CHECK: %[[T5 :.*]] = cmpi "slt", %[[T4 ]], %{{.*}}
102
- // CHECK: scf.if %[[T5 ]]
98
+ // CHECK: %[[BIDY :.*]] = "gpu.block_id"() {dimension = "y"}
99
+ // CHECK: %[[BIDX :.*]] = "gpu.block_id"() {dimension = "x"}
100
+ // CHECK: %[[LBX :.*]] = affine.apply #[[MAP0]]()[%[[BIDX ]]]
101
+ // CHECK: %[[INBOUNDS :.*]] = cmpi "slt", %[[LBX ]], %{{.*}}
102
+ // CHECK: scf.if %[[INBOUNDS ]]
103
103
// CHECK: scf.for %[[ARG3:.*]] =
104
- // CHECK: %[[T6 :.*]] = affine.apply #[[MAP0]]()[%[[T2 ]]]
105
- // CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[T6 ]], %[[ARG3]]]
106
- // CHECK: %[[T14 :.*]] = affine.apply #[[MAP0]]()[%[[T3 ]]]
107
- // CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[T14 ]]]
108
- // CHECK: %[[T18 :.*]] = affine.apply #[[MAP0]]()[%[[T2 ]]]
109
- // CHECK: %[[T21 :.*]] = affine.apply #[[MAP0]]()[%[[T3 ]]]
110
- // CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[T18 ]], %[[T21 ]]]
104
+ // CHECK: %[[OFFSETY :.*]] = affine.apply #[[MAP0]]()[%[[BIDY ]]]
105
+ // CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[OFFSETY ]], %[[ARG3]]]
106
+ // CHECK: %[[OFFSETX :.*]] = affine.apply #[[MAP0]]()[%[[BIDX ]]]
107
+ // CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[OFFSETX ]]]
108
+ // CHECK: %[[OFFSETY_2 :.*]] = affine.apply #[[MAP0]]()[%[[BIDY ]]]
109
+ // CHECK: %[[OFFSETX_2 :.*]] = affine.apply #[[MAP0]]()[%[[BIDX ]]]
110
+ // CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[OFFSETY_2 ]], %[[OFFSETX_2 ]]]
111
111
// CHECK: linalg.matmul %[[SV1]], %[[SV2]], %[[SV3]]
112
112
113
113
// -----
@@ -123,21 +123,21 @@ func @gemm5(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
123
123
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
124
124
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
125
125
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
126
- // CHECK: %[[T3 :.*]] = "gpu.block_id"() {dimension = "y"}
127
- // CHECK: %[[T4 :.*]] = affine.apply #[[MAP0]]()[%[[T3]]]
128
- // CHECK: %[[T5 :.*]] = "gpu.block_id "() {dimension = "x"}
129
- // CHECK: %[[T6 :.*]] = "gpu.grid_dim"() {dimension = "x"}
130
- // CHECK: %[[T7 :.*]] = affine.apply #[[MAP0]]()[%[[T5 ]]]
131
- // CHECK: %[[T8 :.*]] = affine.apply #[[MAP0]]()[%[[T6 ]]]
132
- // CHECK: %[[T9 :.*]] = cmpi "slt", %[[T4 ]], %{{.*}}
133
- // CHECK: scf.if %[[T9 ]]
134
- // CHECK: scf.parallel (%[[ARG3.*]]) = (%[[T7 ]]) to (%{{.*}}) step (%[[T8 ]])
126
+ // CHECK: %[[BIDY :.*]] = "gpu.block_id"() {dimension = "y"}
127
+ // CHECK: %[[BIDX :.*]] = "gpu.block_id"() {dimension = "x"}
128
+ // CHECK: %[[NBLOCKSX :.*]] = "gpu.grid_dim "() {dimension = "x"}
129
+ // CHECK: %[[LBY :.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]]
130
+ // CHECK: %[[LBX :.*]] = affine.apply #[[MAP0]]()[%[[BIDX ]]]
131
+ // CHECK: %[[STEPX :.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX ]]]
132
+ // CHECK: %[[INBOUNDS :.*]] = cmpi "slt", %[[LBY ]], %{{.*}}
133
+ // CHECK: scf.if %[[INBOUNDS ]]
134
+ // CHECK: scf.parallel (%[[ARG3.*]]) = (%[[LBX ]]) to (%{{.*}}) step (%[[STEPX ]])
135
135
// CHECK: scf.for %[[ARG4:.*]] =
136
- // CHECK: %[[T10 :.*]] = affine.apply #[[MAP0]]()[%[[T3 ]]]
137
- // CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[T10 ]], %[[ARG4]]]
136
+ // CHECK: %[[OFFSETY :.*]] = affine.apply #[[MAP0]]()[%[[BIDY ]]]
137
+ // CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[OFFSETY ]], %[[ARG4]]]
138
138
// CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG4]], %[[ARG3]]]
139
- // CHECK: %[[T21 :.*]] = affine.apply #[[MAP0]]()[%[[T3 ]]]
140
- // CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[T21 ]], %[[ARG3]]]
139
+ // CHECK: %[[OFFSETY_2 :.*]] = affine.apply #[[MAP0]]()[%[[BIDY ]]]
140
+ // CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[OFFSETY_2 ]], %[[ARG3]]]
141
141
// CHECK: linalg.matmul %[[SV1]], %[[SV2]], %[[SV3]]
142
142
143
143
// -----
@@ -153,16 +153,16 @@ func @gemm6(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
153
153
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref<?x?xf32>
154
154
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<?x?xf32>
155
155
// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref<?x?xf32>
156
- // CHECK: %[[T2 :.*]] = "gpu.block_id"() {dimension = "y"}
157
- // CHECK: %[[T3 :.*]] = "gpu.grid_dim"() {dimension = "y"}
158
- // CHECK: %[[T4 :.*]] = affine.apply #[[MAP0]]()[%[[T2]]]
159
- // CHECK: %[[T5 :.*]] = affine.apply #[[MAP0]]()[%[[T3 ]]]
160
- // CHECK: %[[T6 :.*]] = "gpu.block_id"() {dimension = "x"}
161
- // CHECK: scf.parallel (%[[ARG3.*]]) = (%[[T4 ]]) to (%{{.*}}) step (%[[T5 ]])
156
+ // CHECK: %[[BIDY :.*]] = "gpu.block_id"() {dimension = "y"}
157
+ // CHECK: %[[NBLOCKSY :.*]] = "gpu.grid_dim"() {dimension = "y"}
158
+ // CHECK: %[[BIDX :.*]] = "gpu.block_id"() {dimension = "x"}
159
+ // CHECK: %[[LBY :.*]] = affine.apply #[[MAP0]]()[%[[BIDY ]]]
160
+ // CHECK: %[[STEPY :.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]]
161
+ // CHECK: scf.parallel (%[[ARG3.*]]) = (%[[LBY ]]) to (%{{.*}}) step (%[[STEPY ]])
162
162
// CHECK: scf.for %[[ARG4:.*]] =
163
163
// CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[ARG3]], %[[ARG4]]]
164
- // CHECK: %[[T14 :.*]] = affine.apply #[[MAP0]]()[%[[T6 ]]]
165
- // CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG4]], %[[T14 ]]]
166
- // CHECK: %[[T20 :.*]] = affine.apply #[[MAP0]]()[%[[T6 ]]]
167
- // CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[ARG3]], %[[T20 ]]]
164
+ // CHECK: %[[OFFSETX :.*]] = affine.apply #[[MAP0]]()[%[[BIDX ]]]
165
+ // CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG4]], %[[OFFSETX ]]]
166
+ // CHECK: %[[OFFSETX_2 :.*]] = affine.apply #[[MAP0]]()[%[[BIDX ]]]
167
+ // CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[ARG3]], %[[OFFSETX_2 ]]]
168
168
// CHECK: linalg.matmul %[[SV1]], %[[SV2]], %[[SV3]]
0 commit comments