@@ -252,4 +252,71 @@ extern "C" void acc_combined(int N) {
252
252
// CHECK-NEXT: acc.yield
253
253
// CHECK-NEXT: } loc
254
254
255
+ #pragma acc parallel loop tile(1, 2, 3)
256
+ for (unsigned I = 0 ; I < N; ++I)
257
+ for (unsigned J = 0 ; J < N; ++J)
258
+ for (unsigned K = 0 ; K < N; ++K);
259
+ // CHECK-NEXT: acc.parallel combined(loop) {
260
+ // CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64
261
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
262
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
263
+ // CHECK-NEXT: acc.loop combined(parallel) tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) {
264
+ // CHECK: acc.yield
265
+ // CHECK-NEXT: } loc
266
+ // CHECK-NEXT: acc.yield
267
+ // CHECK-NEXT: } loc
268
+ #pragma acc serial loop tile(2) device_type(radeon)
269
+ for (unsigned I = 0 ; I < N; ++I)
270
+ for (unsigned J = 0 ; J < N; ++J)
271
+ for (unsigned K = 0 ; K < N; ++K);
272
+ // CHECK-NEXT: acc.serial combined(loop) {
273
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
274
+ // CHECK-NEXT: acc.loop combined(serial) tile({%[[TWO_CONST]] : i64}) {
275
+ // CHECK: acc.yield
276
+ // CHECK-NEXT: } loc
277
+ // CHECK-NEXT: acc.yield
278
+ // CHECK-NEXT: } loc
279
+ #pragma acc kernels loop tile(2) device_type(radeon) tile (1, *)
280
+ for (unsigned I = 0 ; I < N; ++I)
281
+ for (unsigned J = 0 ; J < N; ++J)
282
+ for (unsigned K = 0 ; K < N; ++K);
283
+ // CHECK-NEXT: acc.kernels combined(loop) {
284
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
285
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
286
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
287
+ // CHECK-NEXT: acc.loop combined(kernels) tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) {
288
+ // CHECK: acc.yield
289
+ // CHECK-NEXT: } loc
290
+ // CHECK-NEXT: acc.terminator
291
+ // CHECK-NEXT: } loc
292
+ #pragma acc parallel loop tile(*) device_type(radeon, nvidia) tile (1, 2)
293
+ for (unsigned I = 0 ; I < N; ++I)
294
+ for (unsigned J = 0 ; J < N; ++J)
295
+ for (unsigned K = 0 ; K < N; ++K);
296
+ // CHECK-NEXT: acc.parallel combined(loop) {
297
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
298
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
299
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
300
+ // CHECK-NEXT: acc.loop combined(parallel) tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
301
+ // CHECK: acc.yield
302
+ // CHECK-NEXT: } loc
303
+ // CHECK-NEXT: acc.yield
304
+ // CHECK-NEXT: } loc
305
+ #pragma acc serial loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *)
306
+ for (unsigned I = 0 ; I < N; ++I)
307
+ for (unsigned J = 0 ; J < N; ++J)
308
+ for (unsigned K = 0 ; K < N; ++K);
309
+ // CHECK-NEXT: acc.serial combined(loop) {
310
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
311
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
312
+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
313
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
314
+ // CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64
315
+ // CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64
316
+ // CHECK-NEXT: acc.loop combined(serial) tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) {
317
+ // CHECK: acc.yield
318
+ // CHECK-NEXT: } loc
319
+ // CHECK-NEXT: acc.yield
320
+ // CHECK-NEXT: } loc
321
+
255
322
}
0 commit comments