@@ -323,4 +323,73 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
323
323
// CHECK: acc.yield
324
324
// CHECK-NEXT: } loc
325
325
}
326
+
327
+ #pragma acc parallel
328
+ // CHECK: acc.parallel {
329
+ {
330
+ #pragma acc loop gang
331
+ for (unsigned I = 0 ; I < N; ++I);
332
+ // CHECK-NEXT: acc.loop gang {
333
+ // CHECK: acc.yield
334
+ // CHECK-NEXT: } loc
335
+ #pragma acc loop gang device_type(nvidia) gang
336
+ for (unsigned I = 0 ; I < N; ++I);
337
+ // CHECK-NEXT: acc.loop gang([#acc.device_type<none>, #acc.device_type<nvidia>]) {
338
+ // CHECK: acc.yield
339
+ // CHECK-NEXT: } loc
340
+ #pragma acc loop gang(dim:1) device_type(nvidia) gang(dim:2)
341
+ for (unsigned I = 0 ; I < N; ++I);
342
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
343
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
344
+ // CHECK-NEXT: acc.loop gang({dim=%[[ONE_CONST]] : i64}, {dim=%[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
345
+ // CHECK: acc.yield
346
+ // CHECK-NEXT: } loc
347
+ #pragma acc loop gang(static:N, dim: 1) device_type(nvidia, radeon) gang(static:*, dim : 2)
348
+ for (unsigned I = 0 ; I < N; ++I);
349
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
350
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
351
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
352
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
353
+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
354
+ // CHECK-NEXT: acc.loop gang({static=%[[N_CONV]] : si32, dim=%[[ONE_CONST]] : i64}, {static=%[[STAR_CONST]] : i64, dim=%[[TWO_CONST]] : i64} [#acc.device_type<nvidia>], {static=%[[STAR_CONST]] : i64, dim=%[[TWO_CONST]] : i64} [#acc.device_type<radeon>]) {
355
+ // CHECK: acc.yield
356
+ // CHECK-NEXT: } loc
357
+ }
358
+ #pragma acc kernels
359
+ // CHECK: acc.kernels {
360
+ {
361
+ #pragma acc loop gang(num:N) device_type(nvidia, radeon) gang(num:N)
362
+ for (unsigned I = 0 ; I < N; ++I);
363
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
364
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
365
+ // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
366
+ // CHECK-NEXT: %[[N_CONV2:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD2]] : !s32i to si32
367
+ // CHECK-NEXT: acc.loop gang({num=%[[N_CONV]] : si32}, {num=%[[N_CONV2]] : si32} [#acc.device_type<nvidia>], {num=%[[N_CONV2]] : si32} [#acc.device_type<radeon>]) {
368
+ // CHECK: acc.yield
369
+ // CHECK-NEXT: } loc
370
+ #pragma acc loop gang(static:N) device_type(nvidia) gang(static:*)
371
+ for (unsigned I = 0 ; I < N; ++I);
372
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
373
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
374
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
375
+ // CHECK-NEXT: acc.loop gang({static=%[[N_CONV]] : si32}, {static=%[[STAR_CONST]] : i64} [#acc.device_type<nvidia>]) {
376
+ // CHECK: acc.yield
377
+ // CHECK-NEXT: } loc
378
+ #pragma acc loop gang(static:N, num: N + 1) device_type(nvidia) gang(static:*, num : N + 2)
379
+ for (unsigned I = 0 ; I < N; ++I);
380
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
381
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
382
+ // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
383
+ // CHECK-NEXT: %[[CIR_ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
384
+ // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[CIR_ONE_CONST]]) nsw : !s32i
385
+ // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
386
+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
387
+ // CHECK-NEXT: %[[N_LOAD3:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
388
+ // CHECK-NEXT: %[[CIR_TWO_CONST:.*]] = cir.const #cir.int<2> : !s32i
389
+ // CHECK-NEXT: %[[N_PLUS_TWO:.*]] = cir.binop(add, %[[N_LOAD3]], %[[CIR_TWO_CONST]]) nsw : !s32i
390
+ // CHECK-NEXT: %[[N_PLUS_TWO_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_TWO]] : !s32i to si32
391
+ // CHECK-NEXT: acc.loop gang({static=%[[N_CONV]] : si32, num=%[[N_PLUS_ONE_CONV]] : si32}, {static=%[[STAR_CONST]] : i64, num=%[[N_PLUS_TWO_CONV]] : si32} [#acc.device_type<nvidia>]) {
392
+ // CHECK: acc.yield
393
+ // CHECK-NEXT: } loc
394
+ }
326
395
}
0 commit comments