@@ -193,4 +193,134 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
193
193
// CHECK: acc.yield
194
194
// CHECK-NEXT: } loc
195
195
196
+
197
+ #pragma acc kernels
198
+ {
199
+
200
+ #pragma acc loop worker
201
+ for (unsigned I = 0 ; I < N; ++I);
202
+ // CHECK: acc.loop worker {
203
+ // CHECK: acc.yield
204
+ // CHECK-NEXT: } loc
205
+
206
+ #pragma acc loop worker(N)
207
+ for (unsigned I = 0 ; I < N; ++I);
208
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
209
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
210
+ // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32) {
211
+ // CHECK: acc.yield
212
+ // CHECK-NEXT: } loc
213
+
214
+ #pragma acc loop worker device_type(nvidia, radeon) worker
215
+ for (unsigned I = 0 ; I < N; ++I);
216
+ // CHECK-NEXT: acc.loop worker([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
217
+ // CHECK: acc.yield
218
+ // CHECK-NEXT: } loc
219
+
220
+ #pragma acc loop worker(N) device_type(nvidia, radeon) worker
221
+ for (unsigned I = 0 ; I < N; ++I);
222
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
223
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
224
+ // CHECK-NEXT: acc.loop worker([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) {
225
+ // CHECK: acc.yield
226
+ // CHECK-NEXT: } loc
227
+
228
+ #pragma acc loop worker device_type(nvidia, radeon) worker(N)
229
+ for (unsigned I = 0 ; I < N; ++I);
230
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
231
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
232
+ // CHECK-NEXT: acc.loop worker([#acc.device_type<none>], %[[N_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_CONV]] : si32 [#acc.device_type<radeon>]) {
233
+ // CHECK: acc.yield
234
+ // CHECK-NEXT: } loc
235
+
236
+ #pragma acc loop worker(N) device_type(nvidia, radeon) worker(N + 1)
237
+ for (unsigned I = 0 ; I < N; ++I);
238
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
239
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
240
+ // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
241
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
242
+ // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i
243
+ // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
244
+ // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
245
+ // CHECK: acc.yield
246
+ // CHECK-NEXT: } loc
247
+
248
+ #pragma acc loop device_type(nvidia, radeon) worker(num:N + 1)
249
+ for (unsigned I = 0 ; I < N; ++I);
250
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
251
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
252
+ // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i
253
+ // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
254
+ // CHECK-NEXT: acc.loop worker(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
255
+
256
+ #pragma acc loop vector
257
+ for (unsigned I = 0 ; I < N; ++I);
258
+ // CHECK: acc.loop vector {
259
+ // CHECK: acc.yield
260
+ // CHECK-NEXT: } loc
261
+
262
+ #pragma acc loop vector(N)
263
+ for (unsigned I = 0 ; I < N; ++I);
264
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
265
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
266
+ // CHECK-NEXT: acc.loop vector(%[[N_CONV]] : si32) {
267
+ // CHECK: acc.yield
268
+ // CHECK-NEXT: } loc
269
+
270
+ #pragma acc loop vector device_type(nvidia, radeon) vector
271
+ for (unsigned I = 0 ; I < N; ++I);
272
+ // CHECK-NEXT: acc.loop vector([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
273
+ // CHECK: acc.yield
274
+ // CHECK-NEXT: } loc
275
+
276
+ #pragma acc loop vector(N) device_type(nvidia, radeon) vector
277
+ for (unsigned I = 0 ; I < N; ++I);
278
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
279
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
280
+ // CHECK-NEXT: acc.loop vector([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) {
281
+ // CHECK: acc.yield
282
+ // CHECK-NEXT: } loc
283
+
284
+ #pragma acc loop vector(N) device_type(nvidia, radeon) vector(N + 1)
285
+ for (unsigned I = 0 ; I < N; ++I);
286
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
287
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
288
+ // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
289
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
290
+ // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i
291
+ // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
292
+ // CHECK-NEXT: acc.loop vector(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
293
+ // CHECK: acc.yield
294
+ // CHECK-NEXT: } loc
295
+
296
+ #pragma acc loop device_type(nvidia, radeon) vector(length:N + 1)
297
+ for (unsigned I = 0 ; I < N; ++I);
298
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
299
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
300
+ // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i
301
+ // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
302
+ // CHECK-NEXT: acc.loop vector(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
303
+ // CHECK: acc.yield
304
+ // CHECK-NEXT: } loc
305
+
306
+ #pragma acc loop worker vector device_type(nvidia) worker vector
307
+ for (unsigned I = 0 ; I < N; ++I);
308
+ // CHECK-NEXT: acc.loop worker([#acc.device_type<none>, #acc.device_type<nvidia>]) vector([#acc.device_type<none>, #acc.device_type<nvidia>])
309
+ // CHECK: acc.yield
310
+ // CHECK-NEXT: } loc
311
+
312
+ #pragma acc loop worker(N) vector(N) device_type(nvidia) worker(N) vector(N)
313
+ for (unsigned I = 0 ; I < N; ++I);
314
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
315
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
316
+ // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
317
+ // CHECK-NEXT: %[[N_CONV2:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD2]] : !s32i to si32
318
+ // CHECK-NEXT: %[[N_LOAD3:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
319
+ // CHECK-NEXT: %[[N_CONV3:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD3]] : !s32i to si32
320
+ // CHECK-NEXT: %[[N_LOAD4:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
321
+ // CHECK-NEXT: %[[N_CONV4:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD4]] : !s32i to si32
322
+ // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32, %[[N_CONV3]] : si32 [#acc.device_type<nvidia>]) vector(%[[N_CONV2]] : si32, %[[N_CONV4]] : si32 [#acc.device_type<nvidia>]) {
323
+ // CHECK: acc.yield
324
+ // CHECK-NEXT: } loc
325
+ }
196
326
}
0 commit comments