Skip to content

Commit c8539f7

Browse files
committed
[OpenACC][CIR] Implement independent/auto lowering for combined constr
These two are identical to 'seq', so their lowering is trivial to implement, and just requires using the previous 'helper' function.
1 parent 790ce0e commit c8539f7

File tree

2 files changed

+56
-2
lines changed

2 files changed

+56
-2
lines changed

clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,19 +385,23 @@ class OpenACCClauseCIREmitter final
385385
void VisitAutoClause(const OpenACCAutoClause &clause) {
386386
if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
387387
operation.addAuto(builder.getContext(), lastDeviceTypeValues);
388+
} else if constexpr (isCombinedType<OpTy>) {
389+
applyToLoopOp(clause);
388390
} else {
389391
// TODO: When we've implemented this for everything, switch this to an
390-
// unreachable. Routine, Combined constructs remain.
392+
// unreachable. Routine, construct remains.
391393
return clauseNotImplemented(clause);
392394
}
393395
}
394396

395397
void VisitIndependentClause(const OpenACCIndependentClause &clause) {
396398
if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
397399
operation.addIndependent(builder.getContext(), lastDeviceTypeValues);
400+
} else if constexpr (isCombinedType<OpTy>) {
401+
applyToLoopOp(clause);
398402
} else {
399403
// TODO: When we've implemented this for everything, switch this to an
400-
// unreachable. Routine, Combined constructs remain.
404+
// unreachable. Routine construct remains.
401405
return clauseNotImplemented(clause);
402406
}
403407
}

clang/test/CIR/CodeGenOpenACC/combined.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,54 @@ extern "C" void acc_combined(int N) {
8484
// CHECK: acc.terminator
8585
// CHECK-NEXT: } loc
8686

87+
#pragma acc parallel loop auto
88+
for(unsigned I = 0; I < N; ++I);
89+
// CHECK: acc.parallel combined(loop) {
90+
// CHECK: acc.loop combined(parallel) {
91+
// CHECK: acc.yield
92+
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
93+
// CHECK: acc.yield
94+
// CHECK-NEXT: } loc
95+
#pragma acc serial loop device_type(nvidia, radeon) auto
96+
for(unsigned I = 0; I < N; ++I);
97+
// CHECK: acc.serial combined(loop) {
98+
// CHECK: acc.loop combined(serial) {
99+
// CHECK: acc.yield
100+
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
101+
// CHECK: acc.yield
102+
// CHECK-NEXT: } loc
103+
#pragma acc kernels loop auto device_type(nvidia, radeon)
104+
for(unsigned I = 0; I < N; ++I);
105+
// CHECK: acc.kernels combined(loop) {
106+
// CHECK: acc.loop combined(kernels) {
107+
// CHECK: acc.yield
108+
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
109+
// CHECK: acc.terminator
110+
// CHECK-NEXT: } loc
111+
112+
#pragma acc parallel loop independent
113+
for(unsigned I = 0; I < N; ++I);
114+
// CHECK: acc.parallel combined(loop) {
115+
// CHECK: acc.loop combined(parallel) {
116+
// CHECK: acc.yield
117+
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
118+
// CHECK: acc.yield
119+
// CHECK-NEXT: } loc
120+
#pragma acc serial loop device_type(nvidia, radeon) independent
121+
for(unsigned I = 0; I < N; ++I);
122+
// CHECK: acc.serial combined(loop) {
123+
// CHECK: acc.loop combined(serial) {
124+
// CHECK: acc.yield
125+
// CHECK-NEXT: } attributes {independent = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
126+
// CHECK: acc.yield
127+
// CHECK-NEXT: } loc
128+
#pragma acc kernels loop independent device_type(nvidia, radeon)
129+
for(unsigned I = 0; I < N; ++I);
130+
// CHECK: acc.kernels combined(loop) {
131+
// CHECK: acc.loop combined(kernels) {
132+
// CHECK: acc.yield
133+
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
134+
// CHECK: acc.terminator
135+
// CHECK-NEXT: } loc
136+
87137
}

0 commit comments

Comments
 (0)