Skip to content

Commit 6ff3b8e

Browse files
authored
[OpenACC][CIR] Impl default/seq lowering for combined constructs (#139263)
This adds two clauses plus the infrastructure for emitting the clauses on combined constructs. Combined constructs require two operations, so this makes sure we emit on the 'correct' one. It DOES require that the combined construct handling picks the correct one to put it on, AND sets up the 'inserter' correctly, but these two clauses don't require an inserter, so a future patch will get those.
1 parent 9818120 commit 6ff3b8e

File tree

4 files changed

+115
-6
lines changed

4 files changed

+115
-6
lines changed

clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h

Lines changed: 46 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,25 @@ constexpr bool isOneOfTypes =
2323
template <typename ToTest, typename T>
2424
constexpr bool isOneOfTypes<ToTest, T> = std::is_same_v<ToTest, T>;
2525

26+
// Holds information for emitting clauses for a combined construct. We
27+
// instantiate the clause emitter with this type so that it can use
28+
// if-constexpr to specially handle these.
29+
template <typename CompOpTy> struct CombinedConstructClauseInfo {
30+
using ComputeOpTy = CompOpTy;
31+
ComputeOpTy computeOp;
32+
mlir::acc::LoopOp loopOp;
33+
};
34+
35+
template <typename ToTest> constexpr bool isCombinedType = false;
36+
template <typename T>
37+
constexpr bool isCombinedType<CombinedConstructClauseInfo<T>> = true;
38+
2639
template <typename OpTy>
2740
class OpenACCClauseCIREmitter final
2841
: public OpenACCClauseVisitor<OpenACCClauseCIREmitter<OpTy>> {
42+
// Necessary for combined constructs.
43+
template <typename FriendOpTy> friend class OpenACCClauseCIREmitter;
44+
2945
OpTy &operation;
3046
CIRGen::CIRGenFunction &cgf;
3147
CIRGen::CIRGenBuilderTy &builder;
@@ -119,6 +135,26 @@ class OpenACCClauseCIREmitter final
119135
llvm_unreachable("unknown gang kind");
120136
}
121137

138+
template <typename U = void,
139+
typename = std::enable_if_t<isCombinedType<OpTy>, U>>
140+
void applyToLoopOp(const OpenACCClause &c) {
141+
// TODO OpenACC: we have to set the insertion scope here correctly still.
142+
OpenACCClauseCIREmitter<mlir::acc::LoopOp> loopEmitter{
143+
operation.loopOp, cgf, builder, dirKind, dirLoc};
144+
loopEmitter.lastDeviceTypeValues = lastDeviceTypeValues;
145+
loopEmitter.Visit(&c);
146+
}
147+
148+
template <typename U = void,
149+
typename = std::enable_if_t<isCombinedType<OpTy>, U>>
150+
void applyToComputeOp(const OpenACCClause &c) {
151+
// TODO OpenACC: we have to set the insertion scope here correctly still.
152+
OpenACCClauseCIREmitter<typename OpTy::ComputeOpTy> computeEmitter{
153+
operation.computeOp, cgf, builder, dirKind, dirLoc};
154+
computeEmitter.lastDeviceTypeValues = lastDeviceTypeValues;
155+
computeEmitter.Visit(&c);
156+
}
157+
122158
public:
123159
OpenACCClauseCIREmitter(OpTy &operation, CIRGen::CIRGenFunction &cgf,
124160
CIRGen::CIRGenBuilderTy &builder,
@@ -145,10 +181,10 @@ class OpenACCClauseCIREmitter final
145181
case OpenACCDefaultClauseKind::Invalid:
146182
break;
147183
}
184+
} else if constexpr (isCombinedType<OpTy>) {
185+
applyToComputeOp(clause);
148186
} else {
149-
// TODO: When we've implemented this for everything, switch this to an
150-
// unreachable. Combined constructs remain.
151-
return clauseNotImplemented(clause);
187+
llvm_unreachable("Unknown construct kind in VisitDefaultClause");
152188
}
153189
}
154190

@@ -175,9 +211,12 @@ class OpenACCClauseCIREmitter final
175211
// Nothing to do here, these constructs don't have any IR for these, as
176212
// they just modify the other clauses IR. So setting of
177213
// `lastDeviceTypeValues` (done above) is all we need.
214+
} else if constexpr (isCombinedType<OpTy>) {
215+
// Nothing to do here either, combined constructs are just going to use
216+
// 'lastDeviceTypeValues' to set the value for the child visitor.
178217
} else {
179218
// TODO: When we've implemented this for everything, switch this to an
180-
// unreachable. update, data, routine, combined constructs remain.
219+
// unreachable. update, data, routine constructs remain.
181220
return clauseNotImplemented(clause);
182221
}
183222
}
@@ -334,9 +373,11 @@ class OpenACCClauseCIREmitter final
334373
void VisitSeqClause(const OpenACCSeqClause &clause) {
335374
if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
336375
operation.addSeq(builder.getContext(), lastDeviceTypeValues);
376+
} else if constexpr (isCombinedType<OpTy>) {
377+
applyToLoopOp(clause);
337378
} else {
338379
// TODO: When we've implemented this for everything, switch this to an
339-
// unreachable. Routine, Combined constructs remain.
380+
// unreachable. Routine construct remains.
340381
return clauseNotImplemented(clause);
341382
}
342383
}

clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,15 @@ mlir::LogicalResult CIRGenFunction::emitOpenACCOpCombinedConstruct(
109109
builder.create<mlir::acc::YieldOp>(end);
110110
}
111111

112+
{
113+
mlir::OpBuilder::InsertionGuard guardCase(builder);
114+
CombinedConstructClauseInfo<Op> inf{computeOp, loopOp};
115+
// We don't bother setting the insertion point, since the clause emitter
116+
// is going to have to do this correctly.
117+
makeClauseEmitter(inf, *this, builder, dirKind, dirLoc)
118+
.VisitClauseList(clauses);
119+
}
120+
112121
builder.create<TermOp>(end);
113122
}
114123

clang/test/CIR/CodeGenOpenACC/combined.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,66 @@ extern "C" void acc_combined(int N) {
2222
// CHECK-NEXT: } loc
2323
// CHECK: acc.yield
2424
// CHECK-NEXT: } loc
25+
2526
#pragma acc kernels loop
2627
for(unsigned I = 0; I < N; ++I);
28+
// CHECK: acc.kernels combined(loop) {
29+
// CHECK: acc.loop combined(kernels) {
30+
// CHECK: acc.yield
31+
// CHECK-NEXT: } loc
32+
// CHECK: acc.terminator
33+
// CHECK-NEXT: } loc
34+
35+
#pragma acc parallel loop default(none)
36+
for(unsigned I = 0; I < N; ++I);
37+
// CHECK: acc.parallel combined(loop) {
38+
// CHECK: acc.loop combined(parallel) {
39+
// CHECK: acc.yield
40+
// CHECK-NEXT: } loc
41+
// CHECK: acc.yield
42+
// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>} loc
43+
44+
#pragma acc serial loop default(present)
45+
for(unsigned I = 0; I < N; ++I);
46+
// CHECK: acc.serial combined(loop) {
47+
// CHECK: acc.loop combined(serial) {
48+
// CHECK: acc.yield
49+
// CHECK-NEXT: } loc
50+
// CHECK: acc.yield
51+
// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue present>} loc
2752

53+
#pragma acc kernels loop default(none)
54+
for(unsigned I = 0; I < N; ++I);
2855
// CHECK: acc.kernels combined(loop) {
2956
// CHECK: acc.loop combined(kernels) {
3057
// CHECK: acc.yield
3158
// CHECK-NEXT: } loc
3259
// CHECK: acc.terminator
60+
// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>} loc
61+
62+
#pragma acc parallel loop seq
63+
for(unsigned I = 0; I < N; ++I);
64+
// CHECK: acc.parallel combined(loop) {
65+
// CHECK: acc.loop combined(parallel) {
66+
// CHECK: acc.yield
67+
// CHECK-NEXT: } attributes {seq = [#acc.device_type<none>]} loc
68+
// CHECK: acc.yield
69+
// CHECK-NEXT: } loc
70+
#pragma acc serial loop device_type(nvidia, radeon) seq
71+
for(unsigned I = 0; I < N; ++I);
72+
// CHECK: acc.serial combined(loop) {
73+
// CHECK: acc.loop combined(serial) {
74+
// CHECK: acc.yield
75+
// CHECK-NEXT: } attributes {seq = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
76+
// CHECK: acc.yield
77+
// CHECK-NEXT: } loc
78+
#pragma acc kernels loop seq device_type(nvidia, radeon)
79+
for(unsigned I = 0; I < N; ++I);
80+
// CHECK: acc.kernels combined(loop) {
81+
// CHECK: acc.loop combined(kernels) {
82+
// CHECK: acc.yield
83+
// CHECK-NEXT: } attributes {seq = [#acc.device_type<none>]} loc
84+
// CHECK: acc.terminator
3385
// CHECK-NEXT: } loc
86+
3487
}

clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fopenacc -fclangir -emit-cir %s -o %t.cir -verify
2-
// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fopenacc -fclangir -emit-llvm %s -o %t-cir.ll -verify
32

43
void HelloWorld(int *A, int *B, int *C, int N) {
54

@@ -10,4 +9,11 @@ void HelloWorld(int *A, int *B, int *C, int N) {
109

1110
// expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Declare Construct}}
1211
#pragma acc declare create(A)
12+
13+
// expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Clause: private}}
14+
#pragma acc parallel loop private(A)
15+
for(int i = 0; i <5; ++i);
16+
// expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Clause: async}}
17+
#pragma acc parallel loop async
18+
for(int i = 0; i <5; ++i);
1319
}

0 commit comments

Comments
 (0)