Skip to content

Commit 21bcea7

Browse files
erichkeaneGeorgeARM
authored andcommitted
[OpenACC][CIR] Implement Loop lowering of seq/auto/independent (llvm#138164)
These just add a standard 'device_type' flag to the acc.loop, so implement that lowering. This also modifies the dialect to add helpers for these as well, to be consistent with the previous ones.
1 parent 12b91fa commit 21bcea7

File tree

4 files changed

+137
-2
lines changed

4 files changed

+137
-2
lines changed

clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,13 +147,13 @@ class OpenACCClauseCIREmitter final
147147
decodeDeviceType(clause.getArchitectures()[0].getIdentifierInfo()));
148148
} else if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp,
149149
mlir::acc::SerialOp, mlir::acc::KernelsOp,
150-
mlir::acc::DataOp>) {
150+
mlir::acc::DataOp, mlir::acc::LoopOp>) {
151151
// Nothing to do here, these constructs don't have any IR for these, as
152152
// they just modify the other clauses IR. So setting of
153153
// `lastDeviceTypeValues` (done above) is all we need.
154154
} else {
155155
// TODO: When we've implemented this for everything, switch this to an
156-
// unreachable. update, data, loop, routine, combined constructs remain.
156+
// unreachable. update, data, routine, combined constructs remain.
157157
return clauseNotImplemented(clause);
158158
}
159159
}
@@ -306,6 +306,36 @@ class OpenACCClauseCIREmitter final
306306
llvm_unreachable("set, is only valid device_num constructs");
307307
}
308308
}
309+
310+
void VisitSeqClause(const OpenACCSeqClause &clause) {
311+
if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
312+
operation.addSeq(builder.getContext(), lastDeviceTypeValues);
313+
} else {
314+
// TODO: When we've implemented this for everything, switch this to an
315+
// unreachable. Routine, Combined constructs remain.
316+
return clauseNotImplemented(clause);
317+
}
318+
}
319+
320+
void VisitAutoClause(const OpenACCAutoClause &clause) {
321+
if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
322+
operation.addAuto(builder.getContext(), lastDeviceTypeValues);
323+
} else {
324+
// TODO: When we've implemented this for everything, switch this to an
325+
// unreachable. Routine, Combined constructs remain.
326+
return clauseNotImplemented(clause);
327+
}
328+
}
329+
330+
void VisitIndependentClause(const OpenACCIndependentClause &clause) {
331+
if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
332+
operation.addIndependent(builder.getContext(), lastDeviceTypeValues);
333+
} else {
334+
// TODO: When we've implemented this for everything, switch this to an
335+
// unreachable. Routine, Combined constructs remain.
336+
return clauseNotImplemented(clause);
337+
}
338+
}
309339
};
310340

311341
template <typename OpTy>

clang/test/CIR/CodeGenOpenACC/loop.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,83 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
3030
// CHECK-NEXT: } loc
3131
// CHECK-NEXT: acc.yield
3232
// CHECK-NEXT: } loc
33+
34+
35+
#pragma acc loop seq
36+
for(unsigned I = 0; I < N; ++I);
37+
// CHECK: acc.loop {
38+
// CHECK: acc.yield
39+
// CHECK-NEXT: } attributes {seq = [#acc.device_type<none>]} loc
40+
#pragma acc loop device_type(nvidia, radeon) seq
41+
for(unsigned I = 0; I < N; ++I);
42+
// CHECK: acc.loop {
43+
// CHECK: acc.yield
44+
// CHECK-NEXT: } attributes {seq = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
45+
#pragma acc loop device_type(radeon) seq
46+
for(unsigned I = 0; I < N; ++I);
47+
// CHECK: acc.loop {
48+
// CHECK: acc.yield
49+
// CHECK-NEXT: } attributes {seq = [#acc.device_type<radeon>]} loc
50+
#pragma acc loop seq device_type(nvidia, radeon)
51+
for(unsigned I = 0; I < N; ++I);
52+
// CHECK: acc.loop {
53+
// CHECK: acc.yield
54+
// CHECK-NEXT: } attributes {seq = [#acc.device_type<none>]} loc
55+
#pragma acc loop seq device_type(radeon)
56+
for(unsigned I = 0; I < N; ++I);
57+
// CHECK: acc.loop {
58+
// CHECK: acc.yield
59+
// CHECK-NEXT: } attributes {seq = [#acc.device_type<none>]} loc
60+
61+
#pragma acc loop independent
62+
for(unsigned I = 0; I < N; ++I);
63+
// CHECK: acc.loop {
64+
// CHECK: acc.yield
65+
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
66+
#pragma acc loop device_type(nvidia, radeon) independent
67+
for(unsigned I = 0; I < N; ++I);
68+
// CHECK: acc.loop {
69+
// CHECK: acc.yield
70+
// CHECK-NEXT: } attributes {independent = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
71+
#pragma acc loop device_type(radeon) independent
72+
for(unsigned I = 0; I < N; ++I);
73+
// CHECK: acc.loop {
74+
// CHECK: acc.yield
75+
// CHECK-NEXT: } attributes {independent = [#acc.device_type<radeon>]} loc
76+
#pragma acc loop independent device_type(nvidia, radeon)
77+
for(unsigned I = 0; I < N; ++I);
78+
// CHECK: acc.loop {
79+
// CHECK: acc.yield
80+
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
81+
#pragma acc loop independent device_type(radeon)
82+
for(unsigned I = 0; I < N; ++I);
83+
// CHECK: acc.loop {
84+
// CHECK: acc.yield
85+
// CHECK-NEXT: } attributes {independent = [#acc.device_type<none>]} loc
86+
87+
#pragma acc loop auto
88+
for(unsigned I = 0; I < N; ++I);
89+
// CHECK: acc.loop {
90+
// CHECK: acc.yield
91+
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
92+
#pragma acc loop device_type(nvidia, radeon) auto
93+
for(unsigned I = 0; I < N; ++I);
94+
// CHECK: acc.loop {
95+
// CHECK: acc.yield
96+
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<nvidia>, #acc.device_type<radeon>]} loc
97+
#pragma acc loop device_type(radeon) auto
98+
for(unsigned I = 0; I < N; ++I);
99+
// CHECK: acc.loop {
100+
// CHECK: acc.yield
101+
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<radeon>]} loc
102+
#pragma acc loop auto device_type(nvidia, radeon)
103+
for(unsigned I = 0; I < N; ++I);
104+
// CHECK: acc.loop {
105+
// CHECK: acc.yield
106+
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
107+
#pragma acc loop auto device_type(radeon)
108+
for(unsigned I = 0; I < N; ++I);
109+
// CHECK: acc.loop {
110+
// CHECK: acc.yield
111+
// CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
33112
}

mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2198,6 +2198,14 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
21982198
/// Return the value of the worker clause for the given device_type
21992199
/// if present.
22002200
mlir::Value getGangValue(mlir::acc::GangArgType gangArgType, mlir::acc::DeviceType deviceType);
2201+
2202+
// Add an entry to the 'seq' attribute for each additional device types.
2203+
void addSeq(MLIRContext *, llvm::ArrayRef<DeviceType>);
2204+
// Add an entry to the 'independent' attribute for each additional device
2205+
// types.
2206+
void addIndependent(MLIRContext *, llvm::ArrayRef<DeviceType>);
2207+
// Add an entry to the 'auto' attribute for each additional device types.
2208+
void addAuto(MLIRContext *, llvm::ArrayRef<DeviceType>);
22012209
}];
22022210

22032211
let hasCustomAssemblyFormat = 1;

mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2651,6 +2651,24 @@ void printLoopControl(OpAsmPrinter &p, Operation *op, Region &region,
26512651
p.printRegion(region, /*printEntryBlockArgs=*/false);
26522652
}
26532653

2654+
void acc::LoopOp::addSeq(MLIRContext *context,
2655+
llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
2656+
setSeqAttr(addDeviceTypeAffectedOperandHelper(context, getSeqAttr(),
2657+
effectiveDeviceTypes));
2658+
}
2659+
2660+
void acc::LoopOp::addIndependent(
2661+
MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
2662+
setIndependentAttr(addDeviceTypeAffectedOperandHelper(
2663+
context, getIndependentAttr(), effectiveDeviceTypes));
2664+
}
2665+
2666+
void acc::LoopOp::addAuto(MLIRContext *context,
2667+
llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
2668+
setAuto_Attr(addDeviceTypeAffectedOperandHelper(context, getAuto_Attr(),
2669+
effectiveDeviceTypes));
2670+
}
2671+
26542672
//===----------------------------------------------------------------------===//
26552673
// DataOp
26562674
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)