Skip to content

Commit c39ae09

Browse files
author
git apple-llvm automerger
committed
Merge commit 'f4e7ba02cc7f' from llvm.org/main into next
2 parents 8b3ce8e + f4e7ba0 commit c39ae09

File tree

4 files changed

+205
-0
lines changed

4 files changed

+205
-0
lines changed

clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,38 @@ class OpenACCClauseCIREmitter final
392392
return clauseNotImplemented(clause);
393393
}
394394
}
395+
396+
void VisitWorkerClause(const OpenACCWorkerClause &clause) {
397+
if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
398+
if (clause.hasIntExpr())
399+
operation.addWorkerNumOperand(builder.getContext(),
400+
createIntExpr(clause.getIntExpr()),
401+
lastDeviceTypeValues);
402+
else
403+
operation.addEmptyWorker(builder.getContext(), lastDeviceTypeValues);
404+
405+
} else {
406+
// TODO: When we've implemented this for everything, switch this to an
407+
// unreachable. Combined constructs remain.
408+
return clauseNotImplemented(clause);
409+
}
410+
}
411+
412+
void VisitVectorClause(const OpenACCVectorClause &clause) {
413+
if constexpr (isOneOfTypes<OpTy, mlir::acc::LoopOp>) {
414+
if (clause.hasIntExpr())
415+
operation.addVectorOperand(builder.getContext(),
416+
createIntExpr(clause.getIntExpr()),
417+
lastDeviceTypeValues);
418+
else
419+
operation.addEmptyVector(builder.getContext(), lastDeviceTypeValues);
420+
421+
} else {
422+
// TODO: When we've implemented this for everything, switch this to an
423+
// unreachable. Combined constructs remain.
424+
return clauseNotImplemented(clause);
425+
}
426+
}
395427
};
396428

397429
template <typename OpTy>

clang/test/CIR/CodeGenOpenACC/loop.cpp

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,4 +193,134 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
193193
// CHECK: acc.yield
194194
// CHECK-NEXT: } loc
195195

196+
197+
#pragma acc kernels
198+
{
199+
200+
#pragma acc loop worker
201+
for(unsigned I = 0; I < N; ++I);
202+
// CHECK: acc.loop worker {
203+
// CHECK: acc.yield
204+
// CHECK-NEXT: } loc
205+
206+
#pragma acc loop worker(N)
207+
for(unsigned I = 0; I < N; ++I);
208+
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
209+
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
210+
// CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32) {
211+
// CHECK: acc.yield
212+
// CHECK-NEXT: } loc
213+
214+
#pragma acc loop worker device_type(nvidia, radeon) worker
215+
for(unsigned I = 0; I < N; ++I);
216+
// CHECK-NEXT: acc.loop worker([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
217+
// CHECK: acc.yield
218+
// CHECK-NEXT: } loc
219+
220+
#pragma acc loop worker(N) device_type(nvidia, radeon) worker
221+
for(unsigned I = 0; I < N; ++I);
222+
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
223+
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
224+
// CHECK-NEXT: acc.loop worker([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) {
225+
// CHECK: acc.yield
226+
// CHECK-NEXT: } loc
227+
228+
#pragma acc loop worker device_type(nvidia, radeon) worker(N)
229+
for(unsigned I = 0; I < N; ++I);
230+
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
231+
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
232+
// CHECK-NEXT: acc.loop worker([#acc.device_type<none>], %[[N_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_CONV]] : si32 [#acc.device_type<radeon>]) {
233+
// CHECK: acc.yield
234+
// CHECK-NEXT: } loc
235+
236+
#pragma acc loop worker(N) device_type(nvidia, radeon) worker(N + 1)
237+
for(unsigned I = 0; I < N; ++I);
238+
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
239+
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
240+
// CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
241+
// CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
242+
// CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i
243+
// CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
244+
// CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
245+
// CHECK: acc.yield
246+
// CHECK-NEXT: } loc
247+
248+
#pragma acc loop device_type(nvidia, radeon) worker(num:N + 1)
249+
for(unsigned I = 0; I < N; ++I);
250+
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
251+
// CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
252+
// CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i
253+
// CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
254+
// CHECK-NEXT: acc.loop worker(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
255+
256+
#pragma acc loop vector
257+
for(unsigned I = 0; I < N; ++I);
258+
// CHECK: acc.loop vector {
259+
// CHECK: acc.yield
260+
// CHECK-NEXT: } loc
261+
262+
#pragma acc loop vector(N)
263+
for(unsigned I = 0; I < N; ++I);
264+
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
265+
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
266+
// CHECK-NEXT: acc.loop vector(%[[N_CONV]] : si32) {
267+
// CHECK: acc.yield
268+
// CHECK-NEXT: } loc
269+
270+
#pragma acc loop vector device_type(nvidia, radeon) vector
271+
for(unsigned I = 0; I < N; ++I);
272+
// CHECK-NEXT: acc.loop vector([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
273+
// CHECK: acc.yield
274+
// CHECK-NEXT: } loc
275+
276+
#pragma acc loop vector(N) device_type(nvidia, radeon) vector
277+
for(unsigned I = 0; I < N; ++I);
278+
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
279+
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
280+
// CHECK-NEXT: acc.loop vector([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) {
281+
// CHECK: acc.yield
282+
// CHECK-NEXT: } loc
283+
284+
#pragma acc loop vector(N) device_type(nvidia, radeon) vector(N + 1)
285+
for(unsigned I = 0; I < N; ++I);
286+
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
287+
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
288+
// CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
289+
// CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
290+
// CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i
291+
// CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
292+
// CHECK-NEXT: acc.loop vector(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
293+
// CHECK: acc.yield
294+
// CHECK-NEXT: } loc
295+
296+
#pragma acc loop device_type(nvidia, radeon) vector(length:N + 1)
297+
for(unsigned I = 0; I < N; ++I);
298+
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
299+
// CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
300+
// CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i
301+
// CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
302+
// CHECK-NEXT: acc.loop vector(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
303+
// CHECK: acc.yield
304+
// CHECK-NEXT: } loc
305+
306+
#pragma acc loop worker vector device_type(nvidia) worker vector
307+
for(unsigned I = 0; I < N; ++I);
308+
// CHECK-NEXT: acc.loop worker([#acc.device_type<none>, #acc.device_type<nvidia>]) vector([#acc.device_type<none>, #acc.device_type<nvidia>])
309+
// CHECK: acc.yield
310+
// CHECK-NEXT: } loc
311+
312+
#pragma acc loop worker(N) vector(N) device_type(nvidia) worker(N) vector(N)
313+
for(unsigned I = 0; I < N; ++I);
314+
// CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
315+
// CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
316+
// CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
317+
// CHECK-NEXT: %[[N_CONV2:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD2]] : !s32i to si32
318+
// CHECK-NEXT: %[[N_LOAD3:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
319+
// CHECK-NEXT: %[[N_CONV3:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD3]] : !s32i to si32
320+
// CHECK-NEXT: %[[N_LOAD4:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
321+
// CHECK-NEXT: %[[N_CONV4:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD4]] : !s32i to si32
322+
// CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32, %[[N_CONV3]] : si32 [#acc.device_type<nvidia>]) vector(%[[N_CONV2]] : si32, %[[N_CONV4]] : si32 [#acc.device_type<nvidia>]) {
323+
// CHECK: acc.yield
324+
// CHECK-NEXT: } loc
325+
}
196326
}

mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2216,6 +2216,21 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
22162216
// values should be integral constants, with the '*' represented as a '-1'.
22172217
void setTileForDeviceTypes(MLIRContext *, llvm::ArrayRef<DeviceType>,
22182218
mlir::ValueRange);
2219+
2220+
// Add a value to the 'vector' list with a current list of device_types.
2221+
void addVectorOperand(MLIRContext *, mlir::Value,
2222+
llvm::ArrayRef<DeviceType>);
2223+
// Add an empty value to the 'vector' list with a current list of
2224+
// device_types. This is for the case where there is no expression specified
2225+
// in a 'vector'.
2226+
void addEmptyVector(MLIRContext *, llvm::ArrayRef<DeviceType>);
2227+
// Add a value to the 'worker' list with a current list of device_types.
2228+
void addWorkerNumOperand(MLIRContext *, mlir::Value,
2229+
llvm::ArrayRef<DeviceType>);
2230+
// Add an empty value to the 'worker' list with a current list of
2231+
// device_types. This is for the case where there is no expression specified
2232+
// in a 'worker'.
2233+
void addEmptyWorker(MLIRContext *, llvm::ArrayRef<DeviceType>);
22192234
}];
22202235

22212236
let hasCustomAssemblyFormat = 1;

mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2720,6 +2720,34 @@ void acc::LoopOp::setTileForDeviceTypes(
27202720
setTileOperandsSegments(segments);
27212721
}
27222722

2723+
void acc::LoopOp::addVectorOperand(
2724+
MLIRContext *context, mlir::Value newValue,
2725+
llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
2726+
setVectorOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper(
2727+
context, getVectorOperandsDeviceTypeAttr(), effectiveDeviceTypes,
2728+
newValue, getVectorOperandsMutable()));
2729+
}
2730+
2731+
void acc::LoopOp::addEmptyVector(
2732+
MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
2733+
setVectorAttr(addDeviceTypeAffectedOperandHelper(context, getVectorAttr(),
2734+
effectiveDeviceTypes));
2735+
}
2736+
2737+
void acc::LoopOp::addWorkerNumOperand(
2738+
MLIRContext *context, mlir::Value newValue,
2739+
llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
2740+
setWorkerNumOperandsDeviceTypeAttr(addDeviceTypeAffectedOperandHelper(
2741+
context, getWorkerNumOperandsDeviceTypeAttr(), effectiveDeviceTypes,
2742+
newValue, getWorkerNumOperandsMutable()));
2743+
}
2744+
2745+
void acc::LoopOp::addEmptyWorker(
2746+
MLIRContext *context, llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
2747+
setWorkerAttr(addDeviceTypeAffectedOperandHelper(context, getWorkerAttr(),
2748+
effectiveDeviceTypes));
2749+
}
2750+
27232751
//===----------------------------------------------------------------------===//
27242752
// DataOp
27252753
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)