-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[OpenACC][CIR] Implement member exprs for 'copy' lowering #142998
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
These ended up not being too much of a change, it just requires that we properly emit a member expression,then use it in the varPtr. I also fixed up the 'name' field to be the expression print, as that was necessary to get this correct. Finally, I added a TON of tests to convince myself that I've got this correct, and hopefully the IR shows that.
@llvm/pr-subscribers-clangir Author: Erich Keane (erichkeane) ChangesThese ended up not being too much of a change, it just requires that we properly emit a member expression,then use it in the varPtr. I also fixed up the 'name' field to be the expression print, as that was necessary to get this correct. Finally, I added a TON of tests to convince myself that I've got this correct, and hopefully the IR shows that. Patch is 197.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142998.diff 5 Files Affected:
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
index 8b61d3fae3ad0..939ff4180dacb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
@@ -14,6 +14,8 @@
#include "CIRGenFunction.h"
+#include "clang/AST/ExprCXX.h"
+
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
#include "llvm/ADT/TypeSwitch.h"
@@ -188,7 +190,7 @@ class OpenACCClauseCIREmitter final
struct DataOperandInfo {
mlir::Location beginLoc;
mlir::Value varValue;
- llvm::StringRef name;
+ std::string name;
llvm::SmallVector<mlir::Value> bounds;
};
@@ -226,6 +228,10 @@ class OpenACCClauseCIREmitter final
mlir::Location exprLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc());
llvm::SmallVector<mlir::Value> bounds;
+ std::string exprString;
+ llvm::raw_string_ostream OS(exprString);
+ e->printPretty(OS, nullptr, cgf.getContext().getPrintingPolicy());
+
// Assemble the list of bounds.
while (isa<ArraySectionExpr, ArraySubscriptExpr>(curVarExpr)) {
mlir::Location boundLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc());
@@ -267,20 +273,16 @@ class OpenACCClauseCIREmitter final
bounds.push_back(createBound(boundLoc, lowerBound, upperBound, extent));
}
- // TODO: OpenACC: if this is a member expr, emit the VarPtrPtr correctly.
- if (isa<MemberExpr>(curVarExpr)) {
- cgf.cgm.errorNYI(curVarExpr->getSourceRange(),
- "OpenACC Data clause member expr");
- return {exprLoc, {}, {}, std::move(bounds)};
- }
+ if (const auto *memExpr = dyn_cast<MemberExpr>(curVarExpr))
+ return {exprLoc, cgf.emitMemberExpr(memExpr).getPointer(), exprString,
+ std::move(bounds)};
// Sema has made sure that only 4 types of things can get here, array
// subscript, array section, member expr, or DRE to a var decl (or the
// former 3 wrapping a var-decl), so we should be able to assume this is
// right.
const auto *dre = cast<DeclRefExpr>(curVarExpr);
- const auto *vd = cast<VarDecl>(dre->getFoundDecl()->getCanonicalDecl());
- return {exprLoc, cgf.emitDeclRefLValue(dre).getPointer(), vd->getName(),
+ return {exprLoc, cgf.emitDeclRefLValue(dre).getPointer(), exprString,
std::move(bounds)};
}
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-copy.c b/clang/test/CIR/CodeGenOpenACC/combined-copy.c
index 50c0519f0f29d..28a7f78377da8 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-copy.c
+++ b/clang/test/CIR/CodeGenOpenACC/combined-copy.c
@@ -272,14 +272,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray[3]"} loc
// CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(parallel) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray[3]"} loc
#pragma acc serial loop copy(localArray[1:3])
for(int i = 0; i < 5; ++i);
@@ -290,14 +290,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray[1:3]"} loc
// CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(serial) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray[1:3]"} loc
#pragma acc kernels loop copy(localArray[:3])
for(int i = 0; i < 5; ++i);
@@ -307,14 +307,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray[:3]"} loc
// CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(kernels) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray[:3]"} loc
#pragma acc parallel loop copy(localArray[1:])
for(int i = 0; i < 5; ++i);
@@ -324,14 +324,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray[1:]"} loc
// CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(parallel) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray[1:]"} loc
#pragma acc serial loop copy(localArray[localVar1:localVar2])
for(int i = 0; i < 5; ++i);
@@ -342,14 +342,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray[localVar1:localVar2]"} loc
// CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(serial) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray[localVar1:localVar2]"} loc
#pragma acc kernels loop copy(localArray[:localVar2])
for(int i = 0; i < 5; ++i);
@@ -359,14 +359,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray[:localVar2]"} loc
// CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(kernels) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray[:localVar2]"} loc
#pragma acc parallel loop copy(localArray[localVar1:])
for(int i = 0; i < 5; ++i);
@@ -376,14 +376,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray[localVar1:]"} loc
// CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(parallel) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray[localVar1:]"} loc
#pragma acc serial loop copy(localPointer[3])
for(int i = 0; i < 5; ++i);
@@ -393,14 +393,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer[3]"} loc
// CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
// CHECK-NEXT: acc.loop combined(serial) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer[3]"} loc
#pragma acc kernels loop copy(localPointer[1:3])
for(int i = 0; i < 5; ++i);
@@ -411,14 +411,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer[1:3]"} loc
// CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
// CHECK-NEXT: acc.loop combined(kernels) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer[1:3]"} loc
#pragma acc parallel loop copy(localPointer[:3])
for(int i = 0; i < 5; ++i);
@@ -428,14 +428,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer[:3]"} loc
// CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
// CHECK-NEXT: acc.loop combined(parallel) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer[:3]"} loc
#pragma acc serial loop copy(localPointer[localVar1:localVar2])
for(int i = 0; i < 5; ++i);
@@ -446,14 +446,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer[localVar1:localVar2]"} loc
// CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
// CHECK-NEXT: acc.loop combined(serial) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {...
[truncated]
|
@llvm/pr-subscribers-clang Author: Erich Keane (erichkeane) ChangesThese ended up not being too much of a change, it just requires that we properly emit a member expression,then use it in the varPtr. I also fixed up the 'name' field to be the expression print, as that was necessary to get this correct. Finally, I added a TON of tests to convince myself that I've got this correct, and hopefully the IR shows that. Patch is 197.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142998.diff 5 Files Affected:
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
index 8b61d3fae3ad0..939ff4180dacb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
@@ -14,6 +14,8 @@
#include "CIRGenFunction.h"
+#include "clang/AST/ExprCXX.h"
+
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
#include "llvm/ADT/TypeSwitch.h"
@@ -188,7 +190,7 @@ class OpenACCClauseCIREmitter final
struct DataOperandInfo {
mlir::Location beginLoc;
mlir::Value varValue;
- llvm::StringRef name;
+ std::string name;
llvm::SmallVector<mlir::Value> bounds;
};
@@ -226,6 +228,10 @@ class OpenACCClauseCIREmitter final
mlir::Location exprLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc());
llvm::SmallVector<mlir::Value> bounds;
+ std::string exprString;
+ llvm::raw_string_ostream OS(exprString);
+ e->printPretty(OS, nullptr, cgf.getContext().getPrintingPolicy());
+
// Assemble the list of bounds.
while (isa<ArraySectionExpr, ArraySubscriptExpr>(curVarExpr)) {
mlir::Location boundLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc());
@@ -267,20 +273,16 @@ class OpenACCClauseCIREmitter final
bounds.push_back(createBound(boundLoc, lowerBound, upperBound, extent));
}
- // TODO: OpenACC: if this is a member expr, emit the VarPtrPtr correctly.
- if (isa<MemberExpr>(curVarExpr)) {
- cgf.cgm.errorNYI(curVarExpr->getSourceRange(),
- "OpenACC Data clause member expr");
- return {exprLoc, {}, {}, std::move(bounds)};
- }
+ if (const auto *memExpr = dyn_cast<MemberExpr>(curVarExpr))
+ return {exprLoc, cgf.emitMemberExpr(memExpr).getPointer(), exprString,
+ std::move(bounds)};
// Sema has made sure that only 4 types of things can get here, array
// subscript, array section, member expr, or DRE to a var decl (or the
// former 3 wrapping a var-decl), so we should be able to assume this is
// right.
const auto *dre = cast<DeclRefExpr>(curVarExpr);
- const auto *vd = cast<VarDecl>(dre->getFoundDecl()->getCanonicalDecl());
- return {exprLoc, cgf.emitDeclRefLValue(dre).getPointer(), vd->getName(),
+ return {exprLoc, cgf.emitDeclRefLValue(dre).getPointer(), exprString,
std::move(bounds)};
}
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-copy.c b/clang/test/CIR/CodeGenOpenACC/combined-copy.c
index 50c0519f0f29d..28a7f78377da8 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-copy.c
+++ b/clang/test/CIR/CodeGenOpenACC/combined-copy.c
@@ -272,14 +272,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray[3]"} loc
// CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(parallel) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray[3]"} loc
#pragma acc serial loop copy(localArray[1:3])
for(int i = 0; i < 5; ++i);
@@ -290,14 +290,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray[1:3]"} loc
// CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(serial) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray[1:3]"} loc
#pragma acc kernels loop copy(localArray[:3])
for(int i = 0; i < 5; ++i);
@@ -307,14 +307,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray[:3]"} loc
// CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(kernels) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray[:3]"} loc
#pragma acc parallel loop copy(localArray[1:])
for(int i = 0; i < 5; ++i);
@@ -324,14 +324,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray[1:]"} loc
// CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(parallel) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray[1:]"} loc
#pragma acc serial loop copy(localArray[localVar1:localVar2])
for(int i = 0; i < 5; ++i);
@@ -342,14 +342,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray[localVar1:localVar2]"} loc
// CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(serial) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray[localVar1:localVar2]"} loc
#pragma acc kernels loop copy(localArray[:localVar2])
for(int i = 0; i < 5; ++i);
@@ -359,14 +359,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray[:localVar2]"} loc
// CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(kernels) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray[:localVar2]"} loc
#pragma acc parallel loop copy(localArray[localVar1:])
for(int i = 0; i < 5; ++i);
@@ -376,14 +376,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) upperbound(%[[FOUR_CONST]] : i64) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localArray[localVar1:]"} loc
// CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) {
// CHECK-NEXT: acc.loop combined(parallel) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALARRAY]] : !cir.ptr<!cir.array<!cir.float x 5>>) {dataClause = #acc<data_clause acc_copy>, name = "localArray[localVar1:]"} loc
#pragma acc serial loop copy(localPointer[3])
for(int i = 0; i < 5; ++i);
@@ -393,14 +393,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer[3]"} loc
// CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
// CHECK-NEXT: acc.loop combined(serial) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer[3]"} loc
#pragma acc kernels loop copy(localPointer[1:3])
for(int i = 0; i < 5; ++i);
@@ -411,14 +411,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer[1:3]"} loc
// CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
// CHECK-NEXT: acc.loop combined(kernels) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.terminator
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer[1:3]"} loc
#pragma acc parallel loop copy(localPointer[:3])
for(int i = 0; i < 5; ++i);
@@ -428,14 +428,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer[:3]"} loc
// CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
// CHECK-NEXT: acc.loop combined(parallel) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer[:3]"} loc
#pragma acc serial loop copy(localPointer[localVar1:localVar2])
for(int i = 0; i < 5; ++i);
@@ -446,14 +446,14 @@ void acc_compute(int parmVar) {
// CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64
// CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc
- // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s16i>> {dataClause = #acc<data_clause acc_copy>, name = "localPointer[localVar1:localVar2]"} loc
// CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) {
// CHECK-NEXT: acc.loop combined(serial) {
// CHECK: acc.yield
// CHECK-NEXT: }
// CHECK-NEXT: acc.yield
// CHECK-NEXT: } loc
- // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {dataClause = #acc<data_clause acc_copy>, name = "localPointer"} loc
+ // CHECK-NEXT: acc.copyout accPtr(%[[COPYIN1]] : !cir.ptr<!cir.ptr<!s16i>>) bounds(%[[BOUNDS]]) to varPtr(%[[LOCALPTR]] : !cir.ptr<!cir.ptr<!s16i>>) {...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@@ -226,6 +228,10 @@ class OpenACCClauseCIREmitter final | |||
mlir::Location exprLoc = cgf.cgm.getLoc(curVarExpr->getBeginLoc()); | |||
llvm::SmallVector<mlir::Value> bounds; | |||
|
|||
std::string exprString; | |||
llvm::raw_string_ostream OS(exprString); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe?
llvm::raw_string_ostream OS(exprString); | |
llvm::raw_string_ostream os(exprString); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice job with the testing! I had a moment of panic seeing the size of this change, only to find that it's mostly test cases. I have a couple of questions, but it looks good.
Struct localStruct; | ||
// CHECK-NEXT: %[[LOCALSTRUCT:.*]] = cir.alloca !rec_StructTy, !cir.ptr<!rec_StructTy>, ["localStruct"] | ||
|
||
#pragma acc parallel loop copy(localStruct) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is a case like this possible?
typedef InnerTy {
int a;
int b;
} Inner;
typedef OuterTy {
Inner inner[4];
} Outer;
void copy_member_of_array_element_member() {
Outer outer;
#pragma acc parallel loop copy(out.inner[2].b)
...
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It does! Though it actually uses 'cir.ptr_stride' here, which I think is appropriate. I'll add that test.
// CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 | ||
// CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) | ||
// CHECK-NEXT: %[[GETARRAYMEMBER:.*]] = cir.get_member %[[LOCALSTRUCT]][1] {name = "arrayMember"} : !cir.ptr<!rec_StructTy> -> !cir.ptr<!cir.array<!s32i x 5>> | ||
// CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[GETARRAYMEMBER]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {dataClause = #acc<data_clause acc_copy>, name = "localStruct.arrayMember[2]"} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When I implemented the PointerLikeType
interface, I had a handler for a pointer that was the result of a cir.ptr_stride
operation, thinking we'd need that for array elements, but I see here that's being handled differently. Is there any way that a cir.ptr_stride
result code get to an acc.copy*
operation?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
None that I can think of... The OpenACC dialect defines these 'bounds', and the 'stride' of arrays/etc in C++ are always 1, based on type.
Honestly you're not wrong :) I wrote all of these tests in advance expecting this to be 6-7 different patches. Then while implementing the first bit, I found myself realizing that I'd already inadvertently already implemented the others so just completed the tests. Much of the testing is a little redundant, and likely is overkill, but it seems sensible to keep them around even so.
|
// CHECK-NEXT: %[[INNERDECAY:.*]] = cir.cast(array_to_ptrdecay, %[[GETINNER]] : !cir.ptr<!cir.array<!rec_InnerTy x 4>>), !cir.ptr<!rec_InnerTy> | ||
// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[INNERDECAY]] : !cir.ptr<!rec_InnerTy>, %[[TWO]] : !s32i), !cir.ptr<!rec_InnerTy> | ||
// CHECK-NEXT: %[[GETB:.*]] = cir.get_member %[[STRIDE]][1] {name = "b"} : !cir.ptr<!rec_InnerTy> -> !cir.ptr<!s32i> | ||
// CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[GETB]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copy>, name = "outer.inner[2].b"} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks good. It's finding the member in the way I expected, and the PointerLikeType handling will work with this.
These ended up not being too much of a change, it just requires that we properly emit a member expression,then use it in the varPtr. I also fixed up the 'name' field to be the expression print, as that was necessary to get this correct. Finally, I added a TON of tests to convince myself that I've got this correct, and hopefully the IR shows that.
The patch llvm#142998 crossed in the air with llvm#142862. This resulted in 2 of the tests from the former to not have the inlined function emitted. This patch adds an additional function to force these to be emitted.
These ended up not being too much of a change, it just requires that we properly emit a member expression,then use it in the varPtr. I also fixed up the 'name' field to be the expression print, as that was necessary to get this correct. Finally, I added a TON of tests to convince myself that I've got this correct, and hopefully the IR shows that.
The patch llvm#142998 crossed in the air with llvm#142862. This resulted in 2 of the tests from the former to not have the inlined function emitted. This patch adds an additional function to force these to be emitted.
These ended up not being too much of a change, it just requires that we properly emit a member expression,then use it in the varPtr. I also fixed up the 'name' field to be the expression print, as that was necessary to get this correct. Finally, I added a TON of tests to convince myself that I've got this correct, and hopefully the IR shows that.
The patch llvm#142998 crossed in the air with llvm#142862. This resulted in 2 of the tests from the former to not have the inlined function emitted. This patch adds an additional function to force these to be emitted.
These ended up not being too much of a change, it just requires that we properly emit a member expression,then use it in the varPtr. I also fixed up the 'name' field to be the expression print, as that was necessary to get this correct.
Finally, I added a TON of tests to convince myself that I've got this correct, and hopefully the IR shows that.