Skip to content

Commit acc30a1

Browse files
committed
[OpenMP]Emit captured decls for target data if no devices were specified.
If use_device_ptr/use_device_addr clauses are used on target data directive and no device was specified during the compilation, only host part should be emitted. But it still required to emit captured decls for partially mapped data fields. Differential Revision: https://reviews.llvm.org/D144993
1 parent 37216b4 commit acc30a1

File tree

2 files changed

+83
-6
lines changed

2 files changed

+83
-6
lines changed

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7260,16 +7260,13 @@ void CodeGenFunction::EmitOMPTargetDataDirective(
72607260
};
72617261
DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
72627262

7263-
auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
7264-
CodeGenFunction &CGF, PrePostActionTy &Action) {
7263+
auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
72657264
auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
72667265
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
72677266
};
72687267

72697268
// Codegen that selects whether to generate the privatization code or not.
7270-
auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
7271-
&InnermostCodeGen](CodeGenFunction &CGF,
7272-
PrePostActionTy &Action) {
7269+
auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
72737270
RegionCodeGenTy RCG(InnermostCodeGen);
72747271
PrivatizeDevicePointers = false;
72757272

@@ -7289,7 +7286,28 @@ void CodeGenFunction::EmitOMPTargetDataDirective(
72897286
(void)PrivateScope.Privatize();
72907287
RCG(CGF);
72917288
} else {
7292-
OMPLexicalScope Scope(CGF, S, OMPD_unknown);
7289+
// If we don't have target devices, don't bother emitting the data
7290+
// mapping code.
7291+
std::optional<OpenMPDirectiveKind> CaptureRegion;
7292+
if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7293+
// Emit helper decls of the use_device_ptr/use_device_addr clauses.
7294+
for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7295+
for (const Expr *E : C->varlists()) {
7296+
const Decl *D = cast<DeclRefExpr>(E)->getDecl();
7297+
if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
7298+
CGF.EmitVarDecl(*OED);
7299+
}
7300+
for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7301+
for (const Expr *E : C->varlists()) {
7302+
const Decl *D = getBaseDecl(E);
7303+
if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
7304+
CGF.EmitVarDecl(*OED);
7305+
}
7306+
} else {
7307+
CaptureRegion = OMPD_unknown;
7308+
}
7309+
7310+
OMPLexicalScope Scope(CGF, S, CaptureRegion);
72937311
RCG(CGF);
72947312
}
72957313
};
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2+
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -emit-llvm -o - %s | FileCheck %s
3+
// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10 -x c++ -std=c++11 -emit-pch -o %t %s
4+
// RUN: %clang_cc1 -fopenmp -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
5+
6+
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -emit-llvm -o - %s | FileCheck --check-prefix SIMD-ONLY0 %s
7+
// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10 -x c++ -std=c++11 -emit-pch -o %t %s
8+
// RUN: %clang_cc1 -fopenmp-simd -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
9+
// expected-no-diagnostics
10+
11+
#ifndef HEADER
12+
#define HEADER
13+
14+
template <int T> class A {
15+
double *ptr = nullptr;
16+
17+
public:
18+
void foo() {
19+
#pragma omp target data use_device_ptr(ptr)
20+
{ double *capture = ptr; }
21+
}
22+
};
23+
24+
template class A<0>;
25+
#endif // HEADER
26+
// CHECK-LABEL: define {{[^@]+}}@_ZN1AILi0EE3fooEv
27+
// CHECK-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR0:[0-9]+]] align 2 {
28+
// CHECK-NEXT: entry:
29+
// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
30+
// CHECK-NEXT: [[PTR:%.*]] = alloca ptr, align 8
31+
// CHECK-NEXT: [[CAPTURE:%.*]] = alloca ptr, align 8
32+
// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
33+
// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
34+
// CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[CLASS_A:%.*]], ptr [[THIS1]], i32 0, i32 0
35+
// CHECK-NEXT: store ptr [[PTR2]], ptr [[PTR]], align 8
36+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 8
37+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
38+
// CHECK-NEXT: store ptr [[TMP1]], ptr [[CAPTURE]], align 8
39+
// CHECK-NEXT: ret void
40+
//
41+
//
42+
// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1AILi0EE3fooEv
43+
// SIMD-ONLY0-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR0:[0-9]+]] align 2 {
44+
// SIMD-ONLY0-NEXT: entry:
45+
// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
46+
// SIMD-ONLY0-NEXT: [[PTR:%.*]] = alloca ptr, align 8
47+
// SIMD-ONLY0-NEXT: [[TMP:%.*]] = alloca ptr, align 8
48+
// SIMD-ONLY0-NEXT: [[CAPTURE:%.*]] = alloca ptr, align 8
49+
// SIMD-ONLY0-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
50+
// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
51+
// SIMD-ONLY0-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[CLASS_A:%.*]], ptr [[THIS1]], i32 0, i32 0
52+
// SIMD-ONLY0-NEXT: store ptr [[PTR2]], ptr [[PTR]], align 8
53+
// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 8
54+
// SIMD-ONLY0-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8
55+
// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8
56+
// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
57+
// SIMD-ONLY0-NEXT: store ptr [[TMP2]], ptr [[CAPTURE]], align 8
58+
// SIMD-ONLY0-NEXT: ret void
59+
//

0 commit comments

Comments
 (0)