Skip to content

Commit 3f96fe6

Browse files
committed
[OPENMP] Support reduction clause on target-based directives.
OpenMP 5.0 added support for `reduction` clause in target-based directives. Patch adds this support to clang. llvm-svn: 320596
1 parent dde9325 commit 3f96fe6

File tree

6 files changed

+533
-56
lines changed

6 files changed

+533
-56
lines changed

clang/include/clang/Basic/OpenMPKinds.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,7 @@ OPENMP_TARGET_CLAUSE(depend)
454454
OPENMP_TARGET_CLAUSE(defaultmap)
455455
OPENMP_TARGET_CLAUSE(firstprivate)
456456
OPENMP_TARGET_CLAUSE(is_device_ptr)
457+
OPENMP_TARGET_CLAUSE(reduction)
457458

458459
// Clauses allowed for OpenMP directive 'target data'.
459460
OPENMP_TARGET_DATA_CLAUSE(if)

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6037,6 +6037,8 @@ class MappableExprsHandler {
60376037

60386038
/// \brief Set of all first private variables in the current directive.
60396039
llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
6040+
/// Set of all reduction variables in the current directive.
6041+
llvm::SmallPtrSet<const VarDecl *, 8> ReductionDecls;
60406042

60416043
/// Map between device pointer declarations and their expression components.
60426044
/// The key value for declarations in 'this' is null.
@@ -6429,6 +6431,12 @@ class MappableExprsHandler {
64296431
if (FirstPrivateDecls.count(Cap.getCapturedVar()))
64306432
return MappableExprsHandler::OMP_MAP_PRIVATE |
64316433
MappableExprsHandler::OMP_MAP_TO;
6434+
// Reduction variable will use only the 'private ptr' and 'map to_from'
6435+
// flag.
6436+
if (ReductionDecls.count(Cap.getCapturedVar())) {
6437+
return MappableExprsHandler::OMP_MAP_TO |
6438+
MappableExprsHandler::OMP_MAP_FROM;
6439+
}
64326440

64336441
// We didn't modify anything.
64346442
return CurrentModifiers;
@@ -6442,6 +6450,12 @@ class MappableExprsHandler {
64426450
for (const auto *D : C->varlists())
64436451
FirstPrivateDecls.insert(
64446452
cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
6453+
for (const auto *C : Dir.getClausesOfKind<OMPReductionClause>()) {
6454+
for (const auto *D : C->varlists()) {
6455+
ReductionDecls.insert(
6456+
cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
6457+
}
6458+
}
64456459
// Extract device pointer clause information.
64466460
for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
64476461
for (auto L : C->component_lists())
@@ -6721,15 +6735,9 @@ class MappableExprsHandler {
67216735
// The default map type for a scalar/complex type is 'to' because by
67226736
// default the value doesn't have to be retrieved. For an aggregate
67236737
// type, the default is 'tofrom'.
6724-
CurMapTypes.push_back(ElementType->isAggregateType()
6725-
? (OMP_MAP_TO | OMP_MAP_FROM)
6726-
: OMP_MAP_TO);
6727-
6728-
// If we have a capture by reference we may need to add the private
6729-
// pointer flag if the base declaration shows in some first-private
6730-
// clause.
6731-
CurMapTypes.back() =
6732-
adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
6738+
CurMapTypes.emplace_back(adjustMapModifiersForPrivateClauses(
6739+
CI, ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
6740+
: OMP_MAP_TO));
67336741
}
67346742
// Every default map produces a single argument which is a target parameter.
67356743
CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;

clang/lib/Sema/SemaOpenMP.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1279,9 +1279,13 @@ bool Sema::IsOpenMPCapturedByRef(ValueDecl *D, unsigned Level) {
12791279
// reference except if it is a pointer that is dereferenced somehow.
12801280
IsByRef = !(Ty->isPointerType() && IsVariableAssociatedWithSection);
12811281
} else {
1282-
// By default, all the data that has a scalar type is mapped by copy.
1283-
IsByRef = !Ty->isScalarType() ||
1284-
DSAStack->getDefaultDMAAtLevel(Level) == DMA_tofrom_scalar;
1282+
// By default, all the data that has a scalar type is mapped by copy
1283+
// (except for reduction variables).
1284+
IsByRef =
1285+
!Ty->isScalarType() ||
1286+
DSAStack->getDefaultDMAAtLevel(Level) == DMA_tofrom_scalar ||
1287+
DSAStack->hasExplicitDSA(
1288+
D, [](OpenMPClauseKind K) { return K == OMPC_reduction; }, Level);
12851289
}
12861290
}
12871291

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
// Only test codegen on target side, as private clause does not require any action on the host side
2+
// Test target codegen - host bc file has to be created first.
3+
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
4+
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
5+
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
6+
// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
7+
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
8+
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
9+
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
10+
// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
11+
12+
// expected-no-diagnostics
13+
#ifndef HEADER
14+
#define HEADER
15+
16+
template<typename tx, typename ty>
17+
struct TT{
18+
tx X;
19+
ty Y;
20+
TT<tx, ty> operator*(const TT<tx, ty> &) { return *this; }
21+
};
22+
23+
// TCHECK: [[S1:%.+]] = type { double }
24+
25+
int foo(int n) {
26+
int a = 0;
27+
short aa = 0;
28+
float b[10];
29+
float bn[n];
30+
double c[5][10];
31+
double cn[5][n];
32+
TT<long long, char> d;
33+
34+
#pragma omp target reduction(*:a)
35+
{
36+
}
37+
38+
// TCHECK: define void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}})
39+
// TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*,
40+
// TCHECK: store {{.+}}, {{.+}} [[A]],
41+
// TCHECK: load i32*, i32** [[A]],
42+
// TCHECK: ret void
43+
44+
#pragma omp target reduction(+:a)
45+
{
46+
a = 1;
47+
}
48+
49+
// TCHECK: define void @__omp_offloading_{{.+}}(i32*{{.+}} %{{.+}})
50+
// TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*,
51+
// TCHECK: store {{.+}}, {{.+}} [[A]],
52+
// TCHECK: [[REF:%.+]] = load i32*, i32** [[A]],
53+
// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[REF]],
54+
// TCHECK: ret void
55+
56+
#pragma omp target reduction(-:a, aa)
57+
{
58+
a = 1;
59+
aa = 1;
60+
}
61+
62+
// TCHECK: define void @__omp_offloading_{{.+}}(i32*{{.+}} [[A:%.+]], i16*{{.+}} [[AA:%.+]])
63+
// TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*,
64+
// TCHECK: [[AA:%.+]] = alloca i{{[0-9]+}}*,
65+
// TCHECK: store {{.+}}, {{.+}} [[A]],
66+
// TCHECK: store {{.+}}, {{.+}} [[AA]],
67+
// TCHECK: [[A_REF:%.+]] = load i32*, i32** [[A]],
68+
// TCHECK: [[AA_REF:%.+]] = load i16*, i16** [[AA]],
69+
// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]],
70+
// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]],
71+
// TCHECK: ret void
72+
73+
return a;
74+
}
75+
76+
77+
template<typename tx>
78+
tx ftemplate(int n) {
79+
tx a = 0;
80+
short aa = 0;
81+
tx b[10];
82+
83+
#pragma omp target reduction(+:a,aa,b)
84+
{
85+
a = 1;
86+
aa = 1;
87+
b[2] = 1;
88+
}
89+
90+
return a;
91+
}
92+
93+
static
94+
int fstatic(int n) {
95+
int a = 0;
96+
short aa = 0;
97+
char aaa = 0;
98+
int b[10];
99+
100+
#pragma omp target reduction(-:a,aa,aaa,b)
101+
{
102+
a = 1;
103+
aa = 1;
104+
aaa = 1;
105+
b[2] = 1;
106+
}
107+
108+
return a;
109+
}
110+
111+
// TCHECK: define void @__omp_offloading_{{.+}}(i32*{{.+}}, i16*{{.+}}, i8*{{.+}}, [10 x i32]*{{.+}})
112+
// TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*,
113+
// TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}*,
114+
// TCHECK: [[A3:%.+]] = alloca i{{[0-9]+}}*,
115+
// TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}]*,
116+
// TCHECK: store {{.+}}, {{.+}} [[A]],
117+
// TCHECK: store {{.+}}, {{.+}} [[A2]],
118+
// TCHECK: store {{.+}}, {{.+}} [[A3]],
119+
// TCHECK: store {{.+}}, {{.+}} [[B]],
120+
// TCHECK: [[A_REF:%.+]] = load i32*, i32** [[A]],
121+
// TCHECK: [[AA_REF:%.+]] = load i16*, i16** [[AA]],
122+
// TCHECK: [[A3_REF:%.+]] = load i8*, i8** [[A3]],
123+
// TCHECK: [[B_REF:%.+]] = load {{.+}}*, {{.+}}** [[B]],
124+
// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]],
125+
// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]],
126+
// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A3_REF]],
127+
// TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
128+
// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[B_GEP]],
129+
// TCHECK: ret void
130+
131+
struct S1 {
132+
double a;
133+
134+
int r1(int n){
135+
int b = n+1;
136+
short int c[2][n];
137+
138+
#pragma omp target reduction(max:b,c)
139+
{
140+
this->a = (double)b + 1.5;
141+
c[1][1] = ++a;
142+
}
143+
144+
return c[1][1] + (int)b;
145+
}
146+
147+
// TCHECK: define void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i32*{{.+}}, i{{[0-9]+}} [[VLA:%.+]], i{{[0-9]+}} [[VLA1:%.+]], i16*{{.+}})
148+
// TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*,
149+
// TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}*,
150+
// TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}},
151+
// TCHECK: [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}},
152+
// TCHECK: [[C_ADDR:%.+]] = alloca i{{[0-9]+}}*,
153+
// TCHECK: store [[S1]]* [[TH]], [[S1]]** [[TH_ADDR]],
154+
// TCHECK: store i{{[0-9]+}}* {{.+}}, i{{[0-9]+}}** [[B_ADDR]],
155+
// TCHECK: store i{{[0-9]+}} [[VLA]], i{{[0-9]+}}* [[VLA_ADDR]],
156+
// TCHECK: store i{{[0-9]+}} [[VLA1]], i{{[0-9]+}}* [[VLA_ADDR2]],
157+
// TCHECK: store i{{[0-9]+}}* {{.+}}, i{{[0-9]+}}** [[C_ADDR]],
158+
// TCHECK: [[TH_ADDR_REF:%.+]] = load [[S1]]*, [[S1]]** [[TH_ADDR]],
159+
// TCHECK: [[B_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[B_ADDR]],
160+
// TCHECK: [[VLA_ADDR_REF:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR]],
161+
// TCHECK: [[VLA_ADDR_REF2:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR2]],
162+
// TCHECK: [[C_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[C_ADDR]],
163+
164+
// this->a = (double)b + 1.5;
165+
// TCHECK: [[B_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[B_REF]],
166+
// TCHECK: [[B_CONV:%.+]] = sitofp i{{[0-9]+}} [[B_VAL]] to double
167+
// TCHECK: [[NEW_A_VAL:%.+]] = fadd double [[B_CONV]], 1.5{{.+}}+00
168+
// TCHECK: [[A_FIELD:%.+]] = getelementptr inbounds [[S1]], [[S1]]* [[TH_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
169+
// TCHECK: store double [[NEW_A_VAL]], double* [[A_FIELD]],
170+
171+
// c[1][1] = ++a;
172+
// TCHECK: [[A_FIELD4:%.+]] = getelementptr inbounds [[S1]], [[S1]]* [[TH_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
173+
// TCHECK: [[A_FIELD4_VAL:%.+]] = load double, double* [[A_FIELD4]],
174+
// TCHECK: [[A_FIELD_INC:%.+]] = fadd double [[A_FIELD4_VAL]], 1.0{{.+}}+00
175+
// TCHECK: store double [[A_FIELD_INC]], double* [[A_FIELD4]],
176+
// TCHECK: [[A_FIELD_INC_CONV:%.+]] = fptosi double [[A_FIELD_INC]] to i{{[0-9]+}}
177+
// TCHECK: [[C_IND:%.+]] = mul{{.+}} i{{[0-9]+}} 1, [[VLA_ADDR_REF2]]
178+
// TCHECK: [[C_1_REF:%.+]] = getelementptr inbounds i{{[0-9]+}}, i{{[0-9]+}}* [[C_REF]], i{{[0-9]+}} [[C_IND]]
179+
// TCHECK: [[C_1_1_REF:%.+]] = getelementptr inbounds i{{[0-9]+}}, i{{[0-9]+}}* [[C_1_REF]], i{{[0-9]+}} 1
180+
// TCHECK: store i{{[0-9]+}} [[A_FIELD_INC_CONV]], i{{[0-9]+}}* [[C_1_1_REF]],
181+
182+
// finish
183+
// TCHECK: ret void
184+
};
185+
186+
187+
int bar(int n){
188+
int a = 0;
189+
a += foo(n);
190+
S1 S;
191+
a += S.r1(n);
192+
a += fstatic(n);
193+
a += ftemplate<int>(n);
194+
195+
return a;
196+
}
197+
198+
// template
199+
// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, [10 x i32]*{{.+}})
200+
// TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*,
201+
// TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}*,
202+
// TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}]*,
203+
// TCHECK: store {{.+}}, {{.+}} [[A]],
204+
// TCHECK: store {{.+}}, {{.+}} [[A2]],
205+
// TCHECK: store {{.+}}, {{.+}} [[B]],
206+
// TCHECK: [[A_REF:%.+]] = load i32*, i32** [[A]],
207+
// TCHECK: [[AA_REF:%.+]] = load i16*, i16** [[AA]],
208+
// TCHECK: [[B_REF:%.+]] = load {{.+}}*, {{.+}}** [[B]],
209+
// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]],
210+
// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]],
211+
// TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
212+
// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[B_GEP]],
213+
// TCHECK: ret void
214+
215+
#endif

0 commit comments

Comments
 (0)