Skip to content

Commit 8d27be8

Browse files
committed
[OpenCL] Add global_device and global_host address spaces
This patch introduces 2 new address spaces in OpenCL: global_device and global_host which are a subset of a global address space, so the address space scheme will be looking like: ``` generic->global->host ->device ->private ->local constant ``` Justification: USM allocations may be associated with both host and device memory. We want to give users a way to tell the compiler the allocation type of a USM pointer for optimization purposes. (Link to the Unified Shared Memory extension: https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/USM/cl_intel_unified_shared_memory.asciidoc) Before this patch USM pointer could be only in opencl_global address space, hence a device backend can't tell if a particular pointer points to host or device memory. On FPGAs at least we can generate more efficient hardware code if the user tells us where the pointer can point - being able to distinguish between these types of pointers at compile time allows us to instantiate simpler load-store units to perform memory transactions. Patch by Dmitry Sidorov. Reviewed By: Anastasia Differential Revision: https://reviews.llvm.org/D82174
1 parent 2c662f3 commit 8d27be8

22 files changed

+262
-29
lines changed

clang/include/clang/AST/Type.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,11 @@ class Qualifiers {
480480
// Otherwise in OpenCLC v2.0 s6.5.5: every address space except
481481
// for __constant can be used as __generic.
482482
(A == LangAS::opencl_generic && B != LangAS::opencl_constant) ||
483+
// We also define global_device and global_host address spaces,
484+
// to distinguish global pointers allocated on host from pointers
485+
// allocated on device, which are a subset of __global.
486+
(A == LangAS::opencl_global && (B == LangAS::opencl_global_device ||
487+
B == LangAS::opencl_global_host)) ||
483488
// Consider pointer size address spaces to be equivalent to default.
484489
((isPtrSizeAddressSpace(A) || A == LangAS::Default) &&
485490
(isPtrSizeAddressSpace(B) || B == LangAS::Default));

clang/include/clang/Basic/AddressSpaces.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ enum class LangAS : unsigned {
3636
opencl_constant,
3737
opencl_private,
3838
opencl_generic,
39+
opencl_global_device,
40+
opencl_global_host,
3941

4042
// CUDA specific address spaces.
4143
cuda_device,

clang/include/clang/Basic/Attr.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1178,6 +1178,16 @@ def OpenCLGlobalAddressSpace : TypeAttr {
11781178
let Documentation = [OpenCLAddressSpaceGlobalDocs];
11791179
}
11801180

1181+
def OpenCLGlobalDeviceAddressSpace : TypeAttr {
1182+
let Spellings = [Clang<"opencl_global_device">];
1183+
let Documentation = [OpenCLAddressSpaceGlobalExtDocs];
1184+
}
1185+
1186+
def OpenCLGlobalHostAddressSpace : TypeAttr {
1187+
let Spellings = [Clang<"opencl_global_host">];
1188+
let Documentation = [OpenCLAddressSpaceGlobalExtDocs];
1189+
}
1190+
11811191
def OpenCLLocalAddressSpace : TypeAttr {
11821192
let Spellings = [Keyword<"__local">, Keyword<"local">, Clang<"opencl_local">];
11831193
let Documentation = [OpenCLAddressSpaceLocalDocs];

clang/include/clang/Basic/AttrDocs.td

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3123,6 +3123,30 @@ scope) variables and static local variable as well.
31233123
}];
31243124
}
31253125

3126+
def OpenCLAddressSpaceGlobalExtDocs : Documentation {
3127+
let Category = DocOpenCLAddressSpaces;
3128+
let Heading = "[[clang::opencl_global_device]], [[clang::opencl_global_host]]";
3129+
let Content = [{
3130+
The ``global_device`` and ``global_host`` address space attributes specify that
3131+
an object is allocated in global memory on the device/host. It helps to
3132+
distinguish USM (Unified Shared Memory) pointers that access global device
3133+
memory from those that access global host memory. These new address spaces are
3134+
a subset of the ``__global/opencl_global`` address space, the full address space
3135+
set model for OpenCL 2.0 with the extension looks as follows:
3136+
generic->global->host
3137+
->device
3138+
->private
3139+
->local
3140+
constant
3141+
3142+
As ``global_device`` and ``global_host`` are a subset of
3143+
``__global/opencl_global`` address spaces it is allowed to convert
3144+
``global_device`` and ``global_host`` address spaces to
3145+
``__global/opencl_global`` address spaces (following ISO/IEC TR 18037 5.1.3
3146+
"Address space nesting and rules for pointers).
3147+
}];
3148+
}
3149+
31263150
def OpenCLAddressSpaceLocalDocs : Documentation {
31273151
let Category = DocOpenCLAddressSpaces;
31283152
let Heading = "__local, local, [[clang::opencl_local]]";

clang/include/clang/Sema/ParsedAttr.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,10 @@ class ParsedAttr final
606606
return LangAS::opencl_constant;
607607
case ParsedAttr::AT_OpenCLGlobalAddressSpace:
608608
return LangAS::opencl_global;
609+
case ParsedAttr::AT_OpenCLGlobalDeviceAddressSpace:
610+
return LangAS::opencl_global_device;
611+
case ParsedAttr::AT_OpenCLGlobalHostAddressSpace:
612+
return LangAS::opencl_global_host;
609613
case ParsedAttr::AT_OpenCLLocalAddressSpace:
610614
return LangAS::opencl_local;
611615
case ParsedAttr::AT_OpenCLPrivateAddressSpace:

clang/lib/AST/ASTContext.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -919,18 +919,20 @@ static const LangASMap *getAddressSpaceMap(const TargetInfo &T,
919919
// The fake address space map must have a distinct entry for each
920920
// language-specific address space.
921921
static const unsigned FakeAddrSpaceMap[] = {
922-
0, // Default
923-
1, // opencl_global
924-
3, // opencl_local
925-
2, // opencl_constant
926-
0, // opencl_private
927-
4, // opencl_generic
928-
5, // cuda_device
929-
6, // cuda_constant
930-
7, // cuda_shared
931-
8, // ptr32_sptr
932-
9, // ptr32_uptr
933-
10 // ptr64
922+
0, // Default
923+
1, // opencl_global
924+
3, // opencl_local
925+
2, // opencl_constant
926+
0, // opencl_private
927+
4, // opencl_generic
928+
5, // opencl_global_device
929+
6, // opencl_global_host
930+
7, // cuda_device
931+
8, // cuda_constant
932+
9, // cuda_shared
933+
10, // ptr32_sptr
934+
11, // ptr32_uptr
935+
12 // ptr64
934936
};
935937
return &FakeAddrSpaceMap;
936938
} else {

clang/lib/AST/ItaniumMangle.cpp

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2388,16 +2388,39 @@ void CXXNameMangler::mangleQualifiers(Qualifiers Quals, const DependentAddressSp
23882388
switch (AS) {
23892389
default: llvm_unreachable("Not a language specific address space");
23902390
// <OpenCL-addrspace> ::= "CL" [ "global" | "local" | "constant" |
2391-
// "private"| "generic" ]
2392-
case LangAS::opencl_global: ASString = "CLglobal"; break;
2393-
case LangAS::opencl_local: ASString = "CLlocal"; break;
2394-
case LangAS::opencl_constant: ASString = "CLconstant"; break;
2395-
case LangAS::opencl_private: ASString = "CLprivate"; break;
2396-
case LangAS::opencl_generic: ASString = "CLgeneric"; break;
2391+
// "private"| "generic" | "device" |
2392+
// "host" ]
2393+
case LangAS::opencl_global:
2394+
ASString = "CLglobal";
2395+
break;
2396+
case LangAS::opencl_global_device:
2397+
ASString = "CLdevice";
2398+
break;
2399+
case LangAS::opencl_global_host:
2400+
ASString = "CLhost";
2401+
break;
2402+
case LangAS::opencl_local:
2403+
ASString = "CLlocal";
2404+
break;
2405+
case LangAS::opencl_constant:
2406+
ASString = "CLconstant";
2407+
break;
2408+
case LangAS::opencl_private:
2409+
ASString = "CLprivate";
2410+
break;
2411+
case LangAS::opencl_generic:
2412+
ASString = "CLgeneric";
2413+
break;
23972414
// <CUDA-addrspace> ::= "CU" [ "device" | "constant" | "shared" ]
2398-
case LangAS::cuda_device: ASString = "CUdevice"; break;
2399-
case LangAS::cuda_constant: ASString = "CUconstant"; break;
2400-
case LangAS::cuda_shared: ASString = "CUshared"; break;
2415+
case LangAS::cuda_device:
2416+
ASString = "CUdevice";
2417+
break;
2418+
case LangAS::cuda_constant:
2419+
ASString = "CUconstant";
2420+
break;
2421+
case LangAS::cuda_shared:
2422+
ASString = "CUshared";
2423+
break;
24012424
// <ptrsize-addrspace> ::= [ "ptr32_sptr" | "ptr32_uptr" | "ptr64" ]
24022425
case LangAS::ptr32_sptr:
24032426
ASString = "ptr32_sptr";

clang/lib/AST/MicrosoftMangle.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1798,7 +1798,7 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T,
17981798
// where:
17991799
// <language_addr_space> ::= <OpenCL-addrspace> | <CUDA-addrspace>
18001800
// <OpenCL-addrspace> ::= "CL" [ "global" | "local" | "constant" |
1801-
// "private"| "generic" ]
1801+
// "private"| "generic" | "device" | "host" ]
18021802
// <CUDA-addrspace> ::= "CU" [ "device" | "constant" | "shared" ]
18031803
// Note that the above were chosen to match the Itanium mangling for this.
18041804
//
@@ -1823,6 +1823,12 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T,
18231823
case LangAS::opencl_global:
18241824
Extra.mangleSourceName("_ASCLglobal");
18251825
break;
1826+
case LangAS::opencl_global_device:
1827+
Extra.mangleSourceName("_ASCLdevice");
1828+
break;
1829+
case LangAS::opencl_global_host:
1830+
Extra.mangleSourceName("_ASCLhost");
1831+
break;
18261832
case LangAS::opencl_local:
18271833
Extra.mangleSourceName("_ASCLlocal");
18281834
break;

clang/lib/AST/TypePrinter.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1564,6 +1564,8 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
15641564

15651565
case attr::OpenCLPrivateAddressSpace:
15661566
case attr::OpenCLGlobalAddressSpace:
1567+
case attr::OpenCLGlobalDeviceAddressSpace:
1568+
case attr::OpenCLGlobalHostAddressSpace:
15671569
case attr::OpenCLLocalAddressSpace:
15681570
case attr::OpenCLConstantAddressSpace:
15691571
case attr::OpenCLGenericAddressSpace:
@@ -1866,6 +1868,10 @@ std::string Qualifiers::getAddrSpaceAsString(LangAS AS) {
18661868
return "__constant";
18671869
case LangAS::opencl_generic:
18681870
return "__generic";
1871+
case LangAS::opencl_global_device:
1872+
return "__global_device";
1873+
case LangAS::opencl_global_host:
1874+
return "__global_host";
18691875
case LangAS::cuda_device:
18701876
return "__device__";
18711877
case LangAS::cuda_constant:

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
4646
Constant, // opencl_constant
4747
Private, // opencl_private
4848
Generic, // opencl_generic
49+
Global, // opencl_global_device
50+
Global, // opencl_global_host
4951
Global, // cuda_device
5052
Constant, // cuda_constant
5153
Local, // cuda_shared
@@ -61,6 +63,8 @@ const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
6163
Constant, // opencl_constant
6264
Private, // opencl_private
6365
Generic, // opencl_generic
66+
Global, // opencl_global_device
67+
Global, // opencl_global_host
6468
Global, // cuda_device
6569
Constant, // cuda_constant
6670
Local, // cuda_shared

clang/lib/Basic/Targets/NVPTX.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ static const unsigned NVPTXAddrSpaceMap[] = {
3030
0, // opencl_private
3131
// FIXME: generic has to be added to the target
3232
0, // opencl_generic
33+
1, // opencl_global_device
34+
1, // opencl_global_host
3335
1, // cuda_device
3436
4, // cuda_constant
3537
3, // cuda_shared

clang/lib/Basic/Targets/SPIR.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ static const unsigned SPIRAddrSpaceMap[] = {
2828
2, // opencl_constant
2929
0, // opencl_private
3030
4, // opencl_generic
31+
5, // opencl_global_device
32+
6, // opencl_global_host
3133
0, // cuda_device
3234
0, // cuda_constant
3335
0, // cuda_shared

clang/lib/Basic/Targets/TCE.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ static const unsigned TCEOpenCLAddrSpaceMap[] = {
3535
4, // opencl_local
3636
5, // opencl_constant
3737
0, // opencl_private
38+
1, // opencl_global_device
39+
1, // opencl_global_host
3840
// FIXME: generic has to be added to the target
3941
0, // opencl_generic
4042
0, // cuda_device

clang/lib/Basic/Targets/X86.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ static const unsigned X86AddrSpaceMap[] = {
3030
0, // opencl_constant
3131
0, // opencl_private
3232
0, // opencl_generic
33+
0, // opencl_global_device
34+
0, // opencl_global_host
3335
0, // cuda_device
3436
0, // cuda_constant
3537
0, // cuda_shared

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1324,10 +1324,18 @@ static void removeImageAccessQualifier(std::string& TyName) {
13241324
// (basically all single AS CPUs).
13251325
static unsigned ArgInfoAddressSpace(LangAS AS) {
13261326
switch (AS) {
1327-
case LangAS::opencl_global: return 1;
1328-
case LangAS::opencl_constant: return 2;
1329-
case LangAS::opencl_local: return 3;
1330-
case LangAS::opencl_generic: return 4; // Not in SPIR 2.0 specs.
1327+
case LangAS::opencl_global:
1328+
return 1;
1329+
case LangAS::opencl_constant:
1330+
return 2;
1331+
case LangAS::opencl_local:
1332+
return 3;
1333+
case LangAS::opencl_generic:
1334+
return 4; // Not in SPIR 2.0 specs.
1335+
case LangAS::opencl_global_device:
1336+
return 5;
1337+
case LangAS::opencl_global_host:
1338+
return 6;
13311339
default:
13321340
return 0; // Assume private.
13331341
}
@@ -3792,6 +3800,8 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) {
37923800
if (LangOpts.OpenCL) {
37933801
AddrSpace = D ? D->getType().getAddressSpace() : LangAS::opencl_global;
37943802
assert(AddrSpace == LangAS::opencl_global ||
3803+
AddrSpace == LangAS::opencl_global_device ||
3804+
AddrSpace == LangAS::opencl_global_host ||
37953805
AddrSpace == LangAS::opencl_constant ||
37963806
AddrSpace == LangAS::opencl_local ||
37973807
AddrSpace >= LangAS::FirstTargetAddressSpace);

clang/lib/Sema/SemaType.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7968,6 +7968,8 @@ static bool isAddressSpaceKind(const ParsedAttr &attr) {
79687968
return attrKind == ParsedAttr::AT_AddressSpace ||
79697969
attrKind == ParsedAttr::AT_OpenCLPrivateAddressSpace ||
79707970
attrKind == ParsedAttr::AT_OpenCLGlobalAddressSpace ||
7971+
attrKind == ParsedAttr::AT_OpenCLGlobalDeviceAddressSpace ||
7972+
attrKind == ParsedAttr::AT_OpenCLGlobalHostAddressSpace ||
79717973
attrKind == ParsedAttr::AT_OpenCLLocalAddressSpace ||
79727974
attrKind == ParsedAttr::AT_OpenCLConstantAddressSpace ||
79737975
attrKind == ParsedAttr::AT_OpenCLGenericAddressSpace;
@@ -8048,6 +8050,8 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
80488050
break;
80498051
case ParsedAttr::AT_OpenCLPrivateAddressSpace:
80508052
case ParsedAttr::AT_OpenCLGlobalAddressSpace:
8053+
case ParsedAttr::AT_OpenCLGlobalDeviceAddressSpace:
8054+
case ParsedAttr::AT_OpenCLGlobalHostAddressSpace:
80518055
case ParsedAttr::AT_OpenCLLocalAddressSpace:
80528056
case ParsedAttr::AT_OpenCLConstantAddressSpace:
80538057
case ParsedAttr::AT_OpenCLGenericAddressSpace:

clang/test/AST/language_address_space_attribute.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,18 @@ void langas() {
1717
// CHECK: VarDecl {{.*}} z_global '__global int *'
1818
[[clang::opencl_global]] int *z_global;
1919

20+
// CHECK: VarDecl {{.*}} x_global_device '__global_device int *'
21+
__attribute__((opencl_global_device)) int *x_global_device;
22+
23+
// CHECK: VarDecl {{.*}} z_global_device '__global_device int *'
24+
[[clang::opencl_global_device]] int *z_global_device;
25+
26+
// CHECK: VarDecl {{.*}} x_global_host '__global_host int *'
27+
__attribute__((opencl_global_host)) int *x_global_host;
28+
29+
// CHECK: VarDecl {{.*}} z_global_host '__global_host int *'
30+
[[clang::opencl_global_host]] int *z_global_host;
31+
2032
// CHECK: VarDecl {{.*}} x_local '__local int *'
2133
__attribute__((opencl_local)) int *x_local;
2234

clang/test/CodeGenCXX/mangle-address-space.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ void ocl_f0(char __private *p) { }
4343

4444
struct ocl_OpaqueType;
4545
typedef ocl_OpaqueType __global * ocl_OpaqueTypePtr;
46+
typedef ocl_OpaqueType __attribute__((opencl_global_host)) * ocl_OpaqueTypePtrH;
47+
typedef ocl_OpaqueType
48+
__attribute__((opencl_global_device)) *
49+
ocl_OpaqueTypePtrD;
4650

4751
// CHECKOCL-LABEL: define {{.*}}void @_Z6ocl_f0PU8CLglobal14ocl_OpaqueType
4852
// WINOCL-LABEL: define {{.*}}void @"?ocl_f0@@YAXPEAU?$_ASCLglobal@$$CAUocl_OpaqueType@@@__clang@@@Z"
@@ -61,4 +65,12 @@ __constant float *ocl_f1(char __generic const *p) { return 0;}
6165
// CHECKOCL-LABEL: define {{.*}}float* @_Z6ocl_f2PU9CLgenericKc
6266
// WINOCL-LABEL: define {{.*}}float* @"?ocl_f2@@YAPEAU?$_ASCLgeneric@$$CAM@__clang@@QEAU?$_ASCLgeneric@$$CBD@2@@Z"
6367
__generic float *ocl_f2(__generic char const * const p) { return 0;}
68+
69+
// CHECKOCL-LABEL: define {{.*}}void @_Z6ocl_f3PU6CLhost14ocl_OpaqueType
70+
// WINOCL-LABEL: define {{.*}}void @"?ocl_f3@@YAXPEAU?$_ASCLhost@$$CAUocl_OpaqueType@@@__clang@@@Z"
71+
void ocl_f3(ocl_OpaqueTypePtrH) {}
72+
73+
// CHECKOCL-LABEL: define {{.*}}void @_Z6ocl_f4PU8CLdevice14ocl_OpaqueType
74+
// WINOCL-LABEL: define {{.*}}void @"?ocl_f4@@YAXPEAU?$_ASCLdevice@$$CAUocl_OpaqueType@@@__clang@@@Z"
75+
void ocl_f4(ocl_OpaqueTypePtrD) {}
6476
#endif

clang/test/CodeGenOpenCL/address-spaces-conversions.cl

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
// pointers to different address spaces
77

88
// CHECK: define void @test
9-
void test(global int *arg_glob, generic int *arg_gen) {
9+
void test(global int *arg_glob, generic int *arg_gen,
10+
__attribute__((opencl_global_device)) int *arg_device,
11+
__attribute__((opencl_global_host)) int *arg_host) {
1012
int var_priv;
1113
arg_gen = arg_glob; // implicit cast global -> generic
1214
// CHECK: %{{[0-9]+}} = addrspacecast i32 addrspace(1)* %{{[0-9]+}} to i32 addrspace(4)*
@@ -39,6 +41,30 @@ void test(global int *arg_glob, generic int *arg_gen) {
3941
// CHECK-NOT: bitcast
4042
// CHECK-NOFAKE: bitcast
4143
// CHECK-NOFAKE-NOT: addrspacecast
44+
45+
arg_glob = arg_device; // implicit cast
46+
// CHECK: addrspacecast
47+
// CHECK-NOFAKE-NOT: addrspacecast
48+
49+
arg_glob = arg_host; // implicit cast
50+
// CHECK: addrspacecast
51+
// CHECK-NOFAKE-NOT: addrspacecast
52+
53+
arg_glob = (global int *)arg_device; // explicit cast
54+
// CHECK: addrspacecast
55+
// CHECK-NOFAKE-NOT: addrspacecast
56+
57+
arg_glob = (global int *)arg_host; // explicit cast
58+
// CHECK: addrspacecast
59+
// CHECK-NOFAKE-NOT: addrspacecast
60+
61+
arg_device = (__attribute((opencl_global_device)) int *)arg_glob; // explicit cast
62+
// CHECK: addrspacecast
63+
// CHECK-NOFAKE-NOT: addrspacecast
64+
65+
arg_host = (__attribute((opencl_global_host)) int *)arg_glob; // explicit cast
66+
// CHECK: addrspacecast
67+
// CHECK-NOFAKE-NOT: addrspacecast
4268
}
4369

4470
// Test ternary operator.

0 commit comments

Comments
 (0)