Skip to content

Commit 3d21fa5

Browse files
fooishbartstellar
authored andcommitted
libclc: Make all built-ins overloadable
The SPIR spec states that all OpenCL built-in functions should be overloadable and mangled, to ensure consistency. Add the overload attribute to functions which were missing them: work dimensions, memory barriers and fences, and events. Reviewed By: tstellar, jenatali Differential Revision: https://reviews.llvm.org/D82078
1 parent 3a7051d commit 3d21fa5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+191
-173
lines changed

libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,9 @@
1515
CONST_AS uchar * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr");
1616
#endif
1717

18-
_CLC_DEF size_t get_global_size(uint dim)
19-
{
20-
CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr();
21-
if (dim < 3)
22-
return ptr[3 + dim];
23-
return 1;
18+
_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
19+
CONST_AS uint *ptr = (CONST_AS uint *)__dispatch_ptr();
20+
if (dim < 3)
21+
return ptr[3 + dim];
22+
return 1;
2423
}

libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,15 @@
1515
CONST_AS char * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr");
1616
#endif
1717

18-
_CLC_DEF size_t get_local_size(uint dim)
19-
{
20-
CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr();
21-
switch (dim) {
22-
case 0:
23-
return ptr[1] & 0xffffu;
24-
case 1:
25-
return ptr[1] >> 16;
26-
case 2:
27-
return ptr[2] & 0xffffu;
28-
}
29-
return 1;
18+
_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
19+
CONST_AS uint *ptr = (CONST_AS uint *)__dispatch_ptr();
20+
switch (dim) {
21+
case 0:
22+
return ptr[1] & 0xffffu;
23+
case 1:
24+
return ptr[1] >> 16;
25+
case 2:
26+
return ptr[2] & 0xffffu;
27+
}
28+
return 1;
3029
}

libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22
#include <clc/clc.h>
33

4-
_CLC_DEF size_t get_num_groups(uint dim) {
4+
_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
55
size_t global_size = get_global_size(dim);
66
size_t local_size = get_local_size(dim);
77
size_t num_groups = global_size / local_size;

libclc/amdgcn/lib/mem_fence/fence.cl

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,21 @@ void __clc_amdgcn_s_waitcnt(unsigned flags);
1717
_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned) __asm("llvm.amdgcn.s.waitcnt");
1818
#endif
1919

20-
_CLC_DEF void mem_fence(cl_mem_fence_flags flags)
21-
{
22-
if (flags & CLK_GLOBAL_MEM_FENCE) {
23-
// scalar loads are counted with LGKM but we don't know whether
24-
// the compiler turned any loads to scalar
25-
__waitcnt(0);
26-
} else if (flags & CLK_LOCAL_MEM_FENCE)
27-
__waitcnt(0xff); // LGKM is [12:8]
20+
_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
21+
if (flags & CLK_GLOBAL_MEM_FENCE) {
22+
// scalar loads are counted with LGKM but we don't know whether
23+
// the compiler turned any loads to scalar
24+
__waitcnt(0);
25+
} else if (flags & CLK_LOCAL_MEM_FENCE)
26+
__waitcnt(0xff); // LGKM is [12:8]
2827
}
2928
#undef __waitcnt
3029

3130
// We don't have separate mechanism for read and write fences
32-
_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags)
33-
{
34-
mem_fence(flags);
31+
_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
32+
mem_fence(flags);
3533
}
3634

37-
_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags)
38-
{
39-
mem_fence(flags);
35+
_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
36+
mem_fence(flags);
4037
}
Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#include <clc/clc.h>
22

3-
_CLC_DEF void barrier(cl_mem_fence_flags flags)
4-
{
5-
mem_fence(flags);
6-
__builtin_amdgcn_s_barrier();
3+
_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
4+
mem_fence(flags);
5+
__builtin_amdgcn_s_barrier();
76
}

libclc/amdgcn/lib/workitem/get_global_offset.cl

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,9 @@
88
#define CONST_AS __attribute__((address_space(2)))
99
#endif
1010

11-
_CLC_DEF size_t get_global_offset(uint dim)
12-
{
13-
CONST_AS uint * ptr =
14-
(CONST_AS uint *) __builtin_amdgcn_implicitarg_ptr();
15-
if (dim < 3)
16-
return ptr[dim + 1];
17-
return 0;
11+
_CLC_DEF _CLC_OVERLOAD size_t get_global_offset(uint dim) {
12+
CONST_AS uint *ptr = (CONST_AS uint *)__builtin_amdgcn_implicitarg_ptr();
13+
if (dim < 3)
14+
return ptr[dim + 1];
15+
return 0;
1816
}

libclc/amdgcn/lib/workitem/get_global_size.cl

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@ uint __clc_amdgcn_get_global_size_x(void) __asm("llvm.r600.read.global.size.x");
44
uint __clc_amdgcn_get_global_size_y(void) __asm("llvm.r600.read.global.size.y");
55
uint __clc_amdgcn_get_global_size_z(void) __asm("llvm.r600.read.global.size.z");
66

7-
_CLC_DEF size_t get_global_size(uint dim)
8-
{
9-
switch (dim) {
10-
case 0: return __clc_amdgcn_get_global_size_x();
11-
case 1: return __clc_amdgcn_get_global_size_y();
12-
case 2: return __clc_amdgcn_get_global_size_z();
13-
default: return 1;
14-
}
7+
_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
8+
switch (dim) {
9+
case 0:
10+
return __clc_amdgcn_get_global_size_x();
11+
case 1:
12+
return __clc_amdgcn_get_global_size_y();
13+
case 2:
14+
return __clc_amdgcn_get_global_size_z();
15+
default:
16+
return 1;
17+
}
1518
}
Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
#include <clc/clc.h>
22

3-
_CLC_DEF size_t get_group_id(uint dim)
4-
{
5-
switch(dim) {
6-
case 0: return __builtin_amdgcn_workgroup_id_x();
7-
case 1: return __builtin_amdgcn_workgroup_id_y();
8-
case 2: return __builtin_amdgcn_workgroup_id_z();
9-
default: return 1;
10-
}
3+
_CLC_DEF _CLC_OVERLOAD size_t get_group_id(uint dim) {
4+
switch (dim) {
5+
case 0:
6+
return __builtin_amdgcn_workgroup_id_x();
7+
case 1:
8+
return __builtin_amdgcn_workgroup_id_y();
9+
case 2:
10+
return __builtin_amdgcn_workgroup_id_z();
11+
default:
12+
return 1;
13+
}
1114
}
Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
#include <clc/clc.h>
22

3-
_CLC_DEF size_t get_local_id(uint dim)
4-
{
5-
switch(dim) {
6-
case 0: return __builtin_amdgcn_workitem_id_x();
7-
case 1: return __builtin_amdgcn_workitem_id_y();
8-
case 2: return __builtin_amdgcn_workitem_id_z();
9-
default: return 1;
10-
}
3+
_CLC_DEF _CLC_OVERLOAD size_t get_local_id(uint dim) {
4+
switch (dim) {
5+
case 0:
6+
return __builtin_amdgcn_workitem_id_x();
7+
case 1:
8+
return __builtin_amdgcn_workitem_id_y();
9+
case 2:
10+
return __builtin_amdgcn_workitem_id_z();
11+
default:
12+
return 1;
13+
}
1114
}

libclc/amdgcn/lib/workitem/get_local_size.cl

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@ uint __clc_amdgcn_get_local_size_x(void) __asm("llvm.r600.read.local.size.x");
44
uint __clc_amdgcn_get_local_size_y(void) __asm("llvm.r600.read.local.size.y");
55
uint __clc_amdgcn_get_local_size_z(void) __asm("llvm.r600.read.local.size.z");
66

7-
_CLC_DEF size_t get_local_size(uint dim)
8-
{
9-
switch (dim) {
10-
case 0: return __clc_amdgcn_get_local_size_x();
11-
case 1: return __clc_amdgcn_get_local_size_y();
12-
case 2: return __clc_amdgcn_get_local_size_z();
13-
default: return 1;
14-
}
7+
_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
8+
switch (dim) {
9+
case 0:
10+
return __clc_amdgcn_get_local_size_x();
11+
case 1:
12+
return __clc_amdgcn_get_local_size_y();
13+
case 2:
14+
return __clc_amdgcn_get_local_size_z();
15+
default:
16+
return 1;
17+
}
1518
}

libclc/amdgcn/lib/workitem/get_num_groups.cl

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@ uint __clc_amdgcn_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x");
44
uint __clc_amdgcn_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y");
55
uint __clc_amdgcn_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z");
66

7-
_CLC_DEF size_t get_num_groups(uint dim)
8-
{
9-
switch (dim) {
10-
case 0: return __clc_amdgcn_get_num_groups_x();
11-
case 1: return __clc_amdgcn_get_num_groups_y();
12-
case 2: return __clc_amdgcn_get_num_groups_z();
13-
default: return 1;
14-
}
7+
_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
8+
switch (dim) {
9+
case 0:
10+
return __clc_amdgcn_get_num_groups_x();
11+
case 1:
12+
return __clc_amdgcn_get_num_groups_y();
13+
case 2:
14+
return __clc_amdgcn_get_num_groups_z();
15+
default:
16+
return 1;
17+
}
1518
}

libclc/amdgcn/lib/workitem/get_work_dim.cl

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@
88
#define CONST_AS __attribute__((address_space(2)))
99
#endif
1010

11-
_CLC_DEF uint get_work_dim(void)
12-
{
13-
CONST_AS uint * ptr =
14-
(CONST_AS uint *) __builtin_amdgcn_implicitarg_ptr();
15-
return ptr[0];
11+
_CLC_DEF _CLC_OVERLOAD uint get_work_dim(void) {
12+
CONST_AS uint *ptr = (CONST_AS uint *)__builtin_amdgcn_implicitarg_ptr();
13+
return ptr[0];
1614
}
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
void wait_group_events(int num_events, event_t *event_list);
1+
_CLC_DECL _CLC_OVERLOAD void wait_group_events(int num_events,
2+
event_t *event_list);
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
_CLC_DECL void mem_fence(cl_mem_fence_flags flags);
2-
_CLC_DECL void read_mem_fence(cl_mem_fence_flags flags);
3-
_CLC_DECL void write_mem_fence(cl_mem_fence_flags flags);
1+
_CLC_DECL _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags);
2+
_CLC_DECL _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags);
3+
_CLC_DECL _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags);
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
_CLC_DECL void barrier(cl_mem_fence_flags flags);
1+
_CLC_DECL _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags);
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
_CLC_DECL size_t get_global_id(uint dim);
1+
_CLC_DECL _CLC_OVERLOAD size_t get_global_id(uint dim);
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
_CLC_DECL size_t get_global_offset(uint dim);
1+
_CLC_DECL _CLC_OVERLOAD size_t get_global_offset(uint dim);
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
_CLC_DECL size_t get_global_size(uint dim);
1+
_CLC_DECL _CLC_OVERLOAD size_t get_global_size(uint dim);
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
_CLC_DECL size_t get_group_id(uint dim);
1+
_CLC_DECL _CLC_OVERLOAD size_t get_group_id(uint dim);
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
_CLC_DECL size_t get_local_id(uint dim);
1+
_CLC_DECL _CLC_OVERLOAD size_t get_local_id(uint dim);
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
_CLC_DECL size_t get_local_size(uint dim);
1+
_CLC_DECL _CLC_OVERLOAD size_t get_local_size(uint dim);
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
_CLC_DECL size_t get_num_groups(uint dim);
1+
_CLC_DECL _CLC_OVERLOAD size_t get_num_groups(uint dim);
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
_CLC_DECL uint get_work_dim(void);
1+
_CLC_DECL _CLC_OVERLOAD uint get_work_dim(void);
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include <clc/clc.h>
22

3-
_CLC_DEF void wait_group_events(int num_events, event_t *event_list) {
3+
_CLC_DEF _CLC_OVERLOAD void wait_group_events(int num_events,
4+
event_t *event_list) {
45
barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
56
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#include <clc/clc.h>
22

3-
_CLC_DEF size_t get_global_id(uint dim) {
3+
_CLC_DEF _CLC_OVERLOAD size_t get_global_id(uint dim) {
44
return get_group_id(dim) * get_local_size(dim) + get_local_id(dim) + get_global_offset(dim);
55
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#include <clc/clc.h>
22

3-
_CLC_DEF size_t get_global_size(uint dim) {
3+
_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
44
return get_num_groups(dim)*get_local_size(dim);
55
}
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
#include <clc/clc.h>
22

3-
_CLC_DEF void mem_fence(cl_mem_fence_flags flags) {
4-
if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE))
5-
__nvvm_membar_cta();
3+
_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
4+
if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE))
5+
__nvvm_membar_cta();
66
}
77

88
// We do not have separate mechanism for read and write fences.
9-
_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags) {
9+
_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
1010
mem_fence(flags);
1111
}
1212

13-
_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags) {
13+
_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
1414
mem_fence(flags);
1515
}
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#include <clc/clc.h>
22

3-
_CLC_DEF void barrier(cl_mem_fence_flags flags) {
3+
_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
44
__syncthreads();
55
}
6-
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#include <clc/clc.h>
22

3-
_CLC_DEF size_t get_global_id(uint dim) {
3+
_CLC_DEF _CLC_OVERLOAD size_t get_global_id(uint dim) {
44
return get_group_id(dim) * get_local_size(dim) + get_local_id(dim);
55
}

libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include <clc/clc.h>
22

3-
_CLC_DEF size_t get_group_id(uint dim) {
3+
_CLC_DEF _CLC_OVERLOAD size_t get_group_id(uint dim) {
44
switch (dim) {
55
case 0: return __nvvm_read_ptx_sreg_ctaid_x();
66
case 1: return __nvvm_read_ptx_sreg_ctaid_y();

libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include <clc/clc.h>
22

3-
_CLC_DEF size_t get_local_id(uint dim) {
3+
_CLC_DEF _CLC_OVERLOAD size_t get_local_id(uint dim) {
44
switch (dim) {
55
case 0: return __nvvm_read_ptx_sreg_tid_x();
66
case 1: return __nvvm_read_ptx_sreg_tid_y();

libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include <clc/clc.h>
22

3-
_CLC_DEF size_t get_local_size(uint dim) {
3+
_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
44
switch (dim) {
55
case 0: return __nvvm_read_ptx_sreg_ntid_x();
66
case 1: return __nvvm_read_ptx_sreg_ntid_y();

libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#include <clc/clc.h>
22

3-
_CLC_DEF size_t get_num_groups(uint dim) {
3+
_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
44
switch (dim) {
55
case 0: return __nvvm_read_ptx_sreg_nctaid_x();
66
case 1: return __nvvm_read_ptx_sreg_nctaid_y();

libclc/r600/lib/synchronization/barrier.cl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22

33
_CLC_DEF void __clc_r600_barrier(void) __asm("llvm.r600.group.barrier");
44

5-
_CLC_DEF void barrier(uint flags)
6-
{
5+
_CLC_DEF _CLC_OVERLOAD void barrier(uint flags) {
76
// We should call mem_fence here, but that is not implemented for r600 yet
87
__clc_r600_barrier();
98
}

0 commit comments

Comments
 (0)