Skip to content

Commit 54e3c5e

Browse files
committed
manually mangled __spirv_ocl_frexp, __spirv_ocl_modf, __spirv_ocl_sincos for amd backend
1 parent 34d64cf commit 54e3c5e

File tree

5 files changed

+346
-206
lines changed

5 files changed

+346
-206
lines changed

libclc/amdgcn-amdhsa/libspirv/math/frexp.cl

Lines changed: 79 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -6,81 +6,104 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include "mangle_common.cl"
910
#include <spirv/spirv.h>
1011
#include <utils.h>
1112

1213
double __ocml_frexp_f64(double, int *);
1314
float __ocml_frexp_f32(float, int *);
1415

15-
_CLC_OVERLOAD _CLC_DEF float __clc_spirv_ocl_frexp(float x, private int *ep) {
16-
return __ocml_frexp_f32(x, ep);
17-
}
16+
#define FUNCNAME(IN, OUT) \
17+
__CLC_XCONCAT(__CLC_XCONCAT(_Z17__spirv_ocl_frexp, IN), OUT)
18+
#define VEC_TYPE(T, N) __CLC_XCONCAT(__CLC_XCONCAT(__CLC_XCONCAT(Dv, N), _), T)
19+
#define VEC_FUNCNAME(N, MANGLED_IN_TYPE, MANGLED_PTR, MANGLED_OUT_TYPE) \
20+
FUNCNAME(VEC_TYPE(MANGLED_IN_TYPE, N), \
21+
__CLC_XCONCAT(MANGLED_PTR, VEC_TYPE(MANGLED_OUT_TYPE, N)))
22+
23+
#define MANUALLY_MANGLED_FREXP_IMPL(ADDRSPACE, BUILTIN, ARG1_TYPE, \
24+
MANGLED_ARG1_TYPE, MANGLED_ARG2_TYPE) \
25+
_CLC_DEF ARG1_TYPE FUNCNAME(MANGLED_ARG1_TYPE, MANGLED_ARG2_TYPE)( \
26+
ARG1_TYPE x, __attribute((address_space(ADDRSPACE))) int *ptr) { \
27+
int stack_iptr; \
28+
ARG1_TYPE ret = BUILTIN(x, &stack_iptr); \
29+
*ptr = stack_iptr; \
30+
return ret; \
31+
}
32+
33+
#define __CLC_FREXP(BUILTIN, ARG_TYPE, MANGLED_ARG1_TYPE) \
34+
MANUALLY_MANGLED_FREXP_IMPL(0, BUILTIN, ARG_TYPE, MANGLED_ARG1_TYPE, Pi) \
35+
MANUALLY_MANGLED_FREXP_IMPL(1, BUILTIN, ARG_TYPE, MANGLED_ARG1_TYPE, \
36+
PU3AS1i) \
37+
MANUALLY_MANGLED_FREXP_IMPL(3, BUILTIN, ARG_TYPE, MANGLED_ARG1_TYPE, \
38+
PU3AS3i) \
39+
MANUALLY_MANGLED_FREXP_IMPL(5, BUILTIN, ARG_TYPE, MANGLED_ARG1_TYPE, PU3AS5i)
40+
41+
#define FNAME_GENERIC(N) VEC_FUNCNAME(N, f, P, i)
42+
#define FNAME_GLOBAL(N) VEC_FUNCNAME(N, f, PU3AS1, i)
43+
#define FNAME_LOCAL(N) VEC_FUNCNAME(N, f, PU3AS3, i)
44+
#define FNAME_PRIVATE(N) VEC_FUNCNAME(N, f, PU3AS5, i)
45+
46+
__CLC_FREXP(__ocml_frexp_f32, float, f)
47+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(f, Pi), FNAME_GENERIC, float, 0, int)
48+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(f, PU3AS1i), FNAME_GLOBAL, float, 1,
49+
int)
50+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(f, PU3AS3i), FNAME_LOCAL, float, 3,
51+
int)
52+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(f, PU3AS5i), FNAME_PRIVATE, float, 5,
53+
int)
54+
55+
#undef FNAME_GENERIC
56+
#undef FNAME_GLOBAL
57+
#undef FNAME_LOCAL
58+
#undef FNAME_PRIVATE
1859

1960
#ifdef cl_khr_fp64
2061

2162
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
2263

23-
_CLC_OVERLOAD _CLC_DEF double __clc_spirv_ocl_frexp(double x, private int *ep) {
24-
return __ocml_frexp_f64(x, ep);
25-
}
64+
#define FNAME_GENERIC(N) VEC_FUNCNAME(N, d, P, i)
65+
#define FNAME_GLOBAL(N) VEC_FUNCNAME(N, d, PU3AS1, i)
66+
#define FNAME_LOCAL(N) VEC_FUNCNAME(N, d, PU3AS3, i)
67+
#define FNAME_PRIVATE(N) VEC_FUNCNAME(N, d, PU3AS5, i)
68+
69+
__CLC_FREXP(__ocml_frexp_f64, double, d)
70+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(d, Pi), FNAME_GENERIC, double, 0,
71+
int)
72+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(d, PU3AS1i), FNAME_GLOBAL, double, 1,
73+
int)
74+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(d, PU3AS3i), FNAME_LOCAL, double, 3,
75+
int)
76+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(d, PU3AS5i), FNAME_PRIVATE, double,
77+
5, int)
78+
79+
#undef FNAME_GENERIC
80+
#undef FNAME_GLOBAL
81+
#undef FNAME_LOCAL
82+
#undef FNAME_PRIVATE
2683

2784
#endif
2885

2986
#ifdef cl_khr_fp16
3087

3188
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
3289

33-
_CLC_OVERLOAD _CLC_DEF half __clc_spirv_ocl_frexp(half x, private int *ep) {
34-
float t = x;
35-
return __ocml_frexp_f32(t, ep);
36-
}
90+
#define FNAME_GENERIC(N) VEC_FUNCNAME(N, Dh, P, i)
91+
#define FNAME_GLOBAL(N) VEC_FUNCNAME(N, Dh, PU3AS1, i)
92+
#define FNAME_LOCAL(N) VEC_FUNCNAME(N, Dh, PU3AS3, i)
93+
#define FNAME_PRIVATE(N) VEC_FUNCNAME(N, Dh, PU3AS5, i)
3794

38-
#endif
39-
40-
#define __CLC_ADDRESS_SPACE private
41-
#define __CLC_GENTYPE float
42-
#include <frexp.inc>
43-
#undef __CLC_GENTYPE
44-
#ifdef cl_khr_fp64
45-
#define __CLC_GENTYPE double
46-
#include <frexp.inc>
47-
#undef __CLC_GENTYPE
48-
#endif
49-
#ifdef cl_khr_fp16
50-
#define __CLC_GENTYPE half
51-
#include <frexp.inc>
52-
#undef __CLC_GENTYPE
53-
#endif
54-
#undef __CLC_ADDRESS_SPACE
95+
__CLC_FREXP(__ocml_frexp_f32, half, Dh)
96+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(Dh, Pi), FNAME_GENERIC, half, 0, int)
97+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(Dh, PU3AS1i), FNAME_GLOBAL, half, 1,
98+
int)
99+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(Dh, PU3AS3i), FNAME_LOCAL, half, 3,
100+
int)
101+
MANUALLY_MANGLED_V_V_VP_VECTORIZE(FUNCNAME(Dh, PU3AS5i), FNAME_PRIVATE, half, 5,
102+
int)
55103

56-
#define __CLC_ADDRESS_SPACE global
57-
#define __CLC_GENTYPE float
58-
#include <frexp.inc>
59-
#undef __CLC_GENTYPE
60-
#ifdef cl_khr_fp64
61-
#define __CLC_GENTYPE double
62-
#include <frexp.inc>
63-
#undef __CLC_GENTYPE
64-
#endif
65-
#ifdef cl_khr_fp16
66-
#define __CLC_GENTYPE half
67-
#include <frexp.inc>
68-
#undef __CLC_GENTYPE
69-
#endif
70-
#undef __CLC_ADDRESS_SPACE
104+
#undef FNAME_GENERIC
105+
#undef FNAME_GLOBAL
106+
#undef FNAME_LOCAL
107+
#undef FNAME_PRIVATE
71108

72-
#define __CLC_ADDRESS_SPACE local
73-
#define __CLC_GENTYPE float
74-
#include <frexp.inc>
75-
#undef __CLC_GENTYPE
76-
#ifdef cl_khr_fp64
77-
#define __CLC_GENTYPE double
78-
#include <frexp.inc>
79-
#undef __CLC_GENTYPE
80-
#endif
81-
#ifdef cl_khr_fp16
82-
#define __CLC_GENTYPE half
83-
#include <frexp.inc>
84-
#undef __CLC_GENTYPE
85109
#endif
86-
#undef __CLC_ADDRESS_SPACE

libclc/amdgcn-amdhsa/libspirv/math/frexp.inc

Lines changed: 0 additions & 64 deletions
This file was deleted.
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#ifndef __MANGLE_COMMON
2+
#define __MANGLE_COMMON
3+
4+
#define MANUALLY_MANGLED_V_V_VP_VECTORIZE(SCALAR_NAME, FUNCTION_MACRO, \
5+
ARG1_TYPE, ADDR_SPACE, ARG2_TYPE) \
6+
_CLC_DEF ARG1_TYPE##2 FUNCTION_MACRO(2)( \
7+
ARG1_TYPE##2 x, \
8+
__attribute__((address_space(ADDR_SPACE))) ARG2_TYPE##2 * y) { \
9+
return (ARG1_TYPE##2)( \
10+
SCALAR_NAME( \
11+
x.x, (__attribute__((address_space(ADDR_SPACE))) ARG2_TYPE *)y), \
12+
SCALAR_NAME( \
13+
x.y, \
14+
(__attribute__((address_space(ADDR_SPACE))) ARG2_TYPE *)y + 1)); \
15+
} \
16+
\
17+
_CLC_DEF ARG1_TYPE##3 FUNCTION_MACRO(3)( \
18+
ARG1_TYPE##3 x, \
19+
__attribute__((address_space(ADDR_SPACE))) ARG2_TYPE##3 * y) { \
20+
return (ARG1_TYPE##3)( \
21+
SCALAR_NAME( \
22+
x.x, (__attribute__((address_space(ADDR_SPACE))) ARG2_TYPE *)y), \
23+
SCALAR_NAME( \
24+
x.y, \
25+
(__attribute__((address_space(ADDR_SPACE))) ARG2_TYPE *)y + 1), \
26+
SCALAR_NAME( \
27+
x.z, \
28+
(__attribute__((address_space(ADDR_SPACE))) ARG2_TYPE *)y + 2)); \
29+
} \
30+
\
31+
_CLC_DEF ARG1_TYPE##4 FUNCTION_MACRO(4)( \
32+
ARG1_TYPE##4 x, \
33+
__attribute__((address_space(ADDR_SPACE))) ARG2_TYPE##4 * y) { \
34+
return (ARG1_TYPE##4)( \
35+
FUNCTION_MACRO(2)(x.lo, (__attribute__((address_space(ADDR_SPACE))) \
36+
ARG2_TYPE##2 *)y), \
37+
FUNCTION_MACRO(2)( \
38+
x.hi, (__attribute__((address_space(ADDR_SPACE))) \
39+
ARG2_TYPE##2 *)((__attribute__((address_space(ADDR_SPACE))) \
40+
ARG2_TYPE *)y + \
41+
2))); \
42+
} \
43+
\
44+
_CLC_DEF ARG1_TYPE##8 FUNCTION_MACRO(8)( \
45+
ARG1_TYPE##8 x, \
46+
__attribute__((address_space(ADDR_SPACE))) ARG2_TYPE##8 * y) { \
47+
return (ARG1_TYPE##8)( \
48+
FUNCTION_MACRO(4)(x.lo, (__attribute__((address_space(ADDR_SPACE))) \
49+
ARG2_TYPE##4 *)y), \
50+
FUNCTION_MACRO(4)( \
51+
x.hi, (__attribute__((address_space(ADDR_SPACE))) \
52+
ARG2_TYPE##4 *)((__attribute__((address_space(ADDR_SPACE))) \
53+
ARG2_TYPE *)y + \
54+
4))); \
55+
} \
56+
\
57+
_CLC_DEF ARG1_TYPE##16 FUNCTION_MACRO(16)( \
58+
ARG1_TYPE##16 x, \
59+
__attribute__((address_space(ADDR_SPACE))) ARG2_TYPE##16 * y) { \
60+
return (ARG1_TYPE##16)( \
61+
FUNCTION_MACRO(8)(x.lo, (__attribute__((address_space(ADDR_SPACE))) \
62+
ARG2_TYPE##8 *)y), \
63+
FUNCTION_MACRO(8)( \
64+
x.hi, (__attribute__((address_space(ADDR_SPACE))) \
65+
ARG2_TYPE##8 *)((__attribute__((address_space(ADDR_SPACE))) \
66+
ARG2_TYPE *)y + \
67+
8))); \
68+
}
69+
70+
#endif // !__MANGLE_COMMON

0 commit comments

Comments
 (0)