2
2
#include <clc/integer/definitions.h>
3
3
#include <clc/internal/clc.h>
4
4
5
+ // TODO: Replace with __clc_convert_<type> when available
6
+ #define __CLC_CONVERT_TY (X , TY ) __builtin_convertvector(X, TY)
7
+
8
+ #define __CLC_MUL_HI_VEC_IMPL (BGENTYPE , GENTYPE , GENSIZE ) \
9
+ _CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) { \
10
+ BGENTYPE large_x = __CLC_CONVERT_TY(x, BGENTYPE); \
11
+ BGENTYPE large_y = __CLC_CONVERT_TY(y, BGENTYPE); \
12
+ BGENTYPE large_mul_hi = (large_x * large_y) >> (BGENTYPE)GENSIZE; \
13
+ return __CLC_CONVERT_TY(large_mul_hi, GENTYPE); \
14
+ }
15
+
5
16
// For all types EXCEPT long, which is implemented separately
6
17
#define __CLC_MUL_HI_IMPL (BGENTYPE , GENTYPE , GENSIZE ) \
7
18
_CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) { \
14
25
// (a+b) * (c+d) where a and c are the high-order parts of x and y respectively
15
26
// and b and d are the low-order parts of x and y.
16
27
// Thinking back to algebra, we use FOIL to do the work.
17
-
18
28
_CLC_OVERLOAD _CLC_DEF long __clc_mul_hi (long x , long y ) {
19
29
long f , o , i ;
20
30
ulong l ;
@@ -92,7 +102,11 @@ _CLC_OVERLOAD _CLC_DEF ulong __clc_mul_hi(ulong x, ulong y) {
92
102
93
103
#define __CLC_MUL_HI_DEC_IMPL (BTYPE , TYPE , BITS ) \
94
104
__CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS) \
95
- __CLC_MUL_HI_VEC(TYPE)
105
+ __CLC_MUL_HI_VEC_IMPL(BTYPE##2, TYPE##2, BITS) \
106
+ __CLC_MUL_HI_VEC_IMPL(BTYPE##3, TYPE##3, BITS) \
107
+ __CLC_MUL_HI_VEC_IMPL(BTYPE##4, TYPE##4, BITS) \
108
+ __CLC_MUL_HI_VEC_IMPL(BTYPE##8, TYPE##8, BITS) \
109
+ __CLC_MUL_HI_VEC_IMPL(BTYPE##16, TYPE##16, BITS)
96
110
97
111
#define __CLC_MUL_HI_TYPES () \
98
112
__CLC_MUL_HI_DEC_IMPL(short, char, 8) \
@@ -110,4 +124,5 @@ __CLC_MUL_HI_TYPES()
110
124
#undef __CLC_MUL_HI_DEC_IMPL
111
125
#undef __CLC_MUL_HI_IMPL
112
126
#undef __CLC_MUL_HI_VEC
113
- #undef __CLC_B32
127
+ #undef __CLC_MUL_HI_VEC_IMPL
128
+ #undef __CLC_CONVERT_TY
0 commit comments