Skip to content

Commit 1db30b6

Browse files
committed
[libclc] Add (fast) normalize to CLC; add half overloads
For simplicity the half overloads just call into the float versions of the builtin. Note that in the move some floating-point constants were combined. The vector2 versions of normalize used slightly different constants to the vector3 and vector4 versions of the same builtin. For float it was 0x1.0p-65 vs 0x1.0p-66 and for double 0x1.0p-513 vs 0x1.0p-514. I wasn't sure if this was necessary so this commit replaces the vector2 versions of the constants with the vector3/vector4 ones. The OpenCL-CTS seems okay with it. If this is incorrect then it's not very difficult to split them back out again.
1 parent 2fb6ff4 commit 1db30b6

File tree

10 files changed

+182
-151
lines changed

10 files changed

+182
-151
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__
10+
#define __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__
11+
12+
#define __FLOAT_ONLY
13+
#define __CLC_GEOMETRIC_RET_GENTYPE
14+
#define __CLC_FUNCTION __clc_fast_normalize
15+
#define __CLC_BODY <clc/geometric/unary_decl.inc>
16+
#include <clc/math/gentype.inc>
17+
18+
#undef __CLC_FUNCTION
19+
#undef __CLC_GEOMETRIC_RET_GENTYPE
20+
#undef __FLOAT_ONLY
21+
22+
#endif // __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_GEOMETRIC_CLC_NORMALIZE_H__
10+
#define __CLC_GEOMETRIC_CLC_NORMALIZE_H__
11+
12+
#define __CLC_GEOMETRIC_RET_GENTYPE
13+
#define __CLC_FUNCTION __clc_normalize
14+
#define __CLC_BODY <clc/geometric/unary_decl.inc>
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_FUNCTION
18+
#undef __CLC_GEOMETRIC_RET_GENTYPE
19+
20+
#endif // __CLC_GEOMETRIC_CLC_NORMALIZE_H__

libclc/clc/lib/generic/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ geometric/clc_distance.cl
77
geometric/clc_dot.cl
88
geometric/clc_fast_distance.cl
99
geometric/clc_fast_length.cl
10+
geometric/clc_fast_normalize.cl
1011
geometric/clc_length.cl
12+
geometric/clc_normalize.cl
1113
integer/clc_abs.cl
1214
integer/clc_abs_diff.cl
1315
integer/clc_add_sat.cl
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/geometric/clc_normalize.h>
10+
#include <clc/geometric/clc_dot.h>
11+
#include <clc/math/clc_half_rsqrt.h>
12+
13+
#define __FLOAT_ONLY
14+
#define __CLC_BODY <clc_fast_normalize.inc>
15+
#include <clc/math/gentype.inc>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_VECSIZE_OR_1 == 1
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fast_normalize(__CLC_GENTYPE p) {
12+
return __clc_normalize(p);
13+
}
14+
15+
#elif (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 3 || \
16+
__CLC_VECSIZE_OR_1 == 4)
17+
18+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fast_normalize(__CLC_GENTYPE p) {
19+
__CLC_SCALAR_GENTYPE l2 = __clc_dot(p, p);
20+
return l2 == 0.0f ? p : p * __clc_half_rsqrt(l2);
21+
}
22+
23+
#endif
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/common/clc_sign.h>
11+
#include <clc/float/definitions.h>
12+
#include <clc/geometric/clc_dot.h>
13+
#include <clc/geometric/clc_normalize.h>
14+
#include <clc/internal/clc.h>
15+
#include <clc/math/clc_copysign.h>
16+
#include <clc/math/clc_rsqrt.h>
17+
#include <clc/relational/clc_all.h>
18+
#include <clc/relational/clc_isinf.h>
19+
#include <clc/relational/clc_select.h>
20+
21+
#define __CLC_BODY <clc_normalize.inc>
22+
#include <clc/math/gentype.inc>
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if (__CLC_VECSIZE_OR_1 == 1 || __CLC_VECSIZE_OR_1 == 2 || \
10+
__CLC_VECSIZE_OR_1 == 3 || __CLC_VECSIZE_OR_1 == 4)
11+
12+
// Until we have a native FP16 implementation, go via FP32
13+
#if __CLC_FPSIZE == 16
14+
15+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) {
16+
return __CLC_CONVERT_GENTYPE(__clc_normalize(__CLC_CONVERT_FLOATN(p)));
17+
}
18+
19+
// Scalar normalize
20+
#elif defined(__CLC_SCALAR)
21+
22+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) {
23+
return __clc_sign(p);
24+
}
25+
26+
// Vector normalize
27+
#else
28+
29+
#if __CLC_FPSIZE == 32
30+
#define MAX_SQRT 0x1.0p+86F
31+
#define MIN_SQRT 0x1.0p-65F
32+
#elif __CLC_FPSIZE == 64
33+
#define MAX_SQRT 0x1.0p+563
34+
#define MIN_SQRT 0x1.0p-513
35+
#else
36+
#error "Invalid FP size"
37+
#endif
38+
39+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) {
40+
if (__clc_all(p == __CLC_FP_LIT(0.0))) {
41+
return p;
42+
}
43+
44+
__CLC_SCALAR_GENTYPE l2 = __clc_dot(p, p);
45+
46+
if (l2 < FLT_MIN) {
47+
p *= MAX_SQRT;
48+
l2 = __clc_dot(p, p);
49+
} else if (l2 == INFINITY) {
50+
p *= MIN_SQRT;
51+
l2 = __clc_dot(p, p);
52+
if (l2 == INFINITY) {
53+
p = __clc_copysign(__clc_select((__CLC_GENTYPE)__CLC_FP_LIT(0.0),
54+
(__CLC_GENTYPE)__CLC_FP_LIT(1.0),
55+
__clc_isinf(p)),
56+
p);
57+
l2 = __clc_dot(p, p);
58+
}
59+
}
60+
return p * __clc_rsqrt(l2);
61+
}
62+
63+
#undef MIN_SQRT
64+
#undef MAX_SQRT
65+
66+
#endif
67+
68+
#endif

libclc/opencl/lib/generic/geometric/fast_normalize.cl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <clc/opencl/clc.h>
10+
#include <clc/geometric/clc_fast_normalize.h>
1011

11-
_CLC_OVERLOAD _CLC_DEF float fast_normalize(float p) { return normalize(p); }
12-
13-
#define __CLC_BODY <fast_normalize.inc>
12+
#define FUNCTION fast_normalize
1413
#define __FLOAT_ONLY
14+
#define __CLC_GEOMETRIC_RET_GENTYPE
15+
#define __CLC_BODY <clc/geometric/unary_def.inc>
16+
1517
#include <clc/math/gentype.inc>
16-
#undef __FLOAT_ONLY

libclc/opencl/lib/generic/geometric/fast_normalize.inc

Lines changed: 0 additions & 19 deletions
This file was deleted.

libclc/opencl/lib/generic/geometric/normalize.cl

Lines changed: 5 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -6,134 +6,11 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include <clc/geometric/clc_normalize.h>
910
#include <clc/opencl/clc.h>
1011

11-
_CLC_OVERLOAD _CLC_DEF float normalize(float p) { return sign(p); }
12+
#define FUNCTION normalize
13+
#define __CLC_GEOMETRIC_RET_GENTYPE
14+
#define __CLC_BODY <clc/geometric/unary_def.inc>
1215

13-
_CLC_OVERLOAD _CLC_DEF float2 normalize(float2 p) {
14-
if (all(p == (float2)0.0F))
15-
return p;
16-
17-
float l2 = dot(p, p);
18-
19-
if (l2 < FLT_MIN) {
20-
p *= 0x1.0p+86F;
21-
l2 = dot(p, p);
22-
} else if (l2 == INFINITY) {
23-
p *= 0x1.0p-65f;
24-
l2 = dot(p, p);
25-
if (l2 == INFINITY) {
26-
p = copysign(select((float2)0.0F, (float2)1.0F, isinf(p)), p);
27-
l2 = dot(p, p);
28-
}
29-
}
30-
return p * rsqrt(l2);
31-
}
32-
33-
_CLC_OVERLOAD _CLC_DEF float3 normalize(float3 p) {
34-
if (all(p == (float3)0.0F))
35-
return p;
36-
37-
float l2 = dot(p, p);
38-
39-
if (l2 < FLT_MIN) {
40-
p *= 0x1.0p+86F;
41-
l2 = dot(p, p);
42-
} else if (l2 == INFINITY) {
43-
p *= 0x1.0p-66f;
44-
l2 = dot(p, p);
45-
if (l2 == INFINITY) {
46-
p = copysign(select((float3)0.0F, (float3)1.0F, isinf(p)), p);
47-
l2 = dot(p, p);
48-
}
49-
}
50-
return p * rsqrt(l2);
51-
}
52-
53-
_CLC_OVERLOAD _CLC_DEF float4 normalize(float4 p) {
54-
if (all(p == (float4)0.0F))
55-
return p;
56-
57-
float l2 = dot(p, p);
58-
59-
if (l2 < FLT_MIN) {
60-
p *= 0x1.0p+86F;
61-
l2 = dot(p, p);
62-
} else if (l2 == INFINITY) {
63-
p *= 0x1.0p-66f;
64-
l2 = dot(p, p);
65-
if (l2 == INFINITY) {
66-
p = copysign(select((float4)0.0F, (float4)1.0F, isinf(p)), p);
67-
l2 = dot(p, p);
68-
}
69-
}
70-
return p * rsqrt(l2);
71-
}
72-
73-
#ifdef cl_khr_fp64
74-
75-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
76-
77-
_CLC_OVERLOAD _CLC_DEF double normalize(double p) { return sign(p); }
78-
79-
_CLC_OVERLOAD _CLC_DEF double2 normalize(double2 p) {
80-
if (all(p == (double2)0.0))
81-
return p;
82-
83-
double l2 = dot(p, p);
84-
85-
if (l2 < DBL_MIN) {
86-
p *= 0x1.0p+563;
87-
l2 = dot(p, p);
88-
} else if (l2 == INFINITY) {
89-
p *= 0x1.0p-513;
90-
l2 = dot(p, p);
91-
if (l2 == INFINITY) {
92-
p = copysign(select((double2)0.0, (double2)1.0, isinf(p)), p);
93-
l2 = dot(p, p);
94-
}
95-
}
96-
return p * rsqrt(l2);
97-
}
98-
99-
_CLC_OVERLOAD _CLC_DEF double3 normalize(double3 p) {
100-
if (all(p == (double3)0.0))
101-
return p;
102-
103-
double l2 = dot(p, p);
104-
105-
if (l2 < DBL_MIN) {
106-
p *= 0x1.0p+563;
107-
l2 = dot(p, p);
108-
} else if (l2 == INFINITY) {
109-
p *= 0x1.0p-514;
110-
l2 = dot(p, p);
111-
if (l2 == INFINITY) {
112-
p = copysign(select((double3)0.0, (double3)1.0, isinf(p)), p);
113-
l2 = dot(p, p);
114-
}
115-
}
116-
return p * rsqrt(l2);
117-
}
118-
119-
_CLC_OVERLOAD _CLC_DEF double4 normalize(double4 p) {
120-
if (all(p == (double4)0.0))
121-
return p;
122-
123-
double l2 = dot(p, p);
124-
125-
if (l2 < DBL_MIN) {
126-
p *= 0x1.0p+563;
127-
l2 = dot(p, p);
128-
} else if (l2 == INFINITY) {
129-
p *= 0x1.0p-514;
130-
l2 = dot(p, p);
131-
if (l2 == INFINITY) {
132-
p = copysign(select((double4)0.0, (double4)1.0, isinf(p)), p);
133-
l2 = dot(p, p);
134-
}
135-
}
136-
return p * rsqrt(l2);
137-
}
138-
139-
#endif
16+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)