Skip to content

Commit c55c1f2

Browse files
committed
[libclc] Add (fast) normalize to CLC; add half overloads
For simplicity the half overloads just call into the float versions of the builtin. Note that in the move some floating-point constants were combined. The vector2 versions of normalize used slightly different constants to the vector3 and vector4 versions of the same builtin. For float it was 0x1.0p-65 vs 0x1.0p-66 and for double 0x1.0p-513 vs 0x1.0p-514. I wasn't sure if this was necessary so this commit replaces the vector2 versions of the constants with the vector3/vector4 ones. The OpenCL-CTS seems okay with it. If this is incorrect then it's not very difficult to split them back out again.
1 parent 7a4af40 commit c55c1f2

File tree

10 files changed

+182
-157
lines changed

10 files changed

+182
-157
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__
10+
#define __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__
11+
12+
#define __FLOAT_ONLY
13+
#define __CLC_GEOMETRIC_RET_GENTYPE
14+
#define __CLC_FUNCTION __clc_fast_normalize
15+
#define __CLC_BODY <clc/geometric/unary_decl.inc>
16+
#include <clc/math/gentype.inc>
17+
18+
#undef __CLC_FUNCTION
19+
#undef __CLC_GEOMETRIC_RET_GENTYPE
20+
#undef __FLOAT_ONLY
21+
22+
#endif // __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_GEOMETRIC_CLC_NORMALIZE_H__
10+
#define __CLC_GEOMETRIC_CLC_NORMALIZE_H__
11+
12+
#define __CLC_GEOMETRIC_RET_GENTYPE
13+
#define __CLC_FUNCTION __clc_normalize
14+
#define __CLC_BODY <clc/geometric/unary_decl.inc>
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_FUNCTION
18+
#undef __CLC_GEOMETRIC_RET_GENTYPE
19+
20+
#endif // __CLC_GEOMETRIC_CLC_NORMALIZE_H__

libclc/clc/lib/generic/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ geometric/clc_distance.cl
77
geometric/clc_dot.cl
88
geometric/clc_fast_distance.cl
99
geometric/clc_fast_length.cl
10+
geometric/clc_fast_normalize.cl
1011
geometric/clc_length.cl
12+
geometric/clc_normalize.cl
1113
integer/clc_abs.cl
1214
integer/clc_abs_diff.cl
1315
integer/clc_add_sat.cl
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/geometric/clc_normalize.h>
10+
#include <clc/geometric/clc_dot.h>
11+
#include <clc/math/clc_half_rsqrt.h>
12+
13+
#define __FLOAT_ONLY
14+
#define __CLC_BODY <clc_fast_normalize.inc>
15+
#include <clc/math/gentype.inc>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_VECSIZE_OR_1 == 1
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fast_normalize(__CLC_GENTYPE p) {
12+
return __clc_normalize(p);
13+
}
14+
15+
#elif (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 3 || \
16+
__CLC_VECSIZE_OR_1 == 4)
17+
18+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fast_normalize(__CLC_GENTYPE p) {
19+
__CLC_SCALAR_GENTYPE l2 = __clc_dot(p, p);
20+
return l2 == 0.0f ? p : p * __clc_half_rsqrt(l2);
21+
}
22+
23+
#endif
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/common/clc_sign.h>
11+
#include <clc/float/definitions.h>
12+
#include <clc/geometric/clc_dot.h>
13+
#include <clc/geometric/clc_normalize.h>
14+
#include <clc/internal/clc.h>
15+
#include <clc/math/clc_copysign.h>
16+
#include <clc/math/clc_rsqrt.h>
17+
#include <clc/relational/clc_all.h>
18+
#include <clc/relational/clc_isinf.h>
19+
#include <clc/relational/clc_select.h>
20+
21+
#define __CLC_BODY <clc_normalize.inc>
22+
#include <clc/math/gentype.inc>
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if (__CLC_VECSIZE_OR_1 == 1 || __CLC_VECSIZE_OR_1 == 2 || \
10+
__CLC_VECSIZE_OR_1 == 3 || __CLC_VECSIZE_OR_1 == 4)
11+
12+
// Until we have a native FP16 implementation, go via FP32
13+
#if __CLC_FPSIZE == 16
14+
15+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) {
16+
return __CLC_CONVERT_GENTYPE(__clc_normalize(__CLC_CONVERT_FLOATN(p)));
17+
}
18+
19+
// Scalar normalize
20+
#elif defined(__CLC_SCALAR)
21+
22+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) {
23+
return __clc_sign(p);
24+
}
25+
26+
// Vector normalize
27+
#else
28+
29+
#if __CLC_FPSIZE == 32
30+
#define MAX_SQRT 0x1.0p+86F
31+
#define MIN_SQRT 0x1.0p-65F
32+
#elif __CLC_FPSIZE == 64
33+
#define MAX_SQRT 0x1.0p+563
34+
#define MIN_SQRT 0x1.0p-513
35+
#else
36+
#error "Invalid FP size"
37+
#endif
38+
39+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) {
40+
if (__clc_all(p == __CLC_FP_LIT(0.0))) {
41+
return p;
42+
}
43+
44+
__CLC_SCALAR_GENTYPE l2 = __clc_dot(p, p);
45+
46+
if (l2 < FLT_MIN) {
47+
p *= MAX_SQRT;
48+
l2 = __clc_dot(p, p);
49+
} else if (l2 == INFINITY) {
50+
p *= MIN_SQRT;
51+
l2 = __clc_dot(p, p);
52+
if (l2 == INFINITY) {
53+
p = __clc_copysign(__clc_select((__CLC_GENTYPE)__CLC_FP_LIT(0.0),
54+
(__CLC_GENTYPE)__CLC_FP_LIT(1.0),
55+
__clc_isinf(p)),
56+
p);
57+
l2 = __clc_dot(p, p);
58+
}
59+
}
60+
return p * __clc_rsqrt(l2);
61+
}
62+
63+
#undef MIN_SQRT
64+
#undef MAX_SQRT
65+
66+
#endif
67+
68+
#endif

libclc/generic/lib/geometric/fast_normalize.cl

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,11 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <clc/clc.h>
10+
#include <clc/geometric/clc_fast_normalize.h>
1011

11-
_CLC_OVERLOAD _CLC_DEF float fast_normalize(float p) {
12-
return normalize(p);
13-
}
14-
15-
#define __CLC_BODY <fast_normalize.inc>
12+
#define FUNCTION fast_normalize
1613
#define __FLOAT_ONLY
14+
#define __CLC_GEOMETRIC_RET_GENTYPE
15+
#define __CLC_BODY <clc/geometric/unary_def.inc>
16+
1717
#include <clc/math/gentype.inc>
18-
#undef __FLOAT_ONLY

libclc/generic/lib/geometric/fast_normalize.inc

Lines changed: 0 additions & 19 deletions
This file was deleted.

libclc/generic/lib/geometric/normalize.cl

Lines changed: 5 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -7,137 +7,10 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <clc/clc.h>
10+
#include <clc/geometric/clc_normalize.h>
1011

11-
_CLC_OVERLOAD _CLC_DEF float normalize(float p) {
12-
return sign(p);
13-
}
12+
#define FUNCTION normalize
13+
#define __CLC_GEOMETRIC_RET_GENTYPE
14+
#define __CLC_BODY <clc/geometric/unary_def.inc>
1415

15-
_CLC_OVERLOAD _CLC_DEF float2 normalize(float2 p) {
16-
if (all(p == (float2)0.0F))
17-
return p;
18-
19-
float l2 = dot(p, p);
20-
21-
if (l2 < FLT_MIN) {
22-
p *= 0x1.0p+86F;
23-
l2 = dot(p, p);
24-
} else if (l2 == INFINITY) {
25-
p *= 0x1.0p-65f;
26-
l2 = dot(p, p);
27-
if (l2 == INFINITY) {
28-
p = copysign(select((float2)0.0F, (float2)1.0F, isinf(p)), p);
29-
l2 = dot(p, p);
30-
}
31-
}
32-
return p * rsqrt(l2);
33-
}
34-
35-
_CLC_OVERLOAD _CLC_DEF float3 normalize(float3 p) {
36-
if (all(p == (float3)0.0F))
37-
return p;
38-
39-
float l2 = dot(p, p);
40-
41-
if (l2 < FLT_MIN) {
42-
p *= 0x1.0p+86F;
43-
l2 = dot(p, p);
44-
} else if (l2 == INFINITY) {
45-
p *= 0x1.0p-66f;
46-
l2 = dot(p, p);
47-
if (l2 == INFINITY) {
48-
p = copysign(select((float3)0.0F, (float3)1.0F, isinf(p)), p);
49-
l2 = dot(p, p);
50-
}
51-
}
52-
return p * rsqrt(l2);
53-
}
54-
55-
_CLC_OVERLOAD _CLC_DEF float4 normalize(float4 p) {
56-
if (all(p == (float4)0.0F))
57-
return p;
58-
59-
float l2 = dot(p, p);
60-
61-
if (l2 < FLT_MIN) {
62-
p *= 0x1.0p+86F;
63-
l2 = dot(p, p);
64-
} else if (l2 == INFINITY) {
65-
p *= 0x1.0p-66f;
66-
l2 = dot(p, p);
67-
if (l2 == INFINITY) {
68-
p = copysign(select((float4)0.0F, (float4)1.0F, isinf(p)), p);
69-
l2 = dot(p, p);
70-
}
71-
}
72-
return p * rsqrt(l2);
73-
}
74-
75-
#ifdef cl_khr_fp64
76-
77-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
78-
79-
_CLC_OVERLOAD _CLC_DEF double normalize(double p) {
80-
return sign(p);
81-
}
82-
83-
_CLC_OVERLOAD _CLC_DEF double2 normalize(double2 p) {
84-
if (all(p == (double2)0.0))
85-
return p;
86-
87-
double l2 = dot(p, p);
88-
89-
if (l2 < DBL_MIN) {
90-
p *= 0x1.0p+563;
91-
l2 = dot(p, p);
92-
} else if (l2 == INFINITY) {
93-
p *= 0x1.0p-513;
94-
l2 = dot(p, p);
95-
if (l2 == INFINITY) {
96-
p = copysign(select((double2)0.0, (double2)1.0, isinf(p)), p);
97-
l2 = dot(p, p);
98-
}
99-
}
100-
return p * rsqrt(l2);
101-
}
102-
103-
_CLC_OVERLOAD _CLC_DEF double3 normalize(double3 p) {
104-
if (all(p == (double3)0.0))
105-
return p;
106-
107-
double l2 = dot(p, p);
108-
109-
if (l2 < DBL_MIN) {
110-
p *= 0x1.0p+563;
111-
l2 = dot(p, p);
112-
} else if (l2 == INFINITY) {
113-
p *= 0x1.0p-514;
114-
l2 = dot(p, p);
115-
if (l2 == INFINITY) {
116-
p = copysign(select((double3)0.0, (double3)1.0, isinf(p)), p);
117-
l2 = dot(p, p);
118-
}
119-
}
120-
return p * rsqrt(l2);
121-
}
122-
123-
_CLC_OVERLOAD _CLC_DEF double4 normalize(double4 p) {
124-
if (all(p == (double4)0.0))
125-
return p;
126-
127-
double l2 = dot(p, p);
128-
129-
if (l2 < DBL_MIN) {
130-
p *= 0x1.0p+563;
131-
l2 = dot(p, p);
132-
} else if (l2 == INFINITY) {
133-
p *= 0x1.0p-514;
134-
l2 = dot(p, p);
135-
if (l2 == INFINITY) {
136-
p = copysign(select((double4)0.0, (double4)1.0, isinf(p)), p);
137-
l2 = dot(p, p);
138-
}
139-
}
140-
return p * rsqrt(l2);
141-
}
142-
143-
#endif
16+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)