Skip to content

Commit 8c3019e

Browse files
authored
[libclc] Add (fast) normalize to CLC; add half overloads (#139759)
For simplicity the half overloads just call into the float versions of the builtin. Otherwise there are no codegen changes to any target.
1 parent b6414b5 commit 8c3019e

File tree

10 files changed

+198
-151
lines changed

10 files changed

+198
-151
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__
10+
#define __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__
11+
12+
#define __FLOAT_ONLY
13+
#define __CLC_GEOMETRIC_RET_GENTYPE
14+
#define __CLC_FUNCTION __clc_fast_normalize
15+
#define __CLC_BODY <clc/geometric/unary_decl.inc>
16+
#include <clc/math/gentype.inc>
17+
18+
#undef __CLC_FUNCTION
19+
#undef __CLC_GEOMETRIC_RET_GENTYPE
20+
#undef __FLOAT_ONLY
21+
22+
#endif // __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_GEOMETRIC_CLC_NORMALIZE_H__
10+
#define __CLC_GEOMETRIC_CLC_NORMALIZE_H__
11+
12+
#define __CLC_GEOMETRIC_RET_GENTYPE
13+
#define __CLC_FUNCTION __clc_normalize
14+
#define __CLC_BODY <clc/geometric/unary_decl.inc>
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_FUNCTION
18+
#undef __CLC_GEOMETRIC_RET_GENTYPE
19+
20+
#endif // __CLC_GEOMETRIC_CLC_NORMALIZE_H__

libclc/clc/lib/generic/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ geometric/clc_distance.cl
99
geometric/clc_dot.cl
1010
geometric/clc_fast_distance.cl
1111
geometric/clc_fast_length.cl
12+
geometric/clc_fast_normalize.cl
1213
geometric/clc_length.cl
14+
geometric/clc_normalize.cl
1315
integer/clc_abs.cl
1416
integer/clc_abs_diff.cl
1517
integer/clc_add_sat.cl
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/geometric/clc_dot.h>
10+
#include <clc/geometric/clc_normalize.h>
11+
#include <clc/math/clc_half_rsqrt.h>
12+
13+
#define __FLOAT_ONLY
14+
#define __CLC_BODY <clc_fast_normalize.inc>
15+
#include <clc/math/gentype.inc>
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_VECSIZE_OR_1 == 1
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fast_normalize(__CLC_GENTYPE p) {
12+
return __clc_normalize(p);
13+
}
14+
15+
#elif (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 3 || \
16+
__CLC_VECSIZE_OR_1 == 4)
17+
18+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fast_normalize(__CLC_GENTYPE p) {
19+
__CLC_SCALAR_GENTYPE l2 = __clc_dot(p, p);
20+
return l2 == 0.0f ? p : p * __clc_half_rsqrt(l2);
21+
}
22+
23+
#endif
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/common/clc_sign.h>
11+
#include <clc/float/definitions.h>
12+
#include <clc/geometric/clc_dot.h>
13+
#include <clc/geometric/clc_normalize.h>
14+
#include <clc/internal/clc.h>
15+
#include <clc/math/clc_copysign.h>
16+
#include <clc/math/clc_rsqrt.h>
17+
#include <clc/relational/clc_all.h>
18+
#include <clc/relational/clc_isinf.h>
19+
#include <clc/relational/clc_select.h>
20+
21+
#define __CLC_BODY <clc_normalize.inc>
22+
#include <clc/math/gentype.inc>
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#if (__CLC_VECSIZE_OR_1 == 1 || __CLC_VECSIZE_OR_1 == 2 || \
11+
__CLC_VECSIZE_OR_1 == 3 || __CLC_VECSIZE_OR_1 == 4)
12+
13+
// Until we have a native FP16 implementation, go via FP32
14+
#if __CLC_FPSIZE == 16
15+
16+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) {
17+
return __CLC_CONVERT_GENTYPE(__clc_normalize(__CLC_CONVERT_FLOATN(p)));
18+
}
19+
20+
// Scalar normalize
21+
#elif defined(__CLC_SCALAR)
22+
23+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) {
24+
return __clc_sign(p);
25+
}
26+
27+
// Vector normalize
28+
#else
29+
30+
#if __CLC_FPSIZE == 32
31+
32+
#define MIN_VAL FLT_MIN
33+
#define MAX_SQRT 0x1.0p+86F
34+
#if __CLC_VECSIZE_OR_1 == 2
35+
#define MIN_SQRT 0x1.0p-65F
36+
#else
37+
#define MIN_SQRT 0x1.0p-66F
38+
#endif
39+
40+
#elif __CLC_FPSIZE == 64
41+
42+
#define MIN_VAL DBL_MIN
43+
#define MAX_SQRT 0x1.0p+563
44+
#if __CLC_VECSIZE_OR_1 == 2
45+
#define MIN_SQRT 0x1.0p-513
46+
#else
47+
#define MIN_SQRT 0x1.0p-514
48+
#endif
49+
50+
#else
51+
#error "Invalid FP size"
52+
#endif
53+
54+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) {
55+
if (__clc_all(p == __CLC_FP_LIT(0.0))) {
56+
return p;
57+
}
58+
59+
__CLC_SCALAR_GENTYPE l2 = __clc_dot(p, p);
60+
61+
if (l2 < MIN_VAL) {
62+
p *= MAX_SQRT;
63+
l2 = __clc_dot(p, p);
64+
} else if (l2 == INFINITY) {
65+
p *= MIN_SQRT;
66+
l2 = __clc_dot(p, p);
67+
if (l2 == INFINITY) {
68+
p = __clc_copysign(__clc_select((__CLC_GENTYPE)__CLC_FP_LIT(0.0),
69+
(__CLC_GENTYPE)__CLC_FP_LIT(1.0),
70+
__clc_isinf(p)),
71+
p);
72+
l2 = __clc_dot(p, p);
73+
}
74+
}
75+
return p * __clc_rsqrt(l2);
76+
}
77+
78+
#undef MIN_VAL
79+
#undef MIN_SQRT
80+
#undef MAX_SQRT
81+
82+
#endif
83+
84+
#endif

libclc/opencl/lib/generic/geometric/fast_normalize.cl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include <clc/geometric/clc_fast_normalize.h>
910
#include <clc/opencl/clc.h>
1011

11-
_CLC_OVERLOAD _CLC_DEF float fast_normalize(float p) { return normalize(p); }
12-
13-
#define __CLC_BODY <fast_normalize.inc>
12+
#define FUNCTION fast_normalize
1413
#define __FLOAT_ONLY
14+
#define __CLC_GEOMETRIC_RET_GENTYPE
15+
#define __CLC_BODY <clc/geometric/unary_def.inc>
16+
1517
#include <clc/math/gentype.inc>
16-
#undef __FLOAT_ONLY

libclc/opencl/lib/generic/geometric/fast_normalize.inc

Lines changed: 0 additions & 19 deletions
This file was deleted.

libclc/opencl/lib/generic/geometric/normalize.cl

Lines changed: 5 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -6,134 +6,11 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include <clc/geometric/clc_normalize.h>
910
#include <clc/opencl/clc.h>
1011

11-
_CLC_OVERLOAD _CLC_DEF float normalize(float p) { return sign(p); }
12+
#define FUNCTION normalize
13+
#define __CLC_GEOMETRIC_RET_GENTYPE
14+
#define __CLC_BODY <clc/geometric/unary_def.inc>
1215

13-
_CLC_OVERLOAD _CLC_DEF float2 normalize(float2 p) {
14-
if (all(p == (float2)0.0F))
15-
return p;
16-
17-
float l2 = dot(p, p);
18-
19-
if (l2 < FLT_MIN) {
20-
p *= 0x1.0p+86F;
21-
l2 = dot(p, p);
22-
} else if (l2 == INFINITY) {
23-
p *= 0x1.0p-65f;
24-
l2 = dot(p, p);
25-
if (l2 == INFINITY) {
26-
p = copysign(select((float2)0.0F, (float2)1.0F, isinf(p)), p);
27-
l2 = dot(p, p);
28-
}
29-
}
30-
return p * rsqrt(l2);
31-
}
32-
33-
_CLC_OVERLOAD _CLC_DEF float3 normalize(float3 p) {
34-
if (all(p == (float3)0.0F))
35-
return p;
36-
37-
float l2 = dot(p, p);
38-
39-
if (l2 < FLT_MIN) {
40-
p *= 0x1.0p+86F;
41-
l2 = dot(p, p);
42-
} else if (l2 == INFINITY) {
43-
p *= 0x1.0p-66f;
44-
l2 = dot(p, p);
45-
if (l2 == INFINITY) {
46-
p = copysign(select((float3)0.0F, (float3)1.0F, isinf(p)), p);
47-
l2 = dot(p, p);
48-
}
49-
}
50-
return p * rsqrt(l2);
51-
}
52-
53-
_CLC_OVERLOAD _CLC_DEF float4 normalize(float4 p) {
54-
if (all(p == (float4)0.0F))
55-
return p;
56-
57-
float l2 = dot(p, p);
58-
59-
if (l2 < FLT_MIN) {
60-
p *= 0x1.0p+86F;
61-
l2 = dot(p, p);
62-
} else if (l2 == INFINITY) {
63-
p *= 0x1.0p-66f;
64-
l2 = dot(p, p);
65-
if (l2 == INFINITY) {
66-
p = copysign(select((float4)0.0F, (float4)1.0F, isinf(p)), p);
67-
l2 = dot(p, p);
68-
}
69-
}
70-
return p * rsqrt(l2);
71-
}
72-
73-
#ifdef cl_khr_fp64
74-
75-
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
76-
77-
_CLC_OVERLOAD _CLC_DEF double normalize(double p) { return sign(p); }
78-
79-
_CLC_OVERLOAD _CLC_DEF double2 normalize(double2 p) {
80-
if (all(p == (double2)0.0))
81-
return p;
82-
83-
double l2 = dot(p, p);
84-
85-
if (l2 < DBL_MIN) {
86-
p *= 0x1.0p+563;
87-
l2 = dot(p, p);
88-
} else if (l2 == INFINITY) {
89-
p *= 0x1.0p-513;
90-
l2 = dot(p, p);
91-
if (l2 == INFINITY) {
92-
p = copysign(select((double2)0.0, (double2)1.0, isinf(p)), p);
93-
l2 = dot(p, p);
94-
}
95-
}
96-
return p * rsqrt(l2);
97-
}
98-
99-
_CLC_OVERLOAD _CLC_DEF double3 normalize(double3 p) {
100-
if (all(p == (double3)0.0))
101-
return p;
102-
103-
double l2 = dot(p, p);
104-
105-
if (l2 < DBL_MIN) {
106-
p *= 0x1.0p+563;
107-
l2 = dot(p, p);
108-
} else if (l2 == INFINITY) {
109-
p *= 0x1.0p-514;
110-
l2 = dot(p, p);
111-
if (l2 == INFINITY) {
112-
p = copysign(select((double3)0.0, (double3)1.0, isinf(p)), p);
113-
l2 = dot(p, p);
114-
}
115-
}
116-
return p * rsqrt(l2);
117-
}
118-
119-
_CLC_OVERLOAD _CLC_DEF double4 normalize(double4 p) {
120-
if (all(p == (double4)0.0))
121-
return p;
122-
123-
double l2 = dot(p, p);
124-
125-
if (l2 < DBL_MIN) {
126-
p *= 0x1.0p+563;
127-
l2 = dot(p, p);
128-
} else if (l2 == INFINITY) {
129-
p *= 0x1.0p-514;
130-
l2 = dot(p, p);
131-
if (l2 == INFINITY) {
132-
p = copysign(select((double4)0.0, (double4)1.0, isinf(p)), p);
133-
l2 = dot(p, p);
134-
}
135-
}
136-
return p * rsqrt(l2);
137-
}
138-
139-
#endif
16+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)