Skip to content

Commit 7a4af40

Browse files
authored
[libclc] Move cross to CLC library; add missing half overloads (#139713)
The half overloads are trivially identical to the float and double ones. It didn't seem worth using 'gentype' for the OpenCL layer or CLC declarations so they're just written out explicitly. It does help avoid less trivial repetition in the CLC implementation, though.
1 parent eaa45dc commit 7a4af40

File tree

5 files changed

+88
-8
lines changed

5 files changed

+88
-8
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_GEOMETRIC_CLC_CROSS_H__
10+
#define __CLC_GEOMETRIC_CLC_CROSS_H__
11+
12+
_CLC_OVERLOAD _CLC_DECL float3 __clc_cross(float3 p0, float3 p1);
13+
_CLC_OVERLOAD _CLC_DECL float4 __clc_cross(float4 p0, float4 p1);
14+
15+
#ifdef cl_khr_fp64
16+
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
17+
18+
_CLC_OVERLOAD _CLC_DECL double3 __clc_cross(double3 p0, double3 p1);
19+
_CLC_OVERLOAD _CLC_DECL double4 __clc_cross(double4 p0, double4 p1);
20+
21+
#endif
22+
23+
#ifdef cl_khr_fp16
24+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
25+
26+
_CLC_OVERLOAD _CLC_DECL half3 __clc_cross(half3 p0, half3 p1);
27+
_CLC_OVERLOAD _CLC_DECL half4 __clc_cross(half4 p0, half4 p1);
28+
29+
#endif
30+
31+
#endif // __CLC_GEOMETRIC_CLC_CROSS_H__

libclc/clc/lib/generic/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ common/clc_degrees.cl
22
common/clc_radians.cl
33
common/clc_sign.cl
44
common/clc_smoothstep.cl
5+
geometric/clc_cross.cl
56
geometric/clc_distance.cl
67
geometric/clc_dot.cl
78
geometric/clc_fast_distance.cl
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/internal/clc.h>
10+
11+
#define __CLC_BODY <clc_cross.inc>
12+
#include <clc/math/gentype.inc>
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#if __CLC_VECSIZE_OR_1 == 3
10+
11+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cross(__CLC_GENTYPE p0,
12+
__CLC_GENTYPE p1) {
13+
return (__CLC_GENTYPE)(p0.y * p1.z - p0.z * p1.y, p0.z * p1.x - p0.x * p1.z,
14+
p0.x * p1.y - p0.y * p1.x);
15+
}
16+
17+
#elif __CLC_VECSIZE_OR_1 == 4
18+
19+
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cross(__CLC_GENTYPE p0,
20+
__CLC_GENTYPE p1) {
21+
return (__CLC_GENTYPE)(p0.y * p1.z - p0.z * p1.y, p0.z * p1.x - p0.x * p1.z,
22+
p0.x * p1.y - p0.y * p1.x, 0.0F);
23+
}
24+
25+
#endif

libclc/generic/lib/geometric/cross.cl

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,38 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <clc/clc.h>
10+
#include <clc/geometric/clc_cross.h>
1011

1112
_CLC_OVERLOAD _CLC_DEF float3 cross(float3 p0, float3 p1) {
12-
return (float3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
13-
p0.x*p1.y - p0.y*p1.x);
13+
return __clc_cross(p0, p1);
1414
}
1515

1616
_CLC_OVERLOAD _CLC_DEF float4 cross(float4 p0, float4 p1) {
17-
return (float4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
18-
p0.x*p1.y - p0.y*p1.x, 0.f);
17+
return __clc_cross(p0, p1);
1918
}
2019

2120
#ifdef cl_khr_fp64
2221
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
2322

2423
_CLC_OVERLOAD _CLC_DEF double3 cross(double3 p0, double3 p1) {
25-
return (double3)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
26-
p0.x*p1.y - p0.y*p1.x);
24+
return __clc_cross(p0, p1);
2725
}
2826

2927
_CLC_OVERLOAD _CLC_DEF double4 cross(double4 p0, double4 p1) {
30-
return (double4)(p0.y*p1.z - p0.z*p1.y, p0.z*p1.x - p0.x*p1.z,
31-
p0.x*p1.y - p0.y*p1.x, 0.f);
28+
return __clc_cross(p0, p1);
3229
}
30+
31+
#endif
32+
33+
#ifdef cl_khr_fp16
34+
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
35+
36+
_CLC_OVERLOAD _CLC_DEF half3 cross(half3 p0, half3 p1) {
37+
return __clc_cross(p0, p1);
38+
}
39+
40+
_CLC_OVERLOAD _CLC_DEF half4 cross(half4 p0, half4 p1) {
41+
return __clc_cross(p0, p1);
42+
}
43+
3344
#endif

0 commit comments

Comments
 (0)