Skip to content

Commit 12cdf43

Browse files
authored
[libclc] Move (add|sub)_sat to CLC; optimize (#124903)
Using the `__builtin_elementwise_(add|sub)_sat` functions allows us to directly optimize to the desired intrinsic, and avoid scalarization for vector types.
1 parent ecc71de commit 12cdf43

File tree

11 files changed

+56
-129
lines changed

11 files changed

+56
-129
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#ifndef __CLC_INTEGER_CLC_ADD_SAT_H__
2+
#define __CLC_INTEGER_CLC_ADD_SAT_H__
3+
4+
#define __CLC_FUNCTION __clc_add_sat
5+
#define __CLC_BODY <clc/shared/binary_decl.inc>
6+
7+
#include <clc/integer/gentype.inc>
8+
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
11+
12+
#endif // __CLC_INTEGER_CLC_ADD_SAT_H__
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#ifndef __CLC_INTEGER_CLC_SUB_SAT_H__
2+
#define __CLC_INTEGER_CLC_SUB_SAT_H__
3+
4+
#define __CLC_FUNCTION __clc_sub_sat
5+
#define __CLC_BODY <clc/shared/binary_decl.inc>
6+
7+
#include <clc/integer/gentype.inc>
8+
9+
#undef __CLC_BODY
10+
#undef __CLC_FUNCTION
11+
12+
#endif // __CLC_INTEGER_CLC_SUB_SAT_H__

libclc/clc/lib/clspv/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
../generic/integer/clc_add_sat.cl
2+
../generic/integer/clc_sub_sat.cl
13
../generic/math/clc_ceil.cl
24
../generic/math/clc_copysign.cl
35
../generic/math/clc_fabs.cl

libclc/clc/lib/generic/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ common/clc_smoothstep.cl
44
geometric/clc_dot.cl
55
integer/clc_abs.cl
66
integer/clc_abs_diff.cl
7+
integer/clc_add_sat.cl
8+
integer/clc_sub_sat.cl
79
math/clc_ceil.cl
810
math/clc_copysign.cl
911
math/clc_fabs.cl
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#include <clc/internal/clc.h>
2+
3+
#define FUNCTION __clc_add_sat
4+
#define __CLC_FUNCTION(x) __builtin_elementwise_add_sat
5+
#define __CLC_BODY <clc/shared/binary_def.inc>
6+
7+
#include <clc/integer/gentype.inc>
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#include <clc/internal/clc.h>
2+
3+
#define FUNCTION __clc_sub_sat
4+
#define __CLC_FUNCTION(x) __builtin_elementwise_sub_sat
5+
#define __CLC_BODY <clc/shared/binary_def.inc>
6+
7+
#include <clc/integer/gentype.inc>

libclc/clc/lib/spirv/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
../generic/common/clc_radians.cl
33
../generic/common/clc_smoothstep.cl
44
../generic/geometric/clc_dot.cl
5+
../generic/integer/clc_add_sat.cl
6+
../generic/integer/clc_sub_sat.cl
57
../generic/math/clc_ceil.cl
68
../generic/math/clc_copysign.cl
79
../generic/math/clc_fabs.cl

libclc/clc/lib/spirv64/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
../generic/common/clc_radians.cl
33
../generic/common/clc_smoothstep.cl
44
../generic/geometric/clc_dot.cl
5+
../generic/integer/clc_add_sat.cl
6+
../generic/integer/clc_sub_sat.cl
57
../generic/math/clc_ceil.cl
68
../generic/math/clc_copysign.cl
79
../generic/math/clc_fabs.cl

libclc/generic/lib/integer/add_sat.cl

Lines changed: 4 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,7 @@
11
#include <clc/clc.h>
2-
#include <clc/clcmacro.h>
2+
#include <clc/integer/clc_add_sat.h>
33

4-
// From add_sat.ll
5-
_CLC_DECL char __clc_add_sat_s8(char, char);
6-
_CLC_DECL uchar __clc_add_sat_u8(uchar, uchar);
7-
_CLC_DECL short __clc_add_sat_s16(short, short);
8-
_CLC_DECL ushort __clc_add_sat_u16(ushort, ushort);
9-
_CLC_DECL int __clc_add_sat_s32(int, int);
10-
_CLC_DECL uint __clc_add_sat_u32(uint, uint);
11-
_CLC_DECL long __clc_add_sat_s64(long, long);
12-
_CLC_DECL ulong __clc_add_sat_u64(ulong, ulong);
4+
#define FUNCTION add_sat
5+
#define __CLC_BODY <clc/shared/binary_def.inc>
136

14-
_CLC_OVERLOAD _CLC_DEF char add_sat(char x, char y) {
15-
short r = x + y;
16-
return convert_char_sat(r);
17-
}
18-
19-
_CLC_OVERLOAD _CLC_DEF uchar add_sat(uchar x, uchar y) {
20-
ushort r = x + y;
21-
return convert_uchar_sat(r);
22-
}
23-
24-
_CLC_OVERLOAD _CLC_DEF short add_sat(short x, short y) {
25-
int r = x + y;
26-
return convert_short_sat(r);
27-
}
28-
29-
_CLC_OVERLOAD _CLC_DEF ushort add_sat(ushort x, ushort y) {
30-
uint r = x + y;
31-
return convert_ushort_sat(r);
32-
}
33-
34-
_CLC_OVERLOAD _CLC_DEF int add_sat(int x, int y) {
35-
int r;
36-
if (__builtin_sadd_overflow(x, y, &r))
37-
// The oveflow can only occur if both are pos or both are neg,
38-
// thus we only need to check one operand
39-
return x > 0 ? INT_MAX : INT_MIN;
40-
return r;
41-
}
42-
43-
_CLC_OVERLOAD _CLC_DEF uint add_sat(uint x, uint y) {
44-
uint r;
45-
if (__builtin_uadd_overflow(x, y, &r))
46-
return UINT_MAX;
47-
return r;
48-
}
49-
50-
_CLC_OVERLOAD _CLC_DEF long add_sat(long x, long y) {
51-
long r;
52-
if (__builtin_saddl_overflow(x, y, &r))
53-
// The oveflow can only occur if both are pos or both are neg,
54-
// thus we only need to check one operand
55-
return x > 0 ? LONG_MAX : LONG_MIN;
56-
return r;
57-
}
58-
59-
_CLC_OVERLOAD _CLC_DEF ulong add_sat(ulong x, ulong y) {
60-
ulong r;
61-
if (__builtin_uaddl_overflow(x, y, &r))
62-
return ULONG_MAX;
63-
return r;
64-
}
65-
66-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, add_sat, char, char)
67-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, add_sat, uchar, uchar)
68-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, add_sat, short, short)
69-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, add_sat, ushort, ushort)
70-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, add_sat, int, int)
71-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, add_sat, uint, uint)
72-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, add_sat, long, long)
73-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, add_sat, ulong, ulong)
7+
#include <clc/integer/gentype.inc>

libclc/generic/lib/integer/sub_sat.cl

Lines changed: 4 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,7 @@
11
#include <clc/clc.h>
2-
#include <clc/clcmacro.h>
2+
#include <clc/integer/clc_sub_sat.h>
33

4-
_CLC_OVERLOAD _CLC_DEF char sub_sat(char x, char y) {
5-
short r = x - y;
6-
return convert_char_sat(r);
7-
}
4+
#define FUNCTION sub_sat
5+
#define __CLC_BODY <clc/shared/binary_def.inc>
86

9-
_CLC_OVERLOAD _CLC_DEF uchar sub_sat(uchar x, uchar y) {
10-
short r = x - y;
11-
return convert_uchar_sat(r);
12-
}
13-
14-
_CLC_OVERLOAD _CLC_DEF short sub_sat(short x, short y) {
15-
int r = x - y;
16-
return convert_short_sat(r);
17-
}
18-
19-
_CLC_OVERLOAD _CLC_DEF ushort sub_sat(ushort x, ushort y) {
20-
int r = x - y;
21-
return convert_ushort_sat(r);
22-
}
23-
24-
_CLC_OVERLOAD _CLC_DEF int sub_sat(int x, int y) {
25-
int r;
26-
if (__builtin_ssub_overflow(x, y, &r))
27-
// The oveflow can only occur in the direction of the first operand
28-
return x > 0 ? INT_MAX : INT_MIN;
29-
return r;
30-
}
31-
32-
_CLC_OVERLOAD _CLC_DEF uint sub_sat(uint x, uint y) {
33-
uint r;
34-
if (__builtin_usub_overflow(x, y, &r))
35-
return 0;
36-
return r;
37-
}
38-
39-
_CLC_OVERLOAD _CLC_DEF long sub_sat(long x, long y) {
40-
long r;
41-
if (__builtin_ssubl_overflow(x, y, &r))
42-
// The oveflow can only occur in the direction of the first operand
43-
return x > 0 ? LONG_MAX : LONG_MIN;
44-
return r;
45-
}
46-
47-
_CLC_OVERLOAD _CLC_DEF ulong sub_sat(ulong x, ulong y) {
48-
ulong r;
49-
if (__builtin_usubl_overflow(x, y, &r))
50-
return 0;
51-
return r;
52-
}
53-
54-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, sub_sat, char, char)
55-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, sub_sat, uchar, uchar)
56-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, sub_sat, short, short)
57-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, sub_sat, ushort, ushort)
58-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, sub_sat, int, int)
59-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, sub_sat, uint, uint)
60-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, sub_sat, long, long)
61-
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, sub_sat, ulong, ulong)
7+
#include <clc/integer/gentype.inc>

libclc/generic/lib/math/clc_ldexp.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#include <clc/clc.h>
2424
#include <clc/clcmacro.h>
25+
#include <clc/integer/clc_add_sat.h>
2526
#include <clc/math/clc_subnormal_config.h>
2627
#include <clc/math/math.h>
2728
#include <clc/relational/clc_isinf.h>
@@ -37,7 +38,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
3738
int e = (i >> 23) & 0xff;
3839
int m = i & 0x007fffff;
3940
int s = i & 0x80000000;
40-
int v = add_sat(e, n);
41+
int v = __clc_add_sat(e, n);
4142
v = __clc_clamp(v, 0, 0xff);
4243
int mr = e == 0 | v == 0 | v == 0xff ? 0 : m;
4344
int c = e == 0xff;

0 commit comments

Comments
 (0)