Skip to content

Commit 2c41a8e

Browse files
authored
[HLSL] Fix bug in new clamp overloads (llvm#131928)
In some cases using the newly introduced clamp overloads, when floats were involved, clang would behave differently than DXC. To ensure the same behavior as DXC, require that for mix scalar/vector overloads the type of the scalar matches the type of the vector.
1 parent cb493d2 commit 2c41a8e

File tree

4 files changed

+16
-53
lines changed

4 files changed

+16
-53
lines changed

clang/lib/Headers/hlsl/hlsl_compat_overloads.h

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -20,39 +20,23 @@ namespace hlsl {
2020
// clamp builtins overloads
2121
//===----------------------------------------------------------------------===//
2222

23-
template <typename T, typename R, typename U, uint N>
24-
constexpr __detail::enable_if_t<
25-
__detail::is_arithmetic<U>::Value && (N > 1 && N <= 4), vector<T, N>>
26-
clamp(vector<T, N> p0, vector<R, N> p1, U p2) {
27-
return clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2);
28-
}
29-
template <typename T, typename R, typename U, uint N>
30-
constexpr __detail::enable_if_t<
31-
__detail::is_arithmetic<U>::Value && (N > 1 && N <= 4), vector<T, N>>
32-
clamp(vector<T, N> p0, U p1, vector<R, N> p2) {
33-
return clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2);
23+
template <typename T, uint N>
24+
constexpr __detail::enable_if_t<(N > 1 && N <= 4), vector<T, N>>
25+
clamp(vector<T, N> p0, vector<T, N> p1, T p2) {
26+
return clamp(p0, p1, (vector<T, N>)p2);
3427
}
35-
template <typename T, typename U, typename V, uint N>
36-
constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value &&
37-
__detail::is_arithmetic<V>::Value &&
38-
(N > 1 && N <= 4),
39-
vector<T, N>>
40-
clamp(vector<T, N> p0, U p1, V p2) {
41-
return clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2);
28+
29+
template <typename T, uint N>
30+
constexpr __detail::enable_if_t<(N > 1 && N <= 4), vector<T, N>>
31+
clamp(vector<T, N> p0, T p1, vector<T, N> p2) {
32+
return clamp(p0, (vector<T, N>)p1, p2);
4233
}
43-
template <typename T, typename R, typename S, uint N>
34+
35+
template <typename T, uint N>
4436
constexpr __detail::enable_if_t<(N > 1 && N <= 4), vector<T, N>>
45-
clamp(vector<T, N> p0, vector<R, N> p1, vector<S, N> p2) {
37+
clamp(vector<T, N> p0, T p1, T p2) {
4638
return clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2);
4739
}
48-
template <typename U, typename V, typename W>
49-
constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value &&
50-
__detail::is_arithmetic<V>::Value &&
51-
__detail::is_arithmetic<W>::Value,
52-
U>
53-
clamp(U p0, V p1, W p2) {
54-
return clamp(p0, (U)p1, (U)p2);
55-
}
5640

5741
} // namespace hlsl
5842
#endif // _HLSL_COMPAT_OVERLOADS_H_

clang/test/CodeGenHLSL/builtins/clamp.hlsl

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -174,22 +174,6 @@ double4 test_clamp_double4_mismatch(double4 p0, double p1) { return clamp(p0, p0
174174
// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[TARGET]].nclamp.v4f64
175175
double4 test_clamp_double4_mismatch2(double4 p0, double p1) { return clamp(p0, p1,p0); }
176176

177-
// CHECK: define [[FNATTRS]] <2 x i32> {{.*}}_overloads1
178-
// CHECK: call <2 x i32> @llvm.[[TARGET]].sclamp.v2i32
179-
int2 test_overloads1(int2 p0, float2 p1, uint p2) { return clamp(p0, p1, p2); }
180-
181-
// CHECK: define [[FNATTRS]] [[FFNATTRS]] <2 x float> {{.*}}test_overloads2
182-
// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].nclamp.v2f32
183-
float2 test_overloads2(float2 p0, uint p1, int2 p2) { return clamp(p0, p1, p2); }
184-
185177
// CHECK: define [[FNATTRS]] <3 x i32> {{.*}}test_overloads3
186178
// CHECK: call <3 x i32> @llvm.[[TARGET]].uclamp.v3i32
187-
uint3 test_overloads3(uint3 p0, int p1, float p2) { return clamp(p0, p1, p2); }
188-
189-
// CHECK: define [[FNATTRS]] [[FFNATTRS]] <4 x double> {{.*}}test_overloads4
190-
// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[TARGET]].nclamp.v4f64
191-
double4 test_overloads4(double4 p0, float4 p1, int4 p2) { return clamp(p0, p1, p2); }
192-
193-
// CHECK: define [[FNATTRS]] i32 {{.*}}test_overloads5
194-
// CHECK: call i32 @llvm.[[TARGET]].sclamp.i32(
195-
int test_overloads5(int p0, uint p1, double p2) { return clamp(p0, p1, p2); }
179+
uint3 test_overloads3(uint3 p0, uint p1, uint p2) { return clamp(p0, p1, p2); }
Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
// RUN: not %clang_dxc -enable-16bit-types -T cs_6_0 -HV 202x %s 2>&1 | FileCheck %s -DTEST_TYPE=half
2-
// RUN: not %clang_dxc -enable-16bit-types -T cs_6_0 -HV 202x %s 2>&1 | FileCheck %s -DTEST_TYPE=half3
32
// RUN: not %clang_dxc -enable-16bit-types -T cs_6_0 -HV 202x %s 2>&1 | FileCheck %s -DTEST_TYPE=int16_t
4-
// RUN: not %clang_dxc -enable-16bit-types -T cs_6_0 -HV 202x %s 2>&1 | FileCheck %s -DTEST_TYPE=int16_t3
53
// RUN: not %clang_dxc -enable-16bit-types -T cs_6_0 -HV 202x %s 2>&1 | FileCheck %s -DTEST_TYPE=uint16_t
6-
// RUN: not %clang_dxc -enable-16bit-types -T cs_6_0 -HV 202x %s 2>&1 | FileCheck %s -DTEST_TYPE=uint16_t3
74

85
// check we error on 16 bit type if shader model is too old
96
// CHECK: '-enable-16bit-types' option requires target HLSL Version >= 2018 and shader model >= 6.2, but HLSL Version is 'hlsl202x' and shader model is '6.0'
10-
TEST_TYPE test_half_error(TEST_TYPE p0, int p1) {
7+
vector<TEST_TYPE,3> test_half_error(vector<TEST_TYPE,3> p0, TEST_TYPE p1) {
118
return clamp(p0, p1, p1);
129
}

clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,14 @@ float2 test_clamp_builtin_vector_size_mismatch(float3 p0, float2 p1) {
5959
// expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}}
6060
}
6161

62-
// allowed by the overloads in hlsl_compat_overloads.h
63-
// support for this overload might be removed in a future version of hlsl
6462
float test_clamp_scalar_mismatch(float p0, half p1) {
6563
return clamp(p1, p0, p1);
64+
// expected-error@-1 {{call to 'clamp' is ambiguous}}
6665
}
6766

68-
// allowed by the overloads in hlsl_compat_overloads.h
69-
// support for this overload might be removed in a future version of hlsl
7067
float2 test_clamp_element_type_mismatch(half2 p0, float2 p1) {
7168
return clamp(p1, p0, p1);
69+
// expected-error@-1 {{call to 'clamp' is ambiguous}}
7270
}
7371

7472
float2 test_builtin_clamp_float2_splat(float p0, float2 p1) {

0 commit comments

Comments
 (0)