[HLSL] Fix bug in new clamp overloads (llvm#131928)

spall · web-flow · commit 2c41a8e6d398 · 2025-03-19T08:47:56.000-07:00
In some cases using the newly introduced clamp overloads, when floats
were involved, clang would behave differently than DXC.
To ensure the same behavior as DXC, require that for mix scalar/vector
overloads the type of the scalar matches the type of the vector.
diff --git a/clang/lib/Headers/hlsl/hlsl_compat_overloads.h b/clang/lib/Headers/hlsl/hlsl_compat_overloads.h
@@ -20,39 +20,23 @@ namespace hlsl {
 // clamp builtins overloads
 //===----------------------------------------------------------------------===//
 
-template <typename T, typename R, typename U, uint N>
-constexpr __detail::enable_if_t<
-    __detail::is_arithmetic<U>::Value && (N > 1 && N <= 4), vector<T, N>>
-clamp(vector<T, N> p0, vector<R, N> p1, U p2) {
-  return clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2);
-}
-template <typename T, typename R, typename U, uint N>
-constexpr __detail::enable_if_t<
-    __detail::is_arithmetic<U>::Value && (N > 1 && N <= 4), vector<T, N>>
-clamp(vector<T, N> p0, U p1, vector<R, N> p2) {
-  return clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2);
+template <typename T, uint N>
+constexpr __detail::enable_if_t<(N > 1 && N <= 4), vector<T, N>>
+clamp(vector<T, N> p0, vector<T, N> p1, T p2) {
+  return clamp(p0, p1, (vector<T, N>)p2);
 }
-template <typename T, typename U, typename V, uint N>
-constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value &&
-                                    __detail::is_arithmetic<V>::Value &&
-                                    (N > 1 && N <= 4),
-                                vector<T, N>>
-clamp(vector<T, N> p0, U p1, V p2) {
-  return clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2);
+
+template <typename T, uint N>
+constexpr __detail::enable_if_t<(N > 1 && N <= 4), vector<T, N>>
+clamp(vector<T, N> p0, T p1, vector<T, N> p2) {
+  return clamp(p0, (vector<T, N>)p1, p2);
 }
-template <typename T, typename R, typename S, uint N>
+
+template <typename T, uint N>
 constexpr __detail::enable_if_t<(N > 1 && N <= 4), vector<T, N>>
-clamp(vector<T, N> p0, vector<R, N> p1, vector<S, N> p2) {
+clamp(vector<T, N> p0, T p1, T p2) {
   return clamp(p0, (vector<T, N>)p1, (vector<T, N>)p2);
 }
-template <typename U, typename V, typename W>
-constexpr __detail::enable_if_t<__detail::is_arithmetic<U>::Value &&
-                                    __detail::is_arithmetic<V>::Value &&
-                                    __detail::is_arithmetic<W>::Value,
-                                U>
-clamp(U p0, V p1, W p2) {
-  return clamp(p0, (U)p1, (U)p2);
-}
 
 } // namespace hlsl
 #endif // _HLSL_COMPAT_OVERLOADS_H_
diff --git a/clang/test/CodeGenHLSL/builtins/clamp.hlsl b/clang/test/CodeGenHLSL/builtins/clamp.hlsl
@@ -174,22 +174,6 @@ double4 test_clamp_double4_mismatch(double4 p0, double p1) { return clamp(p0, p0
 // CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[TARGET]].nclamp.v4f64
 double4 test_clamp_double4_mismatch2(double4 p0, double p1) { return clamp(p0, p1,p0); }
 
-// CHECK: define [[FNATTRS]] <2 x i32> {{.*}}_overloads1
-// CHECK: call <2 x i32> @llvm.[[TARGET]].sclamp.v2i32
-int2 test_overloads1(int2 p0, float2 p1, uint p2) { return clamp(p0, p1, p2); }
-
-// CHECK: define [[FNATTRS]] [[FFNATTRS]] <2 x float> {{.*}}test_overloads2
-// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.[[TARGET]].nclamp.v2f32
-float2 test_overloads2(float2 p0, uint p1, int2 p2) { return clamp(p0, p1, p2); }
-
 // CHECK: define [[FNATTRS]] <3 x i32> {{.*}}test_overloads3
 // CHECK: call <3 x i32> @llvm.[[TARGET]].uclamp.v3i32
-uint3 test_overloads3(uint3 p0, int p1, float p2) { return clamp(p0, p1, p2); }
-
-// CHECK: define [[FNATTRS]] [[FFNATTRS]] <4 x double> {{.*}}test_overloads4
-// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.[[TARGET]].nclamp.v4f64
-double4 test_overloads4(double4 p0, float4 p1, int4 p2) { return clamp(p0, p1, p2); }
-
-// CHECK: define [[FNATTRS]] i32 {{.*}}test_overloads5
-// CHECK: call i32 @llvm.[[TARGET]].sclamp.i32(
-int test_overloads5(int p0, uint p1, double p2) { return clamp(p0, p1, p2); }
+uint3 test_overloads3(uint3 p0, uint p1, uint p2) { return clamp(p0, p1, p2); }
diff --git a/clang/test/SemaHLSL/BuiltIns/clamp-errors-16bit.hlsl b/clang/test/SemaHLSL/BuiltIns/clamp-errors-16bit.hlsl
@@ -1,12 +1,9 @@
 // RUN: not %clang_dxc -enable-16bit-types -T cs_6_0 -HV 202x %s 2>&1  | FileCheck %s -DTEST_TYPE=half
-// RUN: not %clang_dxc -enable-16bit-types -T cs_6_0 -HV 202x %s 2>&1  | FileCheck %s -DTEST_TYPE=half3
 // RUN: not %clang_dxc -enable-16bit-types -T cs_6_0 -HV 202x %s 2>&1  | FileCheck %s -DTEST_TYPE=int16_t
-// RUN: not %clang_dxc -enable-16bit-types -T cs_6_0 -HV 202x %s 2>&1  | FileCheck %s -DTEST_TYPE=int16_t3
 // RUN: not %clang_dxc -enable-16bit-types -T cs_6_0 -HV 202x %s 2>&1  | FileCheck %s -DTEST_TYPE=uint16_t
-// RUN: not %clang_dxc -enable-16bit-types -T cs_6_0 -HV 202x %s 2>&1  | FileCheck %s -DTEST_TYPE=uint16_t3
 
 // check we error on 16 bit type if shader model is too old
 // CHECK: '-enable-16bit-types' option requires target HLSL Version >= 2018 and shader model >= 6.2, but HLSL Version is 'hlsl202x' and shader model is '6.0'
-TEST_TYPE test_half_error(TEST_TYPE p0, int p1) {
+vector<TEST_TYPE,3> test_half_error(vector<TEST_TYPE,3> p0, TEST_TYPE p1) {
   return clamp(p0, p1, p1);
 }
diff --git a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl
@@ -59,16 +59,14 @@ float2 test_clamp_builtin_vector_size_mismatch(float3 p0, float2 p1) {
   // expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}}
 }
 
-// allowed by the overloads in hlsl_compat_overloads.h
-// support for this overload might be removed in a future version of hlsl
 float test_clamp_scalar_mismatch(float p0, half p1) {
   return clamp(p1, p0, p1);
+  // expected-error@-1 {{call to 'clamp' is ambiguous}}
 }
 
-// allowed by the overloads in hlsl_compat_overloads.h
-// support for this overload might be removed in a future version of hlsl
 float2 test_clamp_element_type_mismatch(half2 p0, float2 p1) {
   return clamp(p1, p0, p1);
+  // expected-error@-1 {{call to 'clamp' is ambiguous}}
 }
 
 float2 test_builtin_clamp_float2_splat(float p0, float2 p1) {

Original file line number	Diff line number	Diff line change
`@@ -59,16 +59,14 @@ float2 test_clamp_builtin_vector_size_mismatch(float3 p0, float2 p1) {`
`59`	`59`	`// expected-error@-1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}}`
`60`	`60`	`}`
`61`	`61`
`62`		`-// allowed by the overloads in hlsl_compat_overloads.h`
`63`		`-// support for this overload might be removed in a future version of hlsl`
`64`	`62`	`float test_clamp_scalar_mismatch(float p0, half p1) {`
`65`	`63`	`return clamp(p1, p0, p1);`
	`64`	`+ // expected-error@-1 {{call to 'clamp' is ambiguous}}`
`66`	`65`	`}`
`67`	`66`
`68`		`-// allowed by the overloads in hlsl_compat_overloads.h`
`69`		`-// support for this overload might be removed in a future version of hlsl`
`70`	`67`	`float2 test_clamp_element_type_mismatch(half2 p0, float2 p1) {`
`71`	`68`	`return clamp(p1, p0, p1);`
	`69`	`+ // expected-error@-1 {{call to 'clamp' is ambiguous}}`
`72`	`70`	`}`
`73`	`71`
`74`	`72`	`float2 test_builtin_clamp_float2_splat(float p0, float2 p1) {`