-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[HLSL] Move length support out of the DirectX Backend #121611
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-directx @llvm/pr-subscribers-clang Author: Farzon Lotfi (farzonl) ChangesPatch is 29.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121611.diff 14 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index b5b47ae2746011..ab6e92e35dda5d 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4852,12 +4852,6 @@ def HLSLIsinf : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}
-def HLSLLength : LangBuiltin<"HLSL_LANG"> {
- let Spellings = ["__builtin_hlsl_length"];
- let Attributes = [NoThrow, Const];
- let Prototype = "void(...)";
-}
-
def HLSLLerp : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_lerp"];
let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4d4b7428abd505..c4097f8df13de9 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19290,31 +19290,6 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
/*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),
ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");
}
- case Builtin::BI__builtin_hlsl_length: {
- Value *X = EmitScalarExpr(E->getArg(0));
-
- assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
- "length operand must have a float representation");
- // if the operand is a scalar, we can use the fabs llvm intrinsic directly
- if (!E->getArg(0)->getType()->isVectorType())
- return EmitFAbs(*this, X);
-
- return Builder.CreateIntrinsic(
- /*ReturnType=*/X->getType()->getScalarType(),
- CGM.getHLSLRuntime().getLengthIntrinsic(), ArrayRef<Value *>{X},
- nullptr, "hlsl.length");
- }
- case Builtin::BI__builtin_hlsl_normalize: {
- Value *X = EmitScalarExpr(E->getArg(0));
-
- assert(E->getArg(0)->getType()->hasFloatingRepresentation() &&
- "normalize operand must have a float representation");
-
- return Builder.CreateIntrinsic(
- /*ReturnType=*/X->getType(),
- CGM.getHLSLRuntime().getNormalizeIntrinsic(), ArrayRef<Value *>{X},
- nullptr, "hlsl.normalize");
- }
case Builtin::BI__builtin_hlsl_elementwise_degrees: {
Value *X = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index edb87f9d5efdf9..7f0af941b198b1 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -77,7 +77,6 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(Cross, cross)
GENERATE_HLSL_INTRINSIC_FUNCTION(Degrees, degrees)
GENERATE_HLSL_INTRINSIC_FUNCTION(Frac, frac)
- GENERATE_HLSL_INTRINSIC_FUNCTION(Length, length)
GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index 8d5fd941331531..ac4710e9f821bd 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -13,6 +13,14 @@ namespace hlsl {
namespace __detail {
+template <typename T, typename U> struct is_same {
+ static const bool value = false;
+};
+
+template <typename T> struct is_same<T, T> {
+ static const bool value = true;
+};
+
template <bool B, typename T> struct enable_if {};
template <typename T> struct enable_if<true, T> {
@@ -33,6 +41,23 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
return __builtin_bit_cast(U, F);
}
+template <typename T>
+constexpr enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+length_impl(T X) {
+ return __builtin_elementwise_abs(X);
+}
+
+template <typename T, int N>
+enable_if_t<is_same<float, T>::value || is_same<half, T>::value, T>
+length_vec_impl(vector<T, N> X) {
+ vector<T, N> XSquared = X * X;
+ T XSquaredSum = XSquared[0];
+ [unroll]
+ for (int i = 1; i < N; ++i)
+ XSquaredSum += XSquared[i];
+ return __builtin_elementwise_sqrt(XSquaredSum);
+}
+
} // namespace __detail
} // namespace hlsl
#endif //_HLSL_HLSL_DETAILS_H_
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index b745997f1d5a2b..f51cd28bb49236 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1297,27 +1297,18 @@ float4 lerp(float4, float4, float4);
///
/// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...).
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half2);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half3);
-_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-half length(half4);
+const inline half length(half X) { return __detail::length_impl(X); }
+const inline float length(float X) { return __detail::length_impl(X); }
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
-float length(float4);
+template <int N>
+const inline half length(vector<half, N> X) {
+ return __detail::length_vec_impl(X);
+}
+
+template <int N>
+const inline float length(vector<float, N> X) {
+ return __detail::length_vec_impl(X);
+}
//===----------------------------------------------------------------------===//
// log builtins
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 600c800029fd05..64b6fe4cd5eb41 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2100,24 +2100,6 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
- case Builtin::BI__builtin_hlsl_length: {
- if (CheckFloatOrHalfRepresentations(&SemaRef, TheCall))
- return true;
- if (SemaRef.checkArgCount(TheCall, 1))
- return true;
-
- ExprResult A = TheCall->getArg(0);
- QualType ArgTyA = A.get()->getType();
- QualType RetTy;
-
- if (auto *VTy = ArgTyA->getAs<VectorType>())
- RetTy = VTy->getElementType();
- else
- RetTy = TheCall->getArg(0)->getType();
-
- TheCall->setType(RetTy);
- break;
- }
case Builtin::BI__builtin_hlsl_mad: {
if (SemaRef.checkArgCount(TheCall, 3))
return true;
diff --git a/clang/test/CodeGenHLSL/builtins/length.hlsl b/clang/test/CodeGenHLSL/builtins/length.hlsl
index 1c23b0df04df98..40ff1303d2ea47 100644
--- a/clang/test/CodeGenHLSL/builtins/length.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/length.hlsl
@@ -1,73 +1,151 @@
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
-// RUN: --check-prefixes=CHECK,NATIVE_HALF
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
-
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: call half @llvm.fabs.f16(half
-// NO_HALF: call float @llvm.fabs.f32(float
-// NATIVE_HALF: ret half
-// NO_HALF: ret float
-half test_length_half(half p0)
-{
- return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v2f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v2f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half2(half2 p0)
-{
- return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v3f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v3f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half3(half3 p0)
-{
- return length(p0);
-}
-// NATIVE_HALF: define noundef half @
-// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v4f16
-// NO_HALF: %hlsl.length = call float @llvm.dx.length.v4f32(
-// NATIVE_HALF: ret half %hlsl.length
-// NO_HALF: ret float %hlsl.length
-half test_length_half4(half4 p0)
-{
- return length(p0);
-}
-
-// CHECK: define noundef float @
-// CHECK: call float @llvm.fabs.f32(float
-// CHECK: ret float
-float test_length_float(float p0)
-{
- return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v2f32(
-// CHECK: ret float %hlsl.length
-float test_length_float2(float2 p0)
-{
- return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v3f32(
-// CHECK: ret float %hlsl.length
-float test_length_float3(float3 p0)
-{
- return length(p0);
-}
-// CHECK: define noundef float @
-// CHECK: %hlsl.length = call float @llvm.dx.length.v4f32(
-// CHECK: ret float %hlsl.length
-float test_length_float4(float4 p0)
-{
- return length(p0);
-}
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -finclude-default-header -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -O2 -o - | FileCheck %s
+
+// CHECK-LABEL: define noundef half @_Z16test_length_halfDh(
+// CHECK-SAME: half noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef half @llvm.fabs.f16(half [[P0]])
+// CHECK-NEXT: ret half [[ELT_ABS_I]]
+//
+half test_length_half(half p0)
+{
+ return length(p0);
+}
+
+// CHECK-LABEL: define noundef half @_Z17test_length_half2Dv2_Dh(
+// CHECK-SAME: <2 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x half> [[P0]], [[P0]]
+// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 0
+// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x half> [[MUL_I]], i64 1
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd half [[VECEXT1_I]], [[VECEXT_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[ADD_I]])
+// CHECK-NEXT: ret half [[TMP0]]
+//
+half test_length_half2(half2 p0)
+{
+ return length(p0);
+}
+
+// CHECK-LABEL: define noundef half @_Z17test_length_half3Dv3_Dh(
+// CHECK-SAME: <3 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <3 x half> [[P0]], [[P0]]
+// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 0
+// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x half> [[MUL_I]], i64 1
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd half [[VECEXT_I]], [[VECEXT1_I]]
+// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x half> [[MUL_I]], i64 2
+// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd half [[ADD_I]], [[VECEXT1_I_1]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[ADD_I_1]])
+// CHECK-NEXT: ret half [[TMP0]]
+//
+half test_length_half3(half3 p0)
+{
+ return length(p0);
+}
+
+// CHECK-LABEL: define noundef half @_Z17test_length_half4Dv4_Dh(
+// CHECK-SAME: <4 x half> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x half> [[P0]], [[P0]]
+// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 0
+// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x half> [[MUL_I]], i64 1
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd half [[VECEXT_I]], [[VECEXT1_I]]
+// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x half> [[MUL_I]], i64 2
+// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd half [[ADD_I]], [[VECEXT1_I_1]]
+// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x half> [[MUL_I]], i64 3
+// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd half [[ADD_I_1]], [[VECEXT1_I_2]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef half @llvm.sqrt.f16(half [[ADD_I_2]])
+// CHECK-NEXT: ret half [[TMP0]]
+//
+half test_length_half4(half4 p0)
+{
+ return length(p0);
+}
+
+
+// CHECK-LABEL: define noundef float @_Z17test_length_floatf(
+// CHECK-SAME: float noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ELT_ABS_I:%.*]] = tail call noundef float @llvm.fabs.f32(float [[P0]])
+// CHECK-NEXT: ret float [[ELT_ABS_I]]
+//
+float test_length_float(float p0)
+{
+ return length(p0);
+}
+
+// CHECK-LABEL: define noundef float @_Z18test_length_float2Dv2_f(
+// CHECK-SAME: <2 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[P0]], [[P0]]
+// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 0
+// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x float> [[MUL_I]], i64 1
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[VECEXT1_I]], [[VECEXT_I]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I]])
+// CHECK-NEXT: ret float [[TMP0]]
+//
+float test_length_float2(float2 p0)
+{
+ return length(p0);
+}
+
+// CHECK-LABEL: define noundef float @_Z18test_length_float3Dv3_f(
+// CHECK-SAME: <3 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <3 x float> [[P0]], [[P0]]
+// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 0
+// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <3 x float> [[MUL_I]], i64 1
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[VECEXT_I]], [[VECEXT1_I]]
+// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <3 x float> [[MUL_I]], i64 2
+// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd float [[ADD_I]], [[VECEXT1_I_1]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I_1]])
+// CHECK-NEXT: ret float [[TMP0]]
+//
+float test_length_float3(float3 p0)
+{
+ return length(p0);
+}
+
+
+// CHECK-LABEL: define noundef float @_Z18test_length_float4Dv4_f(
+// CHECK-SAME: <4 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
+// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
+// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[VECEXT_I]], [[VECEXT1_I]]
+// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
+// CHECK-NEXT: [[ADD_I_1:%.*]] = fadd float [[ADD_I]], [[VECEXT1_I_1]]
+// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
+// CHECK-NEXT: [[ADD_I_2:%.*]] = fadd float [[ADD_I_1]], [[VECEXT1_I_2]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I_2]])
+// CHECK-NEXT: ret float [[TMP0]]
+//
+float test_length_float4(float4 p0)
+{
+ return length(p0);
+}
+
+
+// CHECK-LABEL: define noundef float @_Z26test_length_float4_extractDv4_f(
+// CHECK-SAME: <4 x float> noundef [[P0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[P0]], [[P0]]
+// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 0
+// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <4 x float> [[MUL_I]], i64 1
+// CHECK-NEXT: [[VECEXT1_I_1:%.*]] = extractelement <4 x float> [[MUL_I]], i64 2
+// CHECK-NEXT: [[VECEXT1_I_2:%.*]] = extractelement <4 x float> [[MUL_I]], i64 3
+// CHECK-NEXT: [[ADD_I12:%.*]] = fadd float [[VECEXT_I]], [[VECEXT1_I]]
+// CHECK-NEXT: [[ADD_I12_1:%.*]] = fadd float [[ADD_I12]], [[VECEXT1_I_1]]
+// CHECK-NEXT: [[ADD_I12_2:%.*]] = fadd float [[ADD_I12_1]], [[VECEXT1_I_2]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef float @llvm.sqrt.f32(float [[ADD_I12_2]])
+// CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[TMP0]]
+// CHECK-NEXT: ret float [[ADD]]
+//
+float test_length_float4_extract(float4 p0)
+{
+ return length(p0) + length(p0);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
index 281faada6f5e94..a191f0419fbbaf 100644
--- a/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/length-errors.hlsl
@@ -1,32 +1,53 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
-
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
void test_too_few_arg()
{
- return __builtin_hlsl_length();
- // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+ return length();
+ // expected-error@-1 {{no matching function for call to 'length'}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but no arguments were provided}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but no arguments were provided}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but no arguments were provided}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but no arguments were provided}}
}
void test_too_many_arg(float2 p0)
{
- return __builtin_hlsl_length(p0, p0);
- // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+ return length(p0, p0);
+ // expected-error@-1 {{no matching function for call to 'length'}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but 2 arguments were provided}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but 2 arguments were provided}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'X', but 2 arguments were provided}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'X', but 2 arguments were provided}}
+}
+
+float double_to_float_type(double p0) {
+ return length(p0);
+ // expected-error@-1 {{call to 'length' is ambiguous}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
}
-bool builtin_bool_to_float_type_promotion(bool p1)
+
+float bool_to_float_type_promotion(bool p1)
{
- return __builtin_hlsl_length(p1);
- // expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}}
+ return length(p1);
+ // expected-error@-1 {{call to 'length' is ambiguous}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
}
-bool builtin_length_int_to_float_promotion(int p1)
+float length_int_to_float_promotion(int p1)
{
- return __builtin_hlsl_length(p1);
- // expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
+ return length(p1);
+ // expected-error@-1 {{call to 'length' is ambiguous}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
}
-bool2 builtin_length_int2_to_float2_promotion(int2 p1)
+float2 length_int2_to_float2_promotion(int2 p1)
{
- return __builtin_hlsl_length(p1);
- // expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
+ return length(p1);
+ // expected-error@-1 {{call to 'length' is ambiguous}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+ // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirec...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
clang/lib/Headers/hlsl/hlsl_detail.h
Outdated
vector<T, N> XSquared = X * X; | ||
T XSquaredSum = XSquared[0]; | ||
[unroll] | ||
for (int i = 1; i < N; ++i) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because of the loop this can't be a constexpr. Made this change because its unlikely we will be able to use the vec reduce add builtin.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is because HLSL is only C++11 and not C++14, right? We could always implement this with template recursion if we really want it to be constexpr, but in all likelihood waiting until we bring HLSL up to C++14 eventually before we make this constexpr is fine.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm fine worry about this later. Should we have a ticket? The way we have done HLSL language modes in clang/include/clang/Basic/LangStandards.def
makes it seem like 202x and older language modes will always be C++11? Is that the wrong way of thinking about it? Will there be a day we change all language modes to C++14?
// | ||
float test_length_float4_extract(float4 p0) | ||
{ | ||
return length(p0) + length(p0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding this case because made me realize that we need -O2
so that we don't have duplicate extracts and adds when calling length twice. The vec_reduce_add implementation used -O1
and didn't have this problem so I think this might be a side effect of not being able to do a constexpr.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In general I dislike tests that depend on optimizations, I'd prefer that Clang verify the unoptimized code gen and we rely on LLVM's testing to ensure that it doesn't make the program illegal during optimization. Having clang validate optimized IR opens the door to us getting the right answer for the wrong reasons.
74ad2ec
to
d0423ed
Compare
template <typename T, typename U> struct is_same { | ||
static const bool value = false; | ||
}; | ||
|
||
template <typename T> struct is_same<T, T> { | ||
static const bool value = true; | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These seem fine, but we should make sure we're being aware of how much of standard C++ we're reimplementing in this header as we do more of this stuff.
length_vec_impl(vector<T, N> X) { | ||
vector<T, N> XSquared = X * X; | ||
T XSquaredSum = XSquared[0]; | ||
[unroll] for (int i = 1; i < N; ++i) XSquaredSum += XSquared[i]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
clang-format doesn't seem to handle [unroll]
, but this should probably be one of:
[unroll]
for (int i = 1; i < N; ++i)
XSquaredSum += XSquared[i];
or
[unroll] for (int i = 1; i < N; ++i)
XSquaredSum += XSquared[i];
The latter is closer to what clang-format does with [[unroll]]
instead of [unroll]
to trick it into seeing that as an attribute.
(aside: I'm not sure if we have a bug for clang-format for this yet, we should check)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Didn't see one can file one.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we require descriptions and not just titles.
length_vec_impl(vector<T, N> X) { | ||
vector<T, N> XSquared = X * X; | ||
T XSquaredSum = XSquared[0]; | ||
[unroll] for (int i = 1; i < N; ++i) XSquaredSum += XSquared[i]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is equivalent to a dot product. Might we consolidate optimizing the same operation by making us of that?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
an earlier version essentially did sqrt(dot(x,x));
I was told not to do that because the implementation in DXC should be considered the spec compliant implementation.
// RUN: --check-prefixes=CHECK,NATIVE_HALF | ||
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ | ||
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ | ||
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would you explain why you removed the NO_HALF variant?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I switched to have this file updated via update_cc_test_checks.py
instead of manually written to save time writing tests. Keeping the NO_HALF variant was resulting in a lot of check duplication with very little benefit. We have plenty of other tests that check that a float will become a half without the fnative-half-type
flag. I think we should probably update all the the tests to get ride of NO_HALF and just have one test that checks this flag is working as intended.
return length(p1); | ||
// expected-error@-1 {{call to 'length' is ambiguous}} | ||
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} | ||
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doubles, integers and bools are all allowed by DXC. Removing that behavior would be a regression and I don't know why we would want to. If it's not implemented yet, that's one thing, but testing for the failure suggests intent.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was no int/bool/double length
intrinsic in hlsl_intrinsics.h
and this test was already an error case. All that has changed here is instead of explicit sema checks for the builtin we are depending on HLSL overload resolution rules for function arguments. Thats partly because the builtin __builtin_hlsl_length
doesn't exist anymore.
Thanks for the description. I would find some justification useful even if it just repeats the reasoning we've established for all such changes. |
d0423ed
to
24ab6e9
Compare
@pow2clk added a justification. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't love the way you re-wrote clang/test/CodeGenHLSL/builtins/length.hlsl. I'm not a huge fan of the update_cc_test_checks
workflow, and (as I commented elsewhere) I really don't like tests that depend on optimizations.
I'm okay with removing the NO_HALF
side of the checks (I agree we had a lot of redundant test coverage there), and there's nothing unique in this change for half support.
24ab6e9
to
92ca8f4
Compare
92ca8f4
to
a2ba98f
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/144/builds/15275 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/14363 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/12510 Here is the relevant piece of the build log for the reference
|
This reverts commit a6b7181. Breaks Clang :: CodeGenHLSL/builtins/length.hlsl, see #121611 (comment)
## Changes - Delete DirectX length intrinsic - Delete HLSL length lang builtin - Implement length algorithm entirely in the header. ## History - In the past if an HLSL intrinsic lowered to either a spirv op code or a DXIL opcode we represented it with intrinsics ## Why we are moving away? - To make HLSL apis more portable the team decided that it makes sense for some intrinsics to be defined only in the header. - Since there tends to be more SPIRV opcodes than DXIL opcodes the plan is to support SPIRV opcodes either with target specific builtins or via pattern matching.
@nico I don't see this failure locally and I am on a M3 mac it isn't clear how the ir could be different right now. Monitoring the reapply here: #122337. Will try building on a few other targets to try and sus out what is going wrong. Thanks for reverting this for me. |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/16596 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/15850 Here is the relevant piece of the build log for the reference
|
…end (#121611)" This reverts commit a6b7181. Breaks Clang :: CodeGenHLSL/builtins/length.hlsl, see llvm/llvm-project#121611 (comment)
… (#122337) ## Changes - Delete DirectX length intrinsic - Delete HLSL length lang builtin - Implement length algorithm entirely in the header. ## History - In the past if an HLSL intrinsic lowered to either a spirv op code or a DXIL opcode we represented it with intrinsics ## Why we are moving away? - To make HLSL apis more portable the team decided that it makes sense for some intrinsics to be defined only in the header. - Since there tends to be more SPIRV opcodes than DXIL opcodes the plan is to support SPIRV opcodes either with target specific builtins or via pattern matching.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/19253 Here is the relevant piece of the build log for the reference
|
…21611)" This reverts commit a6b7181. Breaks Clang :: CodeGenHLSL/builtins/length.hlsl, see llvm#121611 (comment)
…21611) (llvm#122337) ## Changes - Delete DirectX length intrinsic - Delete HLSL length lang builtin - Implement length algorithm entirely in the header. ## History - In the past if an HLSL intrinsic lowered to either a spirv op code or a DXIL opcode we represented it with intrinsics ## Why we are moving away? - To make HLSL apis more portable the team decided that it makes sense for some intrinsics to be defined only in the header. - Since there tends to be more SPIRV opcodes than DXIL opcodes the plan is to support SPIRV opcodes either with target specific builtins or via pattern matching.
Changes
History
Why we are moving away?