-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Add length builtins and length HLSL function to DirectX Backend #101256
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
7027cf2
fc20777
08110b7
2fa4ffd
fa06012
46dec9d
0af5ca8
8857963
9ac2968
2f7f10d
b620bab
3eab70e
d04a061
dc3a89d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -908,6 +908,38 @@ float3 lerp(float3, float3, float3); | |
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_lerp) | ||
float4 lerp(float4, float4, float4); | ||
|
||
//===----------------------------------------------------------------------===// | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PR title looks like it may be misnamed, since this is also adding the HLSL builtin functions, it is doing more than just adding it to the backend. |
||
// length builtins | ||
//===----------------------------------------------------------------------===// | ||
|
||
/// \fn T length(T x) | ||
/// \brief Returns the length of the specified floating-point vector. | ||
/// \param x [in] The vector of floats, or a scalar float. | ||
/// | ||
/// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + …). | ||
|
||
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) | ||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) | ||
half length(half); | ||
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) | ||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) | ||
half length(half2); | ||
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) | ||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) | ||
half length(half3); | ||
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) | ||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) | ||
half length(half4); | ||
|
||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) | ||
float length(float); | ||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) | ||
float length(float2); | ||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) | ||
float length(float3); | ||
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_length) | ||
float length(float4); | ||
|
||
//===----------------------------------------------------------------------===// | ||
// log builtins | ||
//===----------------------------------------------------------------------===// | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ | ||
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ | ||
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ | ||
// RUN: --check-prefixes=CHECK,NATIVE_HALF | ||
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ | ||
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ | ||
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF | ||
|
||
// NATIVE_HALF: define noundef half @ | ||
// NATIVE_HALF: call half @llvm.fabs.f16(half | ||
// NO_HALF: call float @llvm.fabs.f32(float | ||
// NATIVE_HALF: ret half | ||
// NO_HALF: ret float | ||
half test_length_half(half p0) | ||
{ | ||
return length(p0); | ||
farzonl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
// NATIVE_HALF: define noundef half @ | ||
// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v2f16 | ||
// NO_HALF: %hlsl.length = call float @llvm.dx.length.v2f32( | ||
// NATIVE_HALF: ret half %hlsl.length | ||
// NO_HALF: ret float %hlsl.length | ||
half test_length_half2(half2 p0) | ||
{ | ||
return length(p0); | ||
} | ||
// NATIVE_HALF: define noundef half @ | ||
// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v3f16 | ||
// NO_HALF: %hlsl.length = call float @llvm.dx.length.v3f32( | ||
// NATIVE_HALF: ret half %hlsl.length | ||
// NO_HALF: ret float %hlsl.length | ||
half test_length_half3(half3 p0) | ||
{ | ||
return length(p0); | ||
} | ||
// NATIVE_HALF: define noundef half @ | ||
// NATIVE_HALF: %hlsl.length = call half @llvm.dx.length.v4f16 | ||
// NO_HALF: %hlsl.length = call float @llvm.dx.length.v4f32( | ||
// NATIVE_HALF: ret half %hlsl.length | ||
// NO_HALF: ret float %hlsl.length | ||
half test_length_half4(half4 p0) | ||
{ | ||
return length(p0); | ||
} | ||
|
||
// CHECK: define noundef float @ | ||
// CHECK: call float @llvm.fabs.f32(float | ||
// CHECK: ret float | ||
float test_length_float(float p0) | ||
{ | ||
return length(p0); | ||
} | ||
// CHECK: define noundef float @ | ||
// CHECK: %hlsl.length = call float @llvm.dx.length.v2f32( | ||
// CHECK: ret float %hlsl.length | ||
float test_length_float2(float2 p0) | ||
{ | ||
return length(p0); | ||
} | ||
// CHECK: define noundef float @ | ||
// CHECK: %hlsl.length = call float @llvm.dx.length.v3f32( | ||
// CHECK: ret float %hlsl.length | ||
float test_length_float3(float3 p0) | ||
{ | ||
return length(p0); | ||
} | ||
// CHECK: define noundef float @ | ||
// CHECK: %hlsl.length = call float @llvm.dx.length.v4f32( | ||
// CHECK: ret float %hlsl.length | ||
float test_length_float4(float4 p0) | ||
{ | ||
return length(p0); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected | ||
|
||
void test_too_few_arg() | ||
{ | ||
return __builtin_hlsl_elementwise_length(); | ||
farzonl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}} | ||
} | ||
|
||
void test_too_many_arg(float2 p0) | ||
{ | ||
return __builtin_hlsl_elementwise_length(p0, p0); | ||
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}} | ||
} | ||
|
||
bool builtin_bool_to_float_type_promotion(bool p1) | ||
{ | ||
return __builtin_hlsl_elementwise_length(p1); | ||
// expected-error@-1 {passing 'bool' to parameter of incompatible type 'float'}} | ||
} | ||
|
||
bool builtin_length_int_to_float_promotion(int p1) | ||
{ | ||
return __builtin_hlsl_elementwise_length(p1); | ||
// expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}} | ||
} | ||
|
||
bool2 builtin_length_int2_to_float2_promotion(int2 p1) | ||
{ | ||
return __builtin_hlsl_elementwise_length(p1); | ||
// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,5 +63,6 @@ let TargetPrefix = "spv" in { | |
def int_spv_frac : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; | ||
def int_spv_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], | ||
[IntrNoMem, IntrWillReturn] >; | ||
def int_spv_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty]>; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought this change didn't include the SPIRV parts of this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. He has to define this because of |
||
def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,6 +42,7 @@ static bool isIntrinsicExpansion(Function &F) { | |
case Intrinsic::dx_clamp: | ||
case Intrinsic::dx_uclamp: | ||
case Intrinsic::dx_lerp: | ||
case Intrinsic::dx_length: | ||
case Intrinsic::dx_sdot: | ||
case Intrinsic::dx_udot: | ||
return true; | ||
|
@@ -157,6 +158,35 @@ static bool expandAnyIntrinsic(CallInst *Orig) { | |
return true; | ||
} | ||
|
||
static bool expandLengthIntrinsic(CallInst *Orig) { | ||
Value *X = Orig->getOperand(0); | ||
IRBuilder<> Builder(Orig->getParent()); | ||
Builder.SetInsertPoint(Orig); | ||
Type *Ty = X->getType(); | ||
Type *EltTy = Ty->getScalarType(); | ||
|
||
// Though dx.length does work on scalar type, we can optimize it to just emit | ||
// fabs, in CGBuiltin.cpp. We shouldn't see a scalar type here because | ||
// CGBuiltin.cpp should have emitted a fabs call. | ||
Value *Elt = Builder.CreateExtractElement(X, (uint64_t)0); | ||
auto *XVec = dyn_cast<FixedVectorType>(Ty); | ||
unsigned size = XVec->getNumElements(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Style nit: s/size/Size/ |
||
assert(Ty->isVectorTy() && size > 1 && "dx.length only works on vector type"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The message should probably say "dx.length requires a vector of length 2 or more" |
||
|
||
Value *Sum = Builder.CreateFMul(Elt, Elt); | ||
for (unsigned i = 1; i < size; i++) { | ||
Elt = Builder.CreateExtractElement(X, i); | ||
bob80905 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Value *Mul = Builder.CreateFMul(Elt, Elt); | ||
Sum = Builder.CreateFAdd(Sum, Mul); | ||
} | ||
Value *Result = Builder.CreateIntrinsic( | ||
EltTy, Intrinsic::sqrt, ArrayRef<Value *>{Sum}, nullptr, "elt.sqrt"); | ||
|
||
Orig->replaceAllUsesWith(Result); | ||
Orig->eraseFromParent(); | ||
return true; | ||
} | ||
|
||
static bool expandLerpIntrinsic(CallInst *Orig) { | ||
Value *X = Orig->getOperand(0); | ||
Value *Y = Orig->getOperand(1); | ||
|
@@ -280,6 +310,8 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { | |
return expandClampIntrinsic(Orig, F.getIntrinsicID()); | ||
case Intrinsic::dx_lerp: | ||
return expandLerpIntrinsic(Orig); | ||
case Intrinsic::dx_length: | ||
return expandLengthIntrinsic(Orig); | ||
case Intrinsic::dx_sdot: | ||
case Intrinsic::dx_udot: | ||
return expandIntegerDot(Orig, F.getIntrinsicID()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s --check-prefixes=CHECK,EXPCHECK | ||
; RUN: opt -S -dxil-op-lower < %s | FileCheck %s --check-prefixes=CHECK,DOPCHECK | ||
|
||
; ModuleID = 'D:\llvm-project\clang\test\CodeGenHLSL\builtins\length.hlsl' | ||
source_filename = "D:\\llvm-project\\clang\\test\\CodeGenHLSL\\builtins\\length.hlsl" | ||
farzonl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" | ||
target triple = "dxilv1.3-pc-shadermodel6.3-library" | ||
|
||
; Function Attrs: convergent noinline nounwind optnone | ||
farzonl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
define noundef float @"?test_length_half@@YA$halff@$halff@@Z"(float noundef %p0) #0 { | ||
farzonl marked this conversation as resolved.
Show resolved
Hide resolved
farzonl marked this conversation as resolved.
Show resolved
Hide resolved
farzonl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
entry: | ||
%p0.addr = alloca float, align 4 | ||
store float %p0, ptr %p0.addr, align 4 | ||
%0 = load float, ptr %p0.addr, align 4 | ||
farzonl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
; EXPCHECK: call float @llvm.fabs.f32(float %{{.*}}) | ||
; DOPCHECK: call float @dx.op.unary.f32(i32 6, float %{{.*}}) | ||
%1 = call float @llvm.fabs.f32(float %0) #3 | ||
farzonl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
ret float %1 | ||
} | ||
|
||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) | ||
declare float @llvm.fabs.f32(float) #1 | ||
farzonl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
; Function Attrs: convergent noinline nounwind optnone | ||
farzonl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
define noundef float @"?test_length_half2@@YA$halff@T?$__vector@$halff@$01@__clang@@@Z"(<2 x float> noundef %p0) #0 { | ||
entry: | ||
%p0.addr = alloca <2 x float>, align 8 | ||
store <2 x float> %p0, ptr %p0.addr, align 8 | ||
%0 = load <2 x float>, ptr %p0.addr, align 8 | ||
|
||
; CHECK: extractelement <2 x float> %{{.*}}, i64 0 | ||
; CHECK: fmul float %{{.*}}, %{{.*}} | ||
; CHECK: extractelement <2 x float> %{{.*}}, i64 1 | ||
; CHECK: fmul float %{{.*}}, %{{.*}} | ||
; CHECK: fadd float %{{.*}}, %{{.*}} | ||
; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) | ||
; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) | ||
|
||
%hlsl.length = call float @llvm.dx.length.v2f32(<2 x float> %0) | ||
ret float %hlsl.length | ||
} | ||
|
||
; Function Attrs: nocallback nofree nosync nounwind willreturn | ||
declare float @llvm.dx.length.v2f32(<2 x float>) #2 | ||
|
||
; Function Attrs: convergent noinline nounwind optnone | ||
define noundef float @"?test_length_half3@@YA$halff@T?$__vector@$halff@$02@__clang@@@Z"(<3 x float> noundef %p0) #0 { | ||
entry: | ||
%p0.addr = alloca <3 x float>, align 16 | ||
store <3 x float> %p0, ptr %p0.addr, align 16 | ||
%0 = load <3 x float>, ptr %p0.addr, align 16 | ||
|
||
; CHECK: extractelement <3 x float> %{{.*}}, i64 0 | ||
; CHECK: fmul float %{{.*}}, %{{.*}} | ||
; CHECK: extractelement <3 x float> %{{.*}}, i64 1 | ||
; CHECK: fmul float %{{.*}}, %{{.*}} | ||
; CHECK: fadd float %{{.*}}, %{{.*}} | ||
; CHECK: extractelement <3 x float> %{{.*}}, i64 2 | ||
; CHECK: fmul float %{{.*}}, %{{.*}} | ||
; CHECK: fadd float %{{.*}}, %{{.*}} | ||
; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) | ||
; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) | ||
|
||
%hlsl.length = call float @llvm.dx.length.v3f32(<3 x float> %0) | ||
ret float %hlsl.length | ||
} | ||
|
||
; Function Attrs: nocallback nofree nosync nounwind willreturn | ||
declare float @llvm.dx.length.v3f32(<3 x float>) #2 | ||
|
||
; Function Attrs: convergent noinline nounwind optnone | ||
define noundef float @"?test_length_half4@@YA$halff@T?$__vector@$halff@$03@__clang@@@Z"(<4 x float> noundef %p0) #0 { | ||
entry: | ||
%p0.addr = alloca <4 x float>, align 16 | ||
store <4 x float> %p0, ptr %p0.addr, align 16 | ||
%0 = load <4 x float>, ptr %p0.addr, align 16 | ||
|
||
; CHECK: extractelement <4 x float> %{{.*}}, i64 0 | ||
; CHECK: fmul float %{{.*}}, %{{.*}} | ||
; CHECK: extractelement <4 x float> %{{.*}}, i64 1 | ||
; CHECK: fmul float %{{.*}}, %{{.*}} | ||
; CHECK: fadd float %{{.*}}, %{{.*}} | ||
; CHECK: extractelement <4 x float> %{{.*}}, i64 2 | ||
; CHECK: fmul float %{{.*}}, %{{.*}} | ||
; CHECK: fadd float %{{.*}}, %{{.*}} | ||
; CHECK: extractelement <4 x float> %{{.*}}, i64 3 | ||
; CHECK: fmul float %{{.*}}, %{{.*}} | ||
; CHECK: fadd float %{{.*}}, %{{.*}} | ||
; EXPCHECK: call float @llvm.sqrt.f32(float %{{.*}}) | ||
; DOPCHECK: call float @dx.op.unary.f32(i32 24, float %{{.*}}) | ||
|
||
%hlsl.length = call float @llvm.dx.length.v4f32(<4 x float> %0) | ||
ret float %hlsl.length | ||
} | ||
|
||
attributes #0 = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } | ||
farzonl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } | ||
attributes #2 = { nocallback nofree nosync nounwind willreturn } | ||
attributes #3 = { memory(none) } | ||
|
||
!llvm.module.flags = !{!0, !1} | ||
!llvm.ident = !{!2} | ||
|
||
!0 = !{i32 1, !"wchar_size", i32 4} | ||
!1 = !{i32 4, !"dx.disable_optimizations", i32 1} | ||
!2 = !{!"clang version 20.0.0git ([email protected]:bob80905/llvm-project.git 2fa4ffdc63e699e2b0e3c44e5dfb95284dbc5f6b)"} | ||
farzonl marked this conversation as resolved.
Show resolved
Hide resolved
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should be "__builtin_hlsl_length", not "__builtin_hlsl_elementwise_length". The word "elementwise" means we do an operation to each element, whereas this is an operation on the whole vector.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with Justin, This shouldn't be an elementwise builtin see
lerp
,dot
, andmad
. For operations that work on vectors and generate scalar results we don't add theelementwise
naming convention. Reserveelementwise
for cases were the function takes multiple inputs applys the same operation to each input then returns the same number of outputs.