Skip to content

[HLSL] implement the rcp intrinsic #83857

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -4584,6 +4584,12 @@ def HLSLMad : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}

def HLSLRcp : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_rcp"];
let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
}

// Builtins for XRay.
def XRayCustomEvent : Builtin {
let Spellings = ["__xray_customevent"];
Expand Down
18 changes: 13 additions & 5 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17966,7 +17966,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
case Builtin::BI__builtin_hlsl_elementwise_any: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
return Builder.CreateIntrinsic(
/*ReturnType*/ llvm::Type::getInt1Ty(getLLVMContext()),
/*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
Intrinsic::dx_any, ArrayRef<Value *>{Op0}, nullptr, "dx.any");
}
case Builtin::BI__builtin_hlsl_dot: {
Expand Down Expand Up @@ -18002,7 +18002,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
"Dot product requires vectors to be of the same size.");

return Builder.CreateIntrinsic(
/*ReturnType*/ T0->getScalarType(), Intrinsic::dx_dot,
/*ReturnType=*/T0->getScalarType(), Intrinsic::dx_dot,
ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot");
} break;
case Builtin::BI__builtin_hlsl_lerp: {
Expand Down Expand Up @@ -18039,15 +18039,15 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
XVecTy->getElementType() == SVecTy->getElementType() &&
"Lerp requires float vectors to be of the same type.");
return Builder.CreateIntrinsic(
/*ReturnType*/ Xty, Intrinsic::dx_lerp, ArrayRef<Value *>{X, Y, S},
/*ReturnType=*/Xty, Intrinsic::dx_lerp, ArrayRef<Value *>{X, Y, S},
nullptr, "dx.lerp");
}
case Builtin::BI__builtin_hlsl_elementwise_frac: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
llvm_unreachable("frac operand must have a float representation");
return Builder.CreateIntrinsic(
/*ReturnType*/ Op0->getType(), Intrinsic::dx_frac,
/*ReturnType=*/Op0->getType(), Intrinsic::dx_frac,
ArrayRef<Value *>{Op0}, nullptr, "dx.frac");
}
case Builtin::BI__builtin_hlsl_mad: {
Expand All @@ -18066,9 +18066,17 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
}
assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());
return Builder.CreateIntrinsic(
/*ReturnType*/ M->getType(), Intrinsic::dx_umad,
/*ReturnType=*/M->getType(), Intrinsic::dx_umad,
ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");
}
case Builtin::BI__builtin_hlsl_elementwise_rcp: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
llvm_unreachable("rcp operand must have a float representation");
return Builder.CreateIntrinsic(
/*ReturnType=*/Op0->getType(), Intrinsic::dx_rcp,
ArrayRef<Value *>{Op0}, nullptr, "dx.rcp");
}
}
return nullptr;
}
Expand Down
41 changes: 41 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -1112,6 +1112,47 @@ uint64_t3 reversebits(uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_bitreverse)
uint64_t4 reversebits(uint64_t4);

//===----------------------------------------------------------------------===//
// rcp builtins
//===----------------------------------------------------------------------===//

/// \fn T rcp(T x)
/// \brief Calculates a fast, approximate, per-component reciprocal ie 1 / \a x.
/// \param x The specified input value.
///
/// The return value is the reciprocal of the \a x parameter.

_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
half rcp(half);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
half2 rcp(half2);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
half3 rcp(half3);
_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
half4 rcp(half4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
float rcp(float);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
float2 rcp(float2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
float3 rcp(float3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
float4 rcp(float4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
double rcp(double);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
double2 rcp(double2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
double3 rcp(double3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_rcp)
double4 rcp(double4);

//===----------------------------------------------------------------------===//
// round builtins
//===----------------------------------------------------------------------===//
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5285,6 +5285,7 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
case Builtin::BI__builtin_hlsl_elementwise_rcp:
case Builtin::BI__builtin_hlsl_elementwise_frac: {
if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
return true;
Expand Down
53 changes: 53 additions & 0 deletions clang/test/CodeGenHLSL/builtins/rcp.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
// RUN: --check-prefixes=CHECK,NATIVE_HALF
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF

// NATIVE_HALF: define noundef half @
// NATIVE_HALF: %dx.rcp = call half @llvm.dx.rcp.f16(
// NATIVE_HALF: ret half %dx.rcp
// NO_HALF: define noundef float @"?test_rcp_half@@YA$halff@$halff@@Z"(
// NO_HALF: %dx.rcp = call float @llvm.dx.rcp.f32(
// NO_HALF: ret float %dx.rcp
half test_rcp_half(half p0) { return rcp(p0); }
// NATIVE_HALF: define noundef <2 x half> @
// NATIVE_HALF: %dx.rcp = call <2 x half> @llvm.dx.rcp.v2f16
// NATIVE_HALF: ret <2 x half> %dx.rcp
// NO_HALF: define noundef <2 x float> @
// NO_HALF: %dx.rcp = call <2 x float> @llvm.dx.rcp.v2f32(
// NO_HALF: ret <2 x float> %dx.rcp
half2 test_rcp_half2(half2 p0) { return rcp(p0); }
// NATIVE_HALF: define noundef <3 x half> @
// NATIVE_HALF: %dx.rcp = call <3 x half> @llvm.dx.rcp.v3f16
// NATIVE_HALF: ret <3 x half> %dx.rcp
// NO_HALF: define noundef <3 x float> @
// NO_HALF: %dx.rcp = call <3 x float> @llvm.dx.rcp.v3f32(
// NO_HALF: ret <3 x float> %dx.rcp
half3 test_rcp_half3(half3 p0) { return rcp(p0); }
// NATIVE_HALF: define noundef <4 x half> @
// NATIVE_HALF: %dx.rcp = call <4 x half> @llvm.dx.rcp.v4f16
// NATIVE_HALF: ret <4 x half> %dx.rcp
// NO_HALF: define noundef <4 x float> @
// NO_HALF: %dx.rcp = call <4 x float> @llvm.dx.rcp.v4f32(
// NO_HALF: ret <4 x float> %dx.rcp
half4 test_rcp_half4(half4 p0) { return rcp(p0); }

// CHECK: define noundef float @
// CHECK: %dx.rcp = call float @llvm.dx.rcp.f32(
// CHECK: ret float %dx.rcp
float test_rcp_float(float p0) { return rcp(p0); }
// CHECK: define noundef <2 x float> @
// CHECK: %dx.rcp = call <2 x float> @llvm.dx.rcp.v2f32
// CHECK: ret <2 x float> %dx.rcp
float2 test_rcp_float2(float2 p0) { return rcp(p0); }
// CHECK: define noundef <3 x float> @
// CHECK: %dx.rcp = call <3 x float> @llvm.dx.rcp.v3f32
// CHECK: ret <3 x float> %dx.rcp
float3 test_rcp_float3(float3 p0) { return rcp(p0); }
// CHECK: define noundef <4 x float> @
// CHECK: %dx.rcp = call <4 x float> @llvm.dx.rcp.v4f32
// CHECK: ret <4 x float> %dx.rcp
float4 test_rcp_float4(float4 p0) { return rcp(p0); }
27 changes: 27 additions & 0 deletions clang/test/SemaHLSL/BuiltIns/rcp-errors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@

// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected

float test_too_few_arg() {
return __builtin_hlsl_elementwise_rcp();
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
}

float2 test_too_many_arg(float2 p0) {
return __builtin_hlsl_elementwise_rcp(p0, p0);
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
}

float builtin_bool_to_float_type_promotion(bool p1) {
return __builtin_hlsl_elementwise_rcp(p1);
// expected-error@-1 {{1st argument must be a vector, integer or floating point type (was 'bool')}}
}

float builtin_rcp_int_to_float_promotion(int p1) {
return __builtin_hlsl_elementwise_rcp(p1);
// expected-error@-1 {{passing 'int' to parameter of incompatible type 'float'}}
}

float2 builtin_rcp_int2_to_float2_promotion(int2 p1) {
return __builtin_hlsl_elementwise_rcp(p1);
// expected-error@-1 {{passing 'int2' (aka 'vector<int, 2>') to parameter of incompatible type '__attribute__((__vector_size__(2 * sizeof(float)))) float' (vector of 2 'float' values)}}
}
5 changes: 3 additions & 2 deletions llvm/include/llvm/IR/IntrinsicsDirectX.td
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def int_dx_lerp :
[llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>,LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
[IntrNoMem, IntrWillReturn] >;

def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_dx_rcp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
}