Skip to content

Commit 62af641

Browse files
committed
implement firstbithigh hlsl builtin
1 parent d9c95ef commit 62af641

File tree

16 files changed

+488
-0
lines changed

16 files changed

+488
-0
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4791,6 +4791,12 @@ def HLSLDotProduct : LangBuiltin<"HLSL_LANG"> {
47914791
let Prototype = "void(...)";
47924792
}
47934793

4794+
def HLSLFirstBitHigh : LangBuiltin<"HLSL_LANG"> {
4795+
let Spellings = ["__builtin_hlsl_elementwise_firstbithigh"];
4796+
let Attributes = [NoThrow, Const];
4797+
let Prototype = "void(...)";
4798+
}
4799+
47944800
def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
47954801
let Spellings = ["__builtin_hlsl_elementwise_frac"];
47964802
let Attributes = [NoThrow, Const];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18639,6 +18639,14 @@ static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
1863918639
return RT.getUDotIntrinsic();
1864018640
}
1864118641

18642+
Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
18643+
if (QT->hasSignedIntegerRepresentation()) {
18644+
return RT.getFirstBitSHighIntrinsic();
18645+
}
18646+
18647+
return RT.getFirstBitUHighIntrinsic();
18648+
}
18649+
1864218650
Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1864318651
const CallExpr *E,
1864418652
ReturnValueSlot ReturnValue) {
@@ -18728,6 +18736,15 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1872818736
getDotProductIntrinsic(CGM.getHLSLRuntime(), VecTy0->getElementType()),
1872918737
ArrayRef<Value *>{Op0, Op1}, nullptr, "hlsl.dot");
1873018738
} break;
18739+
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
18740+
18741+
Value *X = EmitScalarExpr(E->getArg(0));
18742+
18743+
return Builder.CreateIntrinsic(
18744+
/*ReturnType=*/X->getType(),
18745+
getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
18746+
ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
18747+
}
1873118748
case Builtin::BI__builtin_hlsl_lerp: {
1873218749
Value *X = EmitScalarExpr(E->getArg(0));
1873318750
Value *Y = EmitScalarExpr(E->getArg(1));

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ class CGHLSLRuntime {
9191
GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)
9292
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane)
9393
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
94+
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
95+
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
9496

9597
//===----------------------------------------------------------------------===//
9698
// End of reserved area for HLSL intrinsic getters.

clang/lib/Headers/hlsl/hlsl_intrinsics.h

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -958,6 +958,78 @@ float3 exp2(float3);
958958
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
959959
float4 exp2(float4);
960960

961+
//===----------------------------------------------------------------------===//
962+
// firstbithigh builtins
963+
//===----------------------------------------------------------------------===//
964+
965+
/// \fn T firstbithigh(T Val)
966+
/// \brief Returns the location of the first set bit starting from the highest
967+
/// order bit and working downward, per component.
968+
/// \param Val the input value.
969+
970+
#ifdef __HLSL_ENABLE_16_BIT
971+
_HLSL_AVAILABILITY(shadermodel, 6.2)
972+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
973+
int16_t firstbithigh(int16_t);
974+
_HLSL_AVAILABILITY(shadermodel, 6.2)
975+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
976+
int16_t2 firstbithigh(int16_t2);
977+
_HLSL_AVAILABILITY(shadermodel, 6.2)
978+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
979+
int16_t3 firstbithigh(int16_t3);
980+
_HLSL_AVAILABILITY(shadermodel, 6.2)
981+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
982+
int16_t4 firstbithigh(int16_t4);
983+
_HLSL_AVAILABILITY(shadermodel, 6.2)
984+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
985+
uint16_t firstbithigh(uint16_t);
986+
_HLSL_AVAILABILITY(shadermodel, 6.2)
987+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
988+
uint16_t2 firstbithigh(uint16_t2);
989+
_HLSL_AVAILABILITY(shadermodel, 6.2)
990+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
991+
uint16_t3 firstbithigh(uint16_t3);
992+
_HLSL_AVAILABILITY(shadermodel, 6.2)
993+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
994+
uint16_t4 firstbithigh(uint16_t4);
995+
#endif
996+
997+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
998+
int firstbithigh(int);
999+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1000+
int2 firstbithigh(int2);
1001+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1002+
int3 firstbithigh(int3);
1003+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1004+
int4 firstbithigh(int4);
1005+
1006+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1007+
uint firstbithigh(uint);
1008+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1009+
uint2 firstbithigh(uint2);
1010+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1011+
uint3 firstbithigh(uint3);
1012+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1013+
uint4 firstbithigh(uint4);
1014+
1015+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1016+
int64_t firstbithigh(int64_t);
1017+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1018+
int64_t2 firstbithigh(int64_t2);
1019+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1020+
int64_t3 firstbithigh(int64_t3);
1021+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1022+
int64_t4 firstbithigh(int64_t4);
1023+
1024+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1025+
uint64_t firstbithigh(uint64_t);
1026+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1027+
uint64_t2 firstbithigh(uint64_t2);
1028+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1029+
uint64_t3 firstbithigh(uint64_t3);
1030+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1031+
uint64_t4 firstbithigh(uint64_t4);
1032+
9611033
//===----------------------------------------------------------------------===//
9621034
// floor builtins
9631035
//===----------------------------------------------------------------------===//

clang/lib/Sema/SemaHLSL.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1890,6 +1890,24 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
18901890
return true;
18911891
break;
18921892
}
1893+
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
1894+
if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall))
1895+
return true;
1896+
1897+
const Expr *Arg = TheCall->getArg(0);
1898+
QualType ArgTy = Arg->getType();
1899+
QualType EltTy = ArgTy;
1900+
1901+
if (auto *VecTy = EltTy->getAs<VectorType>())
1902+
EltTy = VecTy->getElementType();
1903+
1904+
if (!EltTy->isIntegerType()) {
1905+
Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
1906+
<< 1 << /* integer ty */ 6 << ArgTy;
1907+
return true;
1908+
}
1909+
break;
1910+
}
18931911
case Builtin::BI__builtin_hlsl_select: {
18941912
if (SemaRef.checkArgCount(TheCall, 3))
18951913
return true;
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
2+
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
3+
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
4+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
5+
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
6+
// RUN: -emit-llvm -disable-llvm-passes \
7+
// RUN: -o - | FileCheck %s -DTARGET=spv
8+
9+
#ifdef __HLSL_ENABLE_16_BIT
10+
// CHECK-LABEL: test_firstbithigh_ushort
11+
// CHECK: call i16 @llvm.[[TARGET]].firstbituhigh.i16
12+
int test_firstbithigh_ushort(uint16_t p0) {
13+
return firstbithigh(p0);
14+
}
15+
16+
// CHECK-LABEL: test_firstbithigh_ushort2
17+
// CHECK: call <2 x i16> @llvm.[[TARGET]].firstbituhigh.v2i16
18+
uint16_t2 test_firstbithigh_ushort2(uint16_t2 p0) {
19+
return firstbithigh(p0);
20+
}
21+
22+
// CHECK-LABEL: test_firstbithigh_ushort3
23+
// CHECK: call <3 x i16> @llvm.[[TARGET]].firstbituhigh.v3i16
24+
uint16_t3 test_firstbithigh_ushort3(uint16_t3 p0) {
25+
return firstbithigh(p0);
26+
}
27+
28+
// CHECK-LABEL: test_firstbithigh_ushort4
29+
// CHECK: call <4 x i16> @llvm.[[TARGET]].firstbituhigh.v4i16
30+
uint16_t4 test_firstbithigh_ushort4(uint16_t4 p0) {
31+
return firstbithigh(p0);
32+
}
33+
34+
// CHECK-LABEL: test_firstbithigh_short
35+
// CHECK: call i16 @llvm.[[TARGET]].firstbitshigh.i16
36+
int16_t test_firstbithigh_short(int16_t p0) {
37+
return firstbithigh(p0);
38+
}
39+
40+
// CHECK-LABEL: test_firstbithigh_short2
41+
// CHECK: call <2 x i16> @llvm.[[TARGET]].firstbitshigh.v2i16
42+
int16_t2 test_firstbithigh_short2(int16_t2 p0) {
43+
return firstbithigh(p0);
44+
}
45+
46+
// CHECK-LABEL: test_firstbithigh_short3
47+
// CHECK: call <3 x i16> @llvm.[[TARGET]].firstbitshigh.v3i16
48+
int16_t3 test_firstbithigh_short3(int16_t3 p0) {
49+
return firstbithigh(p0);
50+
}
51+
52+
// CHECK-LABEL: test_firstbithigh_short4
53+
// CHECK: call <4 x i16> @llvm.[[TARGET]].firstbitshigh.v4i16
54+
int16_t4 test_firstbithigh_short4(int16_t4 p0) {
55+
return firstbithigh(p0);
56+
}
57+
#endif // __HLSL_ENABLE_16_BIT
58+
59+
// CHECK-LABEL: test_firstbithigh_uint
60+
// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i32
61+
uint test_firstbithigh_uint(uint p0) {
62+
return firstbithigh(p0);
63+
}
64+
65+
// CHECK-LABEL: test_firstbithigh_uint2
66+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32
67+
uint2 test_firstbithigh_uint2(uint2 p0) {
68+
return firstbithigh(p0);
69+
}
70+
71+
// CHECK-LABEL: test_firstbithigh_uint3
72+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32
73+
uint3 test_firstbithigh_uint3(uint3 p0) {
74+
return firstbithigh(p0);
75+
}
76+
77+
// CHECK-LABEL: test_firstbithigh_uint4
78+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32
79+
uint4 test_firstbithigh_uint4(uint4 p0) {
80+
return firstbithigh(p0);
81+
}
82+
83+
// CHECK-LABEL: test_firstbithigh_ulong
84+
// CHECK: call i64 @llvm.[[TARGET]].firstbituhigh.i64
85+
uint64_t test_firstbithigh_ulong(uint64_t p0) {
86+
return firstbithigh(p0);
87+
}
88+
89+
// CHECK-LABEL: test_firstbithigh_ulong2
90+
// CHECK: call <2 x i64> @llvm.[[TARGET]].firstbituhigh.v2i64
91+
uint64_t2 test_firstbithigh_ulong2(uint64_t2 p0) {
92+
return firstbithigh(p0);
93+
}
94+
95+
// CHECK-LABEL: test_firstbithigh_ulong3
96+
// CHECK: call <3 x i64> @llvm.[[TARGET]].firstbituhigh.v3i64
97+
uint64_t3 test_firstbithigh_ulong3(uint64_t3 p0) {
98+
return firstbithigh(p0);
99+
}
100+
101+
// CHECK-LABEL: test_firstbithigh_ulong4
102+
// CHECK: call <4 x i64> @llvm.[[TARGET]].firstbituhigh.v4i64
103+
uint64_t4 test_firstbithigh_ulong4(uint64_t4 p0) {
104+
return firstbithigh(p0);
105+
}
106+
107+
// CHECK-LABEL: test_firstbithigh_int
108+
// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i32
109+
int test_firstbithigh_int(int p0) {
110+
return firstbithigh(p0);
111+
}
112+
113+
// CHECK-LABEL: test_firstbithigh_int2
114+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32
115+
int2 test_firstbithigh_int2(int2 p0) {
116+
return firstbithigh(p0);
117+
}
118+
119+
// CHECK-LABEL: test_firstbithigh_int3
120+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32
121+
int3 test_firstbithigh_int3(int3 p0) {
122+
return firstbithigh(p0);
123+
}
124+
125+
// CHECK-LABEL: test_firstbithigh_int4
126+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32
127+
int4 test_firstbithigh_int4(int4 p0) {
128+
return firstbithigh(p0);
129+
}
130+
131+
// CHECK-LABEL: test_firstbithigh_long
132+
// CHECK: call i64 @llvm.[[TARGET]].firstbitshigh.i64
133+
int64_t test_firstbithigh_long(int64_t p0) {
134+
return firstbithigh(p0);
135+
}
136+
137+
// CHECK-LABEL: test_firstbithigh_long2
138+
// CHECK: call <2 x i64> @llvm.[[TARGET]].firstbitshigh.v2i64
139+
int64_t2 test_firstbithigh_long2(int64_t2 p0) {
140+
return firstbithigh(p0);
141+
}
142+
143+
// CHECK-LABEL: test_firstbithigh_long3
144+
// CHECK: call <3 x i64> @llvm.[[TARGET]].firstbitshigh.v3i64
145+
int64_t3 test_firstbithigh_long3(int64_t3 p0) {
146+
return firstbithigh(p0);
147+
}
148+
149+
// CHECK-LABEL: test_firstbithigh_long4
150+
// CHECK: call <4 x i64> @llvm.[[TARGET]].firstbitshigh.v4i64
151+
int64_t4 test_firstbithigh_long4(int64_t4 p0) {
152+
return firstbithigh(p0);
153+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected
2+
3+
int test_too_few_arg() {
4+
return firstbithigh();
5+
// expected-error@-1 {{no matching function for call to 'firstbithigh'}}
6+
}
7+
8+
int test_too_many_arg(int p0) {
9+
return firstbithigh(p0, p0);
10+
// expected-error@-1 {{no matching function for call to 'firstbithigh'}}
11+
}
12+
13+
double test_int_builtin(double p0) {
14+
return firstbithigh(p0);
15+
// expected-error@-1 {{call to 'firstbithigh' is ambiguous}}
16+
}
17+
18+
double2 test_int_builtin_2(double2 p0) {
19+
return __builtin_hlsl_elementwise_firstbithigh(p0);
20+
// expected-error@-1 {{1st argument must be a vector of integers
21+
// (was 'double2' (aka 'vector<double, 2>'))}}
22+
}
23+
24+
float test_int_builtin_3(float p0) {
25+
return __builtin_hlsl_elementwise_firstbithigh(p0);
26+
// expected-error@-1 {{1st argument must be a vector of integers
27+
// (was 'float')}}
28+
}

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,4 +90,6 @@ def int_dx_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0
9090
def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
9191
def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
9292
def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
93+
def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
94+
def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
9395
}

llvm/include/llvm/IR/IntrinsicsSPIRV.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,6 @@ let TargetPrefix = "spv" in {
9797
[llvm_any_ty],
9898
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty],
9999
[IntrNoMem]>;
100+
def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
101+
def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
100102
}

llvm/lib/Target/DirectX/DXIL.td

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,30 @@ def CBits : DXILOp<31, unary> {
564564
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
565565
}
566566

567+
def FBH : DXILOp<33, unary> {
568+
let Doc = "Returns the location of the first set bit starting from "
569+
"the highest order bit and working downward.";
570+
let LLVMIntrinsic = int_dx_firstbituhigh;
571+
let arguments = [OverloadTy];
572+
let result = OverloadTy;
573+
let overloads =
574+
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
575+
let stages = [Stages<DXIL1_0, [all_stages]>];
576+
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
577+
}
578+
579+
def FBSH : DXILOp<34, unary> {
580+
let Doc = "Returns the location of the first set bit from "
581+
"the highest order bit based on the sign.";
582+
let LLVMIntrinsic = int_dx_firstbitshigh;
583+
let arguments = [OverloadTy];
584+
let result = OverloadTy;
585+
let overloads =
586+
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
587+
let stages = [Stages<DXIL1_0, [all_stages]>];
588+
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
589+
}
590+
567591
def FMax : DXILOp<35, binary> {
568592
let Doc = "Float maximum. FMax(a,b) = a > b ? a : b";
569593
let LLVMIntrinsic = int_maxnum;

llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
2828
switch (ID) {
2929
case Intrinsic::dx_frac:
3030
case Intrinsic::dx_rsqrt:
31+
case Intrinsic::dx_firstbituhigh:
32+
case Intrinsic::dx_firstbitshigh:
3133
return true;
3234
default:
3335
return false;

0 commit comments

Comments
 (0)