Skip to content

Commit fb90733

Browse files
authored
[HLSL] implement elementwise firstbithigh hlsl builtin (#111082)
Implements elementwise firstbithigh hlsl builtin. Implements firstbituhigh intrinsic for spirv and directx, which handles unsigned integers Implements firstbitshigh intrinsic for spirv and directx, which handles signed integers. Fixes #113486 Closes #99115
1 parent 7585e2f commit fb90733

File tree

17 files changed

+752
-1
lines changed

17 files changed

+752
-1
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4798,6 +4798,12 @@ def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> {
47984798
let Prototype = "int(unsigned int, unsigned int, int)";
47994799
}
48004800

4801+
def HLSLFirstBitHigh : LangBuiltin<"HLSL_LANG"> {
4802+
let Spellings = ["__builtin_hlsl_elementwise_firstbithigh"];
4803+
let Attributes = [NoThrow, Const];
4804+
let Prototype = "void(...)";
4805+
}
4806+
48014807
def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
48024808
let Spellings = ["__builtin_hlsl_elementwise_frac"];
48034809
let Attributes = [NoThrow, Const];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18773,6 +18773,15 @@ static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
1877318773
return RT.getUDotIntrinsic();
1877418774
}
1877518775

18776+
Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
18777+
if (QT->hasSignedIntegerRepresentation()) {
18778+
return RT.getFirstBitSHighIntrinsic();
18779+
}
18780+
18781+
assert(QT->hasUnsignedIntegerRepresentation());
18782+
return RT.getFirstBitUHighIntrinsic();
18783+
}
18784+
1877618785
Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1877718786
const CallExpr *E,
1877818787
ReturnValueSlot ReturnValue) {
@@ -18872,6 +18881,15 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1887218881
/*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
1887318882
"hlsl.dot4add.i8packed");
1887418883
}
18884+
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
18885+
18886+
Value *X = EmitScalarExpr(E->getArg(0));
18887+
18888+
return Builder.CreateIntrinsic(
18889+
/*ReturnType=*/ConvertType(E->getType()),
18890+
getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
18891+
ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
18892+
}
1887518893
case Builtin::BI__builtin_hlsl_lerp: {
1887618894
Value *X = EmitScalarExpr(E->getArg(0));
1887718895
Value *Y = EmitScalarExpr(E->getArg(1));

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ class CGHLSLRuntime {
9292
GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddI8Packed, dot4add_i8packed)
9393
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane)
9494
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
95+
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
96+
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
9597

9698
GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, handle_fromBinding)
9799

clang/lib/Headers/hlsl/hlsl_intrinsics.h

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,6 +1008,78 @@ float3 exp2(float3);
10081008
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
10091009
float4 exp2(float4);
10101010

1011+
//===----------------------------------------------------------------------===//
1012+
// firstbithigh builtins
1013+
//===----------------------------------------------------------------------===//
1014+
1015+
/// \fn T firstbithigh(T Val)
1016+
/// \brief Returns the location of the first set bit starting from the highest
1017+
/// order bit and working downward, per component.
1018+
/// \param Val the input value.
1019+
1020+
#ifdef __HLSL_ENABLE_16_BIT
1021+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1022+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1023+
uint firstbithigh(int16_t);
1024+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1025+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1026+
uint2 firstbithigh(int16_t2);
1027+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1028+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1029+
uint3 firstbithigh(int16_t3);
1030+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1031+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1032+
uint4 firstbithigh(int16_t4);
1033+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1034+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1035+
uint firstbithigh(uint16_t);
1036+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1037+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1038+
uint2 firstbithigh(uint16_t2);
1039+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1040+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1041+
uint3 firstbithigh(uint16_t3);
1042+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1043+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1044+
uint4 firstbithigh(uint16_t4);
1045+
#endif
1046+
1047+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1048+
uint firstbithigh(int);
1049+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1050+
uint2 firstbithigh(int2);
1051+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1052+
uint3 firstbithigh(int3);
1053+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1054+
uint4 firstbithigh(int4);
1055+
1056+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1057+
uint firstbithigh(uint);
1058+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1059+
uint2 firstbithigh(uint2);
1060+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1061+
uint3 firstbithigh(uint3);
1062+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1063+
uint4 firstbithigh(uint4);
1064+
1065+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1066+
uint firstbithigh(int64_t);
1067+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1068+
uint2 firstbithigh(int64_t2);
1069+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1070+
uint3 firstbithigh(int64_t3);
1071+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1072+
uint4 firstbithigh(int64_t4);
1073+
1074+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1075+
uint firstbithigh(uint64_t);
1076+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1077+
uint2 firstbithigh(uint64_t2);
1078+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1079+
uint3 firstbithigh(uint64_t3);
1080+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
1081+
uint4 firstbithigh(uint64_t4);
1082+
10111083
//===----------------------------------------------------------------------===//
10121084
// floor builtins
10131085
//===----------------------------------------------------------------------===//

clang/lib/Sema/SemaHLSL.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1947,6 +1947,31 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
19471947
return true;
19481948
break;
19491949
}
1950+
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
1951+
if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall))
1952+
return true;
1953+
1954+
const Expr *Arg = TheCall->getArg(0);
1955+
QualType ArgTy = Arg->getType();
1956+
QualType EltTy = ArgTy;
1957+
1958+
QualType ResTy = SemaRef.Context.UnsignedIntTy;
1959+
1960+
if (auto *VecTy = EltTy->getAs<VectorType>()) {
1961+
EltTy = VecTy->getElementType();
1962+
ResTy = SemaRef.Context.getVectorType(ResTy, VecTy->getNumElements(),
1963+
VecTy->getVectorKind());
1964+
}
1965+
1966+
if (!EltTy->isIntegerType()) {
1967+
Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
1968+
<< 1 << /* integer ty */ 6 << ArgTy;
1969+
return true;
1970+
}
1971+
1972+
TheCall->setType(ResTy);
1973+
break;
1974+
}
19501975
case Builtin::BI__builtin_hlsl_select: {
19511976
if (SemaRef.checkArgCount(TheCall, 3))
19521977
return true;
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
2+
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
3+
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
4+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
5+
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
6+
// RUN: -emit-llvm -disable-llvm-passes \
7+
// RUN: -o - | FileCheck %s -DTARGET=spv
8+
9+
#ifdef __HLSL_ENABLE_16_BIT
10+
// CHECK-LABEL: test_firstbithigh_ushort
11+
// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i16
12+
uint test_firstbithigh_ushort(uint16_t p0) {
13+
return firstbithigh(p0);
14+
}
15+
16+
// CHECK-LABEL: test_firstbithigh_ushort2
17+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16
18+
uint2 test_firstbithigh_ushort2(uint16_t2 p0) {
19+
return firstbithigh(p0);
20+
}
21+
22+
// CHECK-LABEL: test_firstbithigh_ushort3
23+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16
24+
uint3 test_firstbithigh_ushort3(uint16_t3 p0) {
25+
return firstbithigh(p0);
26+
}
27+
28+
// CHECK-LABEL: test_firstbithigh_ushort4
29+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16
30+
uint4 test_firstbithigh_ushort4(uint16_t4 p0) {
31+
return firstbithigh(p0);
32+
}
33+
34+
// CHECK-LABEL: test_firstbithigh_short
35+
// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i16
36+
uint test_firstbithigh_short(int16_t p0) {
37+
return firstbithigh(p0);
38+
}
39+
40+
// CHECK-LABEL: test_firstbithigh_short2
41+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16
42+
uint2 test_firstbithigh_short2(int16_t2 p0) {
43+
return firstbithigh(p0);
44+
}
45+
46+
// CHECK-LABEL: test_firstbithigh_short3
47+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16
48+
uint3 test_firstbithigh_short3(int16_t3 p0) {
49+
return firstbithigh(p0);
50+
}
51+
52+
// CHECK-LABEL: test_firstbithigh_short4
53+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16
54+
uint4 test_firstbithigh_short4(int16_t4 p0) {
55+
return firstbithigh(p0);
56+
}
57+
#endif // __HLSL_ENABLE_16_BIT
58+
59+
// CHECK-LABEL: test_firstbithigh_uint
60+
// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i32
61+
uint test_firstbithigh_uint(uint p0) {
62+
return firstbithigh(p0);
63+
}
64+
65+
// CHECK-LABEL: test_firstbithigh_uint2
66+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32
67+
uint2 test_firstbithigh_uint2(uint2 p0) {
68+
return firstbithigh(p0);
69+
}
70+
71+
// CHECK-LABEL: test_firstbithigh_uint3
72+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32
73+
uint3 test_firstbithigh_uint3(uint3 p0) {
74+
return firstbithigh(p0);
75+
}
76+
77+
// CHECK-LABEL: test_firstbithigh_uint4
78+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32
79+
uint4 test_firstbithigh_uint4(uint4 p0) {
80+
return firstbithigh(p0);
81+
}
82+
83+
// CHECK-LABEL: test_firstbithigh_ulong
84+
// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i64
85+
uint test_firstbithigh_ulong(uint64_t p0) {
86+
return firstbithigh(p0);
87+
}
88+
89+
// CHECK-LABEL: test_firstbithigh_ulong2
90+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64
91+
uint2 test_firstbithigh_ulong2(uint64_t2 p0) {
92+
return firstbithigh(p0);
93+
}
94+
95+
// CHECK-LABEL: test_firstbithigh_ulong3
96+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64
97+
uint3 test_firstbithigh_ulong3(uint64_t3 p0) {
98+
return firstbithigh(p0);
99+
}
100+
101+
// CHECK-LABEL: test_firstbithigh_ulong4
102+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64
103+
uint4 test_firstbithigh_ulong4(uint64_t4 p0) {
104+
return firstbithigh(p0);
105+
}
106+
107+
// CHECK-LABEL: test_firstbithigh_int
108+
// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i32
109+
uint test_firstbithigh_int(int p0) {
110+
return firstbithigh(p0);
111+
}
112+
113+
// CHECK-LABEL: test_firstbithigh_int2
114+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32
115+
uint2 test_firstbithigh_int2(int2 p0) {
116+
return firstbithigh(p0);
117+
}
118+
119+
// CHECK-LABEL: test_firstbithigh_int3
120+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32
121+
uint3 test_firstbithigh_int3(int3 p0) {
122+
return firstbithigh(p0);
123+
}
124+
125+
// CHECK-LABEL: test_firstbithigh_int4
126+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32
127+
uint4 test_firstbithigh_int4(int4 p0) {
128+
return firstbithigh(p0);
129+
}
130+
131+
// CHECK-LABEL: test_firstbithigh_long
132+
// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i64
133+
uint test_firstbithigh_long(int64_t p0) {
134+
return firstbithigh(p0);
135+
}
136+
137+
// CHECK-LABEL: test_firstbithigh_long2
138+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64
139+
uint2 test_firstbithigh_long2(int64_t2 p0) {
140+
return firstbithigh(p0);
141+
}
142+
143+
// CHECK-LABEL: test_firstbithigh_long3
144+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64
145+
uint3 test_firstbithigh_long3(int64_t3 p0) {
146+
return firstbithigh(p0);
147+
}
148+
149+
// CHECK-LABEL: test_firstbithigh_long4
150+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64
151+
uint4 test_firstbithigh_long4(int64_t4 p0) {
152+
return firstbithigh(p0);
153+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected
2+
3+
int test_too_few_arg() {
4+
return firstbithigh();
5+
// expected-error@-1 {{no matching function for call to 'firstbithigh'}}
6+
}
7+
8+
int test_too_many_arg(int p0) {
9+
return firstbithigh(p0, p0);
10+
// expected-error@-1 {{no matching function for call to 'firstbithigh'}}
11+
}
12+
13+
double test_int_builtin(double p0) {
14+
return firstbithigh(p0);
15+
// expected-error@-1 {{call to 'firstbithigh' is ambiguous}}
16+
}
17+
18+
double2 test_int_builtin_2(double2 p0) {
19+
return __builtin_hlsl_elementwise_firstbithigh(p0);
20+
// expected-error@-1 {{1st argument must be a vector of integers
21+
// (was 'double2' (aka 'vector<double, 2>'))}}
22+
}
23+
24+
float test_int_builtin_3(float p0) {
25+
return __builtin_hlsl_elementwise_firstbithigh(p0);
26+
// expected-error@-1 {{1st argument must be a vector of integers
27+
// (was 'float')}}
28+
}

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,4 +93,6 @@ def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, L
9393
def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
9494
[LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
9595
def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
96+
def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
97+
def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
9698
}

llvm/include/llvm/IR/IntrinsicsSPIRV.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,6 @@ let TargetPrefix = "spv" in {
9999
[llvm_any_ty],
100100
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty],
101101
[IntrNoMem]>;
102+
def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
103+
def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
102104
}

llvm/lib/Target/DirectX/DXIL.td

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,30 @@ def CountBits : DXILOp<31, unaryBits> {
564564
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
565565
}
566566

567+
def FirstbitHi : DXILOp<33, unaryBits> {
568+
let Doc = "Returns the location of the first set bit starting from "
569+
"the highest order bit and working downward.";
570+
let LLVMIntrinsic = int_dx_firstbituhigh;
571+
let arguments = [OverloadTy];
572+
let result = Int32Ty;
573+
let overloads =
574+
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
575+
let stages = [Stages<DXIL1_0, [all_stages]>];
576+
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
577+
}
578+
579+
def FirstbitSHi : DXILOp<34, unaryBits> {
580+
let Doc = "Returns the location of the first set bit from "
581+
"the highest order bit based on the sign.";
582+
let LLVMIntrinsic = int_dx_firstbitshigh;
583+
let arguments = [OverloadTy];
584+
let result = Int32Ty;
585+
let overloads =
586+
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
587+
let stages = [Stages<DXIL1_0, [all_stages]>];
588+
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
589+
}
590+
567591
def FMax : DXILOp<35, binary> {
568592
let Doc = "Float maximum. FMax(a,b) = a > b ? a : b";
569593
let LLVMIntrinsic = int_maxnum;

0 commit comments

Comments
 (0)