Skip to content

Commit eeb8649

Browse files
committed
[HLSL] Implement elementwise firstbitlow builtin
1 parent 39913ae commit eeb8649

File tree

16 files changed

+617
-6
lines changed

16 files changed

+617
-6
lines changed

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4822,6 +4822,12 @@ def HLSLFirstBitHigh : LangBuiltin<"HLSL_LANG"> {
48224822
let Prototype = "void(...)";
48234823
}
48244824

4825+
def HLSLFirstBitLow : LangBuiltin<"HLSL_LANG"> {
4826+
let Spellings = ["__builtin_hlsl_elementwise_firstbitlow"];
4827+
let Attributes = [NoThrow, Const];
4828+
let Prototype = "void(...)";
4829+
}
4830+
48254831
def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
48264832
let Spellings = ["__builtin_hlsl_elementwise_frac"];
48274833
let Attributes = [NoThrow, Const];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19119,14 +19119,21 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
1911919119
"hlsl.dot4add.u8packed");
1912019120
}
1912119121
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
19122-
1912319122
Value *X = EmitScalarExpr(E->getArg(0));
1912419123

1912519124
return Builder.CreateIntrinsic(
1912619125
/*ReturnType=*/ConvertType(E->getType()),
1912719126
getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
1912819127
ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
1912919128
}
19129+
case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
19130+
Value *X = EmitScalarExpr(E->getArg(0));
19131+
19132+
return Builder.CreateIntrinsic(
19133+
/*ReturnType=*/ConvertType(E->getType()),
19134+
CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
19135+
nullptr, "hlsl.firstbitlow");
19136+
}
1913019137
case Builtin::BI__builtin_hlsl_lerp: {
1913119138
Value *X = EmitScalarExpr(E->getArg(0));
1913219139
Value *Y = EmitScalarExpr(E->getArg(1));

clang/lib/CodeGen/CGHLSLRuntime.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ class CGHLSLRuntime {
9797
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
9898
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
9999
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
100+
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitLow, firstbitlow)
100101
GENERATE_HLSL_INTRINSIC_FUNCTION(NClamp, nclamp)
101102
GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp)
102103
GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp)

clang/lib/Headers/hlsl/hlsl_intrinsics.h

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,6 +1103,78 @@ uint3 firstbithigh(uint64_t3);
11031103
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
11041104
uint4 firstbithigh(uint64_t4);
11051105

1106+
//===----------------------------------------------------------------------===//
1107+
// firstbitlow builtins
1108+
//===----------------------------------------------------------------------===//
1109+
1110+
/// \fn T firstbitlow(T Val)
1111+
/// \brief Returns the location of the first set bit starting from the lowest
1112+
/// order bit and working upward, per component.
1113+
/// \param Val the input value.
1114+
1115+
#ifdef __HLSL_ENABLE_16_BIT
1116+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1117+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1118+
uint firstbitlow(int16_t);
1119+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1120+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1121+
uint2 firstbitlow(int16_t2);
1122+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1123+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1124+
uint3 firstbitlow(int16_t3);
1125+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1126+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1127+
uint4 firstbitlow(int16_t4);
1128+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1129+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1130+
uint firstbitlow(uint16_t);
1131+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1132+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1133+
uint2 firstbitlow(uint16_t2);
1134+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1135+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1136+
uint3 firstbitlow(uint16_t3);
1137+
_HLSL_AVAILABILITY(shadermodel, 6.2)
1138+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1139+
uint4 firstbitlow(uint16_t4);
1140+
#endif
1141+
1142+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1143+
uint firstbitlow(int);
1144+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1145+
uint2 firstbitlow(int2);
1146+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1147+
uint3 firstbitlow(int3);
1148+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1149+
uint4 firstbitlow(int4);
1150+
1151+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1152+
uint firstbitlow(uint);
1153+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1154+
uint2 firstbitlow(uint2);
1155+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1156+
uint3 firstbitlow(uint3);
1157+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1158+
uint4 firstbitlow(uint4);
1159+
1160+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1161+
uint firstbitlow(int64_t);
1162+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1163+
uint2 firstbitlow(int64_t2);
1164+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1165+
uint3 firstbitlow(int64_t3);
1166+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1167+
uint4 firstbitlow(int64_t4);
1168+
1169+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1170+
uint firstbitlow(uint64_t);
1171+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1172+
uint2 firstbitlow(uint64_t2);
1173+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1174+
uint3 firstbitlow(uint64_t3);
1175+
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
1176+
uint4 firstbitlow(uint64_t4);
1177+
11061178
//===----------------------------------------------------------------------===//
11071179
// floor builtins
11081180
//===----------------------------------------------------------------------===//

clang/lib/Sema/SemaHLSL.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1945,7 +1945,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
19451945
return true;
19461946
break;
19471947
}
1948-
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
1948+
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh:
1949+
case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
19491950
if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall))
19501951
return true;
19511952

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
2+
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
3+
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
4+
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
5+
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
6+
// RUN: -emit-llvm -disable-llvm-passes \
7+
// RUN: -o - | FileCheck %s -DTARGET=spv
8+
9+
#ifdef __HLSL_ENABLE_16_BIT
10+
// CHECK-LABEL: test_firstbitlow_ushort
11+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
12+
uint test_firstbitlow_ushort(uint16_t p0) {
13+
return firstbitlow(p0);
14+
}
15+
16+
// CHECK-LABEL: test_firstbitlow_ushort2
17+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
18+
uint2 test_firstbitlow_ushort2(uint16_t2 p0) {
19+
return firstbitlow(p0);
20+
}
21+
22+
// CHECK-LABEL: test_firstbitlow_ushort3
23+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
24+
uint3 test_firstbitlow_ushort3(uint16_t3 p0) {
25+
return firstbitlow(p0);
26+
}
27+
28+
// CHECK-LABEL: test_firstbitlow_ushort4
29+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
30+
uint4 test_firstbitlow_ushort4(uint16_t4 p0) {
31+
return firstbitlow(p0);
32+
}
33+
34+
// CHECK-LABEL: test_firstbitlow_short
35+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
36+
uint test_firstbitlow_short(int16_t p0) {
37+
return firstbitlow(p0);
38+
}
39+
40+
// CHECK-LABEL: test_firstbitlow_short2
41+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
42+
uint2 test_firstbitlow_short2(int16_t2 p0) {
43+
return firstbitlow(p0);
44+
}
45+
46+
// CHECK-LABEL: test_firstbitlow_short3
47+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
48+
uint3 test_firstbitlow_short3(int16_t3 p0) {
49+
return firstbitlow(p0);
50+
}
51+
52+
// CHECK-LABEL: test_firstbitlow_short4
53+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
54+
uint4 test_firstbitlow_short4(int16_t4 p0) {
55+
return firstbitlow(p0);
56+
}
57+
#endif // __HLSL_ENABLE_16_BIT
58+
59+
// CHECK-LABEL: test_firstbitlow_uint
60+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
61+
uint test_firstbitlow_uint(uint p0) {
62+
return firstbitlow(p0);
63+
}
64+
65+
// CHECK-LABEL: test_firstbitlow_uint2
66+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
67+
uint2 test_firstbitlow_uint2(uint2 p0) {
68+
return firstbitlow(p0);
69+
}
70+
71+
// CHECK-LABEL: test_firstbitlow_uint3
72+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
73+
uint3 test_firstbitlow_uint3(uint3 p0) {
74+
return firstbitlow(p0);
75+
}
76+
77+
// CHECK-LABEL: test_firstbitlow_uint4
78+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
79+
uint4 test_firstbitlow_uint4(uint4 p0) {
80+
return firstbitlow(p0);
81+
}
82+
83+
// CHECK-LABEL: test_firstbitlow_ulong
84+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
85+
uint test_firstbitlow_ulong(uint64_t p0) {
86+
return firstbitlow(p0);
87+
}
88+
89+
// CHECK-LABEL: test_firstbitlow_ulong2
90+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
91+
uint2 test_firstbitlow_ulong2(uint64_t2 p0) {
92+
return firstbitlow(p0);
93+
}
94+
95+
// CHECK-LABEL: test_firstbitlow_ulong3
96+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
97+
uint3 test_firstbitlow_ulong3(uint64_t3 p0) {
98+
return firstbitlow(p0);
99+
}
100+
101+
// CHECK-LABEL: test_firstbitlow_ulong4
102+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
103+
uint4 test_firstbitlow_ulong4(uint64_t4 p0) {
104+
return firstbitlow(p0);
105+
}
106+
107+
// CHECK-LABEL: test_firstbitlow_int
108+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
109+
uint test_firstbitlow_int(int p0) {
110+
return firstbitlow(p0);
111+
}
112+
113+
// CHECK-LABEL: test_firstbitlow_int2
114+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
115+
uint2 test_firstbitlow_int2(int2 p0) {
116+
return firstbitlow(p0);
117+
}
118+
119+
// CHECK-LABEL: test_firstbitlow_int3
120+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
121+
uint3 test_firstbitlow_int3(int3 p0) {
122+
return firstbitlow(p0);
123+
}
124+
125+
// CHECK-LABEL: test_firstbitlow_int4
126+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
127+
uint4 test_firstbitlow_int4(int4 p0) {
128+
return firstbitlow(p0);
129+
}
130+
131+
// CHECK-LABEL: test_firstbitlow_long
132+
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
133+
uint test_firstbitlow_long(int64_t p0) {
134+
return firstbitlow(p0);
135+
}
136+
137+
// CHECK-LABEL: test_firstbitlow_long2
138+
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
139+
uint2 test_firstbitlow_long2(int64_t2 p0) {
140+
return firstbitlow(p0);
141+
}
142+
143+
// CHECK-LABEL: test_firstbitlow_long3
144+
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
145+
uint3 test_firstbitlow_long3(int64_t3 p0) {
146+
return firstbitlow(p0);
147+
}
148+
149+
// CHECK-LABEL: test_firstbitlow_long4
150+
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
151+
uint4 test_firstbitlow_long4(int64_t4 p0) {
152+
return firstbitlow(p0);
153+
}

clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,10 @@ double test_int_builtin(double p0) {
1717

1818
double2 test_int_builtin_2(double2 p0) {
1919
return __builtin_hlsl_elementwise_firstbithigh(p0);
20-
// expected-error@-1 {{1st argument must be a vector of integers
21-
// (was 'double2' (aka 'vector<double, 2>'))}}
20+
// expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
2221
}
2322

2423
float test_int_builtin_3(float p0) {
2524
return __builtin_hlsl_elementwise_firstbithigh(p0);
26-
// expected-error@-1 {{1st argument must be a vector of integers
27-
// (was 'float')}}
25+
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
2826
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected
2+
3+
int test_too_few_arg() {
4+
return firstbitlow();
5+
// expected-error@-1 {{no matching function for call to 'firstbitlow'}}
6+
}
7+
8+
int test_too_many_arg(int p0) {
9+
return firstbitlow(p0, p0);
10+
// expected-error@-1 {{no matching function for call to 'firstbitlow'}}
11+
}
12+
13+
double test_int_builtin(double p0) {
14+
return firstbitlow(p0);
15+
// expected-error@-1 {{call to 'firstbitlow' is ambiguous}}
16+
}
17+
18+
double2 test_int_builtin_2(double2 p0) {
19+
return __builtin_hlsl_elementwise_firstbitlow(p0);
20+
// expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
21+
}
22+
23+
float test_int_builtin_3(float p0) {
24+
return __builtin_hlsl_elementwise_firstbitlow(p0);
25+
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
26+
}

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,4 +106,6 @@ def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>
106106
def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
107107
def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
108108
def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
109+
// TODO: check this against the spec
110+
def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
109111
}

llvm/include/llvm/IR/IntrinsicsSPIRV.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ let TargetPrefix = "spv" in {
108108
[IntrNoMem]>;
109109
def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
110110
def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
111+
def int_spv_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
111112

112113
// Read a value from the image buffer. It does not translate directly to a
113114
// single OpImageRead because the result type is not necessarily a 4 element

llvm/lib/Target/DirectX/DXIL.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,19 @@ def CountBits : DXILOp<31, unaryBits> {
564564
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
565565
}
566566

567+
def FirstbitLo : DXILOp<32, unaryBits> {
568+
let Doc = "Returns the location of the first set bit starting from "
569+
"the lowest order bit and working upward.";
570+
let LLVMIntrinsic = int_dx_firstbitlow;
571+
let arguments = [OverloadTy];
572+
let result = Int32Ty;
573+
let overloads =
574+
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
575+
let stages = [Stages<DXIL1_0, [all_stages]>];
576+
// TODO: check these
577+
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
578+
}
579+
567580
def FirstbitHi : DXILOp<33, unaryBits> {
568581
let Doc = "Returns the location of the first set bit starting from "
569582
"the highest order bit and working downward.";

llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
4242
case Intrinsic::dx_splitdouble:
4343
case Intrinsic::dx_firstbituhigh:
4444
case Intrinsic::dx_firstbitshigh:
45+
case Intrinsic::dx_firstbitlow:
4546
return true;
4647
default:
4748
return false;

0 commit comments

Comments
 (0)