Skip to content

[HLSL] Implement elementwise popcount #108121

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/docs/LanguageExtensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,7 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
T __builtin_elementwise_log(T x) return the natural logarithm of x floating point types
T __builtin_elementwise_log2(T x) return the base 2 logarithm of x floating point types
T __builtin_elementwise_log10(T x) return the base 10 logarithm of x floating point types
T __builtin_elementwise_popcount(T x) return the number of 1 bits in x integer types
T __builtin_elementwise_pow(T x, T y) return x raised to the power of y floating point types
T __builtin_elementwise_bitreverse(T x) return the integer represented after reversing the bits of x integer types
T __builtin_elementwise_exp(T x) returns the base-e exponential, e^x, of the specified value floating point types
Expand Down
1 change: 1 addition & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ C++ Language Changes

- Accept C++26 user-defined ``static_assert`` messages in C++11 as an extension.

- Add ``__builtin_elementwise_popcount`` builtin for integer types only.

C++2c Feature Support
^^^^^^^^^^^^^^^^^^^^^
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -1322,6 +1322,12 @@ def ElementwiseLog10 : Builtin {
let Prototype = "void(...)";
}

def ElementwisePopcount : Builtin {
let Spellings = ["__builtin_elementwise_popcount"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
let Prototype = "void(...)";
}

def ElementwisePow : Builtin {
let Spellings = ["__builtin_elementwise_pow"];
let Attributes = [NoThrow, Const, CustomTypeChecking];
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3834,6 +3834,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_elementwise_floor:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, llvm::Intrinsic::floor, "elt.floor"));
case Builtin::BI__builtin_elementwise_popcount:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, llvm::Intrinsic::ctpop, "elt.ctpop"));
case Builtin::BI__builtin_elementwise_roundeven:
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
*this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));
Expand Down
71 changes: 71 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,77 @@ float3 cosh(float3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh)
float4 cosh(float4);

//===----------------------------------------------------------------------===//
// count bits builtins
//===----------------------------------------------------------------------===//

/// \fn T countbits(T Val)
/// \brief Return the number of bits (per component) set in the input integer.
/// \param Val The input value.

#ifdef __HLSL_ENABLE_16_BIT
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int16_t countbits(int16_t);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int16_t2 countbits(int16_t2);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int16_t3 countbits(int16_t3);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int16_t4 countbits(int16_t4);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint16_t countbits(uint16_t);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint16_t2 countbits(uint16_t2);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint16_t3 countbits(uint16_t3);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint16_t4 countbits(uint16_t4);
#endif

_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int countbits(int);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int2 countbits(int2);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int3 countbits(int3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int4 countbits(int4);

_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint countbits(uint);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint2 countbits(uint2);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint3 countbits(uint3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint4 countbits(uint4);

_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int64_t countbits(int64_t);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int64_t2 countbits(int64_t2);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int64_t3 countbits(int64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
int64_t4 countbits(int64_t4);

_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint64_t countbits(uint64_t);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint64_t2 countbits(uint64_t2);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint64_t3 countbits(uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
uint64_t4 countbits(uint64_t4);

//===----------------------------------------------------------------------===//
// dot product builtins
//===----------------------------------------------------------------------===//
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2795,7 +2795,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
if (BuiltinElementwiseMath(TheCall))
return ExprError();
break;

case Builtin::BI__builtin_elementwise_popcount:
case Builtin::BI__builtin_elementwise_bitreverse: {
if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
return ExprError();
Expand Down
37 changes: 37 additions & 0 deletions clang/test/CodeGen/builtins-elementwise-math.c
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,43 @@ void test_builtin_elementwise_log2(float f1, float f2, double d1, double d2,
vf2 = __builtin_elementwise_log2(vf1);
}

void test_builtin_elementwise_popcount(si8 vi1, si8 vi2,
long long int i1, long long int i2, short si,
_BitInt(31) bi1, _BitInt(31) bi2) {


// CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8
// CHECK-NEXT: call i64 @llvm.ctpop.i64(i64 [[I1]])
i2 = __builtin_elementwise_popcount(i1);

// CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
// CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[VI1]])
vi2 = __builtin_elementwise_popcount(vi1);

// CHECK: [[CVI2:%.+]] = load <8 x i16>, ptr %cvi2, align 16
// CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[CVI2]])
const si8 cvi2 = vi2;
vi2 = __builtin_elementwise_popcount(cvi2);

// CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4
// CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31
// CHECK-NEXT: call i31 @llvm.ctpop.i31(i31 [[LOADEDV]])
bi2 = __builtin_elementwise_popcount(bi1);

// CHECK: [[IA1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4
// CHECK-NEXT: call i32 @llvm.ctpop.i32(i32 [[IA1]])
b = __builtin_elementwise_popcount(int_as_one);

// CHECK: call i32 @llvm.ctpop.i32(i32 -10)
b = __builtin_elementwise_popcount(-10);

// CHECK: [[SI:%.+]] = load i16, ptr %si.addr, align 2
// CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
// CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.ctpop.i32(i32 [[SI_EXT]])
// CHECK-NEXT: = trunc i32 [[RES]] to i16
si = __builtin_elementwise_popcount(si);
}

void test_builtin_elementwise_pow(float f1, float f2, double d1, double d2,
float4 vf1, float4 vf2) {

Expand Down
80 changes: 80 additions & 0 deletions clang/test/CodeGenHLSL/builtins/countbits.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s

#ifdef __HLSL_ENABLE_16_BIT
// CHECK-LABEL: test_countbits_ushort
// CHECK: call i16 @llvm.ctpop.i16
uint16_t test_countbits_ushort(uint16_t p0)
{
return countbits(p0);
}
// CHECK-LABEL: test_countbits_ushort2
// CHECK: call <2 x i16> @llvm.ctpop.v2i16
uint16_t2 test_countbits_ushort2(uint16_t2 p0)
{
return countbits(p0);
}
// CHECK-LABEL: test_countbits_ushort3
// CHECK: call <3 x i16> @llvm.ctpop.v3i16
uint16_t3 test_countbits_ushort3(uint16_t3 p0)
{
return countbits(p0);
}
// CHECK-LABEL: test_countbits_ushort4
// CHECK: call <4 x i16> @llvm.ctpop.v4i16
uint16_t4 test_countbits_ushort4(uint16_t4 p0)
{
return countbits(p0);
}
#endif

// CHECK-LABEL: test_countbits_uint
// CHECK: call i32 @llvm.ctpop.i32
int test_countbits_uint(uint p0)
{
return countbits(p0);
}
// CHECK-LABEL: test_countbits_uint2
// CHECK: call <2 x i32> @llvm.ctpop.v2i32
uint2 test_countbits_uint2(uint2 p0)
{
return countbits(p0);
}
// CHECK-LABEL: test_countbits_uint3
// CHECK: call <3 x i32> @llvm.ctpop.v3i32
uint3 test_countbits_uint3(uint3 p0)
{
return countbits(p0);
}
// CHECK-LABEL: test_countbits_uint4
// CHECK: call <4 x i32> @llvm.ctpop.v4i32
uint4 test_countbits_uint4(uint4 p0)
{
return countbits(p0);
}

// CHECK-LABEL: test_countbits_long
// CHECK: call i64 @llvm.ctpop.i64
uint64_t test_countbits_long(uint64_t p0)
{
return countbits(p0);
}
// CHECK-LABEL: test_countbits_long2
// CHECK: call <2 x i64> @llvm.ctpop.v2i64
uint64_t2 test_countbits_long2(uint64_t2 p0)
{
return countbits(p0);
}
// CHECK-LABEL: test_countbits_long3
// CHECK: call <3 x i64> @llvm.ctpop.v3i64
uint64_t3 test_countbits_long3(uint64_t3 p0)
{
return countbits(p0);
}
// CHECK-LABEL: test_countbits_long4
// CHECK: call <4 x i64> @llvm.ctpop.v4i64
uint64_t4 test_countbits_long4(uint64_t4 p0)
{
return countbits(p0);
}
33 changes: 33 additions & 0 deletions clang/test/Sema/builtins-elementwise-math.c
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,39 @@ void test_builtin_elementwise_log2(int i, float f, double d, float4 v, int3 iv,
// expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
}

void test_builtin_elementwise_popcount(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {

struct Foo s = __builtin_elementwise_popcount(i);
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}

i = __builtin_elementwise_popcount();
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}

i = __builtin_elementwise_popcount(f);
// expected-error@-1 {{1st argument must be a vector of integers (was 'float')}}

i = __builtin_elementwise_popcount(f, f);
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}

u = __builtin_elementwise_popcount(d);
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}

v = __builtin_elementwise_popcount(v);
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}

int2 i2 = __builtin_elementwise_popcount(iv);
// expected-error@-1 {{initializing 'int2' (vector of 2 'int' values) with an expression of incompatible type 'int3' (vector of 3 'int' values)}}

iv = __builtin_elementwise_popcount(i2);
// expected-error@-1 {{assigning to 'int3' (vector of 3 'int' values) from incompatible type 'int2' (vector of 2 'int' values)}}

unsigned3 u3 = __builtin_elementwise_popcount(iv);
// expected-error@-1 {{initializing 'unsigned3' (vector of 3 'unsigned int' values) with an expression of incompatible type 'int3' (vector of 3 'int' values)}}

iv = __builtin_elementwise_popcount(u3);
// expected-error@-1 {{assigning to 'int3' (vector of 3 'int' values) from incompatible type 'unsigned3' (vector of 3 'unsigned int' values)}}
}

void test_builtin_elementwise_pow(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
i = __builtin_elementwise_pow(p, d);
// expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}
Expand Down
28 changes: 28 additions & 0 deletions clang/test/Sema/countbits-errors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// RUN: %clang_cc1 -finclude-default-header
// -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only
// -disable-llvm-passes -verify

double2 test_int_builtin(double2 p0) {
return __builtin_hlsl_elementwise_countbits(p0);
// expected-error@-1 {{passing 'double2' (aka 'vector<double, 2>') to
// parameter of incompatible type
// '__attribute__((__vector_size__(2 * sizeof(int)))) int'
// (vector of 2 'int' values)}}
}

float test_ambiguous(float p0) {
return countbits(p0);
// expected-error@-1 {{call to 'countbits' is ambiguous}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
// expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
}

float test_float_builtin(float p0) {
return __builtin_hlsl_elementwise_countbits(p0);
// expected-error@-1 {{passing 'double' to parameter of incompatible type
// 'int'}}
}
8 changes: 8 additions & 0 deletions clang/test/SemaCXX/builtins-elementwise-math.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,3 +269,11 @@ void test_builtin_elementwise_bitreverse() {
static_assert(!is_const<decltype(__builtin_elementwise_bitreverse(a))>::value);
static_assert(!is_const<decltype(__builtin_elementwise_bitreverse(b))>::value);
}

void test_builtin_elementwise_popcount() {
const int a = 2;
int b = 1;
static_assert(!is_const<decltype(__builtin_elementwise_popcount(a))>::value);
static_assert(!is_const<decltype(__builtin_elementwise_popcount(b))>::value);
}

21 changes: 21 additions & 0 deletions clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// RUN: %clang_cc1 -finclude-default-header
// -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only
// -disable-llvm-passes -verify -verify-ignore-unexpected


double test_int_builtin(double p0) {
return countbits(p0);
// expected-error@-1 {{call to 'countbits' is ambiguous}}
}

double2 test_int_builtin_2(double2 p0) {
return __builtin_elementwise_popcount(p0);
// expected-error@-1 {{1st argument must be a vector of integers
// (was 'double2' (aka 'vector<double, 2>'))}}
}

double test_int_builtin_3(float p0) {
return __builtin_elementwise_popcount(p0);
// expected-error@-1 {{1st argument must be a vector of integers
// (was 'float')}}
}
11 changes: 11 additions & 0 deletions llvm/lib/Target/DirectX/DXIL.td
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,17 @@ def Rbits : DXILOp<30, unary> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def CBits : DXILOp<31, unary> {
let Doc = "Returns the number of 1 bits in the specified value.";
let LLVMIntrinsic = int_ctpop;
let arguments = [OverloadTy];
let result = OverloadTy;
let overloads =
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
let stages = [Stages<DXIL1_0, [all_stages]>];
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def FMax : DXILOp<35, binary> {
let Doc = "Float maximum. FMax(a,b) = a > b ? a : b";
let LLVMIntrinsic = int_maxnum;
Expand Down
Loading
Loading