Skip to content

Commit 365886d

Browse files
committed
implement elementwise popcount to implement countbits
1 parent 109cd11 commit 365886d

File tree

13 files changed

+229
-1
lines changed

13 files changed

+229
-1
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,7 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
667667
T __builtin_elementwise_log(T x) return the natural logarithm of x floating point types
668668
T __builtin_elementwise_log2(T x) return the base 2 logarithm of x floating point types
669669
T __builtin_elementwise_log10(T x) return the base 10 logarithm of x floating point types
670+
T __builtin_elementwise_popcount(T x) return the number of 1 bits in x integer types
670671
T __builtin_elementwise_pow(T x, T y) return x raised to the power of y floating point types
671672
T __builtin_elementwise_bitreverse(T x) return the integer represented after reversing the bits of x integer types
672673
T __builtin_elementwise_exp(T x) returns the base-e exponential, e^x, of the specified value floating point types

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1322,6 +1322,12 @@ def ElementwiseLog10 : Builtin {
13221322
let Prototype = "void(...)";
13231323
}
13241324

1325+
def ElementwisePopcount : Builtin {
1326+
let Spellings = ["__builtin_elementwise_popcount"];
1327+
let Attributes = [NoThrow, Const, CustomTypeChecking];
1328+
let Prototype = "void(...)";
1329+
}
1330+
13251331
def ElementwisePow : Builtin {
13261332
let Spellings = ["__builtin_elementwise_pow"];
13271333
let Attributes = [NoThrow, Const, CustomTypeChecking];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3834,6 +3834,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
38343834
case Builtin::BI__builtin_elementwise_floor:
38353835
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
38363836
*this, E, llvm::Intrinsic::floor, "elt.floor"));
3837+
case Builtin::BI__builtin_elementwise_popcount:
3838+
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
3839+
*this, E, llvm::Intrinsic::ctpop, "elt.ctpop"));
38373840
case Builtin::BI__builtin_elementwise_roundeven:
38383841
return RValue::get(emitBuiltinWithOneOverloadedType<1>(
38393842
*this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));

clang/lib/Headers/hlsl/hlsl_intrinsics.h

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,77 @@ float3 cosh(float3);
614614
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh)
615615
float4 cosh(float4);
616616

617+
//===----------------------------------------------------------------------===//
618+
// count bits builtins
619+
//===----------------------------------------------------------------------===//
620+
621+
/// \fn T countbits(T Val)
622+
/// \brief Return the number of bits (per component) set in the input integer.
623+
/// \param Val The input value.
624+
625+
#ifdef __HLSL_ENABLE_16_BIT
626+
_HLSL_AVAILABILITY(shadermodel, 6.2)
627+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
628+
int16_t countbits(int16_t);
629+
_HLSL_AVAILABILITY(shadermodel, 6.2)
630+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
631+
int16_t2 countbits(int16_t2);
632+
_HLSL_AVAILABILITY(shadermodel, 6.2)
633+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
634+
int16_t3 countbits(int16_t3);
635+
_HLSL_AVAILABILITY(shadermodel, 6.2)
636+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
637+
int16_t4 countbits(int16_t4);
638+
_HLSL_AVAILABILITY(shadermodel, 6.2)
639+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
640+
uint16_t countbits(uint16_t);
641+
_HLSL_AVAILABILITY(shadermodel, 6.2)
642+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
643+
uint16_t2 countbits(uint16_t2);
644+
_HLSL_AVAILABILITY(shadermodel, 6.2)
645+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
646+
uint16_t3 countbits(uint16_t3);
647+
_HLSL_AVAILABILITY(shadermodel, 6.2)
648+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
649+
uint16_t4 countbits(uint16_t4);
650+
#endif
651+
652+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
653+
int countbits(int);
654+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
655+
int2 countbits(int2);
656+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
657+
int3 countbits(int3);
658+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
659+
int4 countbits(int4);
660+
661+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
662+
uint countbits(uint);
663+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
664+
uint2 countbits(uint2);
665+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
666+
uint3 countbits(uint3);
667+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
668+
uint4 countbits(uint4);
669+
670+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
671+
int64_t countbits(int64_t);
672+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
673+
int64_t2 countbits(int64_t2);
674+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
675+
int64_t3 countbits(int64_t3);
676+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
677+
int64_t4 countbits(int64_t4);
678+
679+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
680+
uint64_t countbits(uint64_t);
681+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
682+
uint64_t2 countbits(uint64_t2);
683+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
684+
uint64_t3 countbits(uint64_t3);
685+
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
686+
uint64_t4 countbits(uint64_t4);
687+
617688
//===----------------------------------------------------------------------===//
618689
// dot product builtins
619690
//===----------------------------------------------------------------------===//

clang/lib/Sema/SemaChecking.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2795,7 +2795,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
27952795
if (BuiltinElementwiseMath(TheCall))
27962796
return ExprError();
27972797
break;
2798-
2798+
case Builtin::BI__builtin_elementwise_popcount:
27992799
case Builtin::BI__builtin_elementwise_bitreverse: {
28002800
if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
28012801
return ExprError();

clang/lib/Sema/SemaHLSL.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1522,6 +1522,14 @@ bool CheckUnsignedIntRepresentation(Sema *S, CallExpr *TheCall) {
15221522
checkAllUnsignedTypes);
15231523
}
15241524

1525+
bool CheckIntRepresentation(Sema *S, CallExpr *TheCall) {
1526+
auto checkAllIntTypes = [](clang::QualType PassedType) -> bool {
1527+
return !PassedType->hasIntegerRepresentation();
1528+
};
1529+
return CheckArgsTypesAreCorrect(S, TheCall, S->Context.IntTy,
1530+
checkAllIntTypes);
1531+
}
1532+
15251533
void SetElementTypeAsReturnType(Sema *S, CallExpr *TheCall,
15261534
QualType ReturnType) {
15271535
auto *VecTyA = TheCall->getArg(0)->getType()->getAs<VectorType>();

clang/test/CodeGen/builtins-elementwise-math.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,43 @@ void test_builtin_elementwise_log2(float f1, float f2, double d1, double d2,
570570
vf2 = __builtin_elementwise_log2(vf1);
571571
}
572572

573+
void test_builtin_elementwise_popcount(si8 vi1, si8 vi2,
574+
long long int i1, long long int i2, short si,
575+
_BitInt(31) bi1, _BitInt(31) bi2) {
576+
577+
578+
// CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8
579+
// CHECK-NEXT: call i64 @llvm.ctpop.i64(i64 [[I1]])
580+
i2 = __builtin_elementwise_popcount(i1);
581+
582+
// CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
583+
// CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[VI1]])
584+
vi2 = __builtin_elementwise_popcount(vi1);
585+
586+
// CHECK: [[CVI2:%.+]] = load <8 x i16>, ptr %cvi2, align 16
587+
// CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[CVI2]])
588+
const si8 cvi2 = vi2;
589+
vi2 = __builtin_elementwise_popcount(cvi2);
590+
591+
// CHECK: [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4
592+
// CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31
593+
// CHECK-NEXT: call i31 @llvm.ctpop.i31(i31 [[LOADEDV]])
594+
bi2 = __builtin_elementwise_popcount(bi1);
595+
596+
// CHECK: [[IA1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4
597+
// CHECK-NEXT: call i32 @llvm.ctpop.i32(i32 [[IA1]])
598+
b = __builtin_elementwise_popcount(int_as_one);
599+
600+
// CHECK: call i32 @llvm.ctpop.i32(i32 -10)
601+
b = __builtin_elementwise_popcount(-10);
602+
603+
// CHECK: [[SI:%.+]] = load i16, ptr %si.addr, align 2
604+
// CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
605+
// CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.ctpop.i32(i32 [[SI_EXT]])
606+
// CHECK-NEXT: = trunc i32 [[RES]] to i16
607+
si = __builtin_elementwise_popcount(si);
608+
}
609+
573610
void test_builtin_elementwise_pow(float f1, float f2, double d1, double d2,
574611
float4 vf1, float4 vf2) {
575612

clang/test/Sema/builtins-elementwise-math.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,27 @@ void test_builtin_elementwise_log2(int i, float f, double d, float4 v, int3 iv,
505505
// expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
506506
}
507507

508+
void test_builtin_elementwise_popcount(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
509+
510+
struct Foo s = __builtin_elementwise_popcount(i);
511+
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
512+
513+
i = __builtin_elementwise_popcount();
514+
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
515+
516+
i = __builtin_elementwise_popcount(f);
517+
// expected-error@-1 {{1st argument must be a vector of integers (was 'float')}}
518+
519+
i = __builtin_elementwise_popcount(f, f);
520+
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
521+
522+
u = __builtin_elementwise_popcount(d);
523+
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
524+
525+
v = __builtin_elementwise_popcount(v);
526+
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
527+
}
528+
508529
void test_builtin_elementwise_pow(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
509530
i = __builtin_elementwise_pow(p, d);
510531
// expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}

clang/test/SemaCXX/builtins-elementwise-math.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,11 @@ void test_builtin_elementwise_bitreverse() {
269269
static_assert(!is_const<decltype(__builtin_elementwise_bitreverse(a))>::value);
270270
static_assert(!is_const<decltype(__builtin_elementwise_bitreverse(b))>::value);
271271
}
272+
273+
void test_builtin_elementwise_popcount() {
274+
const int a = 2;
275+
int b = 1;
276+
static_assert(!is_const<decltype(__builtin_elementwise_popcount(a))>::value);
277+
static_assert(!is_const<decltype(__builtin_elementwise_popcount(b))>::value);
278+
}
279+

llvm/lib/Target/DirectX/DXIL.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,17 @@ def Rbits : DXILOp<30, unary> {
550550
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
551551
}
552552

553+
def CBits : DXILOp<31, unary> {
554+
let Doc = "Returns the number of 1 bits in the specified value.";
555+
let LLVMIntrinsic = int_ctpop;
556+
let arguments = [OverloadTy];
557+
let result = OverloadTy;
558+
let overloads =
559+
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
560+
let stages = [Stages<DXIL1_0, [all_stages]>];
561+
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
562+
}
563+
553564
def FMax : DXILOp<35, binary> {
554565
let Doc = "Float maximum. FMax(a,b) = a > b ? a : b";
555566
let LLVMIntrinsic = int_maxnum;
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
2+
3+
; Make sure dxil operation function calls for countbits are generated for all integer types.
4+
5+
; Function Attrs: nounwind
6+
define noundef i16 @test_countbits_short(i16 noundef %a) {
7+
entry:
8+
; CHECK:call i16 @dx.op.unary.i16(i32 31, i16 %{{.*}})
9+
%elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
10+
ret i16 %elt.ctpop
11+
}
12+
13+
; Function Attrs: nounwind
14+
define noundef i32 @test_countbits_int(i32 noundef %a) {
15+
entry:
16+
; CHECK:call i32 @dx.op.unary.i32(i32 31, i32 %{{.*}})
17+
%elt.ctpop = call i32 @llvm.ctpop.i32(i32 %a)
18+
ret i32 %elt.ctpop
19+
}
20+
21+
; Function Attrs: nounwind
22+
define noundef i64 @test_countbits_long(i64 noundef %a) {
23+
entry:
24+
; CHECK:call i64 @dx.op.unary.i64(i32 31, i64 %{{.*}})
25+
%elt.ctpop = call i64 @llvm.ctpop.i64(i64 %a)
26+
ret i64 %elt.ctpop
27+
}
28+
29+
declare i16 @llvm.ctpop.i16(i16)
30+
declare i32 @llvm.ctpop.i32(i32)
31+
declare i64 @llvm.ctpop.i64(i64)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
2+
3+
; DXIL operation ctpop does not support double overload type
4+
; CHECK: invalid intrinsic signature
5+
6+
define noundef double @countbits_double(double noundef %a) {
7+
entry:
8+
%elt.ctpop = call double @llvm.ctpop.f64(double %a)
9+
ret double %elt.ctpop
10+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
2+
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
3+
4+
; CHECK: OpMemoryModel Logical GLSL450
5+
6+
define noundef i32 @countbits_i32(i32 noundef %a) {
7+
entry:
8+
; CHECK: %[[#]] = OpBitCount %[[#]] %[[#]]
9+
%elt.bitreverse = call i32 @llvm.ctpop.i32(i32 %a)
10+
ret i32 %elt.bitreverse
11+
}
12+
13+
define noundef i16 @countbits_i16(i16 noundef %a) {
14+
entry:
15+
; CHECK: %[[#]] = OpBitCount %[[#]] %[[#]]
16+
%elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
17+
ret i16 %elt.ctpop
18+
}
19+
20+
declare i16 @llvm.ctpop.i16(i16)
21+
declare i32 @llvm.ctpop.i32(i32)

0 commit comments

Comments
 (0)