llvm · spall · Sep 18, 2024 · Sep 6, 2024 · Sep 11, 2024 · Sep 17, 2024
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
@@ -667,6 +667,7 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
  T __builtin_elementwise_log(T x)            return the natural logarithm of x                                floating point types
  T __builtin_elementwise_log2(T x)           return the base 2 logarithm of x                                 floating point types
  T __builtin_elementwise_log10(T x)          return the base 10 logarithm of x                                floating point types
+ T __builtin_elementwise_popcount(T x)       return the number of 1 bits in x                                 integer types 
  T __builtin_elementwise_pow(T x, T y)       return x raised to the power of y                                floating point types
  T __builtin_elementwise_bitreverse(T x)     return the integer represented after reversing the bits of x     integer types
  T __builtin_elementwise_exp(T x)            returns the base-e exponential, e^x, of the specified value      floating point types

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
@@ -114,6 +114,7 @@ C++ Language Changes
 
 - Accept C++26 user-defined ``static_assert`` messages in C++11 as an extension.
 
+- Add ``__builtin_elementwise_popcount`` builtin for integer types only.
 
 C++2c Feature Support
 ^^^^^^^^^^^^^^^^^^^^^

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
@@ -1322,6 +1322,12 @@ def ElementwiseLog10 : Builtin {
   let Prototype = "void(...)";
 }
 
+def ElementwisePopcount : Builtin {
+  let Spellings = ["__builtin_elementwise_popcount"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def ElementwisePow : Builtin {
   let Spellings = ["__builtin_elementwise_pow"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3834,6 +3834,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_floor:
     return RValue::get(emitBuiltinWithOneOverloadedType<1>(
         *this, E, llvm::Intrinsic::floor, "elt.floor"));
+  case Builtin::BI__builtin_elementwise_popcount:
+    return RValue::get(emitBuiltinWithOneOverloadedType<1>(
+        *this, E, llvm::Intrinsic::ctpop, "elt.ctpop"));
   case Builtin::BI__builtin_elementwise_roundeven:
     return RValue::get(emitBuiltinWithOneOverloadedType<1>(
         *this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));

diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -650,6 +650,77 @@ float3 cosh(float3);
 _HLSL_BUILTIN_ALIAS(__builtin_elementwise_cosh)
 float4 cosh(float4);
 
+//===----------------------------------------------------------------------===//
+// count bits builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T countbits(T Val)
+/// \brief Return the number of bits (per component) set in the input integer.
+/// \param Val The input value.
+
+#ifdef __HLSL_ENABLE_16_BIT
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int16_t countbits(int16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int16_t2 countbits(int16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int16_t3 countbits(int16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int16_t4 countbits(int16_t4);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint16_t countbits(uint16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint16_t2 countbits(uint16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint16_t3 countbits(uint16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint16_t4 countbits(uint16_t4);
+#endif
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int countbits(int);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int2 countbits(int2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int3 countbits(int3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int4 countbits(int4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint countbits(uint);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint2 countbits(uint2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint3 countbits(uint3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint4 countbits(uint4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int64_t countbits(int64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int64_t2 countbits(int64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int64_t3 countbits(int64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+int64_t4 countbits(int64_t4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint64_t countbits(uint64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint64_t2 countbits(uint64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint64_t3 countbits(uint64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
+uint64_t4 countbits(uint64_t4);
+
 //===----------------------------------------------------------------------===//
 // dot product builtins
 //===----------------------------------------------------------------------===//

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
@@ -2795,7 +2795,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     if (BuiltinElementwiseMath(TheCall))
       return ExprError();
     break;
-
+  case Builtin::BI__builtin_elementwise_popcount:
   case Builtin::BI__builtin_elementwise_bitreverse: {
     if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
       return ExprError();

diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -570,6 +570,43 @@ void test_builtin_elementwise_log2(float f1, float f2, double d1, double d2,
   vf2 = __builtin_elementwise_log2(vf1);
 }
 
+void test_builtin_elementwise_popcount(si8 vi1, si8 vi2,
+                                  long long int i1, long long int i2, short si,
+                                  _BitInt(31) bi1, _BitInt(31) bi2) {
+
+
+  // CHECK:      [[I1:%.+]] = load i64, ptr %i1.addr, align 8
+  // CHECK-NEXT: call i64 @llvm.ctpop.i64(i64 [[I1]])
+  i2 = __builtin_elementwise_popcount(i1);
+
+  // CHECK:      [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr, align 16
+  // CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[VI1]])
+  vi2 = __builtin_elementwise_popcount(vi1);
+
+  // CHECK:      [[CVI2:%.+]] = load <8 x i16>, ptr %cvi2, align 16
+  // CHECK-NEXT: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[CVI2]])
+  const si8 cvi2 = vi2;
+  vi2 = __builtin_elementwise_popcount(cvi2);
+
+  // CHECK:      [[BI1:%.+]] = load i32, ptr %bi1.addr, align 4
+  // CHECK-NEXT: [[LOADEDV:%.+]] = trunc i32 [[BI1]] to i31
+  // CHECK-NEXT: call i31 @llvm.ctpop.i31(i31 [[LOADEDV]])
+  bi2 = __builtin_elementwise_popcount(bi1);
+
+  // CHECK:      [[IA1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4
+  // CHECK-NEXT: call i32 @llvm.ctpop.i32(i32 [[IA1]])
+  b = __builtin_elementwise_popcount(int_as_one);
+
+  // CHECK:   call i32 @llvm.ctpop.i32(i32 -10)
+  b = __builtin_elementwise_popcount(-10);
+
+  // CHECK:      [[SI:%.+]] = load i16, ptr %si.addr, align 2
+  // CHECK-NEXT: [[SI_EXT:%.+]] = sext i16 [[SI]] to i32
+  // CHECK-NEXT: [[RES:%.+]] = call i32 @llvm.ctpop.i32(i32 [[SI_EXT]])
+  // CHECK-NEXT: = trunc i32 [[RES]] to i16
+  si = __builtin_elementwise_popcount(si);
+}
+
 void test_builtin_elementwise_pow(float f1, float f2, double d1, double d2,
                                       float4 vf1, float4 vf2) {
 

diff --git a/clang/test/CodeGenHLSL/builtins/countbits.hlsl b/clang/test/CodeGenHLSL/builtins/countbits.hlsl
@@ -0,0 +1,80 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s
+
+#ifdef __HLSL_ENABLE_16_BIT
+// CHECK-LABEL: test_countbits_ushort
+// CHECK: call i16 @llvm.ctpop.i16
+uint16_t test_countbits_ushort(uint16_t p0)
+{
+	return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_ushort2
+// CHECK: call <2 x i16> @llvm.ctpop.v2i16
+uint16_t2 test_countbits_ushort2(uint16_t2 p0)
+{
+	return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_ushort3
+// CHECK: call <3 x i16> @llvm.ctpop.v3i16
+uint16_t3 test_countbits_ushort3(uint16_t3 p0)
+{
+	return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_ushort4
+// CHECK: call <4 x i16> @llvm.ctpop.v4i16
+uint16_t4 test_countbits_ushort4(uint16_t4 p0)
+{
+	return countbits(p0);
+}
+#endif
+
+// CHECK-LABEL: test_countbits_uint
+// CHECK: call i32 @llvm.ctpop.i32
+int test_countbits_uint(uint p0)
+{
+	return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_uint2
+// CHECK: call <2 x i32> @llvm.ctpop.v2i32
+uint2 test_countbits_uint2(uint2 p0)
+{
+	return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_uint3
+// CHECK: call <3 x i32> @llvm.ctpop.v3i32
+uint3 test_countbits_uint3(uint3 p0)
+{
+	return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_uint4
+// CHECK: call <4 x i32> @llvm.ctpop.v4i32
+uint4 test_countbits_uint4(uint4 p0)
+{
+	return countbits(p0);
+}
+
+// CHECK-LABEL: test_countbits_long
+// CHECK: call i64 @llvm.ctpop.i64
+uint64_t test_countbits_long(uint64_t p0)
+{
+	return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_long2
+// CHECK: call <2 x i64> @llvm.ctpop.v2i64
+uint64_t2 test_countbits_long2(uint64_t2 p0)
+{
+	return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_long3
+// CHECK: call <3 x i64> @llvm.ctpop.v3i64
+uint64_t3 test_countbits_long3(uint64_t3 p0)
+{
+	return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_long4
+// CHECK: call <4 x i64> @llvm.ctpop.v4i64
+uint64_t4 test_countbits_long4(uint64_t4 p0)
+{
+	return countbits(p0);
+}
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
@@ -505,6 +505,39 @@ void test_builtin_elementwise_log2(int i, float f, double d, float4 v, int3 iv,
   // expected-error@-1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
 }
 
+void test_builtin_elementwise_popcount(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
+
+  struct Foo s = __builtin_elementwise_popcount(i);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+  i = __builtin_elementwise_popcount();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_elementwise_popcount(f);
+  // expected-error@-1 {{1st argument must be a vector of integers (was 'float')}}
+
+  i = __builtin_elementwise_popcount(f, f);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+
+  u = __builtin_elementwise_popcount(d);
+  // expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
+
+  v = __builtin_elementwise_popcount(v);
+  // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
+
+  int2 i2 = __builtin_elementwise_popcount(iv);
+  // expected-error@-1 {{initializing 'int2' (vector of 2 'int' values) with an expression of incompatible type 'int3' (vector of 3 'int' values)}}
+
+  iv = __builtin_elementwise_popcount(i2);
+  // expected-error@-1 {{assigning to 'int3' (vector of 3 'int' values) from incompatible type 'int2' (vector of 2 'int' values)}}
+
+  unsigned3 u3 = __builtin_elementwise_popcount(iv);
+  // expected-error@-1 {{initializing 'unsigned3' (vector of 3 'unsigned int' values) with an expression of incompatible type 'int3' (vector of 3 'int' values)}}
+
+  iv = __builtin_elementwise_popcount(u3);
+  // expected-error@-1 {{assigning to 'int3' (vector of 3 'int' values) from incompatible type 'unsigned3' (vector of 3 'unsigned int' values)}}
+}
+
 void test_builtin_elementwise_pow(int i, short s, double d, float4 v, int3 iv, unsigned3 uv, int *p) {
   i = __builtin_elementwise_pow(p, d);
   // expected-error@-1 {{arguments are of different types ('int *' vs 'double')}}

diff --git a/clang/test/Sema/countbits-errors.hlsl b/clang/test/Sema/countbits-errors.hlsl
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -finclude-default-header
+// -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only
+// -disable-llvm-passes -verify
+
+double2 test_int_builtin(double2 p0) {
+  return __builtin_hlsl_elementwise_countbits(p0);
+  // expected-error@-1 {{passing 'double2' (aka 'vector<double, 2>') to
+  // parameter of incompatible type
+  // '__attribute__((__vector_size__(2 * sizeof(int)))) int'
+  // (vector of 2 'int' values)}}
+}
+
+float test_ambiguous(float p0) {
+  return countbits(p0);
+  // expected-error@-1 {{call to 'countbits' is ambiguous}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}  
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}  
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function}}
+}
+
+float test_float_builtin(float p0) {
+  return __builtin_hlsl_elementwise_countbits(p0);
+  // expected-error@-1 {{passing 'double' to parameter of incompatible type
+  // 'int'}}
+}
diff --git a/clang/test/SemaCXX/builtins-elementwise-math.cpp b/clang/test/SemaCXX/builtins-elementwise-math.cpp
@@ -269,3 +269,11 @@ void test_builtin_elementwise_bitreverse() {
   static_assert(!is_const<decltype(__builtin_elementwise_bitreverse(a))>::value);
   static_assert(!is_const<decltype(__builtin_elementwise_bitreverse(b))>::value);  
 }
+
+void test_builtin_elementwise_popcount() {
+  const int a = 2;
+  int b = 1;
+  static_assert(!is_const<decltype(__builtin_elementwise_popcount(a))>::value);
+  static_assert(!is_const<decltype(__builtin_elementwise_popcount(b))>::value);  
+}
+
diff --git a/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -finclude-default-header
+// -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only
+// -disable-llvm-passes -verify -verify-ignore-unexpected
+
+
+double test_int_builtin(double p0) {
+  return countbits(p0);
+  // expected-error@-1 {{call to 'countbits' is ambiguous}}
+}
+
+double2 test_int_builtin_2(double2 p0) {
+  return __builtin_elementwise_popcount(p0);
+  // expected-error@-1 {{1st argument must be a vector of integers
+  // (was 'double2' (aka 'vector<double, 2>'))}}
+}
+
+double test_int_builtin_3(float p0) {
+  return __builtin_elementwise_popcount(p0);
+  // expected-error@-1 {{1st argument must be a vector of integers
+  // (was 'float')}}
+}
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
@@ -553,6 +553,17 @@ def Rbits :  DXILOp<30, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
+def CBits :  DXILOp<31, unary> {
+  let Doc = "Returns the number of 1 bits in the specified value.";
+  let LLVMIntrinsic = int_ctpop;
+  let arguments = [OverloadTy];
+  let result = OverloadTy;
+  let overloads =
+      [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
 def FMax :  DXILOp<35, binary> {
   let Doc = "Float maximum. FMax(a,b) = a > b ? a : b";
   let LLVMIntrinsic = int_maxnum;