Skip to content

[clang][x86] Add initial constexpr support for VPOPCNTDQ intrinsics #118017

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions clang/lib/Headers/avx512vpopcntdqintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,14 @@
__target__("avx512vpopcntdq,evex512"), \
__min_vector_width__(512)))

static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
#else
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
#endif

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_popcnt_epi64(__m512i __A) {
return (__m512i)__builtin_elementwise_popcount((__v8du)__A);
}

Expand All @@ -36,7 +43,8 @@ _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) {
static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_popcnt_epi32(__m512i __A) {
return (__m512i)__builtin_elementwise_popcount((__v16su)__A);
}

Expand Down
16 changes: 12 additions & 4 deletions clang/lib/Headers/avx512vpopcntdqvlintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,15 @@
__target__("avx512vpopcntdq,avx512vl,no-evex512"), \
__min_vector_width__(256)))

static __inline__ __m128i __DEFAULT_FN_ATTRS128
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
#else
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
#endif

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_popcnt_epi64(__m128i __A) {
return (__m128i)__builtin_elementwise_popcount((__v2du)__A);
}
Expand All @@ -41,7 +49,7 @@ _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_popcnt_epi32(__m128i __A) {
return (__m128i)__builtin_elementwise_popcount((__v4su)__A);
}
Expand All @@ -57,7 +65,7 @@ _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_popcnt_epi64(__m256i __A) {
return (__m256i)__builtin_elementwise_popcount((__v4du)__A);
}
Expand All @@ -73,7 +81,7 @@ _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_popcnt_epi32(__m256i __A) {
return (__m256i)__builtin_elementwise_popcount((__v8su)__A);
}
Expand Down
8 changes: 8 additions & 0 deletions clang/test/CodeGen/X86/avx512vpopcntdq-builtins.c
Original file line number Diff line number Diff line change
@@ -1,35 +1,43 @@
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror | FileCheck %s

#include <immintrin.h>
#include "builtin_test_helpers.h"

__m512i test_mm512_popcnt_epi64(__m512i __A) {
// CHECK-LABEL: @test_mm512_popcnt_epi64
// CHECK: @llvm.ctpop.v8i64
return _mm512_popcnt_epi64(__A);
}
TEST_CONSTEXPR(match_v8di(_mm512_popcnt_epi64((__m512i)(__v8di){+5, -3, -10, +8, 0, -256, +256, -128}), 2, 31, 30, 1, 0, 24, 1, 25));

__m512i test_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_popcnt_epi64
// CHECK: @llvm.ctpop.v8i64
// CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_popcnt_epi64(__W, __U, __A);
}

__m512i test_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_popcnt_epi64
// CHECK: @llvm.ctpop.v8i64
// CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_popcnt_epi64(__U, __A);
}

__m512i test_mm512_popcnt_epi32(__m512i __A) {
// CHECK-LABEL: @test_mm512_popcnt_epi32
// CHECK: @llvm.ctpop.v16i32
return _mm512_popcnt_epi32(__A);
}
TEST_CONSTEXPR(match_v16si(_mm512_popcnt_epi32((__m512i)(__v16si){+5, -3, -10, +8, 0, -256, +256, -128, +3, +9, +15, +33, +63, +129, +511, +1025}), 2, 31, 30, 1, 0, 24, 1, 25, 2, 2, 4, 2, 6, 2, 9, 2));

__m512i test_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_popcnt_epi32
// CHECK: @llvm.ctpop.v16i32
// CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_popcnt_epi32(__W, __U, __A);
}

__m512i test_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_popcnt_epi32
// CHECK: @llvm.ctpop.v16i32
Expand Down
15 changes: 15 additions & 0 deletions clang/test/CodeGen/X86/avx512vpopcntdqvl-builtins.c
Original file line number Diff line number Diff line change
@@ -1,35 +1,43 @@
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s

#include <immintrin.h>
#include "builtin_test_helpers.h"

__m128i test_mm_popcnt_epi64(__m128i __A) {
// CHECK-LABEL: @test_mm_popcnt_epi64
// CHECK: @llvm.ctpop.v2i64
return _mm_popcnt_epi64(__A);
}
TEST_CONSTEXPR(match_v2di(_mm_popcnt_epi64((__m128i)(__v2di){+5, -3}), 2, 63));

__m128i test_mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: @test_mm_mask_popcnt_epi64
// CHECK: @llvm.ctpop.v2i64
// CHECK: select <2 x i1> %{{.+}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_mask_popcnt_epi64(__W, __U, __A);
}

__m128i test_mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: @test_mm_maskz_popcnt_epi64
// CHECK: @llvm.ctpop.v2i64
// CHECK: select <2 x i1> %{{.+}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm_maskz_popcnt_epi64(__U, __A);
}

__m128i test_mm_popcnt_epi32(__m128i __A) {
// CHECK-LABEL: @test_mm_popcnt_epi32
// CHECK: @llvm.ctpop.v4i32
return _mm_popcnt_epi32(__A);
}
TEST_CONSTEXPR(match_v4si(_mm_popcnt_epi32((__m128i)(__v4si){+5, -3, -10, +8}), 2, 31, 30, 1));

__m128i test_mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: @test_mm_mask_popcnt_epi32
// CHECK: @llvm.ctpop.v4i32
// CHECK: select <4 x i1> %{{.+}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm_mask_popcnt_epi32(__W, __U, __A);
}

__m128i test_mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: @test_mm_maskz_popcnt_epi32
// CHECK: @llvm.ctpop.v4i32
Expand All @@ -42,29 +50,36 @@ __m256i test_mm256_popcnt_epi64(__m256i __A) {
// CHECK: @llvm.ctpop.v4i64
return _mm256_popcnt_epi64(__A);
}
TEST_CONSTEXPR(match_v4di(_mm256_popcnt_epi64((__m256i)(__v4di){+5, -3, -10, +8}), 2, 63, 62, 1));

__m256i test_mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_mask_popcnt_epi64
// CHECK: @llvm.ctpop.v4i64
// CHECK: select <4 x i1> %{{.+}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_popcnt_epi64(__W, __U, __A);
}

__m256i test_mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_maskz_popcnt_epi64
// CHECK: @llvm.ctpop.v4i64
// CHECK: select <4 x i1> %{{.+}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_maskz_popcnt_epi64(__U, __A);
}

__m256i test_mm256_popcnt_epi32(__m256i __A) {
// CHECK-LABEL: @test_mm256_popcnt_epi32
// CHECK: @llvm.ctpop.v8i32
return _mm256_popcnt_epi32(__A);
}
TEST_CONSTEXPR(match_v8si(_mm256_popcnt_epi32((__m256i)(__v8si){+5, -3, -10, +8, 0, -256, +256, -128}), 2, 31, 30, 1, 0, 24, 1, 25));

__m256i test_mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_mask_popcnt_epi32
// CHECK: @llvm.ctpop.v8i32
// CHECK: select <8 x i1> %{{.+}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_popcnt_epi32(__W, __U, __A);
}

__m256i test_mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_maskz_popcnt_epi32
// CHECK: @llvm.ctpop.v8i32
Expand Down
20 changes: 20 additions & 0 deletions clang/test/CodeGen/X86/builtin_test_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,16 @@ constexpr bool match_m256i(__m256i _v, unsigned long long a, unsigned long long
return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
}

constexpr bool match_v4di(__m256i _v, long long a, long long b, long long c, long long d) {
__v4di v = (__v4di)_v;
return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
}

constexpr bool match_v8si(__m256i _v, int a, int b, int c, int d, int e, int f, int g, int h) {
__v8si v = (__v8si)_v;
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
}

constexpr bool match_m512(__m512 v, float a, float b, float c, float d, float e, float f, float g, float h, float i, float j, float k, float l, float m, float n, float o, float p) {
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
}
Expand All @@ -86,6 +96,16 @@ constexpr bool match_m512i(__m512i _v, unsigned long long a, unsigned long long
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
}

constexpr bool match_v8di(__m512i _v, long long a, long long b, long long c, long long d, long long e, long long f, long long g, long long h) {
__v8di v = (__v8di)_v;
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
}

constexpr bool match_v16si(__m512i _v, int a, int b, int c, int d, int e, int f, int g, int h, int i, int j, int k, int l, int m, int n, int o, int p) {
__v16si v = (__v16si)_v;
return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
}

#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)

#else
Expand Down