Skip to content

[clang][powerpc][wasm][systemz][x86] Replace target vector popcount intrinsics with __builtin_elementwise_popcount #109160

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions clang/include/clang/Basic/BuiltinsPPC.def
Original file line number Diff line number Diff line change
Expand Up @@ -541,12 +541,6 @@ TARGET_BUILTIN(__builtin_altivec_vprtybw, "V4UiV4Ui", "", "power9-vector")
TARGET_BUILTIN(__builtin_altivec_vprtybd, "V2ULLiV2ULLi", "", "power9-vector")
TARGET_BUILTIN(__builtin_altivec_vprtybq, "V1ULLLiV1ULLLi", "", "power9-vector")

// Vector population count built-ins
TARGET_BUILTIN(__builtin_altivec_vpopcntb, "V16UcV16Uc", "", "power8-vector")
TARGET_BUILTIN(__builtin_altivec_vpopcnth, "V8UsV8Us", "", "power8-vector")
TARGET_BUILTIN(__builtin_altivec_vpopcntw, "V4UiV4Ui", "", "power8-vector")
TARGET_BUILTIN(__builtin_altivec_vpopcntd, "V2ULLiV2ULLi", "", "power8-vector")

// Absolute difference built-ins
TARGET_BUILTIN(__builtin_altivec_vabsdub, "V16UcV16UcV16Uc", "",
"power9-vector")
Expand Down
4 changes: 0 additions & 4 deletions clang/include/clang/Basic/BuiltinsSystemZ.def
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,6 @@ TARGET_BUILTIN(__builtin_s390_vmof, "V2SLLiV4SiV4Si", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vmlob, "V8UsV16UcV16Uc", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vmloh, "V4UiV8UsV8Us", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vmlof, "V2ULLiV4UiV4Ui", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vpopctb, "V16UcV16Uc", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vpopcth, "V8UsV8Us", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vpopctf, "V4UiV4Ui", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vpopctg, "V2ULLiV2ULLi", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vsq, "SLLLiSLLLiSLLLi", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vsbcbiq, "ULLLiULLLiULLLiULLLi", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vsbiq, "ULLLiULLLiULLLiULLLi", "nc", "vector")
Expand Down
2 changes: 0 additions & 2 deletions clang/include/clang/Basic/BuiltinsWebAssembly.def
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,6 @@ TARGET_BUILTIN(__builtin_wasm_max_u_i32x4, "V4UiV4UiV4Ui", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_avgr_u_i8x16, "V16UcV16UcV16Uc", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_avgr_u_i16x8, "V8UsV8UsV8Us", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_popcnt_i8x16, "V16ScV16Sc", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_q15mulr_sat_s_i16x8, "V8sV8sV8s", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_extadd_pairwise_i8x16_s_i16x8, "V8sV16Sc", "nc", "simd128")
Expand Down
14 changes: 0 additions & 14 deletions clang/include/clang/Basic/BuiltinsX86.def
Original file line number Diff line number Diff line change
Expand Up @@ -866,20 +866,6 @@ TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512, "V16iV16i", "ncV:512:", "avx512c
TARGET_BUILTIN(__builtin_ia32_vplzcntd_512, "V16iV16i", "ncV:512:", "avx512cd,evex512")
TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8OiV8Oi", "ncV:512:", "avx512cd,evex512")

TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "ncV:128:", "avx512vpopcntdq,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2OiV2Oi", "ncV:128:", "avx512vpopcntdq,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpopcntd_256, "V8iV8i", "ncV:256:", "avx512vpopcntdq,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4OiV4Oi", "ncV:256:", "avx512vpopcntdq,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "ncV:512:", "avx512vpopcntdq,evex512")
TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8OiV8Oi", "ncV:512:", "avx512vpopcntdq,evex512")

TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "ncV:128:", "avx512vl,avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "ncV:128:", "avx512vl,avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "ncV:256:", "avx512vl,avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "ncV:256:", "avx512vl,avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "ncV:512:", "avx512bitalg,evex512")
TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "ncV:512:", "avx512bitalg,evex512")

TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "ncV:128:", "avx512vl,avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "ncV:256:", "avx512vl,avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "UOiV64cV64cUOi", "ncV:512:", "avx512bitalg,evex512")
Expand Down
41 changes: 0 additions & 41 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14807,22 +14807,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_storesd128_mask:
return EmitX86MaskedStore(*this, Ops, Align(1));

case X86::BI__builtin_ia32_vpopcntb_128:
case X86::BI__builtin_ia32_vpopcntd_128:
case X86::BI__builtin_ia32_vpopcntq_128:
case X86::BI__builtin_ia32_vpopcntw_128:
case X86::BI__builtin_ia32_vpopcntb_256:
case X86::BI__builtin_ia32_vpopcntd_256:
case X86::BI__builtin_ia32_vpopcntq_256:
case X86::BI__builtin_ia32_vpopcntw_256:
case X86::BI__builtin_ia32_vpopcntb_512:
case X86::BI__builtin_ia32_vpopcntd_512:
case X86::BI__builtin_ia32_vpopcntq_512:
case X86::BI__builtin_ia32_vpopcntw_512: {
llvm::Type *ResultType = ConvertType(E->getType());
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
return Builder.CreateCall(F, Ops);
}
case X86::BI__builtin_ia32_cvtmask2b128:
case X86::BI__builtin_ia32_cvtmask2b256:
case X86::BI__builtin_ia32_cvtmask2b512:
Expand Down Expand Up @@ -17624,15 +17608,6 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
return Builder.CreateBitCast(
Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
}
case PPC::BI__builtin_altivec_vpopcntb:
case PPC::BI__builtin_altivec_vpopcnth:
case PPC::BI__builtin_altivec_vpopcntw:
case PPC::BI__builtin_altivec_vpopcntd: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
return Builder.CreateCall(F, X);
}
case PPC::BI__builtin_altivec_vadduqm:
case PPC::BI__builtin_altivec_vsubuqm: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
Expand Down Expand Up @@ -19702,16 +19677,6 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
// be represented via standard LLVM IR, which is preferable to enable common
// LLVM optimizations.

case SystemZ::BI__builtin_s390_vpopctb:
case SystemZ::BI__builtin_s390_vpopcth:
case SystemZ::BI__builtin_s390_vpopctf:
case SystemZ::BI__builtin_s390_vpopctg: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
return Builder.CreateCall(F, X);
}

case SystemZ::BI__builtin_s390_vclzb:
case SystemZ::BI__builtin_s390_vclzh:
case SystemZ::BI__builtin_s390_vclzf:
Expand Down Expand Up @@ -21613,12 +21578,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
return Builder.CreateCall(Callee, {LHS, RHS});
}
case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
Value *Vec = EmitScalarExpr(E->getArg(0));
Function *Callee =
CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
return Builder.CreateCall(Callee, {Vec});
}
case WebAssembly::BI__builtin_wasm_any_true_v128:
case WebAssembly::BI__builtin_wasm_all_true_i8x16:
case WebAssembly::BI__builtin_wasm_all_true_i16x8:
Expand Down
16 changes: 8 additions & 8 deletions clang/lib/Headers/altivec.h
Original file line number Diff line number Diff line change
Expand Up @@ -2502,37 +2502,37 @@ vec_cmplt(vector unsigned long long __a, vector unsigned long long __b) {

static __inline__ vector unsigned char __ATTRS_o_ai
vec_popcnt(vector signed char __a) {
return (vector unsigned char)__builtin_altivec_vpopcntb(
return (vector unsigned char)__builtin_elementwise_popcount(
(vector unsigned char)__a);
}
static __inline__ vector unsigned char __ATTRS_o_ai
vec_popcnt(vector unsigned char __a) {
return __builtin_altivec_vpopcntb(__a);
return __builtin_elementwise_popcount(__a);
}
static __inline__ vector unsigned short __ATTRS_o_ai
vec_popcnt(vector signed short __a) {
return (vector unsigned short)__builtin_altivec_vpopcnth(
return (vector unsigned short)__builtin_elementwise_popcount(
(vector unsigned short)__a);
}
static __inline__ vector unsigned short __ATTRS_o_ai
vec_popcnt(vector unsigned short __a) {
return __builtin_altivec_vpopcnth(__a);
return __builtin_elementwise_popcount(__a);
}
static __inline__ vector unsigned int __ATTRS_o_ai
vec_popcnt(vector signed int __a) {
return __builtin_altivec_vpopcntw((vector unsigned int)__a);
return __builtin_elementwise_popcount((vector unsigned int)__a);
}
static __inline__ vector unsigned int __ATTRS_o_ai
vec_popcnt(vector unsigned int __a) {
return __builtin_altivec_vpopcntw(__a);
return __builtin_elementwise_popcount(__a);
}
static __inline__ vector unsigned long long __ATTRS_o_ai
vec_popcnt(vector signed long long __a) {
return __builtin_altivec_vpopcntd((vector unsigned long long)__a);
return __builtin_elementwise_popcount((vector unsigned long long)__a);
}
static __inline__ vector unsigned long long __ATTRS_o_ai
vec_popcnt(vector unsigned long long __a) {
return __builtin_altivec_vpopcntd(__a);
return __builtin_elementwise_popcount(__a);
}

#define vec_vclz vec_cntlz
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Headers/avx512bitalgintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_popcnt_epi16(__m512i __A)
{
return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A);
return (__m512i)__builtin_elementwise_popcount((__v32hu)__A);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
Expand All @@ -45,7 +45,7 @@ _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_popcnt_epi8(__m512i __A)
{
return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A);
return (__m512i)__builtin_elementwise_popcount((__v64qu)__A);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
Expand Down
8 changes: 4 additions & 4 deletions clang/lib/Headers/avx512vlbitalgintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi16(__m256i __A)
{
return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A);
return (__m256i)__builtin_elementwise_popcount((__v16hu)__A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
Expand All @@ -49,7 +49,7 @@ _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi16(__m128i __A)
{
return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A);
return (__m128i)__builtin_elementwise_popcount((__v8hu)__A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
Expand All @@ -71,7 +71,7 @@ _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi8(__m256i __A)
{
return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A);
return (__m256i)__builtin_elementwise_popcount((__v32qu)__A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
Expand All @@ -93,7 +93,7 @@ _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi8(__m128i __A)
{
return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A);
return (__m128i)__builtin_elementwise_popcount((__v16qu)__A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Headers/avx512vpopcntdqintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
__min_vector_width__(512)))

static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);
return (__m512i)__builtin_elementwise_popcount((__v8du)__A);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
Expand All @@ -37,7 +37,7 @@ _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
}

static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) {
return (__m512i)__builtin_ia32_vpopcntd_512((__v16si)__A);
return (__m512i)__builtin_elementwise_popcount((__v16su)__A);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS
Expand Down
8 changes: 4 additions & 4 deletions clang/lib/Headers/avx512vpopcntdqvlintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi64(__m128i __A) {
return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A);
return (__m128i)__builtin_elementwise_popcount((__v2du)__A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
Expand All @@ -43,7 +43,7 @@ _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi32(__m128i __A) {
return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A);
return (__m128i)__builtin_elementwise_popcount((__v4su)__A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
Expand All @@ -59,7 +59,7 @@ _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi64(__m256i __A) {
return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A);
return (__m256i)__builtin_elementwise_popcount((__v4du)__A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
Expand All @@ -75,7 +75,7 @@ _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi32(__m256i __A) {
return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A);
return (__m256i)__builtin_elementwise_popcount((__v8su)__A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
Expand Down
16 changes: 8 additions & 8 deletions clang/lib/Headers/vecintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -6477,42 +6477,42 @@ vec_cnttz(__vector unsigned long long __a) {

static inline __ATTRS_o_ai __vector unsigned char
vec_popcnt(__vector signed char __a) {
return __builtin_s390_vpopctb((__vector unsigned char)__a);
return __builtin_elementwise_popcount((__vector unsigned char)__a);
}

static inline __ATTRS_o_ai __vector unsigned char
vec_popcnt(__vector unsigned char __a) {
return __builtin_s390_vpopctb(__a);
return __builtin_elementwise_popcount(__a);
}

static inline __ATTRS_o_ai __vector unsigned short
vec_popcnt(__vector signed short __a) {
return __builtin_s390_vpopcth((__vector unsigned short)__a);
return __builtin_elementwise_popcount((__vector unsigned short)__a);
}

static inline __ATTRS_o_ai __vector unsigned short
vec_popcnt(__vector unsigned short __a) {
return __builtin_s390_vpopcth(__a);
return __builtin_elementwise_popcount(__a);
}

static inline __ATTRS_o_ai __vector unsigned int
vec_popcnt(__vector signed int __a) {
return __builtin_s390_vpopctf((__vector unsigned int)__a);
return __builtin_elementwise_popcount((__vector unsigned int)__a);
}

static inline __ATTRS_o_ai __vector unsigned int
vec_popcnt(__vector unsigned int __a) {
return __builtin_s390_vpopctf(__a);
return __builtin_elementwise_popcount(__a);
}

static inline __ATTRS_o_ai __vector unsigned long long
vec_popcnt(__vector signed long long __a) {
return __builtin_s390_vpopctg((__vector unsigned long long)__a);
return __builtin_elementwise_popcount((__vector unsigned long long)__a);
}

static inline __ATTRS_o_ai __vector unsigned long long
vec_popcnt(__vector unsigned long long __a) {
return __builtin_s390_vpopctg(__a);
return __builtin_elementwise_popcount(__a);
}

/*-- vec_rl -----------------------------------------------------------------*/
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Headers/wasm_simd128.h
Original file line number Diff line number Diff line change
Expand Up @@ -957,7 +957,7 @@ static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i8x16_bitmask(v128_t __a) {
}

static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_popcnt(v128_t __a) {
return (v128_t)__builtin_wasm_popcnt_i8x16((__i8x16)__a);
return (v128_t)__builtin_elementwise_popcount((__i8x16)__a);
}

static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shl(v128_t __a,
Expand Down
9 changes: 0 additions & 9 deletions clang/test/CodeGen/SystemZ/builtins-systemz-vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -301,15 +301,6 @@ void test_integer(void) {
vul = __builtin_s390_vmlof(vui, vui);
// CHECK: call <2 x i64> @llvm.s390.vmlof(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})

vuc = __builtin_s390_vpopctb(vuc);
// CHECK: call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %{{.*}})
vus = __builtin_s390_vpopcth(vus);
// CHECK: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %{{.*}})
vui = __builtin_s390_vpopctf(vui);
// CHECK: call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %{{.*}})
vul = __builtin_s390_vpopctg(vul);
// CHECK: call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %{{.*}})

si128 = __builtin_s390_vsq(si128, si128);
// CHECK: call i128 @llvm.s390.vsq(i128 %{{.*}}, i128 %{{.*}})
ui128 = __builtin_s390_vsbiq(ui128, ui128, ui128);
Expand Down
8 changes: 4 additions & 4 deletions clang/test/CodeGen/X86/avx512bitalg-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ __m512i test_mm512_popcnt_epi16(__m512i __A) {
__m512i test_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_popcnt_epi16
// CHECK: @llvm.ctpop.v32i16
// CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}}
// CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_popcnt_epi16(__A, __U, __B);
}
__m512i test_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_popcnt_epi16
// CHECK: @llvm.ctpop.v32i16
// CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}}
// CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_popcnt_epi16(__U, __B);
}

Expand All @@ -30,13 +30,13 @@ __m512i test_mm512_popcnt_epi8(__m512i __A) {
__m512i test_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_popcnt_epi8
// CHECK: @llvm.ctpop.v64i8
// CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
// CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_popcnt_epi8(__A, __U, __B);
}
__m512i test_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_popcnt_epi8
// CHECK: @llvm.ctpop.v64i8
// CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
// CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_popcnt_epi8(__U, __B);
}

Expand Down
Loading
Loading