-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[clang][powerpc][wasm][systemz][x86] Replace target vector popcount intrinsics with __builtin_elementwise_popcount #109160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
aee207d
to
dcbe496
Compare
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-backend-powerpc Author: Simon Pilgrim (RKSimon) ChangesNow that we have the C++ Patch is 29.30 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/109160.diff 18 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 261e91b06b7104..161df386f00f03 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -541,12 +541,6 @@ TARGET_BUILTIN(__builtin_altivec_vprtybw, "V4UiV4Ui", "", "power9-vector")
TARGET_BUILTIN(__builtin_altivec_vprtybd, "V2ULLiV2ULLi", "", "power9-vector")
TARGET_BUILTIN(__builtin_altivec_vprtybq, "V1ULLLiV1ULLLi", "", "power9-vector")
-// Vector population count built-ins
-TARGET_BUILTIN(__builtin_altivec_vpopcntb, "V16UcV16Uc", "", "power8-vector")
-TARGET_BUILTIN(__builtin_altivec_vpopcnth, "V8UsV8Us", "", "power8-vector")
-TARGET_BUILTIN(__builtin_altivec_vpopcntw, "V4UiV4Ui", "", "power8-vector")
-TARGET_BUILTIN(__builtin_altivec_vpopcntd, "V2ULLiV2ULLi", "", "power8-vector")
-
// Absolute difference built-ins
TARGET_BUILTIN(__builtin_altivec_vabsdub, "V16UcV16UcV16Uc", "",
"power9-vector")
diff --git a/clang/include/clang/Basic/BuiltinsSystemZ.def b/clang/include/clang/Basic/BuiltinsSystemZ.def
index f0c0ebfa622a43..c564dd9e486bc6 100644
--- a/clang/include/clang/Basic/BuiltinsSystemZ.def
+++ b/clang/include/clang/Basic/BuiltinsSystemZ.def
@@ -157,10 +157,6 @@ TARGET_BUILTIN(__builtin_s390_vmof, "V2SLLiV4SiV4Si", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vmlob, "V8UsV16UcV16Uc", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vmloh, "V4UiV8UsV8Us", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vmlof, "V2ULLiV4UiV4Ui", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vpopctb, "V16UcV16Uc", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vpopcth, "V8UsV8Us", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vpopctf, "V4UiV4Ui", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_vpopctg, "V2ULLiV2ULLi", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vsq, "SLLLiSLLLiSLLLi", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vsbcbiq, "ULLLiULLLiULLLiULLLi", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vsbiq, "ULLLiULLLiULLLiULLLi", "nc", "vector")
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index ad73f031922a0b..be9ba7599fe5af 100644
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -99,8 +99,6 @@ TARGET_BUILTIN(__builtin_wasm_max_u_i32x4, "V4UiV4UiV4Ui", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_avgr_u_i8x16, "V16UcV16UcV16Uc", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_avgr_u_i16x8, "V8UsV8UsV8Us", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_popcnt_i8x16, "V16ScV16Sc", "nc", "simd128")
-
TARGET_BUILTIN(__builtin_wasm_q15mulr_sat_s_i16x8, "V8sV8sV8s", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_extadd_pairwise_i8x16_s_i16x8, "V8sV16Sc", "nc", "simd128")
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 3f47e34cc9098c..25c656a530b158 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -866,20 +866,6 @@ TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512, "V16iV16i", "ncV:512:", "avx512c
TARGET_BUILTIN(__builtin_ia32_vplzcntd_512, "V16iV16i", "ncV:512:", "avx512cd,evex512")
TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8OiV8Oi", "ncV:512:", "avx512cd,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "ncV:128:", "avx512vpopcntdq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2OiV2Oi", "ncV:128:", "avx512vpopcntdq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpopcntd_256, "V8iV8i", "ncV:256:", "avx512vpopcntdq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4OiV4Oi", "ncV:256:", "avx512vpopcntdq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "ncV:512:", "avx512vpopcntdq,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8OiV8Oi", "ncV:512:", "avx512vpopcntdq,evex512")
-
-TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "ncV:128:", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "ncV:128:", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "ncV:256:", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "ncV:256:", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "ncV:512:", "avx512bitalg,evex512")
-TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "ncV:512:", "avx512bitalg,evex512")
-
TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "ncV:128:", "avx512vl,avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "ncV:256:", "avx512vl,avx512bitalg")
TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "UOiV64cV64cUOi", "ncV:512:", "avx512bitalg,evex512")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 7e18aafcdd4b8a..e2711f1ba70239 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14807,22 +14807,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_storesd128_mask:
return EmitX86MaskedStore(*this, Ops, Align(1));
- case X86::BI__builtin_ia32_vpopcntb_128:
- case X86::BI__builtin_ia32_vpopcntd_128:
- case X86::BI__builtin_ia32_vpopcntq_128:
- case X86::BI__builtin_ia32_vpopcntw_128:
- case X86::BI__builtin_ia32_vpopcntb_256:
- case X86::BI__builtin_ia32_vpopcntd_256:
- case X86::BI__builtin_ia32_vpopcntq_256:
- case X86::BI__builtin_ia32_vpopcntw_256:
- case X86::BI__builtin_ia32_vpopcntb_512:
- case X86::BI__builtin_ia32_vpopcntd_512:
- case X86::BI__builtin_ia32_vpopcntq_512:
- case X86::BI__builtin_ia32_vpopcntw_512: {
- llvm::Type *ResultType = ConvertType(E->getType());
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
- return Builder.CreateCall(F, Ops);
- }
case X86::BI__builtin_ia32_cvtmask2b128:
case X86::BI__builtin_ia32_cvtmask2b256:
case X86::BI__builtin_ia32_cvtmask2b512:
@@ -17624,15 +17608,6 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
return Builder.CreateBitCast(
Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);
}
- case PPC::BI__builtin_altivec_vpopcntb:
- case PPC::BI__builtin_altivec_vpopcnth:
- case PPC::BI__builtin_altivec_vpopcntw:
- case PPC::BI__builtin_altivec_vpopcntd: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
- return Builder.CreateCall(F, X);
- }
case PPC::BI__builtin_altivec_vadduqm:
case PPC::BI__builtin_altivec_vsubuqm: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
@@ -19702,16 +19677,6 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
// be represented via standard LLVM IR, which is preferable to enable common
// LLVM optimizations.
- case SystemZ::BI__builtin_s390_vpopctb:
- case SystemZ::BI__builtin_s390_vpopcth:
- case SystemZ::BI__builtin_s390_vpopctf:
- case SystemZ::BI__builtin_s390_vpopctg: {
- llvm::Type *ResultType = ConvertType(E->getType());
- Value *X = EmitScalarExpr(E->getArg(0));
- Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
- return Builder.CreateCall(F, X);
- }
-
case SystemZ::BI__builtin_s390_vclzb:
case SystemZ::BI__builtin_s390_vclzh:
case SystemZ::BI__builtin_s390_vclzf:
@@ -21613,12 +21578,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);
return Builder.CreateCall(Callee, {LHS, RHS});
}
- case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {
- Value *Vec = EmitScalarExpr(E->getArg(0));
- Function *Callee =
- CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));
- return Builder.CreateCall(Callee, {Vec});
- }
case WebAssembly::BI__builtin_wasm_any_true_v128:
case WebAssembly::BI__builtin_wasm_all_true_i8x16:
case WebAssembly::BI__builtin_wasm_all_true_i16x8:
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 4971631c50f412..8da65055012f1d 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -2502,37 +2502,37 @@ vec_cmplt(vector unsigned long long __a, vector unsigned long long __b) {
static __inline__ vector unsigned char __ATTRS_o_ai
vec_popcnt(vector signed char __a) {
- return (vector unsigned char)__builtin_altivec_vpopcntb(
+ return (vector unsigned char)__builtin_elementwise_popcount(
(vector unsigned char)__a);
}
static __inline__ vector unsigned char __ATTRS_o_ai
vec_popcnt(vector unsigned char __a) {
- return __builtin_altivec_vpopcntb(__a);
+ return __builtin_elementwise_popcount(__a);
}
static __inline__ vector unsigned short __ATTRS_o_ai
vec_popcnt(vector signed short __a) {
- return (vector unsigned short)__builtin_altivec_vpopcnth(
+ return (vector unsigned short)__builtin_elementwise_popcount(
(vector unsigned short)__a);
}
static __inline__ vector unsigned short __ATTRS_o_ai
vec_popcnt(vector unsigned short __a) {
- return __builtin_altivec_vpopcnth(__a);
+ return __builtin_elementwise_popcount(__a);
}
static __inline__ vector unsigned int __ATTRS_o_ai
vec_popcnt(vector signed int __a) {
- return __builtin_altivec_vpopcntw((vector unsigned int)__a);
+ return __builtin_elementwise_popcount((vector unsigned int)__a);
}
static __inline__ vector unsigned int __ATTRS_o_ai
vec_popcnt(vector unsigned int __a) {
- return __builtin_altivec_vpopcntw(__a);
+ return __builtin_elementwise_popcount(__a);
}
static __inline__ vector unsigned long long __ATTRS_o_ai
vec_popcnt(vector signed long long __a) {
- return __builtin_altivec_vpopcntd((vector unsigned long long)__a);
+ return __builtin_elementwise_popcount((vector unsigned long long)__a);
}
static __inline__ vector unsigned long long __ATTRS_o_ai
vec_popcnt(vector unsigned long long __a) {
- return __builtin_altivec_vpopcntd(__a);
+ return __builtin_elementwise_popcount(__a);
}
#define vec_vclz vec_cntlz
diff --git a/clang/lib/Headers/avx512bitalgintrin.h b/clang/lib/Headers/avx512bitalgintrin.h
index bad265ceb7db23..4fceb26a3108b8 100644
--- a/clang/lib/Headers/avx512bitalgintrin.h
+++ b/clang/lib/Headers/avx512bitalgintrin.h
@@ -23,7 +23,7 @@
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_popcnt_epi16(__m512i __A)
{
- return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A);
+ return (__m512i)__builtin_elementwise_popcount((__v32hi)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -45,7 +45,7 @@ _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_popcnt_epi8(__m512i __A)
{
- return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A);
+ return (__m512i)__builtin_elementwise_popcount((__v64qi)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/avx512vlbitalgintrin.h b/clang/lib/Headers/avx512vlbitalgintrin.h
index 377e3a5ea57132..2484ef7119a324 100644
--- a/clang/lib/Headers/avx512vlbitalgintrin.h
+++ b/clang/lib/Headers/avx512vlbitalgintrin.h
@@ -27,7 +27,7 @@
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi16(__m256i __A)
{
- return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A);
+ return (__m256i)__builtin_elementwise_popcount((__v16hi)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -49,7 +49,7 @@ _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi16(__m128i __A)
{
- return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A);
+ return (__m128i)__builtin_elementwise_popcount((__v8hi)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -71,7 +71,7 @@ _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi8(__m256i __A)
{
- return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A);
+ return (__m256i)__builtin_elementwise_popcount((__v32qi)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -93,7 +93,7 @@ _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi8(__m128i __A)
{
- return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A);
+ return (__m128i)__builtin_elementwise_popcount((__v16qi)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
diff --git a/clang/lib/Headers/avx512vpopcntdqintrin.h b/clang/lib/Headers/avx512vpopcntdqintrin.h
index e73e7e4f713137..68dd76ffcb0aa2 100644
--- a/clang/lib/Headers/avx512vpopcntdqintrin.h
+++ b/clang/lib/Headers/avx512vpopcntdqintrin.h
@@ -22,7 +22,7 @@
__min_vector_width__(512)))
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
- return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);
+ return (__m512i)__builtin_elementwise_popcount((__v8di)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
@@ -37,7 +37,7 @@ _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
}
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) {
- return (__m512i)__builtin_ia32_vpopcntd_512((__v16si)__A);
+ return (__m512i)__builtin_elementwise_popcount((__v16si)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/avx512vpopcntdqvlintrin.h b/clang/lib/Headers/avx512vpopcntdqvlintrin.h
index b2df2e84d3ed23..ee4b1b919fe4b4 100644
--- a/clang/lib/Headers/avx512vpopcntdqvlintrin.h
+++ b/clang/lib/Headers/avx512vpopcntdqvlintrin.h
@@ -27,7 +27,7 @@
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi64(__m128i __A) {
- return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A);
+ return (__m128i)__builtin_elementwise_popcount((__v2di)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -43,7 +43,7 @@ _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_popcnt_epi32(__m128i __A) {
- return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A);
+ return (__m128i)__builtin_elementwise_popcount((__v4si)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -59,7 +59,7 @@ _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi64(__m256i __A) {
- return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A);
+ return (__m256i)__builtin_elementwise_popcount((__v4di)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -75,7 +75,7 @@ _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_popcnt_epi32(__m256i __A) {
- return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A);
+ return (__m256i)__builtin_elementwise_popcount((__v8si)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/vecintrin.h b/clang/lib/Headers/vecintrin.h
index 1f51e32c0d136d..c1225d127a875f 100644
--- a/clang/lib/Headers/vecintrin.h
+++ b/clang/lib/Headers/vecintrin.h
@@ -6477,42 +6477,42 @@ vec_cnttz(__vector unsigned long long __a) {
static inline __ATTRS_o_ai __vector unsigned char
vec_popcnt(__vector signed char __a) {
- return __builtin_s390_vpopctb((__vector unsigned char)__a);
+ return __builtin_elementwise_popcount((__vector unsigned char)__a);
}
static inline __ATTRS_o_ai __vector unsigned char
vec_popcnt(__vector unsigned char __a) {
- return __builtin_s390_vpopctb(__a);
+ return __builtin_elementwise_popcount(__a);
}
static inline __ATTRS_o_ai __vector unsigned short
vec_popcnt(__vector signed short __a) {
- return __builtin_s390_vpopcth((__vector unsigned short)__a);
+ return __builtin_elementwise_popcount((__vector unsigned short)__a);
}
static inline __ATTRS_o_ai __vector unsigned short
vec_popcnt(__vector unsigned short __a) {
- return __builtin_s390_vpopcth(__a);
+ return __builtin_elementwise_popcount(__a);
}
static inline __ATTRS_o_ai __vector unsigned int
vec_popcnt(__vector signed int __a) {
- return __builtin_s390_vpopctf((__vector unsigned int)__a);
+ return __builtin_elementwise_popcount((__vector unsigned int)__a);
}
static inline __ATTRS_o_ai __vector unsigned int
vec_popcnt(__vector unsigned int __a) {
- return __builtin_s390_vpopctf(__a);
+ return __builtin_elementwise_popcount(__a);
}
static inline __ATTRS_o_ai __vector unsigned long long
vec_popcnt(__vector signed long long __a) {
- return __builtin_s390_vpopctg((__vector unsigned long long)__a);
+ return __builtin_elementwise_popcount((__vector unsigned long long)__a);
}
static inline __ATTRS_o_ai __vector unsigned long long
vec_popcnt(__vector unsigned long long __a) {
- return __builtin_s390_vpopctg(__a);
+ return __builtin_elementwise_popcount(__a);
}
/*-- vec_rl -----------------------------------------------------------------*/
diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h
index 14e36e85da8efa..22f0e27ccf756e 100644
--- a/clang/lib/Headers/wasm_simd128.h
+++ b/clang/lib/Headers/wasm_simd128.h
@@ -957,7 +957,7 @@ static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i8x16_bitmask(v128_t __a) {
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_popcnt(v128_t __a) {
- return (v128_t)__builtin_wasm_popcnt_i8x16((__i8x16)__a);
+ return (v128_t)__builtin_elementwise_popcount((__i8x16)__a);
}
static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shl(v128_t __a,
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c
index 31b8cd11ea79f6..58e452ae05c407 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c
@@ -301,15 +301,6 @@ void test_integer(void) {
vul = __builtin_s390_vmlof(vui, vui);
// CHECK: call <2 x i64> @llvm.s390.vmlof(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
- vuc = __builtin_s390_vpopctb(vuc);
- // CHECK: call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %{{.*}})
- vus = __builtin_s390_vpopcth(vus);
- // CHECK: call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %{{.*}})
- vui = __builtin_s390_vpopctf(vui);
- // CHECK: call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %{{.*}})
- vul = __builtin_s390_vpopctg(vul);
- // CHECK: call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %{{.*}})
-
si128 = __builtin_s390_vsq(si128, si128);
// CHECK: call i128 @llvm.s390.vsq(i128 %{{.*}}, i128 %{{.*}})
ui128 = __builtin_s390_vsbiq(ui128, ui128, ui128);
diff --git a/clang/test/CodeGen/X86/avx512bitalg-builtins.c b/clang/test/CodeGen/X86/avx512bitalg-builtins.c
index e5df0a089b9c81..c80fb5e708c556 100644
--- a/clang/test/CodeGen/X86/avx512bitalg-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bitalg-builtins.c
@@ -11,13 +11,13 @@ __m512i test_mm512_popcnt_epi16(__m512i __A) {
__m512i test_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_popcnt_epi16
// CHECK: @llvm.ctpop.v32i16
- // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}}
+ // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_popcnt_epi16(__A, __U, __B);
}
__m512i test_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_popcnt_epi16
// CHECK: @llvm.ctpop.v32i16
- // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}}
+ // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_popcnt_epi16(__U, __B);
}
@@ -30,13 +30,13 @@ __m512i test_mm512_popcnt_epi8(__m512i __A) {
__m512i test_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_popcnt_epi8
// CHECK: @llvm.ctpop.v64i8
- // CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+ // CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_popcnt_epi8(__A, __U, __B);
}
__m512i test_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_popcnt_epi8
// CHECK: @llvm.ctpop.v64i8
- // CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}}
+ // CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_popcn...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
WebAssembly changes LGTM.
@@ -23,7 +23,7 @@ | |||
static __inline__ __m512i __DEFAULT_FN_ATTRS | |||
_mm512_popcnt_epi16(__m512i __A) | |||
{ | |||
return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A); | |||
return (__m512i)__builtin_elementwise_popcount((__v32hi)__A); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
hi or hu? The same below.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've updated all the x86 cases to use unsigned for consistency - __builtin_elementwise_popcount
doesn't really care, although it expects src/dst types to match (although x86 casts back to _mXXXi types anyway).
…ntrinsics with __builtin_elementwise_popcount All of these intrinsics already immediately map to Intrinsic::ctpop so use the generic intrinsic for simplicity
dcbe496
to
3b9c6da
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
SystemZ changes LGTM. |
@nemanjai Are you OK with the PPC changes? |
Officially we don't guarantee target intrinsics at all - but I can add a release note if it will help? |
hmm, I meant IBM OpenXLC compiler product doc, many builtins are listed there, not the community clang/llvm docs. |
…ntrinsics with __builtin_elementwise_popcount (llvm#109160) Now that we have the C/C++ `__builtin_elementwise_popcount` intrinsic (llvm#108121) - remove custom target intrinsics that just immediately map to Intrinsic::ctpop and use the generic intrinsic directly.
Now that we have the C/C++
__builtin_elementwise_popcount
intrinsic (#108121) - remove custom target intrinsics that just immediately map to Intrinsic::ctpop and use the generic intrinsic directly.