[X86][AVX10.2] Replace nepbh with bf16 to match with others, NFCI #134240

phoebewang · 2025-04-03T12:31:06Z

No description provided.

llvmbot · 2025-04-03T12:31:44Z

@llvm/pr-subscribers-clang-codegen

@llvm/pr-subscribers-clang

Author: Phoebe Wang (phoebewang)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/134240.diff

4 Files Affected:

(modified) clang/include/clang/Basic/BuiltinsX86.td (+3-3)
(modified) clang/lib/CodeGen/TargetBuiltins/X86.cpp (+3-3)
(modified) clang/lib/Headers/avx10_2_512bf16intrin.h (+8-8)
(modified) clang/lib/Headers/avx10_2bf16intrin.h (+16-16)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index cc4249acbfee9..67cbbfdec7aaf 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -5365,13 +5365,13 @@ let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<
 
 let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
   def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">;
-  def vfmaddnepbh512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, _Vector<32, __bf16>)">;
+  def vfmaddbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, _Vector<32, __bf16>)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vfmaddnepbh256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, _Vector<16, __bf16>)">;
+  def vfmaddbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, _Vector<16, __bf16>)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vfmaddnepbh128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vfmaddbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
 }
diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
index dcb0365862a1e..836808619aad0 100644
--- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
@@ -1053,9 +1053,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_vfmaddph512_mask:
   case X86::BI__builtin_ia32_vfmaddph512_maskz:
   case X86::BI__builtin_ia32_vfmaddph512_mask3:
-  case X86::BI__builtin_ia32_vfmaddnepbh128:
-  case X86::BI__builtin_ia32_vfmaddnepbh256:
-  case X86::BI__builtin_ia32_vfmaddnepbh512:
+  case X86::BI__builtin_ia32_vfmaddbf16128:
+  case X86::BI__builtin_ia32_vfmaddbf16256:
+  case X86::BI__builtin_ia32_vfmaddbf16512:
   case X86::BI__builtin_ia32_vfmaddps512_mask:
   case X86::BI__builtin_ia32_vfmaddps512_maskz:
   case X86::BI__builtin_ia32_vfmaddps512_mask3:
diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h
index ce43ecbcfe047..75290d22ef259 100644
--- a/clang/lib/Headers/avx10_2_512bf16intrin.h
+++ b/clang/lib/Headers/avx10_2_512bf16intrin.h
@@ -441,8 +441,8 @@ _mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) {
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
-  return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B,
-                                                 (__v32bf)__C);
+  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B,
+                                                (__v32bf)__C);
 }
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
@@ -469,8 +469,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh(
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
-  return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B,
-                                                 -(__v32bf)__C);
+  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B,
+                                                -(__v32bf)__C);
 }
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
@@ -497,8 +497,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh(
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
-  return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B,
-                                                 (__v32bf)__C);
+  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B,
+                                                (__v32bf)__C);
 }
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pbh(
@@ -527,8 +527,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh(
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
-  return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B,
-                                                 -(__v32bf)__C);
+  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B,
+                                                -(__v32bf)__C);
 }
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pbh(
diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h
index 199cc13ff7a1c..66797ae00fe4f 100644
--- a/clang/lib/Headers/avx10_2bf16intrin.h
+++ b/clang/lib/Headers/avx10_2bf16intrin.h
@@ -852,8 +852,8 @@ _mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) {
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
 _mm256_fmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
-  return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B,
-                                                 (__v16bf)__C);
+  return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B,
+                                                (__v16bf)__C);
 }
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
@@ -880,8 +880,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pbh(
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
 _mm256_fmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
-  return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B,
-                                                 -(__v16bf)__C);
+  return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B,
+                                                -(__v16bf)__C);
 }
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
@@ -908,8 +908,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pbh(
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
 _mm256_fnmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
-  return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B,
-                                                 (__v16bf)__C);
+  return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B,
+                                                (__v16bf)__C);
 }
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pbh(
@@ -938,8 +938,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pbh(
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
 _mm256_fnmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
-  return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B,
-                                                 -(__v16bf)__C);
+  return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B,
+                                                -(__v16bf)__C);
 }
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pbh(
@@ -969,8 +969,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pbh(
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmadd_pbh(__m128bh __A,
                                                                __m128bh __B,
                                                                __m128bh __C) {
-  return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B,
-                                                 (__v8bf)__C);
+  return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B,
+                                                (__v8bf)__C);
 }
 
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128
@@ -997,8 +997,8 @@ _mm_maskz_fmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsub_pbh(__m128bh __A,
                                                                __m128bh __B,
                                                                __m128bh __C) {
-  return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B,
-                                                 -(__v8bf)__C);
+  return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B,
+                                                -(__v8bf)__C);
 }
 
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128
@@ -1025,8 +1025,8 @@ _mm_maskz_fmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmadd_pbh(__m128bh __A,
                                                                 __m128bh __B,
                                                                 __m128bh __C) {
-  return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B,
-                                                 (__v8bf)__C);
+  return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B,
+                                                (__v8bf)__C);
 }
 
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128
@@ -1053,8 +1053,8 @@ _mm_maskz_fnmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmsub_pbh(__m128bh __A,
                                                                 __m128bh __B,
                                                                 __m128bh __C) {
-  return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B,
-                                                 -(__v8bf)__C);
+  return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B,
+                                                -(__v8bf)__C);
 }
 
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128

llvmbot · 2025-04-03T12:31:44Z

@llvm/pr-subscribers-backend-x86

Author: Phoebe Wang (phoebewang)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/134240.diff

4 Files Affected:

(modified) clang/include/clang/Basic/BuiltinsX86.td (+3-3)
(modified) clang/lib/CodeGen/TargetBuiltins/X86.cpp (+3-3)
(modified) clang/lib/Headers/avx10_2_512bf16intrin.h (+8-8)
(modified) clang/lib/Headers/avx10_2bf16intrin.h (+16-16)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index cc4249acbfee9..67cbbfdec7aaf 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -5365,13 +5365,13 @@ let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<
 
 let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
   def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">;
-  def vfmaddnepbh512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, _Vector<32, __bf16>)">;
+  def vfmaddbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, _Vector<32, __bf16>)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vfmaddnepbh256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, _Vector<16, __bf16>)">;
+  def vfmaddbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, _Vector<16, __bf16>)">;
 }
 
 let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
-  def vfmaddnepbh128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
+  def vfmaddbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
 }
diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
index dcb0365862a1e..836808619aad0 100644
--- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp
@@ -1053,9 +1053,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   case X86::BI__builtin_ia32_vfmaddph512_mask:
   case X86::BI__builtin_ia32_vfmaddph512_maskz:
   case X86::BI__builtin_ia32_vfmaddph512_mask3:
-  case X86::BI__builtin_ia32_vfmaddnepbh128:
-  case X86::BI__builtin_ia32_vfmaddnepbh256:
-  case X86::BI__builtin_ia32_vfmaddnepbh512:
+  case X86::BI__builtin_ia32_vfmaddbf16128:
+  case X86::BI__builtin_ia32_vfmaddbf16256:
+  case X86::BI__builtin_ia32_vfmaddbf16512:
   case X86::BI__builtin_ia32_vfmaddps512_mask:
   case X86::BI__builtin_ia32_vfmaddps512_maskz:
   case X86::BI__builtin_ia32_vfmaddps512_mask3:
diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h
index ce43ecbcfe047..75290d22ef259 100644
--- a/clang/lib/Headers/avx10_2_512bf16intrin.h
+++ b/clang/lib/Headers/avx10_2_512bf16intrin.h
@@ -441,8 +441,8 @@ _mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) {
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
-  return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B,
-                                                 (__v32bf)__C);
+  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B,
+                                                (__v32bf)__C);
 }
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
@@ -469,8 +469,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh(
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
-  return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B,
-                                                 -(__v32bf)__C);
+  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, (__v32bf)__B,
+                                                -(__v32bf)__C);
 }
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
@@ -497,8 +497,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh(
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
-  return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B,
-                                                 (__v32bf)__C);
+  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B,
+                                                (__v32bf)__C);
 }
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pbh(
@@ -527,8 +527,8 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh(
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
 _mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
-  return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B,
-                                                 -(__v32bf)__C);
+  return (__m512bh)__builtin_ia32_vfmaddbf16512((__v32bf)__A, -(__v32bf)__B,
+                                                -(__v32bf)__C);
 }
 
 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pbh(
diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h
index 199cc13ff7a1c..66797ae00fe4f 100644
--- a/clang/lib/Headers/avx10_2bf16intrin.h
+++ b/clang/lib/Headers/avx10_2bf16intrin.h
@@ -852,8 +852,8 @@ _mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) {
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
 _mm256_fmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
-  return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B,
-                                                 (__v16bf)__C);
+  return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B,
+                                                (__v16bf)__C);
 }
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
@@ -880,8 +880,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pbh(
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
 _mm256_fmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
-  return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B,
-                                                 -(__v16bf)__C);
+  return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, (__v16bf)__B,
+                                                -(__v16bf)__C);
 }
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
@@ -908,8 +908,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pbh(
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
 _mm256_fnmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
-  return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B,
-                                                 (__v16bf)__C);
+  return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B,
+                                                (__v16bf)__C);
 }
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pbh(
@@ -938,8 +938,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pbh(
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256
 _mm256_fnmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) {
-  return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B,
-                                                 -(__v16bf)__C);
+  return (__m256bh)__builtin_ia32_vfmaddbf16256((__v16bf)__A, -(__v16bf)__B,
+                                                -(__v16bf)__C);
 }
 
 static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pbh(
@@ -969,8 +969,8 @@ static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pbh(
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmadd_pbh(__m128bh __A,
                                                                __m128bh __B,
                                                                __m128bh __C) {
-  return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B,
-                                                 (__v8bf)__C);
+  return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B,
+                                                (__v8bf)__C);
 }
 
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128
@@ -997,8 +997,8 @@ _mm_maskz_fmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsub_pbh(__m128bh __A,
                                                                __m128bh __B,
                                                                __m128bh __C) {
-  return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B,
-                                                 -(__v8bf)__C);
+  return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, (__v8bf)__B,
+                                                -(__v8bf)__C);
 }
 
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128
@@ -1025,8 +1025,8 @@ _mm_maskz_fmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmadd_pbh(__m128bh __A,
                                                                 __m128bh __B,
                                                                 __m128bh __C) {
-  return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B,
-                                                 (__v8bf)__C);
+  return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B,
+                                                (__v8bf)__C);
 }
 
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128
@@ -1053,8 +1053,8 @@ _mm_maskz_fnmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) {
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmsub_pbh(__m128bh __A,
                                                                 __m128bh __B,
                                                                 __m128bh __C) {
-  return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B,
-                                                 -(__v8bf)__C);
+  return (__m128bh)__builtin_ia32_vfmaddbf16128((__v8bf)__A, -(__v8bf)__B,
+                                                -(__v8bf)__C);
 }
 
 static __inline__ __m128bh __DEFAULT_FN_ATTRS128

RKSimon

are we missing clang codegen tests for these?

phoebewang · 2025-04-03T13:39:36Z

are we missing clang codegen tests for these?

No, we don't generate these intrinsics. They will be transformed to llvm.fma ones.

RKSimon

LGTM - cheers

e-kud

LGTM. Looks like the last nep.

llvm-ci · 2025-04-04T04:26:58Z

LLVM Buildbot has detected a new failure on builder clang-aarch64-sve-vla running on linaro-g3-04 while building clang at step 7 "ninja check 1".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/17/builds/7014

Here is the relevant piece of the build log for the reference

Step 7 (ninja check 1) failure: stage 1 checked (failure)
...
llvm-lit: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/llvm/utils/lit/lit/llvm/config.py:520: note: using ld64.lld: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/bin/ld64.lld
llvm-lit: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/llvm/utils/lit/lit/llvm/config.py:520: note: using wasm-ld: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/bin/wasm-ld
llvm-lit: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/llvm/utils/lit/lit/discovery.py:276: warning: input '/home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/interception/Unit' contained no tests
llvm-lit: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/llvm/utils/lit/lit/discovery.py:276: warning: input '/home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/Unit' contained no tests
llvm-lit: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/llvm/utils/lit/lit/llvm/config.py:520: note: using ld.lld: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/bin/ld.lld
llvm-lit: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/llvm/utils/lit/lit/llvm/config.py:520: note: using lld-link: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/bin/lld-link
llvm-lit: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/llvm/utils/lit/lit/llvm/config.py:520: note: using ld64.lld: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/bin/ld64.lld
llvm-lit: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/llvm/llvm/utils/lit/lit/llvm/config.py:520: note: using wasm-ld: /home/tcwg-buildbot/worker/clang-aarch64-sve-vla/stage1/bin/wasm-ld
-- Testing: 97642 tests, 64 workers --
UNRESOLVED: Flang :: Driver/slp-vectorize.ll (1 of 97642)
******************** TEST 'Flang :: Driver/slp-vectorize.ll' FAILED ********************
Test has no 'RUN:' line
********************
PASS: libFuzzer-aarch64-default-Linux :: deep-recursion.test (2 of 97642)
PASS: Clang :: Analysis/PR24184.cpp (3 of 97642)
PASS: Clangd :: protocol.test (4 of 97642)
PASS: libFuzzer-aarch64-default-Linux :: value-profile-mem.test (5 of 97642)
PASS: Clang :: CodeGenCXX/dllimport-members.cpp (6 of 97642)
PASS: Clang :: CodeGen/X86/mmx-builtins.c (7 of 97642)
PASS: LLVM :: CodeGen/X86/vector-interleaved-load-i8-stride-8.ll (8 of 97642)
PASS: Clang :: OpenMP/target_teams_codegen_registration.cpp (9 of 97642)
PASS: Clang :: OpenMP/target_parallel_codegen_registration.cpp (10 of 97642)
PASS: Clang :: Driver/x86-target-features.c (11 of 97642)
PASS: Clang :: CodeGen/X86/avx2-builtins.c (12 of 97642)
PASS: LLVM :: CodeGen/ARM/build-attributes.ll (13 of 97642)
PASS: ThreadSanitizer-aarch64 :: lots_of_threads.c (14 of 97642)
PASS: Clang :: Analysis/runtime-regression.c (15 of 97642)
PASS: Clang :: Headers/arm-neon-header.c (16 of 97642)
PASS: ThreadSanitizer-aarch64 :: signal_thread.cpp (17 of 97642)
PASS: libFuzzer-aarch64-default-Linux :: large.test (18 of 97642)
PASS: libFuzzer-aarch64-default-Linux :: swap-cmp.test (19 of 97642)
PASS: libFuzzer-aarch64-default-Linux :: msan.test (20 of 97642)
PASS: Clang :: Driver/linux-ld.c (21 of 97642)
PASS: Clang :: CodeGen/X86/rot-intrinsics.c (22 of 97642)
PASS: Clang :: OpenMP/target_teams_distribute_simd_codegen_registration.cpp (23 of 97642)
PASS: Clang :: CodeGen/X86/sse2-builtins.c (24 of 97642)
PASS: Clang :: CodeGen/X86/avx-builtins.c (25 of 97642)
PASS: SanitizerCommon-ubsan-aarch64-Linux :: Linux/signal_segv_handler.cpp (26 of 97642)
PASS: Clang :: Preprocessor/predefined-arch-macros.c (27 of 97642)
PASS: HWAddressSanitizer-aarch64 :: TestCases/Linux/create-thread-stress.cpp (28 of 97642)
PASS: SanitizerCommon-tsan-aarch64-Linux :: Linux/signal_segv_handler.cpp (29 of 97642)
PASS: ThreadSanitizer-aarch64 :: deadlock_detector_stress_test.cpp (30 of 97642)
PASS: Clang :: Analysis/a_flaky_crash.cpp (31 of 97642)
PASS: SanitizerCommon-lsan-aarch64-Linux :: Linux/signal_segv_handler.cpp (32 of 97642)
PASS: SanitizerCommon-asan-aarch64-Linux :: Linux/signal_segv_handler.cpp (33 of 97642)
PASS: Clang :: Driver/clang_f_opts.c (34 of 97642)
PASS: Clang :: OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp (35 of 97642)
PASS: Clang :: Preprocessor/aarch64-target-features.c (36 of 97642)
PASS: LLVM :: CodeGen/AMDGPU/sched-group-barrier-pipeline-solver.mir (37 of 97642)

[X86][AVX10.2] Replace nepbh with bf16 to match with others, NFCI

cf4be7b

phoebewang requested review from RKSimon, e-kud, KanRobert and FreddyLeaf April 3, 2025 12:31

llvmbot added clang Clang issues not falling into any other category backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang:codegen IR generation bugs: mangling, exceptions, etc. labels Apr 3, 2025

RKSimon reviewed Apr 3, 2025

View reviewed changes

RKSimon approved these changes Apr 3, 2025

View reviewed changes

e-kud approved these changes Apr 3, 2025

View reviewed changes

phoebewang merged commit 897f9a5 into llvm:main Apr 4, 2025
17 checks passed

phoebewang deleted the AVX10 branch April 4, 2025 03:27

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86][AVX10.2] Replace nepbh with bf16 to match with others, NFCI #134240

[X86][AVX10.2] Replace nepbh with bf16 to match with others, NFCI #134240

Uh oh!

phoebewang commented Apr 3, 2025

Uh oh!

llvmbot commented Apr 3, 2025 •

edited

Loading

Uh oh!

llvmbot commented Apr 3, 2025

Uh oh!

RKSimon left a comment

Uh oh!

phoebewang commented Apr 3, 2025

Uh oh!

RKSimon left a comment

Uh oh!

e-kud left a comment

Uh oh!

Uh oh!

llvm-ci commented Apr 4, 2025

Uh oh!

Uh oh!

[X86][AVX10.2] Replace nepbh with bf16 to match with others, NFCI #134240

[X86][AVX10.2] Replace nepbh with bf16 to match with others, NFCI #134240

Uh oh!

Conversation

phoebewang commented Apr 3, 2025

Uh oh!

llvmbot commented Apr 3, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Apr 3, 2025

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

phoebewang commented Apr 3, 2025

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

e-kud left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Apr 4, 2025

Uh oh!

Uh oh!

llvmbot commented Apr 3, 2025 •

edited

Loading