[X86][AMX] Support AMX-AVX512 #114070

phoebewang · 2024-10-29T15:14:20Z

No description provided.

llvmbot · 2024-10-29T15:14:55Z

@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-clang
@llvm/pr-subscribers-mc

@llvm/pr-subscribers-clang-driver

Author: Phoebe Wang (phoebewang)

Changes

Patch is 81.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114070.diff

31 Files Affected:

(modified) clang/docs/ReleaseNotes.rst (+2)
(modified) clang/include/clang/Basic/BuiltinsX86_64.def (+13)
(modified) clang/include/clang/Driver/Options.td (+2)
(modified) clang/lib/Basic/Targets/X86.cpp (+6)
(modified) clang/lib/Basic/Targets/X86.h (+1)
(modified) clang/lib/Headers/CMakeLists.txt (+1)
(added) clang/lib/Headers/amxavx512intrin.h (+381)
(modified) clang/lib/Headers/immintrin.h (+4)
(modified) clang/lib/Sema/SemaX86.cpp (+6)
(added) clang/test/CodeGen/X86/amx_avx512_api.c (+52)
(added) clang/test/CodeGen/X86/amxavx512-builtins.c (+41)
(modified) clang/test/CodeGen/attr-target-x86.c (+4-4)
(modified) clang/test/Driver/x86-target-features.c (+7)
(modified) clang/test/Preprocessor/x86_target_features.c (+7)
(modified) llvm/include/llvm/IR/IntrinsicsX86.td (+50)
(modified) llvm/include/llvm/TargetParser/X86TargetParser.def (+1)
(modified) llvm/lib/Target/X86/X86.td (+4)
(modified) llvm/lib/Target/X86/X86ExpandPseudo.cpp (+60-4)
(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+76)
(modified) llvm/lib/Target/X86/X86InstrAMX.td (+147)
(modified) llvm/lib/Target/X86/X86InstrPredicates.td (+1)
(modified) llvm/lib/Target/X86/X86LowerAMXType.cpp (+11)
(modified) llvm/lib/Target/X86/X86PreTileConfig.cpp (+17-2)
(modified) llvm/lib/TargetParser/Host.cpp (+4)
(modified) llvm/lib/TargetParser/X86TargetParser.cpp (+2)
(added) llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll (+171)
(added) llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll (+116)
(added) llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll (+61)
(added) llvm/test/MC/Disassembler/X86/amx-avx512.txt (+106)
(added) llvm/test/MC/X86/amx-avx512-att.s (+105)
(added) llvm/test/MC/X86/amx-avx512-intel.s (+105)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ce046a305c89b6..d45bd1240dd173 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -611,6 +611,8 @@ X86 Support
   * Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
   ``*_(mask(z)))_minmax_s[s|d|h]``.
 
+- Support ISA of ``AMX-AVX512``.
+
 - All intrinsics in adcintrin.h can now be used in constant expressions.
 
 - All intrinsics in adxintrin.h can now be used in constant expressions.
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index 2c591edb2835cd..70644f3f6b6054 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -128,6 +128,12 @@ TARGET_BUILTIN(__builtin_ia32_tdpbf16ps_internal, "V256iUsUsUsV256iV256iV256i",
 TARGET_BUILTIN(__builtin_ia32_tdpfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-fp16")
 TARGET_BUILTIN(__builtin_ia32_tcmmimfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-complex")
 TARGET_BUILTIN(__builtin_ia32_tcmmrlfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-complex")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps_internal, "V16fUsUsV256iUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h_internal, "V32yUsUsV256iUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l_internal, "V32yUsUsV256iUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phh_internal, "V32xUsUsV256iUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phl_internal, "V32xUsUsV256iUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tilemovrow_internal, "V16iUsUsV256iUi", "n", "amx-avx512")
 // AMX
 TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile")
 TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, "vvC*", "n", "amx-tile")
@@ -148,6 +154,13 @@ TARGET_BUILTIN(__builtin_ia32_ptwrite64, "vUOi", "n", "ptwrite")
 TARGET_BUILTIN(__builtin_ia32_tcmmimfp16ps, "vIUcIUcIUc", "n", "amx-complex")
 TARGET_BUILTIN(__builtin_ia32_tcmmrlfp16ps, "vIUcIUcIUc", "n", "amx-complex")
 
+TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps, "V16fIUcUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h, "V32yIUcUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l, "V32yIUcUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phh, "V32xIUcUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phl, "V32xIUcUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tilemovrow, "V16iIUcUi", "n", "amx-avx512")
+
 TARGET_BUILTIN(__builtin_ia32_prefetchi, "vvC*Ui", "nc", "prefetchi")
 TARGET_BUILTIN(__builtin_ia32_cmpccxadd32, "Siv*SiSiIi", "n", "cmpccxadd")
 TARGET_BUILTIN(__builtin_ia32_cmpccxadd64, "SLLiv*SLLiSLLiIi", "n", "cmpccxadd")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 2ddb2f5312148e..fd200abebceb11 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6277,6 +6277,8 @@ def mno_80387 : Flag<["-"], "mno-80387">, Alias<mno_x87>;
 def mno_fp_ret_in_387 : Flag<["-"], "mno-fp-ret-in-387">, Alias<mno_x87>;
 def mmmx : Flag<["-"], "mmmx">, Group<m_x86_Features_Group>;
 def mno_mmx : Flag<["-"], "mno-mmx">, Group<m_x86_Features_Group>;
+def mamx_avx512 : Flag<["-"], "mamx-avx512">, Group<m_x86_Features_Group>;
+def mno_amx_avx512 : Flag<["-"], "mno-amx-avx512">, Group<m_x86_Features_Group>;
 def mamx_bf16 : Flag<["-"], "mamx-bf16">, Group<m_x86_Features_Group>;
 def mno_amx_bf16 : Flag<["-"], "mno-amx-bf16">, Group<m_x86_Features_Group>;
 def mamx_complex : Flag<["-"], "mamx-complex">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 5448bd841959f4..52cab65cbd9451 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -418,6 +418,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasAMXTILE = true;
     } else if (Feature == "+amx-complex") {
       HasAMXCOMPLEX = true;
+    } else if (Feature == "+amx-avx512") {
+      HasAMXAVX512 = true;
     } else if (Feature == "+cmpccxadd") {
       HasCMPCCXADD = true;
     } else if (Feature == "+raoint") {
@@ -935,6 +937,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__AMX_FP16__");
   if (HasAMXCOMPLEX)
     Builder.defineMacro("__AMX_COMPLEX__");
+  if (HasAMXAVX512)
+    Builder.defineMacro("__AMX_AVX512__");
   if (HasCMPCCXADD)
     Builder.defineMacro("__CMPCCXADD__");
   if (HasRAOINT)
@@ -1060,6 +1064,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
   return llvm::StringSwitch<bool>(Name)
       .Case("adx", true)
       .Case("aes", true)
+      .Case("amx-avx512", true)
       .Case("amx-bf16", true)
       .Case("amx-complex", true)
       .Case("amx-fp16", true)
@@ -1177,6 +1182,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
   return llvm::StringSwitch<bool>(Feature)
       .Case("adx", HasADX)
       .Case("aes", HasAES)
+      .Case("amx-avx512", HasAMXAVX512)
       .Case("amx-bf16", HasAMXBF16)
       .Case("amx-complex", HasAMXCOMPLEX)
       .Case("amx-fp16", HasAMXFP16)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index a99ae62984c7d5..ce7458ae99ad64 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -156,6 +156,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasAMXINT8 = false;
   bool HasAMXBF16 = false;
   bool HasAMXCOMPLEX = false;
+  bool HasAMXAVX512 = false;
   bool HasSERIALIZE = false;
   bool HasTSXLDTRK = false;
   bool HasUSERMSR = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index ff392e7122a448..88e8f282bd7ec2 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -146,6 +146,7 @@ set(x86_files
   adcintrin.h
   adxintrin.h
   ammintrin.h
+  amxavx512intrin.h
   amxcomplexintrin.h
   amxfp16intrin.h
   amxintrin.h
diff --git a/clang/lib/Headers/amxavx512intrin.h b/clang/lib/Headers/amxavx512intrin.h
new file mode 100644
index 00000000000000..f819696f8086b7
--- /dev/null
+++ b/clang/lib/Headers/amxavx512intrin.h
@@ -0,0 +1,381 @@
+/*===--------------------- amxavx512intrin.h - AMXAVX512 --------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===------------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <amxavx512intrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AMX_AVX512INTRIN_H
+#define __AMX_AVX512INTRIN_H
+#ifdef __x86_64__
+
+#define __DEFAULT_FN_ATTRS_AVX512                                              \
+  __attribute__((__always_inline__, __nodebug__, __target__("amx-avx512")))
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the int32 source elements to fp32. The row of the tile is selected by an
+///    32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowd2ps(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.f32[i] := CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWD2PS instruction.
+///
+/// \param tsrc
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param row
+///    The row of the source tile
+#define _tile_cvtrowd2ps(tsrc, row) __builtin_ia32_tcvtrowd2ps(tsrc, row)
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to bf16. It places the resulting bf16 elements
+///    in the high 16 bits within each dword. The row of the tile is selected
+///    by an 32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2pbf16h(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+0] := 0
+///         dst.bf16[2*i+1] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2PBF16H instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param row
+///    The the row of the source tile.
+#define _tile_cvtrowps2pbf16h(tsrc, row)                                       \
+  __builtin_ia32_tcvtrowps2pbf16h(tsrc, row)
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to bf16. It places the resulting bf16 elements
+///    in the low 16 bits within each dword. The row of the tile is selected
+///    by an 32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2pbf16l(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+1] := 0
+///         dst.bf16[2*i+0] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2PBF16L instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param row
+///    The the row of the source tile.
+#define _tile_cvtrowps2pbf16l(tsrc, row)                                       \
+  __builtin_ia32_tcvtrowps2pbf16l(tsrc, row)
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to fp16. It places the resulting fp16 elements
+///    in the high 16 bits within each dword. The row of the tile is selected
+///    by an 32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2phh(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+0] := 0
+///         dst.fp16[2*i+1] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2PHH instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param row
+///    The the row of the source tile.
+#define _tile_cvtrowps2phh(tsrc, row) __builtin_ia32_tcvtrowps2phh(tsrc, row)
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to fp16. It places the resulting fp16 elements
+///    in the low 16 bits within each dword. The row of the tile is selected
+///    by an 32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2phl(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+1] := 0
+///         dst.fp16[2*i+0] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2PHL instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param row
+///    The the row of the source tile.
+#define _tile_cvtrowps2phl(tsrc, row) __builtin_ia32_tcvtrowps2phl(tsrc, row)
+
+/// Move one row of a tile data to a v16f32 data.
+/// The row of the tile is selected by a 32b GPR.
+///
+/// \headerfile <immintrin.h>
+///
+/// \code
+/// __m512 _tile_movrow(__tile a, unsigned b);
+/// \endcode
+///
+/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
+///
+/// \param a
+///     The 1st source tile. Max size is 1024 Bytes.
+/// \param b
+///     The 2nd source r32. Size is 4 Bytes.
+/// \returns
+///     The destination v16f32 data. Size is 64 Bytes.
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL>>3
+/// row_index := b&0xffff
+/// row_chunk := ((b>>16)&0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes-1)
+///     IF (row_chunk + i >= a.colsb)
+///             dst.byte[i] := 0
+///     ELSE
+///             dst.byte[i] := a.row[row_index].byte[row_chunk+i]
+/// ENDFOR
+/// \endcode
+#define _tile_movrow(a, b) __builtin_ia32_tilemovrow(a, b)
+
+/// This is internal intrinsic. C/C++ user should avoid calling it directly.
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal(
+    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowd2ps_internal(m, n, src, u);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
+_tile_cvtrowps2pbf16h_internal(unsigned short m, unsigned short n,
+                               _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowps2pbf16h_internal(m, n, src, u);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
+_tile_cvtrowps2pbf16l_internal(unsigned short m, unsigned short n,
+                               _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowps2pbf16l_internal(m, n, src, u);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal(
+    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal(
+    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_AVX512 _tile_movrow_internal(
+    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
+  return (__m512i)__builtin_ia32_tilemovrow_internal(m, n, src, u);
+}
+
+/// Move a row from a tile (src0) to a v16f32 dst, converting the int32 source
+/// elements to fp32. No SIMD exceptions are generated. Rounding is done as if
+/// MXCSR.RC=RNE. Embedded rounding is not supported.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWD2PS </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 1st source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v16f32 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowd2ps_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
+/// elements to bf16 at high 16-bits of each dword.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16H </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 1st source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v32bf16 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowps2pbf16h_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
+/// elements to bf16 at low 16-bits of each dword.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16L </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 1st source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v32bf16 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512bh __tile_cvtrowps2pbf16l(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowps2pbf16l_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
+/// elements to fp16 at high 16-bits of each dword.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWPS2PHH </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 1st source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v32fp16 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowps2phh_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
+/// elements to fp16 at low 16-bits of each dword.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWPS2PHL </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 1st source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v32fp16 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512h __tile_cvtrowps2phl(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowps2phl_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move one row of a tile data to a v16f32 data.
+/// The row of the tile is selected by a 32b GPR.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 1st source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v16i32 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512i __tile_movrow(__tile1024i src0, unsigned src1) {
+  return (__m512i)_tile_movrow_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+#endif // __x86_64__
+#endif // __AMX_AVX512INTRIN_H
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 3fbabffa98df20..84e56238fcf2dc 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -638,6 +638,10 @@ _storebe_i64(void * __P, long long __D) {
 #include <amxcomplexintrin.h>
 #endif
 
+#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_AVX512__)
+#include <amxavx512intrin.h>
+#endif
+
 #if !defined(__SCE__) || __has_feature(modules) ||                             \
     defined(__AVX512VP2INTERSECT__)
 #include <avx512vp2intersectintrin.h>
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index 6a4d78f0ca9084..fba901473e6e18 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -631,6 +631,12 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCa...
[truncated]

llvmbot · 2024-10-29T15:14:55Z

@llvm/pr-subscribers-backend-x86

Author: Phoebe Wang (phoebewang)

Changes

Patch is 81.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114070.diff

31 Files Affected:

(modified) clang/docs/ReleaseNotes.rst (+2)
(modified) clang/include/clang/Basic/BuiltinsX86_64.def (+13)
(modified) clang/include/clang/Driver/Options.td (+2)
(modified) clang/lib/Basic/Targets/X86.cpp (+6)
(modified) clang/lib/Basic/Targets/X86.h (+1)
(modified) clang/lib/Headers/CMakeLists.txt (+1)
(added) clang/lib/Headers/amxavx512intrin.h (+381)
(modified) clang/lib/Headers/immintrin.h (+4)
(modified) clang/lib/Sema/SemaX86.cpp (+6)
(added) clang/test/CodeGen/X86/amx_avx512_api.c (+52)
(added) clang/test/CodeGen/X86/amxavx512-builtins.c (+41)
(modified) clang/test/CodeGen/attr-target-x86.c (+4-4)
(modified) clang/test/Driver/x86-target-features.c (+7)
(modified) clang/test/Preprocessor/x86_target_features.c (+7)
(modified) llvm/include/llvm/IR/IntrinsicsX86.td (+50)
(modified) llvm/include/llvm/TargetParser/X86TargetParser.def (+1)
(modified) llvm/lib/Target/X86/X86.td (+4)
(modified) llvm/lib/Target/X86/X86ExpandPseudo.cpp (+60-4)
(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+76)
(modified) llvm/lib/Target/X86/X86InstrAMX.td (+147)
(modified) llvm/lib/Target/X86/X86InstrPredicates.td (+1)
(modified) llvm/lib/Target/X86/X86LowerAMXType.cpp (+11)
(modified) llvm/lib/Target/X86/X86PreTileConfig.cpp (+17-2)
(modified) llvm/lib/TargetParser/Host.cpp (+4)
(modified) llvm/lib/TargetParser/X86TargetParser.cpp (+2)
(added) llvm/test/CodeGen/X86/amx-across-func-tilemovrow.ll (+171)
(added) llvm/test/CodeGen/X86/amx-avx512-intrinsics.ll (+116)
(added) llvm/test/CodeGen/X86/amx-tile-avx512-internals.ll (+61)
(added) llvm/test/MC/Disassembler/X86/amx-avx512.txt (+106)
(added) llvm/test/MC/X86/amx-avx512-att.s (+105)
(added) llvm/test/MC/X86/amx-avx512-intel.s (+105)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ce046a305c89b6..d45bd1240dd173 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -611,6 +611,8 @@ X86 Support
   * Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
   ``*_(mask(z)))_minmax_s[s|d|h]``.
 
+- Support ISA of ``AMX-AVX512``.
+
 - All intrinsics in adcintrin.h can now be used in constant expressions.
 
 - All intrinsics in adxintrin.h can now be used in constant expressions.
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index 2c591edb2835cd..70644f3f6b6054 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -128,6 +128,12 @@ TARGET_BUILTIN(__builtin_ia32_tdpbf16ps_internal, "V256iUsUsUsV256iV256iV256i",
 TARGET_BUILTIN(__builtin_ia32_tdpfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-fp16")
 TARGET_BUILTIN(__builtin_ia32_tcmmimfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-complex")
 TARGET_BUILTIN(__builtin_ia32_tcmmrlfp16ps_internal, "V256iUsUsUsV256iV256iV256i", "n", "amx-complex")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps_internal, "V16fUsUsV256iUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h_internal, "V32yUsUsV256iUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l_internal, "V32yUsUsV256iUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phh_internal, "V32xUsUsV256iUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phl_internal, "V32xUsUsV256iUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tilemovrow_internal, "V16iUsUsV256iUi", "n", "amx-avx512")
 // AMX
 TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile")
 TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, "vvC*", "n", "amx-tile")
@@ -148,6 +154,13 @@ TARGET_BUILTIN(__builtin_ia32_ptwrite64, "vUOi", "n", "ptwrite")
 TARGET_BUILTIN(__builtin_ia32_tcmmimfp16ps, "vIUcIUcIUc", "n", "amx-complex")
 TARGET_BUILTIN(__builtin_ia32_tcmmrlfp16ps, "vIUcIUcIUc", "n", "amx-complex")
 
+TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps, "V16fIUcUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h, "V32yIUcUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l, "V32yIUcUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phh, "V32xIUcUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowps2phl, "V32xIUcUi", "n", "amx-avx512")
+TARGET_BUILTIN(__builtin_ia32_tilemovrow, "V16iIUcUi", "n", "amx-avx512")
+
 TARGET_BUILTIN(__builtin_ia32_prefetchi, "vvC*Ui", "nc", "prefetchi")
 TARGET_BUILTIN(__builtin_ia32_cmpccxadd32, "Siv*SiSiIi", "n", "cmpccxadd")
 TARGET_BUILTIN(__builtin_ia32_cmpccxadd64, "SLLiv*SLLiSLLiIi", "n", "cmpccxadd")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 2ddb2f5312148e..fd200abebceb11 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6277,6 +6277,8 @@ def mno_80387 : Flag<["-"], "mno-80387">, Alias<mno_x87>;
 def mno_fp_ret_in_387 : Flag<["-"], "mno-fp-ret-in-387">, Alias<mno_x87>;
 def mmmx : Flag<["-"], "mmmx">, Group<m_x86_Features_Group>;
 def mno_mmx : Flag<["-"], "mno-mmx">, Group<m_x86_Features_Group>;
+def mamx_avx512 : Flag<["-"], "mamx-avx512">, Group<m_x86_Features_Group>;
+def mno_amx_avx512 : Flag<["-"], "mno-amx-avx512">, Group<m_x86_Features_Group>;
 def mamx_bf16 : Flag<["-"], "mamx-bf16">, Group<m_x86_Features_Group>;
 def mno_amx_bf16 : Flag<["-"], "mno-amx-bf16">, Group<m_x86_Features_Group>;
 def mamx_complex : Flag<["-"], "mamx-complex">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 5448bd841959f4..52cab65cbd9451 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -418,6 +418,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasAMXTILE = true;
     } else if (Feature == "+amx-complex") {
       HasAMXCOMPLEX = true;
+    } else if (Feature == "+amx-avx512") {
+      HasAMXAVX512 = true;
     } else if (Feature == "+cmpccxadd") {
       HasCMPCCXADD = true;
     } else if (Feature == "+raoint") {
@@ -935,6 +937,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__AMX_FP16__");
   if (HasAMXCOMPLEX)
     Builder.defineMacro("__AMX_COMPLEX__");
+  if (HasAMXAVX512)
+    Builder.defineMacro("__AMX_AVX512__");
   if (HasCMPCCXADD)
     Builder.defineMacro("__CMPCCXADD__");
   if (HasRAOINT)
@@ -1060,6 +1064,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
   return llvm::StringSwitch<bool>(Name)
       .Case("adx", true)
       .Case("aes", true)
+      .Case("amx-avx512", true)
       .Case("amx-bf16", true)
       .Case("amx-complex", true)
       .Case("amx-fp16", true)
@@ -1177,6 +1182,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
   return llvm::StringSwitch<bool>(Feature)
       .Case("adx", HasADX)
       .Case("aes", HasAES)
+      .Case("amx-avx512", HasAMXAVX512)
       .Case("amx-bf16", HasAMXBF16)
       .Case("amx-complex", HasAMXCOMPLEX)
       .Case("amx-fp16", HasAMXFP16)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index a99ae62984c7d5..ce7458ae99ad64 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -156,6 +156,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasAMXINT8 = false;
   bool HasAMXBF16 = false;
   bool HasAMXCOMPLEX = false;
+  bool HasAMXAVX512 = false;
   bool HasSERIALIZE = false;
   bool HasTSXLDTRK = false;
   bool HasUSERMSR = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index ff392e7122a448..88e8f282bd7ec2 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -146,6 +146,7 @@ set(x86_files
   adcintrin.h
   adxintrin.h
   ammintrin.h
+  amxavx512intrin.h
   amxcomplexintrin.h
   amxfp16intrin.h
   amxintrin.h
diff --git a/clang/lib/Headers/amxavx512intrin.h b/clang/lib/Headers/amxavx512intrin.h
new file mode 100644
index 00000000000000..f819696f8086b7
--- /dev/null
+++ b/clang/lib/Headers/amxavx512intrin.h
@@ -0,0 +1,381 @@
+/*===--------------------- amxavx512intrin.h - AMXAVX512 --------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===------------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <amxavx512intrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AMX_AVX512INTRIN_H
+#define __AMX_AVX512INTRIN_H
+#ifdef __x86_64__
+
+#define __DEFAULT_FN_ATTRS_AVX512                                              \
+  __attribute__((__always_inline__, __nodebug__, __target__("amx-avx512")))
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the int32 source elements to fp32. The row of the tile is selected by an
+///    32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowd2ps(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.f32[i] := CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWD2PS instruction.
+///
+/// \param tsrc
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param row
+///    The row of the source tile
+#define _tile_cvtrowd2ps(tsrc, row) __builtin_ia32_tcvtrowd2ps(tsrc, row)
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to bf16. It places the resulting bf16 elements
+///    in the high 16 bits within each dword. The row of the tile is selected
+///    by an 32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2pbf16h(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+0] := 0
+///         dst.bf16[2*i+1] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2PBF16H instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param row
+///    The the row of the source tile.
+#define _tile_cvtrowps2pbf16h(tsrc, row)                                       \
+  __builtin_ia32_tcvtrowps2pbf16h(tsrc, row)
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to bf16. It places the resulting bf16 elements
+///    in the low 16 bits within each dword. The row of the tile is selected
+///    by an 32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2pbf16l(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+1] := 0
+///         dst.bf16[2*i+0] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2PBF16L instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param row
+///    The the row of the source tile.
+#define _tile_cvtrowps2pbf16l(tsrc, row)                                       \
+  __builtin_ia32_tcvtrowps2pbf16l(tsrc, row)
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to fp16. It places the resulting fp16 elements
+///    in the high 16 bits within each dword. The row of the tile is selected
+///    by an 32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2phh(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+0] := 0
+///         dst.fp16[2*i+1] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2PHH instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param row
+///    The the row of the source tile.
+#define _tile_cvtrowps2phh(tsrc, row) __builtin_ia32_tcvtrowps2phh(tsrc, row)
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to fp16. It places the resulting fp16 elements
+///    in the low 16 bits within each dword. The row of the tile is selected
+///    by an 32b GPR.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \code
+/// __m512i _tile_cvtrowps2phl(__tile tsrc, unsigned int row);
+/// \endcode
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL >> 3
+/// row_index := row & 0xffff
+/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes / 4) - 1
+///     IF i + row_chunk / 4 >= tsrc.colsb / 4
+///         dst.dword[i] := 0
+///     ELSE
+///         dst.word[2*i+1] := 0
+///         dst.fp16[2*i+0] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///     FI
+/// ENDFOR
+/// dst[MAX_VL-1:VL] := 0
+/// zero_tileconfig_start()
+/// \endcode
+///
+/// This intrinsic corresponds to the \c TCVTROWPS2PHL instruction.
+///
+/// \param tsrc
+///    The source tile. Max size is 1024 Bytes.
+/// \param row
+///    The the row of the source tile.
+#define _tile_cvtrowps2phl(tsrc, row) __builtin_ia32_tcvtrowps2phl(tsrc, row)
+
+/// Move one row of a tile data to a v16f32 data.
+/// The row of the tile is selected by a 32b GPR.
+///
+/// \headerfile <immintrin.h>
+///
+/// \code
+/// __m512 _tile_movrow(__tile a, unsigned b);
+/// \endcode
+///
+/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
+///
+/// \param a
+///     The 1st source tile. Max size is 1024 Bytes.
+/// \param b
+///     The 2nd source r32. Size is 4 Bytes.
+/// \returns
+///     The destination v16f32 data. Size is 64 Bytes.
+///
+/// \code{.operation}
+/// VL := 512
+/// VL_bytes := VL>>3
+/// row_index := b&0xffff
+/// row_chunk := ((b>>16)&0xffff) * VL_bytes
+/// FOR i := 0 TO (VL_bytes-1)
+///     IF (row_chunk + i >= a.colsb)
+///             dst.byte[i] := 0
+///     ELSE
+///             dst.byte[i] := a.row[row_index].byte[row_chunk+i]
+/// ENDFOR
+/// \endcode
+#define _tile_movrow(a, b) __builtin_ia32_tilemovrow(a, b)
+
+/// This is internal intrinsic. C/C++ user should avoid calling it directly.
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal(
+    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowd2ps_internal(m, n, src, u);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
+_tile_cvtrowps2pbf16h_internal(unsigned short m, unsigned short n,
+                               _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowps2pbf16h_internal(m, n, src, u);
+}
+
+static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
+_tile_cvtrowps2pbf16l_internal(unsigned short m, unsigned short n,
+                               _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowps2pbf16l_internal(m, n, src, u);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal(
+    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal(
+    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
+  return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS_AVX512 _tile_movrow_internal(
+    unsigned short m, unsigned short n, _tile1024i src, unsigned u) {
+  return (__m512i)__builtin_ia32_tilemovrow_internal(m, n, src, u);
+}
+
+/// Move a row from a tile (src0) to a v16f32 dst, converting the int32 source
+/// elements to fp32. No SIMD exceptions are generated. Rounding is done as if
+/// MXCSR.RC=RNE. Embedded rounding is not supported.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWD2PS </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 1st source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v16f32 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowd2ps_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
+/// elements to bf16 at high 16-bits of each dword.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16H </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 1st source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v32bf16 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowps2pbf16h_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
+/// elements to bf16 at low 16-bits of each dword.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16L </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 1st source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v32bf16 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512bh __tile_cvtrowps2pbf16l(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowps2pbf16l_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
+/// elements to fp16 at high 16-bits of each dword.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWPS2PHH </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 1st source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v32fp16 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowps2phh_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
+/// elements to fp16 at low 16-bits of each dword.
+/// The row and chunk elements of tile is fetched from 32bit src1.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TCVTROWPS2PHL </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 1st source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v32fp16 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512h __tile_cvtrowps2phl(__tile1024i src0, unsigned src1) {
+  return _tile_cvtrowps2phl_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+/// Move one row of a tile data to a v16f32 data.
+/// The row of the tile is selected by a 32b GPR.
+///
+/// \headerfile <immintrin.h>
+///
+/// This intrinsic corresponds to the <c> TILEMOVROW </c> instruction.
+///
+/// \param src0
+///    The 1st source tile. Max size is 1024 Bytes.
+/// \param src1
+///    The 1st source r32. Size is 4 Bytes.
+/// \returns
+///    The destination v16i32 data. Size is 64 Bytes.
+__DEFAULT_FN_ATTRS_AVX512
+static __m512i __tile_movrow(__tile1024i src0, unsigned src1) {
+  return (__m512i)_tile_movrow_internal(src0.row, src0.col, src0.tile, src1);
+}
+
+#endif // __x86_64__
+#endif // __AMX_AVX512INTRIN_H
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 3fbabffa98df20..84e56238fcf2dc 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -638,6 +638,10 @@ _storebe_i64(void * __P, long long __D) {
 #include <amxcomplexintrin.h>
 #endif
 
+#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_AVX512__)
+#include <amxavx512intrin.h>
+#endif
+
 #if !defined(__SCE__) || __has_feature(modules) ||                             \
     defined(__AVX512VP2INTERSECT__)
 #include <avx512vp2intersectintrin.h>
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index 6a4d78f0ca9084..fba901473e6e18 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -631,6 +631,12 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCa...
[truncated]

fzou1 · 2024-11-01T08:50:23Z

clang/lib/Headers/amxavx512intrin.h

+  __attribute__((__always_inline__, __nodebug__, __target__("amx-avx512")))
+
+/// Moves a row from a tile register to a zmm destination register, converting
+///    the int32 source elements to fp32. The row of the tile is selected by an


fzou1 · 2024-11-01T08:51:08Z

clang/lib/Headers/amxavx512intrin.h

+/// This intrinsic corresponds to the \c TCVTROWD2PS instruction.
+///
+/// \param tsrc
+///    The 1st source tile. Max size is 1024 Bytes.


Remove "1st" since there is only one source tile. The following should be updated as this.

fzou1 · 2024-11-01T08:51:42Z

clang/lib/Headers/amxavx512intrin.h

+/// Moves a row from a tile register to a zmm destination register, converting
+///    the fp32 source elements to bf16. It places the resulting bf16 elements
+///    in the high 16 bits within each dword. The row of the tile is selected
+///    by an 32b GPR.


an -> a. The following "an 32b" should be updated to "a 32b" too.

fzou1 · 2024-11-01T09:13:56Z

llvm/lib/Target/X86/X86ExpandPseudo.cpp

+      Opc = X86::TILEMOVROWrri;
+      break;
+    default:
+      llvm_unreachable("Impossible Opcode!");


Better to change "Impossible" to "Invalid".

github-actions · 2024-11-06T09:59:34Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff 37ce18951fded6be1de319b05b968918cb45c00b ffcb6a7452a8ef41e62e47ce71747559eca1c83a --extensions c,cpp,h -- clang/lib/Headers/amxavx512intrin.h clang/test/CodeGen/X86/amx_avx512_api.c clang/test/CodeGen/X86/amxavx512-builtins.c clang/lib/Basic/Targets/X86.cpp clang/lib/Basic/Targets/X86.h clang/lib/Headers/immintrin.h clang/lib/Sema/SemaX86.cpp clang/test/CodeGen/attr-target-x86.c clang/test/Driver/x86-target-features.c clang/test/Preprocessor/x86_target_features.c llvm/lib/Target/X86/X86ExpandPseudo.cpp llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86LowerAMXType.cpp llvm/lib/Target/X86/X86PreTileConfig.cpp llvm/lib/TargetParser/Host.cpp llvm/lib/TargetParser/X86TargetParser.cpp

View the diff from clang-format here.

diff --git a/clang/lib/Headers/amxavx512intrin.h b/clang/lib/Headers/amxavx512intrin.h
index 945edea543..a01c6eff93 100644
--- a/clang/lib/Headers/amxavx512intrin.h
+++ b/clang/lib/Headers/amxavx512intrin.h
@@ -37,7 +37,8 @@
 ///     IF i + row_chunk / 4 >= tsrc.colsb / 4
 ///         dst.dword[i] := 0
 ///     ELSE
-///         dst.f32[i] := CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE)
+///         dst.f32[i] :=
+///         CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0
@@ -73,7 +74,8 @@
 ///         dst.dword[i] := 0
 ///     ELSE
 ///         dst.word[2*i+0] := 0
-///         dst.bf16[2*i+1] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///         dst.bf16[2*i+1] :=
+///         CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0
@@ -110,7 +112,8 @@
 ///         dst.dword[i] := 0
 ///     ELSE
 ///         dst.word[2*i+1] := 0
-///         dst.bf16[2*i+0] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///         dst.bf16[2*i+0] :=
+///         CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0
@@ -147,7 +150,8 @@
 ///         dst.dword[i] := 0
 ///     ELSE
 ///         dst.word[2*i+0] := 0
-///         dst.fp16[2*i+1] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///         dst.fp16[2*i+1] :=
+///         CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0
@@ -183,7 +187,8 @@
 ///         dst.dword[i] := 0
 ///     ELSE
 ///         dst.word[2*i+1] := 0
-///         dst.fp16[2*i+0] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
+///         dst.fp16[2*i+0] :=
+///         CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE)
 ///     FI
 /// ENDFOR
 /// dst[MAX_VL-1:VL] := 0

fzou1 · 2024-11-07T02:15:29Z

clang/include/clang/Basic/BuiltinsX86_64.def

@@ -133,6 +133,12 @@ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0t1_internal, "vUsUsUsV256i*V256i*vC*z",
 TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
 TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
 TARGET_BUILTIN(__builtin_ia32_ttransposed_internal, "V256iUsUsV256i", "n", "amx-transpose")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps_internal, "V16fUsUsV256iUi", "n", "amx-avx512")


Is "avx10.2-512" feature needed to be added for the intrinsics here and there?

Good catch! Done.

Is it necessary to add avx10.2-512 feature for these internal APIs? With that, we may detect errors for new APIs if there is no AVX10.2-512 target feature.

Yes, sorry I missed that parts.

fzou1 · 2024-11-08T02:50:10Z

clang/include/clang/Basic/BuiltinsX86_64.def

@@ -133,6 +133,12 @@ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0t1_internal, "vUsUsUsV256i*V256i*vC*z",
 TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
 TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1_internal, "vUsUsUsV256i*V256i*vC*z", "n", "amx-transpose")
 TARGET_BUILTIN(__builtin_ia32_ttransposed_internal, "V256iUsUsV256i", "n", "amx-transpose")
+TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps_internal, "V16fUsUsV256iUi", "n", "amx-avx512")


Is it necessary to add avx10.2-512 feature for these internal APIs? With that, we may detect errors for new APIs if there is no AVX10.2-512 target feature.

fzou1 · 2024-11-08T02:53:41Z

clang/lib/Headers/amxavx512intrin.h

+#ifdef __x86_64__
+
+#define __DEFAULT_FN_ATTRS_AVX512                                              \
+  __attribute__((__always_inline__, __nodebug__, __target__("amx-avx512")))


If AVX10.2-512 feature dependency is needed for internal APIs, we may create another attribute with AVX10.2-512 and add it to internal APIs.

Yes, we need them all. Good catch!

fzou1

LGTM except the last place probably missing avx10.2-512 dependency.

fzou1 · 2024-11-08T06:52:49Z

llvm/lib/Target/X86/X86InstrAMX.td

@@ -369,3 +369,150 @@ let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
    }
  }
 } // HasAMXTILE, HasAMXTRANSPOSE
+
+multiclass m_tcvtrowd2ps {
+  let Predicates = [HasAMXAVX512, In64BitMode] in {


Should add HasAVX10_2_512 in line 374, 390 and 454?

Done, thanks!

fzou1

LGTM

llvm-ci · 2024-11-08T11:16:22Z

LLVM Buildbot has detected a new failure on builder sanitizer-x86_64-linux-bootstrap-asan running on sanitizer-buildbot2 while building clang,llvm at step 2 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/52/builds/3564

Here is the relevant piece of the build log for the reference

Step 2 (annotate) failure: 'python ../sanitizer_buildbot/sanitizers/zorg/buildbot/builders/sanitizers/buildbot_selector.py' (failure)
...
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:506: note: using lld-link: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/lld-link
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:506: note: using ld64.lld: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld64.lld
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:506: note: using wasm-ld: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/wasm-ld
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:506: note: using ld.lld: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:506: note: using lld-link: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/lld-link
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:506: note: using ld64.lld: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld64.lld
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:506: note: using wasm-ld: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/wasm-ld
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/main.py:72: note: The test suite configuration requested an individual test timeout of 0 seconds but a timeout of 900 seconds was requested on the command line. Forcing timeout to be 900 seconds.
-- Testing: 87001 of 87002 tests, 88 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80.. 90.
FAIL: lld :: ELF/allow-shlib-undefined.s (84780 of 87001)
******************** TEST 'lld :: ELF/allow-shlib-undefined.s' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 3: rm -rf /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/tools/lld/test/ELF/Output/allow-shlib-undefined.s.tmp && split-file /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/tools/lld/test/ELF/Output/allow-shlib-undefined.s.tmp && cd /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/tools/lld/test/ELF/Output/allow-shlib-undefined.s.tmp
+ rm -rf /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/tools/lld/test/ELF/Output/allow-shlib-undefined.s.tmp
+ split-file /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/tools/lld/test/ELF/Output/allow-shlib-undefined.s.tmp
+ cd /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/tools/lld/test/ELF/Output/allow-shlib-undefined.s.tmp
RUN: at line 4: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 main.s -o main.o
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 main.s -o main.o
RUN: at line 5: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 def.s -o def.o
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 def.s -o def.o
RUN: at line 6: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 def-hidden.s -o def-hidden.o
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 def-hidden.s -o def-hidden.o
RUN: at line 7: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 ref.s -o ref.o
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 ref.s -o ref.o
RUN: at line 8: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o && /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld -shared a.o -o a.so
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld -shared a.o -o a.so
RUN: at line 9: cp a.so b.so
+ cp a.so b.so
RUN: at line 10: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 empty.s -o empty.o && /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld -shared empty.o -o empty.so
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 empty.s -o empty.o
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld -shared empty.o -o empty.so
RUN: at line 12: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld --allow-shlib-undefined main.o a.so -o /dev/null
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld --allow-shlib-undefined main.o a.so -o /dev/null
RUN: at line 13: not /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld --no-allow-shlib-undefined main.o a.so -o /dev/null 2>&1 | /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/FileCheck /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/FileCheck /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s
+ not /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld --no-allow-shlib-undefined main.o a.so -o /dev/null
RUN: at line 15: not /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld main.o a.so -o /dev/null 2>&1 | /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/FileCheck /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s
+ not /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld main.o a.so -o /dev/null
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/FileCheck /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s
RUN: at line 16: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld main.o a.so --noinhibit-exec -o /dev/null 2>&1 | /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/FileCheck /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s --check-prefix=WARN
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld main.o a.so --noinhibit-exec -o /dev/null
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/FileCheck /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s --check-prefix=WARN
RUN: at line 17: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld main.o a.so --warn-unresolved-symbols -o /dev/null 2>&1 | /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/FileCheck /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s --check-prefix=WARN
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld main.o a.so --warn-unresolved-symbols -o /dev/null
Step 10 (stage2/asan check) failure: stage2/asan check (failure)
...
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:506: note: using lld-link: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/lld-link
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:506: note: using ld64.lld: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld64.lld
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:506: note: using wasm-ld: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/wasm-ld
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:506: note: using ld.lld: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:506: note: using lld-link: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/lld-link
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:506: note: using ld64.lld: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld64.lld
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/llvm/config.py:506: note: using wasm-ld: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/wasm-ld
llvm-lit: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/llvm/utils/lit/lit/main.py:72: note: The test suite configuration requested an individual test timeout of 0 seconds but a timeout of 900 seconds was requested on the command line. Forcing timeout to be 900 seconds.
-- Testing: 87001 of 87002 tests, 88 workers --
Testing:  0.. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80.. 90.
FAIL: lld :: ELF/allow-shlib-undefined.s (84780 of 87001)
******************** TEST 'lld :: ELF/allow-shlib-undefined.s' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 3: rm -rf /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/tools/lld/test/ELF/Output/allow-shlib-undefined.s.tmp && split-file /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/tools/lld/test/ELF/Output/allow-shlib-undefined.s.tmp && cd /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/tools/lld/test/ELF/Output/allow-shlib-undefined.s.tmp
+ rm -rf /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/tools/lld/test/ELF/Output/allow-shlib-undefined.s.tmp
+ split-file /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/tools/lld/test/ELF/Output/allow-shlib-undefined.s.tmp
+ cd /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/tools/lld/test/ELF/Output/allow-shlib-undefined.s.tmp
RUN: at line 4: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 main.s -o main.o
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 main.s -o main.o
RUN: at line 5: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 def.s -o def.o
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 def.s -o def.o
RUN: at line 6: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 def-hidden.s -o def-hidden.o
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 def-hidden.s -o def-hidden.o
RUN: at line 7: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 ref.s -o ref.o
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 ref.s -o ref.o
RUN: at line 8: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o && /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld -shared a.o -o a.so
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld -shared a.o -o a.so
RUN: at line 9: cp a.so b.so
+ cp a.so b.so
RUN: at line 10: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 empty.s -o empty.o && /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld -shared empty.o -o empty.so
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/llvm-mc -filetype=obj -triple=x86_64 empty.s -o empty.o
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld -shared empty.o -o empty.so
RUN: at line 12: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld --allow-shlib-undefined main.o a.so -o /dev/null
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld --allow-shlib-undefined main.o a.so -o /dev/null
RUN: at line 13: not /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld --no-allow-shlib-undefined main.o a.so -o /dev/null 2>&1 | /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/FileCheck /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/FileCheck /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s
+ not /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld --no-allow-shlib-undefined main.o a.so -o /dev/null
RUN: at line 15: not /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld main.o a.so -o /dev/null 2>&1 | /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/FileCheck /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s
+ not /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld main.o a.so -o /dev/null
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/FileCheck /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s
RUN: at line 16: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld main.o a.so --noinhibit-exec -o /dev/null 2>&1 | /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/FileCheck /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s --check-prefix=WARN
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld main.o a.so --noinhibit-exec -o /dev/null
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/FileCheck /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s --check-prefix=WARN
RUN: at line 17: /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld main.o a.so --warn-unresolved-symbols -o /dev/null 2>&1 | /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/FileCheck /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm-project/lld/test/ELF/allow-shlib-undefined.s --check-prefix=WARN
+ /home/b/sanitizer-x86_64-linux-bootstrap-asan/build/llvm_build_asan/bin/ld.lld main.o a.so --warn-unresolved-symbols -o /dev/null

llvm-ci · 2024-11-08T16:13:14Z

LLVM Buildbot has detected a new failure on builder clang-s390x-linux running on systemz-1 while building clang,llvm at step 5 "ninja check 1".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/42/builds/1812

Here is the relevant piece of the build log for the reference

Step 5 (ninja check 1) failure: stage 1 checked (failure)
******************** TEST 'libFuzzer-s390x-default-Linux :: fuzzer-timeout.test' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 1: /home/uweigand/sandbox/buildbot/clang-s390x-linux/stage1/./bin/clang    -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta   --driver-mode=g++ -O2 -gline-tables-only -fsanitize=address,fuzzer -I/home/uweigand/sandbox/buildbot/clang-s390x-linux/llvm/compiler-rt/lib/fuzzer  /home/uweigand/sandbox/buildbot/clang-s390x-linux/llvm/compiler-rt/test/fuzzer/TimeoutTest.cpp -o /home/uweigand/sandbox/buildbot/clang-s390x-linux/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutTest
+ /home/uweigand/sandbox/buildbot/clang-s390x-linux/stage1/./bin/clang -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta --driver-mode=g++ -O2 -gline-tables-only -fsanitize=address,fuzzer -I/home/uweigand/sandbox/buildbot/clang-s390x-linux/llvm/compiler-rt/lib/fuzzer /home/uweigand/sandbox/buildbot/clang-s390x-linux/llvm/compiler-rt/test/fuzzer/TimeoutTest.cpp -o /home/uweigand/sandbox/buildbot/clang-s390x-linux/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutTest
RUN: at line 2: /home/uweigand/sandbox/buildbot/clang-s390x-linux/stage1/./bin/clang    -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta   --driver-mode=g++ -O2 -gline-tables-only -fsanitize=address,fuzzer -I/home/uweigand/sandbox/buildbot/clang-s390x-linux/llvm/compiler-rt/lib/fuzzer  /home/uweigand/sandbox/buildbot/clang-s390x-linux/llvm/compiler-rt/test/fuzzer/TimeoutEmptyTest.cpp -o /home/uweigand/sandbox/buildbot/clang-s390x-linux/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutEmptyTest
+ /home/uweigand/sandbox/buildbot/clang-s390x-linux/stage1/./bin/clang -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta --driver-mode=g++ -O2 -gline-tables-only -fsanitize=address,fuzzer -I/home/uweigand/sandbox/buildbot/clang-s390x-linux/llvm/compiler-rt/lib/fuzzer /home/uweigand/sandbox/buildbot/clang-s390x-linux/llvm/compiler-rt/test/fuzzer/TimeoutEmptyTest.cpp -o /home/uweigand/sandbox/buildbot/clang-s390x-linux/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutEmptyTest
RUN: at line 3: not  /home/uweigand/sandbox/buildbot/clang-s390x-linux/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutTest -timeout=1 2>&1 | FileCheck /home/uweigand/sandbox/buildbot/clang-s390x-linux/llvm/compiler-rt/test/fuzzer/fuzzer-timeout.test --check-prefix=TimeoutTest
+ not /home/uweigand/sandbox/buildbot/clang-s390x-linux/stage1/runtimes/runtimes-bins/compiler-rt/test/fuzzer/S390XDefaultLinuxConfig/Output/fuzzer-timeout.test.tmp-TimeoutTest -timeout=1
+ FileCheck /home/uweigand/sandbox/buildbot/clang-s390x-linux/llvm/compiler-rt/test/fuzzer/fuzzer-timeout.test --check-prefix=TimeoutTest
/home/uweigand/sandbox/buildbot/clang-s390x-linux/llvm/compiler-rt/test/fuzzer/fuzzer-timeout.test:7:14: error: TimeoutTest: expected string not found in input
TimeoutTest: #0
             ^
<stdin>:19:44: note: scanning from here
==3335512== ERROR: libFuzzer: timeout after 1 seconds
                                           ^
<stdin>:24:104: note: possible intended match here
AddressSanitizer: CHECK failed: asan_report.cpp:199 "((current_error_.kind)) == ((kErrorKindInvalid))" (0x1, 0x0) (tid=3335512)
                                                                                                       ^

Input file: <stdin>
Check file: /home/uweigand/sandbox/buildbot/clang-s390x-linux/llvm/compiler-rt/test/fuzzer/fuzzer-timeout.test

-dump-input=help explains the following input dump.

Input was:
<<<<<<
           .
           .
           .
          14: MS: 1 InsertByte-; base unit: bf397014ecbce0b1be8d9011c77f6181927a357f 
          15: 0x48,0x69,0x21,0x48, 
          16: Hi!H 
          17: artifact_prefix='./'; Test unit written to ./timeout-c9f7ef19d5ac7565f3dcaf7a3221ae711a187db5 
          18: Base64: SGkhSA== 
          19: ==3335512== ERROR: libFuzzer: timeout after 1 seconds 
check:7'0                                                X~~~~~~~~~~ error: no match found
          20: AddressSanitizer:DEADLYSIGNAL 
check:7'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          21: ================================================================= 
check:7'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          22: AddressSanitizer:DEADLYSIGNAL 
check:7'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          23: ================================================================= 
check:7'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          24: AddressSanitizer: CHECK failed: asan_report.cpp:199 "((current_error_.kind)) == ((kErrorKindInvalid))" (0x1, 0x0) (tid=3335512) 
check:7'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
check:7'1                                                                                                            ?                         possible intended match
...

alanzhao1 · 2024-11-08T18:59:31Z

FYI this is causing Chrome X86 MacOS builds to fail due to error: unknown type name '__m512bh': https://crbug.com/378111077

alanzhao1 · 2024-11-08T21:53:20Z

FYI this is causing Chrome X86 MacOS builds to fail due to error: unknown type name '__m512bh': https://crbug.com/378111077

As I mentioned in https://crbug.com/378111077#comment3, the issue is that we pull in avx512bf16intrin.h because __SCE__ is not defined, but __SSE2__ is also not defined, so we don't pull in the typedef for __m512bh (since they're guarded by macros that look for __SSE2__) and so on. We then pull in amxavx512intrin.h which was introduced in this PR which then fails to compile because we don't have the typedefs.

pranavk · 2024-11-08T23:58:25Z

This is causing similar errors for us as well.

alanzhao1 · 2024-11-09T00:12:46Z

Figured out a repro - immintrin.h doesn't compile if both -msse and -mno-sse2 are passed to clang:

$ cat ~/src/test-mac.c
#include <immintrin.h>

$ bin/clang -msse -mno-sse2 -o /dev/null -c ~/src/test-mac.c
In file included from /usr/local/google/home/ayzhao/src/test-mac.c:1:
In file included from /usr/local/google/home/ayzhao/src/llvm-project/build/lib/clang/20/include/immintrin.h:660:
/usr/local/google/home/ayzhao/src/llvm-project/build/lib/clang/20/include/amxavx512intrin.h:240:19: error: unknown type name '__m512bh'
  240 | static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
      |                   ^
/usr/local/google/home/ayzhao/src/llvm-project/build/lib/clang/20/include/amxavx512intrin.h:243:10: error: returning '__attribute__((__vector_size__(32 * sizeof(__bf16)))) __bf16' (vector of 32 '__bf16' values) from a function with incompatible result type 'int'
  243 |   return __builtin_ia32_tcvtrowps2pbf16h_internal(m, n, src, u);
      |          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/usr/local/google/home/ayzhao/src/llvm-project/build/lib/clang/20/include/amxavx512intrin.h:246:19: error: unknown type name '__m512bh'
  246 | static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
      |                   ^
/usr/local/google/home/ayzhao/src/llvm-project/build/lib/clang/20/include/amxavx512intrin.h:249:10: error: returning '__attribute__((__vector_size__(32 * sizeof(__bf16)))) __bf16' (vector of 32 '__bf16' values) from a function with incompatible result type 'int'
  249 |   return __builtin_ia32_tcvtrowps2pbf16l_internal(m, n, src, u);
      |          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/usr/local/google/home/ayzhao/src/llvm-project/build/lib/clang/20/include/amxavx512intrin.h:252:19: error: unknown type name '__m512h'
  252 | static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal(
      |                   ^
/usr/local/google/home/ayzhao/src/llvm-project/build/lib/clang/20/include/amxavx512intrin.h:254:10: error: returning '__attribute__((__vector_size__(32 * sizeof(_Float16)))) _Float16' (vector of 32 '_Float16' values) from a function with incompatible result type 'int'
  254 |   return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u);
      |          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/usr/local/google/home/ayzhao/src/llvm-project/build/lib/clang/20/include/amxavx512intrin.h:257:19: error: unknown type name '__m512h'
  257 | static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal(
      |                   ^
/usr/local/google/home/ayzhao/src/llvm-project/build/lib/clang/20/include/amxavx512intrin.h:259:10: error: returning '__attribute__((__vector_size__(32 * sizeof(_Float16)))) _Float16' (vector of 32 '_Float16' values) from a function with incompatible result type 'int'
  259 |   return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u);
      |          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/usr/local/google/home/ayzhao/src/llvm-project/build/lib/clang/20/include/amxavx512intrin.h:302:8: error: unknown type name '__m512bh'
  302 | static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) {
      |        ^
/usr/local/google/home/ayzhao/src/llvm-project/build/lib/clang/20/include/amxavx512intrin.h:321:8: error: unknown type name '__m512bh'
  321 | static __m512bh __tile_cvtrowps2pbf16l(__tile1024i src0, unsigned src1) {
      |        ^
/usr/local/google/home/ayzhao/src/llvm-project/build/lib/clang/20/include/amxavx512intrin.h:340:8: error: unknown type name '__m512h'
  340 | static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) {
      |        ^
/usr/local/google/home/ayzhao/src/llvm-project/build/lib/clang/20/include/amxavx512intrin.h:359:8: error: unknown type name '__m512h'
  359 | static __m512h __tile_cvtrowps2phl(__tile1024i src0, unsigned src1) {
      |        ^
12 errors generated.

Test was on an x64 linux system.

This reverts commit 58a17e1.

Reverts #114070 Reason: Causes `immintrin.h` to fail to compile if `-msse` and `-mno-sse2` are passed to clang: #114070 (comment)

phoebewang · 2024-11-09T05:27:09Z

Thanks all! Fixed in #115581

Reverts llvm#114070 Reason: Causes `immintrin.h` to fail to compile if `-msse` and `-mno-sse2` are passed to clang: llvm#114070 (comment)

[X86][AMX] Support AMX-AVX512

587d010

phoebewang requested review from RKSimon, xiangzh1 and FreddyLeaf October 29, 2024 15:14

fzou1 reviewed Nov 6, 2024

View reviewed changes

phoebewang added 2 commits November 6, 2024 17:53

Merge remote-tracking branch 'origin/main' into AMX-AVX512

65b821f

Address review comments

c38da4e

fzou1 reviewed Nov 7, 2024

View reviewed changes

Add avx10.2-512 predicate

1b93a9a

fzou1 reviewed Nov 8, 2024

View reviewed changes

phoebewang added 2 commits November 8, 2024 10:59

Add missing parts

6560806

Add avx10.2-512

6a78b18

fzou1 reviewed Nov 8, 2024

View reviewed changes

Add HasAVX10_2_512

ffcb6a7

fzou1 approved these changes Nov 8, 2024

View reviewed changes

phoebewang merged commit 58a17e1 into llvm:main Nov 8, 2024
6 of 9 checks passed

phoebewang deleted the AMX-AVX512 branch November 8, 2024 08:25

alanzhao1 added a commit that referenced this pull request Nov 9, 2024

Revert "[X86][AMX] Support AMX-AVX512 (#114070)"

e06af1d

This reverts commit 58a17e1.

alanzhao1 mentioned this pull request Nov 9, 2024

Revert "[X86][AMX] Support AMX-AVX512" #115570

Merged

alanzhao1 added a commit that referenced this pull request Nov 9, 2024

Revert "[X86][AMX] Support AMX-AVX512" (#115570)

ff22515

Reverts #114070 Reason: Causes `immintrin.h` to fail to compile if `-msse` and `-mno-sse2` are passed to clang: #114070 (comment)

Groverkss pushed a commit to iree-org/llvm-project that referenced this pull request Nov 15, 2024

[X86][AMX] Support AMX-AVX512 (llvm#114070)

afd7c34

[X86][AMX] Support AMX-AVX512 #114070

[X86][AMX] Support AMX-AVX512 #114070

Uh oh!

Conversation

phoebewang commented Oct 29, 2024

Uh oh!

llvmbot commented Oct 29, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Oct 29, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

github-actions bot commented Nov 6, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

fzou1 left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

fzou1 left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Nov 8, 2024

Uh oh!

llvm-ci commented Nov 8, 2024

Uh oh!

alanzhao1 commented Nov 8, 2024

Uh oh!

alanzhao1 commented Nov 8, 2024

Uh oh!

pranavk commented Nov 8, 2024

Uh oh!

alanzhao1 commented Nov 9, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

phoebewang commented Nov 9, 2024

Uh oh!

Uh oh!

llvmbot commented Oct 29, 2024 •

edited

Loading

github-actions bot commented Nov 6, 2024 •

edited

Loading

alanzhao1 commented Nov 9, 2024 •

edited

Loading