Skip to content

[X86][AMX-AVX512][NFC] Remove P from intrinsic and instruction name #123270

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions clang/include/clang/Basic/BuiltinsX86_64.td
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,8 @@ let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in {

let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in {
def tcvtrowd2ps_internal : X86Builtin<"_Vector<16, float>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2pbf16h_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2pbf16l_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2bf16h_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2bf16l_internal : X86Builtin<"_Vector<32, __bf16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2phh_internal : X86Builtin<"_Vector<32, _Float16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tcvtrowps2phl_internal : X86Builtin<"_Vector<32, _Float16>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
def tilemovrow_internal : X86Builtin<"_Vector<16, int>(unsigned short, unsigned short, _Vector<256, int>, unsigned int)">;
Expand Down Expand Up @@ -387,8 +387,8 @@ let Features = "amx-complex,amx-transpose", Attributes = [NoThrow] in {

let Features = "amx-avx512,avx10.2-512", Attributes = [NoThrow] in {
def tcvtrowd2ps : X86Builtin<"_Vector<16, float>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2pbf16h : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2pbf16l : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2bf16h : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2bf16l : X86Builtin<"_Vector<32, __bf16>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2phh : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned char, unsigned int)">;
def tcvtrowps2phl : X86Builtin<"_Vector<32, _Float16>(_Constant unsigned char, unsigned int)">;
def tilemovrow : X86Builtin<"_Vector<16, int>(_Constant unsigned char, unsigned int)">;
Expand Down
40 changes: 20 additions & 20 deletions clang/lib/Headers/amxavx512intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
/// \headerfile <x86intrin.h>
///
/// \code
/// __m512i _tile_cvtrowps2pbf16h(__tile tsrc, unsigned int row);
/// __m512i _tile_cvtrowps2bf16h(__tile tsrc, unsigned int row);
/// \endcode
///
/// \code{.operation}
Expand All @@ -80,14 +80,14 @@
/// zero_tileconfig_start()
/// \endcode
///
/// This intrinsic corresponds to the \c TCVTROWPS2PBF16H instruction.
/// This intrinsic corresponds to the \c TCVTROWPS2BF16H instruction.
///
/// \param tsrc
/// The source tile. Max size is 1024 Bytes.
/// \param row
/// The the row of the source tile.
#define _tile_cvtrowps2pbf16h(tsrc, row) \
__builtin_ia32_tcvtrowps2pbf16h(tsrc, row)
#define _tile_cvtrowps2bf16h(tsrc, row) \
__builtin_ia32_tcvtrowps2bf16h(tsrc, row)

/// Moves a row from a tile register to a zmm destination register, converting
/// the fp32 source elements to bf16. It places the resulting bf16 elements
Expand All @@ -97,7 +97,7 @@
/// \headerfile <x86intrin.h>
///
/// \code
/// __m512i _tile_cvtrowps2pbf16l(__tile tsrc, unsigned int row);
/// __m512i _tile_cvtrowps2bf16l(__tile tsrc, unsigned int row);
/// \endcode
///
/// \code{.operation}
Expand All @@ -117,14 +117,14 @@
/// zero_tileconfig_start()
/// \endcode
///
/// This intrinsic corresponds to the \c TCVTROWPS2PBF16L instruction.
/// This intrinsic corresponds to the \c TCVTROWPS2BF16L instruction.
///
/// \param tsrc
/// The source tile. Max size is 1024 Bytes.
/// \param row
/// The the row of the source tile.
#define _tile_cvtrowps2pbf16l(tsrc, row) \
__builtin_ia32_tcvtrowps2pbf16l(tsrc, row)
#define _tile_cvtrowps2bf16l(tsrc, row) \
__builtin_ia32_tcvtrowps2bf16l(tsrc, row)

/// Moves a row from a tile register to a zmm destination register, converting
/// the fp32 source elements to fp16. It places the resulting fp16 elements
Expand Down Expand Up @@ -238,15 +238,15 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal(
}

static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
_tile_cvtrowps2pbf16h_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2pbf16h_internal(m, n, src, u);
_tile_cvtrowps2bf16h_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2bf16h_internal(m, n, src, u);
}

static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512
_tile_cvtrowps2pbf16l_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2pbf16l_internal(m, n, src, u);
_tile_cvtrowps2bf16l_internal(unsigned short m, unsigned short n,
_tile1024i src, unsigned u) {
return __builtin_ia32_tcvtrowps2bf16l_internal(m, n, src, u);
}

static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal(
Expand Down Expand Up @@ -290,7 +290,7 @@ static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) {
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16H </c> instruction.
/// This intrinsic corresponds to the <c> TCVTROWPS2BF16H </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
Expand All @@ -299,8 +299,8 @@ static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) {
/// \returns
/// The destination v32bf16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2pbf16h_internal(src0.row, src0.col, src0.tile, src1);
static __m512bh __tile_cvtrowps2bf16h(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2bf16h_internal(src0.row, src0.col, src0.tile, src1);
}

/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source
Expand All @@ -309,7 +309,7 @@ static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) {
///
/// \headerfile <immintrin.h>
///
/// This intrinsic corresponds to the <c> TCVTROWPS2PBF16L </c> instruction.
/// This intrinsic corresponds to the <c> TCVTROWPS2BF16L </c> instruction.
///
/// \param src0
/// The 1st source tile. Max size is 1024 Bytes.
Expand All @@ -318,8 +318,8 @@ static __m512bh __tile_cvtrowps2pbf16h(__tile1024i src0, unsigned src1) {
/// \returns
/// The destination v32bf16 data. Size is 64 Bytes.
__DEFAULT_FN_ATTRS_AVX512
static __m512bh __tile_cvtrowps2pbf16l(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2pbf16l_internal(src0.row, src0.col, src0.tile, src1);
static __m512bh __tile_cvtrowps2bf16l(__tile1024i src0, unsigned src1) {
return _tile_cvtrowps2bf16l_internal(src0.row, src0.col, src0.tile, src1);
}

/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Sema/SemaX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -641,8 +641,8 @@ bool SemaX86::CheckBuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_t2rpntlvwz1rs:
case X86::BI__builtin_ia32_t2rpntlvwz1rst1:
case X86::BI__builtin_ia32_t2rpntlvwz0rs:
case X86::BI__builtin_ia32_tcvtrowps2pbf16h:
case X86::BI__builtin_ia32_tcvtrowps2pbf16l:
case X86::BI__builtin_ia32_tcvtrowps2bf16h:
case X86::BI__builtin_ia32_tcvtrowps2bf16l:
case X86::BI__builtin_ia32_tcvtrowps2phh:
case X86::BI__builtin_ia32_tcvtrowps2phl:
case X86::BI__builtin_ia32_tcvtrowd2ps:
Expand Down
16 changes: 8 additions & 8 deletions clang/test/CodeGen/X86/amx_avx512_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@ __m512 test_tile_cvtrowd2ps(__tile1024i a, unsigned b) {
return __tile_cvtrowd2ps(a, b);
}

__m512bh test_tile_cvtrowps2pbf16h(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2pbf16h
__m512bh test_tile_cvtrowps2bf16h(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2bf16h
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h.internal
return __tile_cvtrowps2pbf16h(a, b);
//CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16h.internal
return __tile_cvtrowps2bf16h(a, b);
}

__m512bh test_tile_cvtrowps2pbf16l(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2pbf16l
__m512bh test_tile_cvtrowps2bf16l(__tile1024i a, unsigned b) {
//CHECK-LABEL: @test_tile_cvtrowps2bf16l
//CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
//CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l.internal
return __tile_cvtrowps2pbf16l(a, b);
//CHECK-DAG: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16l.internal
return __tile_cvtrowps2bf16l(a, b);
}

__m512h test_tile_cvtrowps2phh(__tile1024i a, unsigned b) {
Expand Down
16 changes: 8 additions & 8 deletions clang/test/CodeGen/X86/amxavx512-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ __m512 test_tile_cvtrowd2ps(unsigned int A) {
return _tile_cvtrowd2ps(1, A);
}

__m512bh test_tile_cvtrowps2pbf16h(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2pbf16h(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16h(i8 1, i32 %{{.*}})
return _tile_cvtrowps2pbf16h(1, A);
__m512bh test_tile_cvtrowps2bf16h(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2bf16h(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16h(i8 1, i32 %{{.*}})
return _tile_cvtrowps2bf16h(1, A);
}

__m512bh test_tile_cvtrowps2pbf16l(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2pbf16l(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2pbf16l(i8 1, i32 %{{.*}})
return _tile_cvtrowps2pbf16l(1, A);
__m512bh test_tile_cvtrowps2bf16l(unsigned int A) {
// CHECK-LABEL: @test_tile_cvtrowps2bf16l(
// CHECK: call <32 x bfloat> @llvm.x86.tcvtrowps2bf16l(i8 1, i32 %{{.*}})
return _tile_cvtrowps2bf16l(1, A);
}

__m512h test_tile_cvtrowps2phh(unsigned int A) {
Expand Down
14 changes: 7 additions & 7 deletions llvm/include/llvm/IR/IntrinsicsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -5999,10 +5999,10 @@ let TargetPrefix = "x86" in {
def int_x86_tcvtrowd2ps : ClangBuiltin<"__builtin_ia32_tcvtrowd2ps">,
Intrinsic<[llvm_v16f32_ty], [llvm_i8_ty, llvm_i32_ty],
[ImmArg<ArgIndex<0>>]>;
def int_x86_tcvtrowps2pbf16h : ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16h">,
def int_x86_tcvtrowps2bf16h : ClangBuiltin<"__builtin_ia32_tcvtrowps2bf16h">,
Intrinsic<[llvm_v32bf16_ty], [llvm_i8_ty, llvm_i32_ty],
[ImmArg<ArgIndex<0>>]>;
def int_x86_tcvtrowps2pbf16l : ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16l">,
def int_x86_tcvtrowps2bf16l : ClangBuiltin<"__builtin_ia32_tcvtrowps2bf16l">,
Intrinsic<[llvm_v32bf16_ty], [llvm_i8_ty, llvm_i32_ty],
[ImmArg<ArgIndex<0>>]>;
def int_x86_tcvtrowps2phh : ClangBuiltin<"__builtin_ia32_tcvtrowps2phh">,
Expand Down Expand Up @@ -6181,13 +6181,13 @@ let TargetPrefix = "x86" in {
Intrinsic<[llvm_v16f32_ty],
[llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty],
[]>;
def int_x86_tcvtrowps2pbf16h_internal :
ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16h_internal">,
def int_x86_tcvtrowps2bf16h_internal :
ClangBuiltin<"__builtin_ia32_tcvtrowps2bf16h_internal">,
Intrinsic<[llvm_v32bf16_ty],
[llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty],
[]>;
def int_x86_tcvtrowps2pbf16l_internal :
ClangBuiltin<"__builtin_ia32_tcvtrowps2pbf16l_internal">,
def int_x86_tcvtrowps2bf16l_internal :
ClangBuiltin<"__builtin_ia32_tcvtrowps2bf16l_internal">,
Intrinsic<[llvm_v32bf16_ty],
[llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_i32_ty],
[]>;
Expand Down Expand Up @@ -7893,4 +7893,4 @@ def int_x86_movrsdi : ClangBuiltin<"__builtin_ia32_movrsdi">,
[IntrReadMem]>;
def int_x86_prefetchrs : ClangBuiltin<"__builtin_ia32_prefetchrs">,
Intrinsic<[], [llvm_ptr_ty], []>;
}
}
24 changes: 12 additions & 12 deletions llvm/lib/Target/X86/X86ExpandPseudo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,10 +563,10 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case X86::PTILELOADDRST1V:
case X86::PTCVTROWD2PSrreV:
case X86::PTCVTROWD2PSrriV:
case X86::PTCVTROWPS2PBF16HrreV:
case X86::PTCVTROWPS2PBF16HrriV:
case X86::PTCVTROWPS2PBF16LrreV:
case X86::PTCVTROWPS2PBF16LrriV:
case X86::PTCVTROWPS2BF16HrreV:
case X86::PTCVTROWPS2BF16HrriV:
case X86::PTCVTROWPS2BF16LrreV:
case X86::PTCVTROWPS2BF16LrriV:
case X86::PTCVTROWPS2PHHrreV:
case X86::PTCVTROWPS2PHHrriV:
case X86::PTCVTROWPS2PHLrreV:
Expand Down Expand Up @@ -595,17 +595,17 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case X86::PTCVTROWD2PSrriV:
Opc = X86::TCVTROWD2PSrri;
break;
case X86::PTCVTROWPS2PBF16HrreV:
Opc = X86::TCVTROWPS2PBF16Hrre;
case X86::PTCVTROWPS2BF16HrreV:
Opc = X86::TCVTROWPS2BF16Hrre;
break;
case X86::PTCVTROWPS2PBF16HrriV:
Opc = X86::TCVTROWPS2PBF16Hrri;
case X86::PTCVTROWPS2BF16HrriV:
Opc = X86::TCVTROWPS2BF16Hrri;
break;
case X86::PTCVTROWPS2PBF16LrreV:
Opc = X86::TCVTROWPS2PBF16Lrre;
case X86::PTCVTROWPS2BF16LrreV:
Opc = X86::TCVTROWPS2BF16Lrre;
break;
case X86::PTCVTROWPS2PBF16LrriV:
Opc = X86::TCVTROWPS2PBF16Lrri;
case X86::PTCVTROWPS2BF16LrriV:
Opc = X86::TCVTROWPS2BF16Lrri;
break;
case X86::PTCVTROWPS2PHHrreV:
Opc = X86::TCVTROWPS2PHHrre;
Expand Down
24 changes: 12 additions & 12 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37890,8 +37890,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
case X86::PTCVTROWPS2PBF16Hrri:
case X86::PTCVTROWPS2PBF16Lrri:
case X86::PTCVTROWPS2BF16Hrri:
case X86::PTCVTROWPS2BF16Lrri:
case X86::PTCVTROWPS2PHHrri:
case X86::PTCVTROWPS2PHLrri:
case X86::PTCVTROWD2PSrri:
Expand All @@ -37904,14 +37904,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTCVTROWD2PSrri:
Opc = X86::TCVTROWD2PSrri;
break;
case X86::PTCVTROWPS2PBF16Hrri:
Opc = X86::TCVTROWPS2PBF16Hrri;
case X86::PTCVTROWPS2BF16Hrri:
Opc = X86::TCVTROWPS2BF16Hrri;
break;
case X86::PTCVTROWPS2PHHrri:
Opc = X86::TCVTROWPS2PHHrri;
break;
case X86::PTCVTROWPS2PBF16Lrri:
Opc = X86::TCVTROWPS2PBF16Lrri;
case X86::PTCVTROWPS2BF16Lrri:
Opc = X86::TCVTROWPS2BF16Lrri;
break;
case X86::PTCVTROWPS2PHLrri:
Opc = X86::TCVTROWPS2PHLrri;
Expand All @@ -37928,8 +37928,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
case X86::PTCVTROWPS2PBF16Hrre:
case X86::PTCVTROWPS2PBF16Lrre:
case X86::PTCVTROWPS2BF16Hrre:
case X86::PTCVTROWPS2BF16Lrre:
case X86::PTCVTROWPS2PHHrre:
case X86::PTCVTROWPS2PHLrre:
case X86::PTCVTROWD2PSrre:
Expand All @@ -37942,11 +37942,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTCVTROWD2PSrre:
Opc = X86::TCVTROWD2PSrre;
break;
case X86::PTCVTROWPS2PBF16Hrre:
Opc = X86::TCVTROWPS2PBF16Hrre;
case X86::PTCVTROWPS2BF16Hrre:
Opc = X86::TCVTROWPS2BF16Hrre;
break;
case X86::PTCVTROWPS2PBF16Lrre:
Opc = X86::TCVTROWPS2PBF16Lrre;
case X86::PTCVTROWPS2BF16Lrre:
Opc = X86::TCVTROWPS2BF16Lrre;
break;
case X86::PTCVTROWPS2PHHrre:
Opc = X86::TCVTROWPS2PHHrre;
Expand Down
44 changes: 22 additions & 22 deletions llvm/lib/Target/X86/X86InstrAMX.td
Original file line number Diff line number Diff line change
Expand Up @@ -590,26 +590,26 @@ let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
[(set VR512: $dst,
(int_x86_tcvtrowd2ps_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
def PTCVTROWPS2PBF16HrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2pbf16h_internal GR16:$src1, GR16:$src2,
TILE:$src3, imm:$src4))]>;
def PTCVTROWPS2PBF16HrreV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2pbf16h_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
def PTCVTROWPS2PBF16LrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2pbf16l_internal GR16:$src1, GR16:$src2,
TILE:$src3, imm:$src4))]>;
def PTCVTROWPS2PBF16LrreV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2pbf16l_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
def PTCVTROWPS2BF16HrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2,
TILE:$src3, imm:$src4))]>;
def PTCVTROWPS2BF16HrreV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2bf16h_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
def PTCVTROWPS2BF16LrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2,
TILE:$src3, imm:$src4))]>;
def PTCVTROWPS2BF16LrreV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, GR32:$src4),
[(set VR512: $dst,
(int_x86_tcvtrowps2bf16l_internal GR16:$src1, GR16:$src2,
TILE:$src3, GR32:$src4))]>;
def PTCVTROWPS2PHHrriV : PseudoI<(outs VR512:$dst),
(ins GR16:$src1, GR16:$src2, TILE:$src3, i32u8imm:$src4),
[(set VR512: $dst,
Expand Down Expand Up @@ -659,8 +659,8 @@ multiclass AMXAVX512_BASE<bits<8> Opcode1, bits<8> Opcode2, string Opstr,

defm TCVTROWPS2PHH : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2phh", PS, PS>;
defm TCVTROWPS2PHL : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2phl", PD, XD>;
defm TCVTROWPS2PBF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2pbf16h", XD, XD>;
defm TCVTROWPS2PBF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2pbf16l", XS, XS>;
defm TCVTROWPS2BF16H : AMXAVX512_BASE<0x6d, 0x07, "tcvtrowps2bf16h", XD, XD>;
defm TCVTROWPS2BF16L : AMXAVX512_BASE<0x6d, 0x77, "tcvtrowps2bf16l", XS, XS>;

multiclass m_tilemovrow {
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
Expand Down
Loading
Loading