Skip to content

Commit 661881d

Browse files
committed
[X86] Add AMX-FP16 instructions.
Differential Revision: https://reviews.llvm.org/D135941
1 parent b5afa79 commit 661881d

File tree

27 files changed

+182
-2
lines changed

27 files changed

+182
-2
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,7 @@ X86 Support in Clang
595595
--------------------
596596
- Support ``-mindirect-branch-cs-prefix`` for call and jmp to indirect thunk.
597597
- Fix 32-bit ``__fastcall`` and ``__vectorcall`` ABI mismatch with MSVC.
598+
- Add ISA of ``AMX-FP16`` which support ``_tile_dpfp16ps``.
598599
- Switch ``AVX512-BF16`` intrinsics types from ``short`` to ``__bf16``.
599600
- Add support for ``PREFETCHI`` instructions.
600601

clang/include/clang/Basic/BuiltinsX86_64.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ TARGET_BUILTIN(__builtin_ia32_ptwrite64, "vUOi", "n", "ptwrite")
135135

136136
TARGET_BUILTIN(__builtin_ia32_prefetchi, "vvC*Ui", "nc", "prefetchi")
137137

138+
// AMX_FP16 FP16
139+
TARGET_BUILTIN(__builtin_ia32_tdpfp16ps, "vIUcIUcIUc", "n", "amx-fp16")
140+
138141
#undef BUILTIN
139142
#undef TARGET_BUILTIN
140143
#undef TARGET_HEADER_BUILTIN

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4522,6 +4522,8 @@ def m3dnowa : Flag<["-"], "m3dnowa">, Group<m_x86_Features_Group>;
45224522
def mno_3dnowa : Flag<["-"], "mno-3dnowa">, Group<m_x86_Features_Group>;
45234523
def mamx_bf16 : Flag<["-"], "mamx-bf16">, Group<m_x86_Features_Group>;
45244524
def mno_amx_bf16 : Flag<["-"], "mno-amx-bf16">, Group<m_x86_Features_Group>;
4525+
def mamx_fp16 : Flag<["-"], "mamx-fp16">, Group<m_x86_Features_Group>;
4526+
def mno_amx_fp16 : Flag<["-"], "mno-amx-fp16">, Group<m_x86_Features_Group>;
45254527
def mamx_int8 : Flag<["-"], "mamx-int8">, Group<m_x86_Features_Group>;
45264528
def mno_amx_int8 : Flag<["-"], "mno-amx-int8">, Group<m_x86_Features_Group>;
45274529
def mamx_tile : Flag<["-"], "mamx-tile">, Group<m_x86_Features_Group>;

clang/lib/Basic/Targets/X86.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
328328
HasHRESET = true;
329329
} else if (Feature == "+amx-bf16") {
330330
HasAMXBF16 = true;
331+
} else if (Feature == "+amx-fp16") {
332+
HasAMXFP16 = true;
331333
} else if (Feature == "+amx-int8") {
332334
HasAMXINT8 = true;
333335
} else if (Feature == "+amx-tile") {
@@ -778,6 +780,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
778780
Builder.defineMacro("__AMXINT8__");
779781
if (HasAMXBF16)
780782
Builder.defineMacro("__AMXBF16__");
783+
if (HasAMXFP16)
784+
Builder.defineMacro("__AMXFP16__");
781785
if (HasAVXVNNI)
782786
Builder.defineMacro("__AVXVNNI__");
783787
if (HasSERIALIZE)
@@ -881,6 +885,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
881885
.Case("adx", true)
882886
.Case("aes", true)
883887
.Case("amx-bf16", true)
888+
.Case("amx-fp16", true)
884889
.Case("amx-int8", true)
885890
.Case("amx-tile", true)
886891
.Case("avx", true)
@@ -976,6 +981,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
976981
.Case("adx", HasADX)
977982
.Case("aes", HasAES)
978983
.Case("amx-bf16", HasAMXBF16)
984+
.Case("amx-fp16", HasAMXFP16)
979985
.Case("amx-int8", HasAMXINT8)
980986
.Case("amx-tile", HasAMXTILE)
981987
.Case("avxvnni", HasAVXVNNI)

clang/lib/Basic/Targets/X86.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
137137
bool HasPTWRITE = false;
138138
bool HasINVPCID = false;
139139
bool HasENQCMD = false;
140+
bool HasAMXFP16 = false;
140141
bool HasKL = false; // For key locker
141142
bool HasWIDEKL = false; // For wide key locker
142143
bool HasHRESET = false;

clang/lib/Headers/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ set(x86_files
111111
# Intrinsics
112112
adxintrin.h
113113
ammintrin.h
114+
amxfp16intrin.h
114115
amxintrin.h
115116
avx2intrin.h
116117
avx512bf16intrin.h

clang/lib/Headers/amxfp16intrin.h

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*===------------- amxfp16intrin.h - AMX_FP16 intrinsics -*- C++ -*---------===
2+
*
3+
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
* See https://llvm.org/LICENSE.txt for license information.
5+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
*
7+
*===------------------------------------------------------------------------===
8+
*/
9+
10+
#ifndef __IMMINTRIN_H
11+
#error "Never use <amxfp16intrin.h> directly; use <immintrin.h> instead."
12+
#endif /* __IMMINTRIN_H */
13+
14+
#ifndef __AMX_FP16INTRIN_H
15+
#define __AMX_FP16INTRIN_H
16+
#ifdef __x86_64__
17+
18+
/// Compute dot-product of FP16 (16-bit) floating-point pairs in tiles \a a
19+
/// and \a b, accumulating the intermediate single-precision (32-bit)
20+
/// floating-point elements with elements in \a dst, and store the 32-bit
21+
/// result back to tile \a dst.
22+
///
23+
/// \headerfile <x86intrin.h>
24+
///
25+
/// \code
26+
/// void _tile_dpfp16ps (__tile dst, __tile a, __tile b)
27+
/// \endcode
28+
///
29+
/// \code{.operation}
30+
/// FOR m := 0 TO dst.rows - 1
31+
/// tmp := dst.row[m]
32+
/// FOR k := 0 TO (a.colsb / 4) - 1
33+
/// FOR n := 0 TO (dst.colsb / 4) - 1
34+
/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) *
35+
/// FP32(b.row[k].fp16[2*n+0])
36+
/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) *
37+
/// FP32(b.row[k].fp16[2*n+1])
38+
/// ENDFOR
39+
/// ENDFOR
40+
/// write_row_and_zero(dst, m, tmp, dst.colsb)
41+
/// ENDFOR
42+
/// zero_upper_rows(dst, dst.rows)
43+
/// zero_tileconfig_start()
44+
/// \endcode
45+
///
46+
/// This intrinsic corresponds to the \c TDPFP16PS instruction.
47+
///
48+
/// \param dst
49+
/// The destination tile. Max size is 1024 Bytes.
50+
/// \param a
51+
/// The 1st source tile. Max size is 1024 Bytes.
52+
/// \param b
53+
/// The 2nd source tile. Max size is 1024 Bytes.
54+
#define _tile_dpfp16ps(dst, a, b) \
55+
__builtin_ia32_tdpfp16ps(dst, a, b)
56+
57+
#endif /* __x86_64__ */
58+
#endif /* __AMX_FP16INTRIN_H */

clang/lib/Headers/cpuid.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@
202202
/* Features in %eax for leaf 7 sub-leaf 1 */
203203
#define bit_AVXVNNI 0x00000010
204204
#define bit_AVX512BF16 0x00000020
205+
#define bit_AMXFP16 0x00200000
205206
#define bit_HRESET 0x00400000
206207

207208
/* Features in %edx for leaf 7 sub-leaf 1 */

clang/lib/Headers/immintrin.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,10 @@ _storebe_i64(void * __P, long long __D) {
508508
defined(__INVPCID__)
509509
#include <invpcidintrin.h>
510510
#endif
511+
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
512+
defined(__AMXFP16__)
513+
#include <amxfp16intrin.h>
514+
#endif
511515

512516
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
513517
defined(__KL__) || defined(__WIDEKL__)

clang/lib/Sema/SemaChecking.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5026,6 +5026,7 @@ bool Sema::CheckX86BuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall) {
50265026
case X86::BI__builtin_ia32_tdpbusd:
50275027
case X86::BI__builtin_ia32_tdpbuud:
50285028
case X86::BI__builtin_ia32_tdpbf16ps:
5029+
case X86::BI__builtin_ia32_tdpfp16ps:
50295030
return CheckX86BuiltinTileRangeAndDuplicate(TheCall, {0, 1, 2});
50305031
}
50315032
}

clang/test/CodeGen/X86/amx_errors.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile -target-feature +amx-int8 -target-feature +amx-bf16 -emit-llvm -fsyntax-only -verify
1+
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +amx-tile \
2+
// RUN: -target-feature +amx-int8 -target-feature +amx-bf16 -target-feature +amx-fp16 -emit-llvm -fsyntax-only -verify
23

34
#include <immintrin.h>
45

@@ -14,4 +15,7 @@ void test_amx(void *data) {
1415
_tile_dpbsud(7, 1, 7); // expected-error {{tile arguments must refer to different tiles}}
1516
_tile_dpbsud(4, 3, 3); // expected-error {{tile arguments must refer to different tiles}}
1617
_tile_dpbf16ps(4, 3, 3); // expected-error {{tile arguments must refer to different tiles}}
18+
_tile_dpfp16ps(1, 1, 3); // expected-error {{tile arguments must refer to different tiles}}
19+
_tile_dpfp16ps(1, 2, 1); // expected-error {{tile arguments must refer to different tiles}}
20+
_tile_dpfp16ps(1, 2, 2); // expected-error {{tile arguments must refer to different tiles}}
1721
}

clang/test/CodeGen/amx_fp16.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown \
2+
// RUN: -target-feature +amx-tile -target-feature +amx-int8 -target-feature +amx-bf16 -target-feature +amx-fp16 -emit-llvm -o - -Wall -Werror -pedantic \
3+
// RUN: -Wno-gnu-statement-expression| FileCheck %s
4+
5+
#include <immintrin.h>
6+
#include <stddef.h>
7+
void test_tile_dpfp16ps(void) {
8+
// CHECK-LABEL: @test_tile_dpfp16ps
9+
// CHECK: call void @llvm.x86.tdpfp16ps(i8 1, i8 2, i8 3)
10+
_tile_dpfp16ps(1, 2, 3);
11+
}

clang/test/Driver/x86-target-features.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,13 @@
290290
// AMX-INT8: "-target-feature" "+amx-int8"
291291
// NO-AMX-INT8: "-target-feature" "-amx-int8"
292292

293+
// RUN: %clang --target=x86_64 -mamx-fp16 %s \
294+
// RUN: -### -o %t.o 2>&1 | FileCheck -check-prefix=AMX-FP16 %s
295+
// RUN: %clang --target=x86_64 -mno-amx-fp16 \
296+
// RUN: %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-AMX-FP16 %s
297+
// AMX-FP16: "-target-feature" "+amx-fp16"
298+
// NO-AMX-FP16: "-target-feature" "-amx-fp16"
299+
293300
// RUN: %clang --target=i386 -march=i386 -mhreset %s -### 2>&1 | FileCheck -check-prefix=HRESET %s
294301
// RUN: %clang --target=i386 -march=i386 -mno-hreset %s -### 2>&1 | FileCheck -check-prefix=NO-HRESET %s
295302
// HRESET: "-target-feature" "+hreset"

clang/test/Preprocessor/x86_target_features.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,20 @@
545545

546546
// NOUINTR-NOT: #define __UINTR__ 1
547547

548+
// RUN: %clang -target x86_64-unknown-linux-gnu -march=atom -mamx-fp16 -x c \
549+
// RUN: -E -dM -o - %s | FileCheck -check-prefix=AMX-FP16 %s
550+
551+
// AMX-FP16: #define __AMXFP16__ 1
552+
// AMX-FP16: #define __AMXTILE__ 1
553+
554+
// RUN: %clang -target x86_64-unknown-linux-gnu -march=atom -mno-amx-fp16 \
555+
// RUN: -x c -E -dM -o - %s | FileCheck -check-prefix=NO-AMX-FP16 %s
556+
// RUN: %clang -target x86_64-unknown-linux-gnu -march=atom -mamx-fp16 \
557+
// RUN: -mno-amx-tile -x c -E -dM -o - %s | FileCheck -check-prefix=NO-AMX-FP16 %s
558+
559+
// NO-AMX-FP16-NOT: #define __AMXFP16__ 1
560+
// NO-AMX-FP16-NOT: #define __AMXTILE__ 1
561+
548562
// RUN: %clang -target i386-unknown-unknown -march=atom -mavxvnni -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVXVNNI %s
549563

550564
// AVXVNNI: #define __AVX2__ 1

llvm/docs/ReleaseNotes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ Changes to the X86 Backend
138138

139139
* Add support for the ``RDMSRLIST and WRMSRLIST`` instructions.
140140
* Add support for the ``WRMSRNS`` instruction.
141+
* Support ISA of ``AMX-FP16`` which contains ``tdpfp16ps`` instruction.
141142

142143
Changes to the OCaml bindings
143144
-----------------------------

llvm/include/llvm/IR/IntrinsicsX86.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5115,6 +5115,14 @@ let TargetPrefix = "x86" in {
51155115
Intrinsic<[llvm_anyvector_ty], [llvm_x86amx_ty], [IntrNoMem]>;
51165116
}
51175117

5118+
//===----------------------------------------------------------------------===//
5119+
let TargetPrefix = "x86" in {
5120+
// AMX_FP16 - Intel FP16 AMX extensions
5121+
def int_x86_tdpfp16ps : ClangBuiltin<"__builtin_ia32_tdpfp16ps">,
5122+
Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
5123+
[ImmArg<ArgIndex<0>>,
5124+
ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
5125+
}
51185126
//===----------------------------------------------------------------------===//
51195127
// UINTR - User Level Interrupt
51205128

llvm/include/llvm/Support/X86TargetParser.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ X86_FEATURE (XSAVEOPT, "xsaveopt")
202202
X86_FEATURE (XSAVES, "xsaves")
203203
X86_FEATURE (HRESET, "hreset")
204204
X86_FEATURE (AVX512FP16, "avx512fp16")
205+
X86_FEATURE (AMX_FP16, "amx-fp16")
205206
X86_FEATURE (AVXVNNI, "avxvnni")
206207
// These features aren't really CPU features, but the frontend can set them.
207208
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")

llvm/lib/Support/Host.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1807,6 +1807,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
18071807
MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
18081808
Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
18091809
Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
1810+
Features["amxfp16"] = HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave;
18101811
Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
18111812
Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
18121813

llvm/lib/Support/X86TargetParser.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@ constexpr FeatureBitset ImpliedFeaturesXOP = FeatureFMA4;
578578
// AMX Features
579579
constexpr FeatureBitset ImpliedFeaturesAMX_TILE = {};
580580
constexpr FeatureBitset ImpliedFeaturesAMX_BF16 = FeatureAMX_TILE;
581+
constexpr FeatureBitset ImpliedFeaturesAMX_FP16 = FeatureAMX_TILE;
581582
constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
582583
constexpr FeatureBitset ImpliedFeaturesHRESET = {};
583584

llvm/lib/Target/X86/X86.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,10 @@ def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
257257
def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
258258
"Support AMX-BF16 instructions",
259259
[FeatureAMXTILE]>;
260+
def FeatureAMXFP16 : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true",
261+
"Support AMX amx-fp16 instructions",
262+
[FeatureAMXTILE]>;
263+
260264
def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
261265
"Invalidate Process-Context Identifier">;
262266
def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36925,7 +36925,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3692536925
case X86::PTDPBSUD:
3692636926
case X86::PTDPBUSD:
3692736927
case X86::PTDPBUUD:
36928-
case X86::PTDPBF16PS: {
36928+
case X86::PTDPBF16PS:
36929+
case X86::PTDPFP16PS: {
3692936930
unsigned Opc;
3693036931
switch (MI.getOpcode()) {
3693136932
default: llvm_unreachable("illegal opcode!");
@@ -36934,6 +36935,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3693436935
case X86::PTDPBUSD: Opc = X86::TDPBUSD; break;
3693536936
case X86::PTDPBUUD: Opc = X86::TDPBUUD; break;
3693636937
case X86::PTDPBF16PS: Opc = X86::TDPBF16PS; break;
36938+
case X86::PTDPFP16PS: Opc = X86::TDPFP16PS; break;
3693736939
}
3693836940

3693936941
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));

llvm/lib/Target/X86/X86InstrAMX.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,3 +185,21 @@ let Predicates = [HasAMXBF16, In64BitMode] in {
185185
}
186186
}
187187
} // HasAMXTILE, HasAMXBF16
188+
189+
//AMX-FP16
190+
let Predicates = [HasAMXFP16, In64BitMode] in {
191+
let SchedRW = [WriteSystem] in {
192+
let Constraints = "$src1 = $dst" in {
193+
def TDPFP16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
194+
(ins TILE:$src1, TILE:$src2, TILE:$src3),
195+
"tdpfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
196+
[]>, VEX_4V, T8XD;
197+
}
198+
let usesCustomInserter = 1 in {
199+
def PTDPFP16PS : PseudoI<(outs), (ins u8imm:$src1,
200+
u8imm:$src2, u8imm:$src3),
201+
[(int_x86_tdpfp16ps timm:$src1,
202+
timm:$src2, timm:$src3)]>;
203+
}
204+
}
205+
} // HasAMXTILE, HasAMXFP16

llvm/lib/Target/X86/X86InstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -980,6 +980,7 @@ def HasCX8 : Predicate<"Subtarget->hasCX8()">;
980980
def HasCX16 : Predicate<"Subtarget->hasCX16()">;
981981
def HasPCONFIG : Predicate<"Subtarget->hasPCONFIG()">;
982982
def HasENQCMD : Predicate<"Subtarget->hasENQCMD()">;
983+
def HasAMXFP16 : Predicate<"Subtarget->hasAMXFP16()">;
983984
def HasKL : Predicate<"Subtarget->hasKL()">;
984985
def HasWIDEKL : Predicate<"Subtarget->hasWIDEKL()">;
985986
def HasHRESET : Predicate<"Subtarget->hasHRESET()">;
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-fp16 | FileCheck %s
2+
3+
; CHECK-LABEL: test_amx:
4+
; CHECK: # %bb.0:
5+
; CHECK: tdpfp16ps %tmm1, %tmm2, %tmm3
6+
7+
define void @test_amx() {
8+
call void @llvm.x86.tdpfp16ps(i8 3, i8 2, i8 1)
9+
10+
ret void
11+
}
12+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13+
declare void @llvm.x86.tdpfp16ps(i8 %tile3, i8 %tile2, i8 %tile1)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck -check-prefix=ATT %s
2+
# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck -check-prefix=INTEL %s
3+
4+
# ATT: tdpfp16ps %tmm5, %tmm4, %tmm3
5+
# INTEL: tdpfp16ps tmm3, tmm4, tmm5
6+
0xc4,0xe2,0x53,0x5c,0xdc
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
2+
3+
// CHECK: tdpfp16ps %tmm5, %tmm4, %tmm3
4+
// CHECK: encoding: [0xc4,0xe2,0x53,0x5c,0xdc]
5+
tdpfp16ps %tmm5, %tmm4, %tmm3
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
2+
3+
// CHECK: tdpfp16ps tmm3, tmm4, tmm5
4+
// CHECK: encoding: [0xc4,0xe2,0x53,0x5c,0xdc]
5+
tdpfp16ps tmm3, tmm4, tmm5

0 commit comments

Comments
 (0)