-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86][APX] Support APX + AMX-MOVRS/AMX-TRANSPOSE #123267
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) ChangesRef.: https://cdrdv2.intel.com/v1/dl/getContent/784266 Patch is 55.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123267.diff 13 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index fc8a0eaed140d0..7fbba7f05e0a5e 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -578,10 +578,10 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
unsigned Opc;
switch (Opcode) {
case X86::PTILELOADDRSV:
- Opc = X86::TILELOADDRS;
+ Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS);
break;
case X86::PTILELOADDRST1V:
- Opc = X86::TILELOADDRST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1);
break;
case X86::PTILELOADDV:
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
@@ -737,28 +737,28 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
unsigned Opc;
switch (Opcode) {
case X86::PT2RPNTLVWZ0V:
- Opc = X86::T2RPNTLVWZ0;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
break;
case X86::PT2RPNTLVWZ0T1V:
- Opc = X86::T2RPNTLVWZ0T1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
break;
case X86::PT2RPNTLVWZ1V:
- Opc = X86::T2RPNTLVWZ1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
break;
case X86::PT2RPNTLVWZ1T1V:
- Opc = X86::T2RPNTLVWZ1T1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
break;
case X86::PT2RPNTLVWZ0RSV:
- Opc = X86::T2RPNTLVWZ0RS;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
break;
case X86::PT2RPNTLVWZ0RST1V:
- Opc = X86::T2RPNTLVWZ0RST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
break;
case X86::PT2RPNTLVWZ1RSV:
- Opc = X86::T2RPNTLVWZ1RS;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
break;
case X86::PT2RPNTLVWZ1RST1V:
- Opc = X86::T2RPNTLVWZ1RST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
break;
default:
llvm_unreachable("Impossible Opcode!");
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 90e3e15b1fb46c..6d69665c17565a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37800,14 +37800,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTILESTORED:
Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
break;
-#undef GET_EGPR_IF_ENABLED
case X86::PTILELOADDRS:
- Opc = X86::TILELOADDRS;
+ Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS);
break;
case X86::PTILELOADDRST1:
- Opc = X86::TILELOADDRST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1);
break;
}
+#undef GET_EGPR_IF_ENABLED
MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
unsigned CurOp = 0;
@@ -37838,34 +37838,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PT2RPNTLVWZ1RST1: {
const DebugLoc &DL = MI.getDebugLoc();
unsigned Opc;
+#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected instruction!");
case X86::PT2RPNTLVWZ0:
- Opc = X86::T2RPNTLVWZ0;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
break;
case X86::PT2RPNTLVWZ0T1:
- Opc = X86::T2RPNTLVWZ0T1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
break;
case X86::PT2RPNTLVWZ1:
- Opc = X86::T2RPNTLVWZ1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
break;
case X86::PT2RPNTLVWZ1T1:
- Opc = X86::T2RPNTLVWZ1T1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
break;
case X86::PT2RPNTLVWZ0RS:
- Opc = X86::T2RPNTLVWZ0RS;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
break;
case X86::PT2RPNTLVWZ0RST1:
- Opc = X86::T2RPNTLVWZ0RST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
break;
case X86::PT2RPNTLVWZ1RS:
- Opc = X86::T2RPNTLVWZ1RS;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
break;
case X86::PT2RPNTLVWZ1RST1:
- Opc = X86::T2RPNTLVWZ1RST1;
+ Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
break;
}
+#undef GET_EGPR_IF_ENABLED
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
MIB.addReg(TMMImmToTMMPair(MI.getOperand(0).getImm()), RegState::Define);
diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index a055ba91d3e171..b5d99f52f15c23 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -349,22 +349,22 @@ let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
let SchedRW = [WriteSystem] in {
def T2RPNTLVWZ0 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}",
- []>, VEX, WIG, T8,PS;
+ []>, VEX, T8, PS;
def T2RPNTLVWZ0T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}",
- []>, VEX, T8,PS;
+ []>, VEX, T8, PS;
def T2RPNTLVWZ1 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}",
- []>, VEX, T8,PD;
+ []>, VEX, T8, PD;
def T2RPNTLVWZ1T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}",
- []>, VEX, T8,PD;
+ []>, VEX, T8, PD;
def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
- "ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8,XS;
+ "ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS;
let isPseudo = true in {
def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
@@ -554,6 +554,48 @@ let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
}
} // HasAMXMOVRS, In64BitMode
+let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
+ def T2RPNTLVWZ0_EVEX : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}",
+ []>, EVEX, NoCD8, T8, PS;
+
+ def T2RPNTLVWZ0T1_EVEX : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}",
+ []>, EVEX, NoCD8, T8, PS;
+
+ def T2RPNTLVWZ1_EVEX : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}",
+ []>, EVEX, NoCD8, T8, PD;
+
+ def T2RPNTLVWZ1T1_EVEX : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}",
+ []>, EVEX, NoCD8, T8, PD;
+} // HasAMXTRANSPOSE, HasEGPR, In64BitMode
+
+let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
+ def T2RPNTLVWZ0RS_EVEX : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src1), "t2rpntlvwz0rs\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T_MAP5;
+ def T2RPNTLVWZ0RST1_EVEX : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src1), "t2rpntlvwz0rst1\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T_MAP5;
+ def T2RPNTLVWZ1RS_EVEX : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src1), "t2rpntlvwz1rs\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T_MAP5, PD;
+ def T2RPNTLVWZ1RST1_EVEX : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
+ (ins sibmem:$src1), "t2rpntlvwz1rst1\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T_MAP5, PD;
+} // HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode
+
+let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in {
+ def TILELOADDRS_EVEX : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
+ (ins sibmem:$src1), "tileloaddrs\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T8, XD;
+ def TILELOADDRST1_EVEX : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
+ (ins sibmem:$src1), "tileloaddrst1\t{$src1, $dst|$dst, $src1}",
+ []>, EVEX, NoCD8, T8, PD;
+} // HasAMXMOVRS, HasEGPR, In64BitMode
+
multiclass m_tcvtrowd2ps {
let Predicates = [HasAMXAVX512, HasAVX10_2_512, In64BitMode] in {
let SchedRW = [WriteSystem] in {
diff --git a/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll b/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll
index da212a1850964e..67688326c17500 100755
--- a/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll
+++ b/llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs | FileCheck %s
+; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF
define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-LABEL: test_amx_internal:
@@ -35,6 +36,44 @@ define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx_internal:
+; APXF: # %bb.0: # %entry
+; APXF-NEXT: pushq %rbp # encoding: [0x55]
+; APXF-NEXT: .cfi_def_cfa_offset 16
+; APXF-NEXT: .cfi_offset %rbp, -16
+; APXF-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
+; APXF-NEXT: .cfi_def_cfa_register %rbp
+; APXF-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
+; APXF-NEXT: # imm = 0xFC00
+; APXF-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
+; APXF-NEXT: # imm = 0xC00
+; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
+; APXF-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
+; APXF-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; APXF-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
+; APXF-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; APXF-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; APXF-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
+; APXF-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
+; APXF-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
+; APXF-NEXT: # implicit-def: $al
+; APXF-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
+; APXF-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
+; APXF-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
+; APXF-NEXT: tileloaddrs (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x04,0x32]
+; APXF-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
+; APXF-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
+; APXF-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
+; APXF-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
+; APXF-NEXT: popq %rbp # encoding: [0x5d]
+; APXF-NEXT: .cfi_def_cfa %rsp, 8
+; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; APXF-NEXT: retq # encoding: [0xc3]
entry:
%t1 = call x86_amx @llvm.x86.tileloaddrs64.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
@@ -48,6 +87,12 @@ define void @test_amx_old(i16 %m, i16 %n, ptr %buf) {
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: tileloaddrs (%rdx,%rax), %tmm2
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx_old:
+; APXF: # %bb.0: # %entry
+; APXF-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
+; APXF-NEXT: tileloaddrs (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x14,0x02]
+; APXF-NEXT: retq # encoding: [0xc3]
entry:
call void @llvm.x86.tileloaddrs64(i8 2, ptr %buf, i64 32)
ret void
@@ -88,6 +133,44 @@ define void @test_amx_t1_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx_t1_internal:
+; APXF: # %bb.0: # %entry
+; APXF-NEXT: pushq %rbp # encoding: [0x55]
+; APXF-NEXT: .cfi_def_cfa_offset 16
+; APXF-NEXT: .cfi_offset %rbp, -16
+; APXF-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
+; APXF-NEXT: .cfi_def_cfa_register %rbp
+; APXF-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
+; APXF-NEXT: # imm = 0xFC00
+; APXF-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
+; APXF-NEXT: # imm = 0xC00
+; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
+; APXF-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
+; APXF-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
+; APXF-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; APXF-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
+; APXF-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; APXF-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
+; APXF-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
+; APXF-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
+; APXF-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
+; APXF-NEXT: # implicit-def: $al
+; APXF-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
+; APXF-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
+; APXF-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
+; APXF-NEXT: tileloaddrst1 (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x04,0x32]
+; APXF-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
+; APXF-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
+; APXF-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
+; APXF-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
+; APXF-NEXT: popq %rbp # encoding: [0x5d]
+; APXF-NEXT: .cfi_def_cfa %rsp, 8
+; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; APXF-NEXT: retq # encoding: [0xc3]
entry:
%t1 = call x86_amx @llvm.x86.tileloaddrst164.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
@@ -101,6 +184,12 @@ define void @test_amx_t1_old(i16 %m, i16 %n, ptr %buf) {
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx_t1_old:
+; APXF: # %bb.0: # %entry
+; APXF-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
+; APXF-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x14,0x02]
+; APXF-NEXT: retq # encoding: [0xc3]
entry:
call void @llvm.x86.tileloaddrst164(i8 2, ptr %buf, i64 32)
ret void
diff --git a/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll b/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
index 146b69773eb186..0d5b85f2bb1088 100755
--- a/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
+++ b/llvm/test/CodeGen/X86/amx_movrs_transpose_intrinsics.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O0
; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs | FileCheck %s --check-prefixes=CHECK,O2
+; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-transpose,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF
define void @test_amx(i64 %stride, i8* %addr1) #0 {
; CHECK-LABEL: test_amx:
@@ -10,6 +11,14 @@ define void @test_amx(i64 %stride, i8* %addr1) #0 {
; CHECK-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0
; CHECK-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2
; CHECK-NEXT: retq
+;
+; APXF-LABEL: test_amx:
+; APXF: # %bb.0:
+; APXF-NEXT: t2rpntlvwz0rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x04,0x3e]
+; APXF-NEXT: t2rpntlvwz0rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x14,0x3e]
+; APXF-NEXT: t2rpntlvwz1rs (%rsi,%rdi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x04,0x3e]
+; APXF-NEXT: t2rpntlvwz1rst1 (%rsi,%rdi), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x14,0x3e]
+; APXF-NEXT: retq # encoding: [0xc3]
call void @llvm.x86.t2rpntlvwz0rs(i8 1, i8* %addr1, i64 %stride)
call void @llvm.x86.t2rpntlvwz0rst1(i8 2, i8* %addr1, i64 %stride)
call void @llvm.x86.t2rpntlvwz1rs(i8 1, i8* %addr1, i64 %stride)
@@ -80,6 +89,27 @@ define void @test_amx2(i8* %base, i64 %stride) #0 {
; O2-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4
; O2-NEXT: tilerelease
; O2-NEXT: retq
+;
+; APXF-LABEL: test_amx2:
+; APXF: # %bb.0:
+; APXF-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
+; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xc0]
+; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xd0]
+; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xe0]
+; APXF-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x44,0x24,0xf0]
+; APXF-NEXT: movb $1, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xc0,0x01]
+; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf4,0x08]
+; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xd8,0x08,0x00]
+; APXF-NEXT: movb $8, -{{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0xf5,0x08]
+; APXF-NEXT: movw $8, -{{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0xda,0x08,0x00]
+; APXF-NEXT: ldtilecfg -{{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0xc0]
+; APXF-NEXT: movw $8, %ax # encoding: [0x66,0xb8,0x08,0x00]
+; APXF-NEXT: t2rpntlvwz0rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf8,0x24,0x37]
+; APXF-NEXT: t2rpntlvwz0rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x78,0xf9,0x24,0x37]
+; APXF-NEXT: t2rpntlvwz1rs (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf8,0x24,0x37]
+; APXF-NEXT: t2rpntlvwz1rst1 (%rdi,%rsi), %tmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe5,0x79,0xf9,0x24,0x37]
+; APXF-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; APXF-NEXT: retq # encoding: [0xc3]
call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0rs.internal(i16 8, i16 8, i16 8, i8* %base, i64 %...
[truncated]
|
@@ -1,5 +1,6 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | |||
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs | FileCheck %s | |||
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=APXF |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
prefix=EGPR?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
llvm/lib/Target/X86/X86InstrAMX.td
Outdated
@@ -554,6 +554,48 @@ let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in { | |||
} | |||
} // HasAMXMOVRS, In64BitMode | |||
|
|||
let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in { | |||
def T2RPNTLVWZ0_EVEX : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Share code?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/784266