-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86][APX]Support lowering for APX promoted AMX-TILE instructions #78689
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: None (XinWang10) ChangesThe enc/dec of promoted AMX-TILE instructions have been supported in #76210. Patch is 30.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78689.diff 7 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index ecc7208e760722..95c4b02842ac57 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -556,16 +556,18 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case TargetOpcode::ICALL_BRANCH_FUNNEL:
ExpandICallBranchFunnel(&MBB, MBBI);
return true;
+#define GET_EGPR_IF_ENABLED(OPC) (STI->hasEGPR() ? OPC##_EVEX : OPC)
case X86::PLDTILECFGV: {
- MI.setDesc(TII->get(X86::LDTILECFG));
+ MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::LDTILECFG)));
return true;
}
case X86::PTILELOADDV:
case X86::PTILELOADDT1V: {
for (unsigned i = 2; i > 0; --i)
MI.removeOperand(i);
- unsigned Opc =
- Opcode == X86::PTILELOADDV ? X86::TILELOADD : X86::TILELOADDT1;
+ unsigned Opc = Opcode == X86::PTILELOADDV
+ ? GET_EGPR_IF_ENABLED(X86::TILELOADD)
+ : GET_EGPR_IF_ENABLED(X86::TILELOADDT1);
MI.setDesc(TII->get(Opc));
return true;
}
@@ -599,9 +601,10 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case X86::PTILESTOREDV: {
for (int i = 1; i >= 0; --i)
MI.removeOperand(i);
- MI.setDesc(TII->get(X86::TILESTORED));
+ MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::TILESTORED)));
return true;
}
+#undef GET_EGPR_IF_ENABLED
case X86::PTILEZEROV: {
for (int i = 2; i > 0; --i) // Remove row, col
MI.removeOperand(i);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 68634068fee31c..78bc14c977ad29 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36374,14 +36374,16 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
unsigned Opc;
switch (MI.getOpcode()) {
default: llvm_unreachable("illegal opcode!");
- case X86::PTILELOADD: Opc = X86::TILELOADD; break;
- case X86::PTILELOADDT1: Opc = X86::TILELOADDT1; break;
- case X86::PTILESTORED: Opc = X86::TILESTORED; break;
+#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
+ case X86::PTILELOADD: Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD); break;
+ case X86::PTILELOADDT1: Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDT1); break;
+ case X86::PTILESTORED: Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED); break;
+#undef GET_EGPR_IF_ENABLED
}
MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
unsigned CurOp = 0;
- if (Opc != X86::TILESTORED)
+ if (Opc != X86::TILESTORED && Opc != X86::TILESTORED_EVEX)
MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()),
RegState::Define);
@@ -36391,7 +36393,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MIB.add(MI.getOperand(CurOp++)); // displacement
MIB.add(MI.getOperand(CurOp++)); // segment
- if (Opc == X86::TILESTORED)
+ if (Opc == X86::TILESTORED || Opc == X86::TILESTORED_EVEX)
MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()),
RegState::Undef);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 8b454a2cde4160..36022ef35118fe 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4382,7 +4382,10 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
case 1024:
assert(X86::TILERegClass.hasSubClassEq(RC) && "Unknown 1024-byte regclass");
assert(STI.hasAMXTILE() && "Using 8*1024-bit register requires AMX-TILE");
- return Load ? X86::TILELOADD : X86::TILESTORED;
+#define GET_EGPR_IF_ENABLED(OPC) (STI.hasEGPR() ? OPC##_EVEX : OPC)
+ return Load ? GET_EGPR_IF_ENABLED(X86::TILELOADD)
+ : GET_EGPR_IF_ENABLED(X86::TILESTORED);
+#undef GET_EGPR_IF_ENABLED
}
}
@@ -4575,6 +4578,8 @@ static bool isAMXOpcode(unsigned Opc) {
return false;
case X86::TILELOADD:
case X86::TILESTORED:
+ case X86::TILELOADD_EVEX:
+ case X86::TILESTORED_EVEX:
return true;
}
}
@@ -4586,7 +4591,8 @@ void X86InstrInfo::loadStoreTileReg(MachineBasicBlock &MBB,
switch (Opc) {
default:
llvm_unreachable("Unexpected special opcode!");
- case X86::TILESTORED: {
+ case X86::TILESTORED:
+ case X86::TILESTORED_EVEX: {
// tilestored %tmm, (%sp, %idx)
MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
@@ -4599,7 +4605,8 @@ void X86InstrInfo::loadStoreTileReg(MachineBasicBlock &MBB,
MO.setIsKill(true);
break;
}
- case X86::TILELOADD: {
+ case X86::TILELOADD:
+ case X86::TILELOADD_EVEX: {
// tileloadd (%sp, %idx), %tmm
MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
diff --git a/llvm/lib/Target/X86/X86LowerTileCopy.cpp b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
index d6b42145859d89..e7afc49240e547 100644
--- a/llvm/lib/Target/X86/X86LowerTileCopy.cpp
+++ b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
@@ -107,7 +107,8 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
// mov 64 %rax
BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
// tilestored %tmm, (%sp, %idx)
- unsigned Opc = X86::TILESTORED;
+#define GET_EGPR_IF_ENABLED(OPC) (ST.hasEGPR() ? OPC##_EVEX : OPC)
+ unsigned Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
MachineInstr *NewMI =
addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), TileSS)
.addReg(SrcReg, getKillRegState(SrcMO.isKill()));
@@ -115,7 +116,8 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
MO.setReg(GR64Cand);
MO.setIsKill(true);
// tileloadd (%sp, %idx), %tmm
- Opc = X86::TILELOADD;
+ Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
+#undef GET_EGPR_IF_ENABLED
NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg),
TileSS);
// restore %rax
diff --git a/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll b/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll
index e3c6f039cf0beb..4686361ad2fcfa 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f,+egpr --show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefix=EGPR
define dso_local void @test1(ptr%buf) nounwind {
; CHECK-LABEL: test1:
@@ -63,6 +64,79 @@ define dso_local void @test1(ptr%buf) nounwind {
; CHECK-NEXT: tilerelease
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: test1:
+; EGPR: # %bb.0: # %entry
+; EGPR-NEXT: pushq %rbp # encoding: [0x55]
+; EGPR-NEXT: pushq %r15 # encoding: [0x41,0x57]
+; EGPR-NEXT: pushq %r14 # encoding: [0x41,0x56]
+; EGPR-NEXT: pushq %rbx # encoding: [0x53]
+; EGPR-NEXT: subq $4056, %rsp # encoding: [0x48,0x81,0xec,0xd8,0x0f,0x00,0x00]
+; EGPR-NEXT: # imm = 0xFD8
+; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
+; EGPR-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0x0f]
+; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
+; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xf0,0x03,0x00,0x00,0x08]
+; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0xd0,0x03,0x00,0x00,0x08,0x00]
+; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xf1,0x03,0x00,0x00,0x08]
+; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0xd2,0x03,0x00,0x00,0x08,0x00]
+; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xf2,0x03,0x00,0x00,0x08]
+; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0xd4,0x03,0x00,0x00,0x08,0x00]
+; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xf3,0x03,0x00,0x00,0x08]
+; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0xd6,0x03,0x00,0x00,0x08,0x00]
+; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
+; EGPR-NEXT: movl $64, %eax # encoding: [0xb8,0x40,0x00,0x00,0x00]
+; EGPR-NEXT: movw $8, %bp # encoding: [0x66,0xbd,0x08,0x00]
+; EGPR-NEXT: tileloadd (%rdi,%rax), %tmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x1c,0x07]
+; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT: testb %al, %al # encoding: [0x84,0xc0]
+; EGPR-NEXT: jne .LBB0_3 # encoding: [0x75,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: .LBB0_3-1, kind: FK_PCRel_1
+; EGPR-NEXT: # %bb.1: # %loop.header.preheader
+; EGPR-NEXT: movq %rdi, %rbx # encoding: [0x48,0x89,0xfb]
+; EGPR-NEXT: xorl %r14d, %r14d # encoding: [0x45,0x31,0xf6]
+; EGPR-NEXT: movl $32, %r15d # encoding: [0x41,0xbf,0x20,0x00,0x00,0x00]
+; EGPR-NEXT: .p2align 4, 0x90
+; EGPR-NEXT: .LBB0_2: # %loop.header
+; EGPR-NEXT: # =>This Inner Loop Header: Depth=1
+; EGPR-NEXT: movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+; EGPR-NEXT: tilestored %tmm3, 3024(%rsp,%rax) # 1024-byte Folded Spill
+; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x9c,0x04,0xd0,0x0b,0x00,0x00]
+; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; EGPR-NEXT: callq foo # encoding: [0xe8,A,A,A,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: foo-4, kind: reloc_branch_4byte_pcrel
+; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
+; EGPR-NEXT: movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+; EGPR-NEXT: tileloadd 3024(%rsp,%rax), %tmm3 # 1024-byte Folded Reload
+; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x9c,0x04,0xd0,0x0b,0x00,0x00]
+; EGPR-NEXT: tileloadd (%rbx,%r15), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x04,0x3b]
+; EGPR-NEXT: tileloadd (%rbx,%r15), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x3b]
+; EGPR-NEXT: # implicit-def: $rax
+; EGPR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NEXT: # encoding: [0x48,0x89,0x84,0x24,0xb8,0x03,0x00,0x00]
+; EGPR-NEXT: movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+; EGPR-NEXT: tilestored %tmm3, 1024(%rsp,%rax) # 1024-byte Folded Spill
+; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x9c,0x04,0x00,0x04,0x00,0x00]
+; EGPR-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm2 # 1024-byte Folded Reload
+; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x94,0x24,0x00,0x04,0x00,0x00]
+; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; EGPR-NEXT: # encoding: [0x48,0x8b,0x84,0x24,0xb8,0x03,0x00,0x00]
+; EGPR-NEXT: tdpbssd %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x5e,0xd0]
+; EGPR-NEXT: tilestored %tmm2, (%rbx,%r15) # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7a,0x4b,0x14,0x3b]
+; EGPR-NEXT: incl %r14d # encoding: [0x41,0xff,0xc6]
+; EGPR-NEXT: cmpw $100, %r14w # encoding: [0x66,0x41,0x83,0xfe,0x64]
+; EGPR-NEXT: jl .LBB0_2 # encoding: [0x7c,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1
+; EGPR-NEXT: .LBB0_3: # %exit
+; EGPR-NEXT: addq $4056, %rsp # encoding: [0x48,0x81,0xc4,0xd8,0x0f,0x00,0x00]
+; EGPR-NEXT: # imm = 0xFD8
+; EGPR-NEXT: popq %rbx # encoding: [0x5b]
+; EGPR-NEXT: popq %r14 # encoding: [0x41,0x5e]
+; EGPR-NEXT: popq %r15 # encoding: [0x41,0x5f]
+; EGPR-NEXT: popq %rbp # encoding: [0x5d]
+; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; EGPR-NEXT: retq # encoding: [0xc3]
entry:
%t1 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, ptr %buf, i64 64)
br i1 undef, label %loop.header, label %exit
@@ -139,6 +213,60 @@ define dso_local void @test2(ptr%buf) nounwind {
; CHECK-NEXT: tilerelease
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: test2:
+; EGPR: # %bb.0: # %entry
+; EGPR-NEXT: pushq %rbp # encoding: [0x55]
+; EGPR-NEXT: pushq %r15 # encoding: [0x41,0x57]
+; EGPR-NEXT: pushq %r14 # encoding: [0x41,0x56]
+; EGPR-NEXT: pushq %rbx # encoding: [0x53]
+; EGPR-NEXT: subq $72, %rsp # encoding: [0x48,0x83,0xec,0x48]
+; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
+; EGPR-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x84,0x24,0x08,0x00,0x00,0x00]
+; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x08,0x01]
+; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x38,0x08]
+; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x18,0x08,0x00]
+; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x39,0x08]
+; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x1a,0x08,0x00]
+; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x3a,0x08]
+; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x1c,0x08,0x00]
+; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0x08]
+; EGPR-NEXT: movw $8, %bp # encoding: [0x66,0xbd,0x08,0x00]
+; EGPR-NEXT: tilezero %tmm0 # encoding: [0xc4,0xe2,0x7b,0x49,0xc0]
+; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT: testb %al, %al # encoding: [0x84,0xc0]
+; EGPR-NEXT: jne .LBB1_3 # encoding: [0x75,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: .LBB1_3-1, kind: FK_PCRel_1
+; EGPR-NEXT: # %bb.1: # %loop.header.preheader
+; EGPR-NEXT: movq %rdi, %rbx # encoding: [0x48,0x89,0xfb]
+; EGPR-NEXT: xorl %r14d, %r14d # encoding: [0x45,0x31,0xf6]
+; EGPR-NEXT: movl $32, %r15d # encoding: [0x41,0xbf,0x20,0x00,0x00,0x00]
+; EGPR-NEXT: .p2align 4, 0x90
+; EGPR-NEXT: .LBB1_2: # %loop.header
+; EGPR-NEXT: # =>This Inner Loop Header: Depth=1
+; EGPR-NEXT: tilezero %tmm0 # encoding: [0xc4,0xe2,0x7b,0x49,0xc0]
+; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; EGPR-NEXT: callq foo # encoding: [0xe8,A,A,A,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: foo-4, kind: reloc_branch_4byte_pcrel
+; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0x08]
+; EGPR-NEXT: tilezero %tmm2 # encoding: [0xc4,0xe2,0x7b,0x49,0xd0]
+; EGPR-NEXT: tileloadd (%rbx,%r15), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x04,0x3b]
+; EGPR-NEXT: tileloadd (%rbx,%r15), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x3b]
+; EGPR-NEXT: tdpbssd %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x5e,0xd0]
+; EGPR-NEXT: tilestored %tmm2, (%rbx,%r15) # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7a,0x4b,0x14,0x3b]
+; EGPR-NEXT: incl %r14d # encoding: [0x41,0xff,0xc6]
+; EGPR-NEXT: cmpw $100, %r14w # encoding: [0x66,0x41,0x83,0xfe,0x64]
+; EGPR-NEXT: jl .LBB1_2 # encoding: [0x7c,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
+; EGPR-NEXT: .LBB1_3: # %exit
+; EGPR-NEXT: addq $72, %rsp # encoding: [0x48,0x83,0xc4,0x48]
+; EGPR-NEXT: popq %rbx # encoding: [0x5b]
+; EGPR-NEXT: popq %r14 # encoding: [0x41,0x5e]
+; EGPR-NEXT: popq %r15 # encoding: [0x41,0x5f]
+; EGPR-NEXT: popq %rbp # encoding: [0x5d]
+; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; EGPR-NEXT: retq # encoding: [0xc3]
entry:
%t1 = tail call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8)
br i1 undef, label %loop.header, label %exit
diff --git a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll
index 9f0d8aee3c4ee2..c7c919c7cbb30d 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f,+egpr --show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefix=EGPR
@buf = dso_local global [3072 x i8] zeroinitializer, align 64
@@ -88,6 +89,111 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
+;
+; EGPR-LABEL: test_api:
+; EGPR: # %bb.0:
+; EGPR-NEXT: pushq %rbp # encoding: [0x55]
+; EGPR-NEXT: pushq %r15 # encoding: [0x41,0x57]
+; EGPR-NEXT: pushq %r14 # encoding: [0x41,0x56]
+; EGPR-NEXT: pushq %rbx # encoding: [0x53]
+; EGPR-NEXT: subq $2120, %rsp # encoding: [0x48,0x81,0xec,0x48,0x08,0x00,0x00]
+; EGPR-NEXT: # imm = 0x848
+; EGPR-NEXT: movl %esi, %ebx # encoding: [0x89,0xf3]
+; EGPR-NEXT: movl %edi, %ebp # encoding: [0x89,0xfd]
+; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
+; EGPR-NEXT: vmovups %zmm0, (%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
+; EGPR-NEXT: movb $1, (%rsp) # encoding: [0xc6,0x04,0x24,0x01]
+; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x10,0x08,0x00]
+; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x31,0x08]
+; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x14,0x08,0x00]
+; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x33,0x08]
+; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x18,0x08,0x00]
+; EGPR-NEXT: movw %bx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x5c,0x24,0x1c]
+; EGPR-NEXT: movb %bpl, {{[0-9]+}}(%rsp) # encoding: [0x40,0x88,0x6c,0x24,0x36]
+; EGPR-NEXT: movw %bx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x5c,0x24,0x1a]
+; EGPR-NEXT: movb %bpl, {{[0-9]+}}(%rsp) # encoding: [0x40,0x88,0x6c,0x24,0x35]
+; EGPR-NEXT: movb %bpl, {{[0-9]+}}(%rsp) # encoding: [0x40,0x88,0x6c,0x24,0x34]
+; EGPR-NEXT: movw %bx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x5c,0x24,0x16]
+; EGPR-NEXT: movb %bpl, {{[0-9]+}}(%rsp) # encoding: [0x40,0x88,0x6c,0x24,0x32]
+; EGPR-NEXT: movw %bx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x5c,0x24,0x12]
+; EGPR-NEXT: movb %bpl, {{[0-9]+}}(%rsp) # encoding: [0x40,0x88,0x6c,0x24,0x30]
+; EGPR-NEXT: ldtilecfg (%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x04,0x24]
+; EGPR-NEXT: movl $32, %r14d # encoding: [0x41,0xbe,0x20,0x00,0x00,0x00]
+; EGPR-NEXT: movl $buf+2048, %r15d # encoding: [0x41,0xbf,A,A,A,A]
+; EGPR-NEXT: # fixup A - offset: 2, value: buf+2048, kind: FK_Data_4
+; EGPR-NEXT: tileloadd (%r15,%r14), %tmm5 # EVEX TO VEX Compression encoding: [0xc4,0x82,0x7b,0x4b,0x2c,0x37]
+; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT: testb %al, %al # encoding: [0x84,0xc0]
+; EGPR-NEXT: jne .LBB0_2 # encoding: [0x75,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1
+; EGPR-NEXT: # %bb.1: # %if.true
+; EGPR-NEXT: movl $buf, %eax # encoding: [0xb8,A,A,A,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: buf, kind: FK_Data_4
+; EGPR-NEXT: movw $8, %cx # encoding: [0x66,0xb9,0x08,0x00]
+; EGPR-NEXT: tileloadd (%rax,%r14), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x04,0x30]
+; EGPR-NEXT: movl $buf+1024, %eax # encoding: [0xb8,A,A,A,A]
+; EGPR-NEXT: # fixup A - offset: 1, value: buf+1024, kind: FK_Data_4
+; EGPR-NEXT: tileloadd (%rax,%r14), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x30]
+; EGPR-NEXT: movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
The enc/dec of promoted AMX-TILE instructions have been supported in #76210.
This patch support lowering for promoted AMX-TILE instructions and integrate test to existing tests.