Skip to content

[X86][APX]Support lowering for APX promoted AMX-TILE instructions #78689

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 22, 2024

Conversation

XinWang10
Copy link
Contributor

The enc/dec of promoted AMX-TILE instructions have been supported in #76210.
This patch support lowering for promoted AMX-TILE instructions and integrate test to existing tests.

@XinWang10 XinWang10 changed the title [X86][APX]Support lowering for promoted AMX-TILE instructions [X86][APX]Support lowering for APX promoted AMX-TILE instructions Jan 19, 2024
@XinWang10 XinWang10 requested a review from KanRobert January 19, 2024 09:52
@llvmbot
Copy link
Member

llvmbot commented Jan 19, 2024

@llvm/pr-subscribers-backend-x86

Author: None (XinWang10)

Changes

The enc/dec of promoted AMX-TILE instructions have been supported in #76210.
This patch support lowering for promoted AMX-TILE instructions and integrate test to existing tests.


Patch is 30.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78689.diff

7 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ExpandPseudo.cpp (+7-4)
  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+7-5)
  • (modified) llvm/lib/Target/X86/X86InstrInfo.cpp (+10-3)
  • (modified) llvm/lib/Target/X86/X86LowerTileCopy.cpp (+4-2)
  • (modified) llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll (+128)
  • (modified) llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll (+163)
  • (modified) llvm/test/CodeGen/X86/AMX/amx-tile-intrinsics.ll (+12)
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index ecc7208e760722..95c4b02842ac57 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -556,16 +556,18 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
   case TargetOpcode::ICALL_BRANCH_FUNNEL:
     ExpandICallBranchFunnel(&MBB, MBBI);
     return true;
+#define GET_EGPR_IF_ENABLED(OPC) (STI->hasEGPR() ? OPC##_EVEX : OPC)
   case X86::PLDTILECFGV: {
-    MI.setDesc(TII->get(X86::LDTILECFG));
+    MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::LDTILECFG)));
     return true;
   }
   case X86::PTILELOADDV:
   case X86::PTILELOADDT1V: {
     for (unsigned i = 2; i > 0; --i)
       MI.removeOperand(i);
-    unsigned Opc =
-        Opcode == X86::PTILELOADDV ? X86::TILELOADD : X86::TILELOADDT1;
+    unsigned Opc = Opcode == X86::PTILELOADDV
+                       ? GET_EGPR_IF_ENABLED(X86::TILELOADD)
+                       : GET_EGPR_IF_ENABLED(X86::TILELOADDT1);
     MI.setDesc(TII->get(Opc));
     return true;
   }
@@ -599,9 +601,10 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
   case X86::PTILESTOREDV: {
     for (int i = 1; i >= 0; --i)
       MI.removeOperand(i);
-    MI.setDesc(TII->get(X86::TILESTORED));
+    MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::TILESTORED)));
     return true;
   }
+#undef GET_EGPR_IF_ENABLED
   case X86::PTILEZEROV: {
     for (int i = 2; i > 0; --i) // Remove row, col
       MI.removeOperand(i);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 68634068fee31c..78bc14c977ad29 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36374,14 +36374,16 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     unsigned Opc;
     switch (MI.getOpcode()) {
     default: llvm_unreachable("illegal opcode!");
-    case X86::PTILELOADD:   Opc = X86::TILELOADD;   break;
-    case X86::PTILELOADDT1: Opc = X86::TILELOADDT1; break;
-    case X86::PTILESTORED:  Opc = X86::TILESTORED;  break;
+#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
+    case X86::PTILELOADD:   Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);   break;
+    case X86::PTILELOADDT1: Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDT1); break;
+    case X86::PTILESTORED:  Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);  break;
+#undef GET_EGPR_IF_ENABLED
     }
 
     MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
     unsigned CurOp = 0;
-    if (Opc != X86::TILESTORED)
+    if (Opc != X86::TILESTORED && Opc != X86::TILESTORED_EVEX)
       MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()),
                  RegState::Define);
 
@@ -36391,7 +36393,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     MIB.add(MI.getOperand(CurOp++)); // displacement
     MIB.add(MI.getOperand(CurOp++)); // segment
 
-    if (Opc == X86::TILESTORED)
+    if (Opc == X86::TILESTORED || Opc == X86::TILESTORED_EVEX)
       MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()),
                  RegState::Undef);
 
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 8b454a2cde4160..36022ef35118fe 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4382,7 +4382,10 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
   case 1024:
     assert(X86::TILERegClass.hasSubClassEq(RC) && "Unknown 1024-byte regclass");
     assert(STI.hasAMXTILE() && "Using 8*1024-bit register requires AMX-TILE");
-    return Load ? X86::TILELOADD : X86::TILESTORED;
+#define GET_EGPR_IF_ENABLED(OPC) (STI.hasEGPR() ? OPC##_EVEX : OPC)
+    return Load ? GET_EGPR_IF_ENABLED(X86::TILELOADD)
+                : GET_EGPR_IF_ENABLED(X86::TILESTORED);
+#undef GET_EGPR_IF_ENABLED
   }
 }
 
@@ -4575,6 +4578,8 @@ static bool isAMXOpcode(unsigned Opc) {
     return false;
   case X86::TILELOADD:
   case X86::TILESTORED:
+  case X86::TILELOADD_EVEX:
+  case X86::TILESTORED_EVEX:
     return true;
   }
 }
@@ -4586,7 +4591,8 @@ void X86InstrInfo::loadStoreTileReg(MachineBasicBlock &MBB,
   switch (Opc) {
   default:
     llvm_unreachable("Unexpected special opcode!");
-  case X86::TILESTORED: {
+  case X86::TILESTORED:
+  case X86::TILESTORED_EVEX: {
     // tilestored %tmm, (%sp, %idx)
     MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
     Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
@@ -4599,7 +4605,8 @@ void X86InstrInfo::loadStoreTileReg(MachineBasicBlock &MBB,
     MO.setIsKill(true);
     break;
   }
-  case X86::TILELOADD: {
+  case X86::TILELOADD:
+  case X86::TILELOADD_EVEX: {
     // tileloadd (%sp, %idx), %tmm
     MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
     Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
diff --git a/llvm/lib/Target/X86/X86LowerTileCopy.cpp b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
index d6b42145859d89..e7afc49240e547 100644
--- a/llvm/lib/Target/X86/X86LowerTileCopy.cpp
+++ b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
@@ -107,7 +107,8 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
       // mov 64 %rax
       BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
       // tilestored %tmm, (%sp, %idx)
-      unsigned Opc = X86::TILESTORED;
+#define GET_EGPR_IF_ENABLED(OPC) (ST.hasEGPR() ? OPC##_EVEX : OPC)
+      unsigned Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
       MachineInstr *NewMI =
           addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), TileSS)
               .addReg(SrcReg, getKillRegState(SrcMO.isKill()));
@@ -115,7 +116,8 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
       MO.setReg(GR64Cand);
       MO.setIsKill(true);
       // tileloadd (%sp, %idx), %tmm
-      Opc = X86::TILELOADD;
+      Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
+#undef GET_EGPR_IF_ENABLED
       NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg),
                                 TileSS);
       // restore %rax
diff --git a/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll b/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll
index e3c6f039cf0beb..4686361ad2fcfa 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f,+egpr --show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefix=EGPR
 
 define dso_local void @test1(ptr%buf) nounwind {
 ; CHECK-LABEL: test1:
@@ -63,6 +64,79 @@ define dso_local void @test1(ptr%buf) nounwind {
 ; CHECK-NEXT:    tilerelease
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: test1:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    pushq %rbp # encoding: [0x55]
+; EGPR-NEXT:    pushq %r15 # encoding: [0x41,0x57]
+; EGPR-NEXT:    pushq %r14 # encoding: [0x41,0x56]
+; EGPR-NEXT:    pushq %rbx # encoding: [0x53]
+; EGPR-NEXT:    subq $4056, %rsp # encoding: [0x48,0x81,0xec,0xd8,0x0f,0x00,0x00]
+; EGPR-NEXT:    # imm = 0xFD8
+; EGPR-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
+; EGPR-NEXT:    vmovups %zmm0, {{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0x0f]
+; EGPR-NEXT:    movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
+; EGPR-NEXT:    movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xf0,0x03,0x00,0x00,0x08]
+; EGPR-NEXT:    movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0xd0,0x03,0x00,0x00,0x08,0x00]
+; EGPR-NEXT:    movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xf1,0x03,0x00,0x00,0x08]
+; EGPR-NEXT:    movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0xd2,0x03,0x00,0x00,0x08,0x00]
+; EGPR-NEXT:    movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xf2,0x03,0x00,0x00,0x08]
+; EGPR-NEXT:    movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0xd4,0x03,0x00,0x00,0x08,0x00]
+; EGPR-NEXT:    movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xf3,0x03,0x00,0x00,0x08]
+; EGPR-NEXT:    movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0xd6,0x03,0x00,0x00,0x08,0x00]
+; EGPR-NEXT:    ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
+; EGPR-NEXT:    movl $64, %eax # encoding: [0xb8,0x40,0x00,0x00,0x00]
+; EGPR-NEXT:    movw $8, %bp # encoding: [0x66,0xbd,0x08,0x00]
+; EGPR-NEXT:    tileloadd (%rdi,%rax), %tmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x1c,0x07]
+; EGPR-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT:    testb %al, %al # encoding: [0x84,0xc0]
+; EGPR-NEXT:    jne .LBB0_3 # encoding: [0x75,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB0_3-1, kind: FK_PCRel_1
+; EGPR-NEXT:  # %bb.1: # %loop.header.preheader
+; EGPR-NEXT:    movq %rdi, %rbx # encoding: [0x48,0x89,0xfb]
+; EGPR-NEXT:    xorl %r14d, %r14d # encoding: [0x45,0x31,0xf6]
+; EGPR-NEXT:    movl $32, %r15d # encoding: [0x41,0xbf,0x20,0x00,0x00,0x00]
+; EGPR-NEXT:    .p2align 4, 0x90
+; EGPR-NEXT:  .LBB0_2: # %loop.header
+; EGPR-NEXT:    # =>This Inner Loop Header: Depth=1
+; EGPR-NEXT:    movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+; EGPR-NEXT:    tilestored %tmm3, 3024(%rsp,%rax) # 1024-byte Folded Spill
+; EGPR-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x9c,0x04,0xd0,0x0b,0x00,0x00]
+; EGPR-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; EGPR-NEXT:    callq foo # encoding: [0xe8,A,A,A,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: foo-4, kind: reloc_branch_4byte_pcrel
+; EGPR-NEXT:    ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
+; EGPR-NEXT:    movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+; EGPR-NEXT:    tileloadd 3024(%rsp,%rax), %tmm3 # 1024-byte Folded Reload
+; EGPR-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x9c,0x04,0xd0,0x0b,0x00,0x00]
+; EGPR-NEXT:    tileloadd (%rbx,%r15), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x04,0x3b]
+; EGPR-NEXT:    tileloadd (%rbx,%r15), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x3b]
+; EGPR-NEXT:    # implicit-def: $rax
+; EGPR-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NEXT:    # encoding: [0x48,0x89,0x84,0x24,0xb8,0x03,0x00,0x00]
+; EGPR-NEXT:    movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
+; EGPR-NEXT:    tilestored %tmm3, 1024(%rsp,%rax) # 1024-byte Folded Spill
+; EGPR-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x9c,0x04,0x00,0x04,0x00,0x00]
+; EGPR-NEXT:    tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm2 # 1024-byte Folded Reload
+; EGPR-NEXT:    # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x94,0x24,0x00,0x04,0x00,0x00]
+; EGPR-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
+; EGPR-NEXT:    # encoding: [0x48,0x8b,0x84,0x24,0xb8,0x03,0x00,0x00]
+; EGPR-NEXT:    tdpbssd %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x5e,0xd0]
+; EGPR-NEXT:    tilestored %tmm2, (%rbx,%r15) # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7a,0x4b,0x14,0x3b]
+; EGPR-NEXT:    incl %r14d # encoding: [0x41,0xff,0xc6]
+; EGPR-NEXT:    cmpw $100, %r14w # encoding: [0x66,0x41,0x83,0xfe,0x64]
+; EGPR-NEXT:    jl .LBB0_2 # encoding: [0x7c,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1
+; EGPR-NEXT:  .LBB0_3: # %exit
+; EGPR-NEXT:    addq $4056, %rsp # encoding: [0x48,0x81,0xc4,0xd8,0x0f,0x00,0x00]
+; EGPR-NEXT:    # imm = 0xFD8
+; EGPR-NEXT:    popq %rbx # encoding: [0x5b]
+; EGPR-NEXT:    popq %r14 # encoding: [0x41,0x5e]
+; EGPR-NEXT:    popq %r15 # encoding: [0x41,0x5f]
+; EGPR-NEXT:    popq %rbp # encoding: [0x5d]
+; EGPR-NEXT:    tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; EGPR-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %t1 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, ptr %buf, i64 64)
   br i1 undef, label %loop.header, label %exit
@@ -139,6 +213,60 @@ define dso_local void @test2(ptr%buf) nounwind {
 ; CHECK-NEXT:    tilerelease
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: test2:
+; EGPR:       # %bb.0: # %entry
+; EGPR-NEXT:    pushq %rbp # encoding: [0x55]
+; EGPR-NEXT:    pushq %r15 # encoding: [0x41,0x57]
+; EGPR-NEXT:    pushq %r14 # encoding: [0x41,0x56]
+; EGPR-NEXT:    pushq %rbx # encoding: [0x53]
+; EGPR-NEXT:    subq $72, %rsp # encoding: [0x48,0x83,0xec,0x48]
+; EGPR-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
+; EGPR-NEXT:    vmovups %zmm0, {{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x84,0x24,0x08,0x00,0x00,0x00]
+; EGPR-NEXT:    movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x08,0x01]
+; EGPR-NEXT:    movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x38,0x08]
+; EGPR-NEXT:    movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x18,0x08,0x00]
+; EGPR-NEXT:    movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x39,0x08]
+; EGPR-NEXT:    movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x1a,0x08,0x00]
+; EGPR-NEXT:    movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x3a,0x08]
+; EGPR-NEXT:    movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x1c,0x08,0x00]
+; EGPR-NEXT:    ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0x08]
+; EGPR-NEXT:    movw $8, %bp # encoding: [0x66,0xbd,0x08,0x00]
+; EGPR-NEXT:    tilezero %tmm0 # encoding: [0xc4,0xe2,0x7b,0x49,0xc0]
+; EGPR-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT:    testb %al, %al # encoding: [0x84,0xc0]
+; EGPR-NEXT:    jne .LBB1_3 # encoding: [0x75,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB1_3-1, kind: FK_PCRel_1
+; EGPR-NEXT:  # %bb.1: # %loop.header.preheader
+; EGPR-NEXT:    movq %rdi, %rbx # encoding: [0x48,0x89,0xfb]
+; EGPR-NEXT:    xorl %r14d, %r14d # encoding: [0x45,0x31,0xf6]
+; EGPR-NEXT:    movl $32, %r15d # encoding: [0x41,0xbf,0x20,0x00,0x00,0x00]
+; EGPR-NEXT:    .p2align 4, 0x90
+; EGPR-NEXT:  .LBB1_2: # %loop.header
+; EGPR-NEXT:    # =>This Inner Loop Header: Depth=1
+; EGPR-NEXT:    tilezero %tmm0 # encoding: [0xc4,0xe2,0x7b,0x49,0xc0]
+; EGPR-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; EGPR-NEXT:    callq foo # encoding: [0xe8,A,A,A,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: foo-4, kind: reloc_branch_4byte_pcrel
+; EGPR-NEXT:    ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0x08]
+; EGPR-NEXT:    tilezero %tmm2 # encoding: [0xc4,0xe2,0x7b,0x49,0xd0]
+; EGPR-NEXT:    tileloadd (%rbx,%r15), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x04,0x3b]
+; EGPR-NEXT:    tileloadd (%rbx,%r15), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x3b]
+; EGPR-NEXT:    tdpbssd %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x5e,0xd0]
+; EGPR-NEXT:    tilestored %tmm2, (%rbx,%r15) # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7a,0x4b,0x14,0x3b]
+; EGPR-NEXT:    incl %r14d # encoding: [0x41,0xff,0xc6]
+; EGPR-NEXT:    cmpw $100, %r14w # encoding: [0x66,0x41,0x83,0xfe,0x64]
+; EGPR-NEXT:    jl .LBB1_2 # encoding: [0x7c,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
+; EGPR-NEXT:  .LBB1_3: # %exit
+; EGPR-NEXT:    addq $72, %rsp # encoding: [0x48,0x83,0xc4,0x48]
+; EGPR-NEXT:    popq %rbx # encoding: [0x5b]
+; EGPR-NEXT:    popq %r14 # encoding: [0x41,0x5e]
+; EGPR-NEXT:    popq %r15 # encoding: [0x41,0x5f]
+; EGPR-NEXT:    popq %rbp # encoding: [0x5d]
+; EGPR-NEXT:    tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
+; EGPR-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %t1 = tail call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8)
   br i1 undef, label %loop.header, label %exit
diff --git a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll
index 9f0d8aee3c4ee2..c7c919c7cbb30d 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f,+egpr --show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefix=EGPR
 
 @buf = dso_local global [3072 x i8] zeroinitializer, align 64
 
@@ -88,6 +89,111 @@ define dso_local void @test_api(i16 signext %0, i16 signext %1) nounwind {
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    tilerelease
 ; CHECK-NEXT:    retq
+;
+; EGPR-LABEL: test_api:
+; EGPR:       # %bb.0:
+; EGPR-NEXT:    pushq %rbp # encoding: [0x55]
+; EGPR-NEXT:    pushq %r15 # encoding: [0x41,0x57]
+; EGPR-NEXT:    pushq %r14 # encoding: [0x41,0x56]
+; EGPR-NEXT:    pushq %rbx # encoding: [0x53]
+; EGPR-NEXT:    subq $2120, %rsp # encoding: [0x48,0x81,0xec,0x48,0x08,0x00,0x00]
+; EGPR-NEXT:    # imm = 0x848
+; EGPR-NEXT:    movl %esi, %ebx # encoding: [0x89,0xf3]
+; EGPR-NEXT:    movl %edi, %ebp # encoding: [0x89,0xfd]
+; EGPR-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
+; EGPR-NEXT:    vmovups %zmm0, (%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x04,0x24]
+; EGPR-NEXT:    movb $1, (%rsp) # encoding: [0xc6,0x04,0x24,0x01]
+; EGPR-NEXT:    movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x10,0x08,0x00]
+; EGPR-NEXT:    movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x31,0x08]
+; EGPR-NEXT:    movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x14,0x08,0x00]
+; EGPR-NEXT:    movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x33,0x08]
+; EGPR-NEXT:    movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x18,0x08,0x00]
+; EGPR-NEXT:    movw %bx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x5c,0x24,0x1c]
+; EGPR-NEXT:    movb %bpl, {{[0-9]+}}(%rsp) # encoding: [0x40,0x88,0x6c,0x24,0x36]
+; EGPR-NEXT:    movw %bx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x5c,0x24,0x1a]
+; EGPR-NEXT:    movb %bpl, {{[0-9]+}}(%rsp) # encoding: [0x40,0x88,0x6c,0x24,0x35]
+; EGPR-NEXT:    movb %bpl, {{[0-9]+}}(%rsp) # encoding: [0x40,0x88,0x6c,0x24,0x34]
+; EGPR-NEXT:    movw %bx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x5c,0x24,0x16]
+; EGPR-NEXT:    movb %bpl, {{[0-9]+}}(%rsp) # encoding: [0x40,0x88,0x6c,0x24,0x32]
+; EGPR-NEXT:    movw %bx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x5c,0x24,0x12]
+; EGPR-NEXT:    movb %bpl, {{[0-9]+}}(%rsp) # encoding: [0x40,0x88,0x6c,0x24,0x30]
+; EGPR-NEXT:    ldtilecfg (%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x04,0x24]
+; EGPR-NEXT:    movl $32, %r14d # encoding: [0x41,0xbe,0x20,0x00,0x00,0x00]
+; EGPR-NEXT:    movl $buf+2048, %r15d # encoding: [0x41,0xbf,A,A,A,A]
+; EGPR-NEXT:    # fixup A - offset: 2, value: buf+2048, kind: FK_Data_4
+; EGPR-NEXT:    tileloadd (%r15,%r14), %tmm5 # EVEX TO VEX Compression encoding: [0xc4,0x82,0x7b,0x4b,0x2c,0x37]
+; EGPR-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT:    testb %al, %al # encoding: [0x84,0xc0]
+; EGPR-NEXT:    jne .LBB0_2 # encoding: [0x75,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1
+; EGPR-NEXT:  # %bb.1: # %if.true
+; EGPR-NEXT:    movl $buf, %eax # encoding: [0xb8,A,A,A,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: buf, kind: FK_Data_4
+; EGPR-NEXT:    movw $8, %cx # encoding: [0x66,0xb9,0x08,0x00]
+; EGPR-NEXT:    tileloadd (%rax,%r14), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x04,0x30]
+; EGPR-NEXT:    movl $buf+1024, %eax # encoding: [0xb8,A,A,A,A]
+; EGPR-NEXT:    # fixup A - offset: 1, value: buf+1024, kind: FK_Data_4
+; EGPR-NEXT:    tileloadd (%rax,%r14), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x30]
+; EGPR-NEXT:    movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,...
[truncated]

Copy link

github-actions bot commented Jan 19, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

Copy link
Contributor

@KanRobert KanRobert left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@XinWang10 XinWang10 merged commit dd6fec5 into llvm:main Jan 22, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants