Skip to content

Commit dd6fec5

Browse files
authored
[X86][APX]Support lowering for APX promoted AMX-TILE instructions (#78689)
The enc/dec of promoted AMX-TILE instructions have been supported in #76210. This patch support lowering for promoted AMX-TILE instructions and integrate test to existing tests.
1 parent d3cd1ce commit dd6fec5

File tree

7 files changed

+337
-14
lines changed

7 files changed

+337
-14
lines changed

llvm/lib/Target/X86/X86ExpandPseudo.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -556,16 +556,18 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
556556
case TargetOpcode::ICALL_BRANCH_FUNNEL:
557557
ExpandICallBranchFunnel(&MBB, MBBI);
558558
return true;
559+
#define GET_EGPR_IF_ENABLED(OPC) (STI->hasEGPR() ? OPC##_EVEX : OPC)
559560
case X86::PLDTILECFGV: {
560-
MI.setDesc(TII->get(X86::LDTILECFG));
561+
MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::LDTILECFG)));
561562
return true;
562563
}
563564
case X86::PTILELOADDV:
564565
case X86::PTILELOADDT1V: {
565566
for (unsigned i = 2; i > 0; --i)
566567
MI.removeOperand(i);
567-
unsigned Opc =
568-
Opcode == X86::PTILELOADDV ? X86::TILELOADD : X86::TILELOADDT1;
568+
unsigned Opc = Opcode == X86::PTILELOADDV
569+
? GET_EGPR_IF_ENABLED(X86::TILELOADD)
570+
: GET_EGPR_IF_ENABLED(X86::TILELOADDT1);
569571
MI.setDesc(TII->get(Opc));
570572
return true;
571573
}
@@ -599,9 +601,10 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
599601
case X86::PTILESTOREDV: {
600602
for (int i = 1; i >= 0; --i)
601603
MI.removeOperand(i);
602-
MI.setDesc(TII->get(X86::TILESTORED));
604+
MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::TILESTORED)));
603605
return true;
604606
}
607+
#undef GET_EGPR_IF_ENABLED
605608
case X86::PTILEZEROV: {
606609
for (int i = 2; i > 0; --i) // Remove row, col
607610
MI.removeOperand(i);

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36374,14 +36374,22 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3637436374
unsigned Opc;
3637536375
switch (MI.getOpcode()) {
3637636376
default: llvm_unreachable("illegal opcode!");
36377-
case X86::PTILELOADD: Opc = X86::TILELOADD; break;
36378-
case X86::PTILELOADDT1: Opc = X86::TILELOADDT1; break;
36379-
case X86::PTILESTORED: Opc = X86::TILESTORED; break;
36377+
#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
36378+
case X86::PTILELOADD:
36379+
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
36380+
break;
36381+
case X86::PTILELOADDT1:
36382+
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDT1);
36383+
break;
36384+
case X86::PTILESTORED:
36385+
Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
36386+
break;
36387+
#undef GET_EGPR_IF_ENABLED
3638036388
}
3638136389

3638236390
MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
3638336391
unsigned CurOp = 0;
36384-
if (Opc != X86::TILESTORED)
36392+
if (Opc != X86::TILESTORED && Opc != X86::TILESTORED_EVEX)
3638536393
MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()),
3638636394
RegState::Define);
3638736395

@@ -36391,7 +36399,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3639136399
MIB.add(MI.getOperand(CurOp++)); // displacement
3639236400
MIB.add(MI.getOperand(CurOp++)); // segment
3639336401

36394-
if (Opc == X86::TILESTORED)
36402+
if (Opc == X86::TILESTORED || Opc == X86::TILESTORED_EVEX)
3639536403
MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()),
3639636404
RegState::Undef);
3639736405

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4382,7 +4382,10 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
43824382
case 1024:
43834383
assert(X86::TILERegClass.hasSubClassEq(RC) && "Unknown 1024-byte regclass");
43844384
assert(STI.hasAMXTILE() && "Using 8*1024-bit register requires AMX-TILE");
4385-
return Load ? X86::TILELOADD : X86::TILESTORED;
4385+
#define GET_EGPR_IF_ENABLED(OPC) (STI.hasEGPR() ? OPC##_EVEX : OPC)
4386+
return Load ? GET_EGPR_IF_ENABLED(X86::TILELOADD)
4387+
: GET_EGPR_IF_ENABLED(X86::TILESTORED);
4388+
#undef GET_EGPR_IF_ENABLED
43864389
}
43874390
}
43884391

@@ -4575,6 +4578,8 @@ static bool isAMXOpcode(unsigned Opc) {
45754578
return false;
45764579
case X86::TILELOADD:
45774580
case X86::TILESTORED:
4581+
case X86::TILELOADD_EVEX:
4582+
case X86::TILESTORED_EVEX:
45784583
return true;
45794584
}
45804585
}
@@ -4586,7 +4591,8 @@ void X86InstrInfo::loadStoreTileReg(MachineBasicBlock &MBB,
45864591
switch (Opc) {
45874592
default:
45884593
llvm_unreachable("Unexpected special opcode!");
4589-
case X86::TILESTORED: {
4594+
case X86::TILESTORED:
4595+
case X86::TILESTORED_EVEX: {
45904596
// tilestored %tmm, (%sp, %idx)
45914597
MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
45924598
Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
@@ -4599,7 +4605,8 @@ void X86InstrInfo::loadStoreTileReg(MachineBasicBlock &MBB,
45994605
MO.setIsKill(true);
46004606
break;
46014607
}
4602-
case X86::TILELOADD: {
4608+
case X86::TILELOADD:
4609+
case X86::TILELOADD_EVEX: {
46034610
// tileloadd (%sp, %idx), %tmm
46044611
MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
46054612
Register VirtReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);

llvm/lib/Target/X86/X86LowerTileCopy.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,15 +107,17 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
107107
// mov 64 %rax
108108
BuildMI(MBB, MI, DL, TII->get(X86::MOV64ri), GR64Cand).addImm(64);
109109
// tilestored %tmm, (%sp, %idx)
110-
unsigned Opc = X86::TILESTORED;
110+
#define GET_EGPR_IF_ENABLED(OPC) (ST.hasEGPR() ? OPC##_EVEX : OPC)
111+
unsigned Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
111112
MachineInstr *NewMI =
112113
addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc)), TileSS)
113114
.addReg(SrcReg, getKillRegState(SrcMO.isKill()));
114115
MachineOperand &MO = NewMI->getOperand(2);
115116
MO.setReg(GR64Cand);
116117
MO.setIsKill(true);
117118
// tileloadd (%sp, %idx), %tmm
118-
Opc = X86::TILELOADD;
119+
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
120+
#undef GET_EGPR_IF_ENABLED
119121
NewMI = addFrameReference(BuildMI(MBB, MI, DL, TII->get(Opc), DstReg),
120122
TileSS);
121123
// restore %rax

llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs | FileCheck %s
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f,+egpr --show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefix=EGPR
34

45
define dso_local void @test1(ptr%buf) nounwind {
56
; CHECK-LABEL: test1:
@@ -63,6 +64,79 @@ define dso_local void @test1(ptr%buf) nounwind {
6364
; CHECK-NEXT: tilerelease
6465
; CHECK-NEXT: vzeroupper
6566
; CHECK-NEXT: retq
67+
;
68+
; EGPR-LABEL: test1:
69+
; EGPR: # %bb.0: # %entry
70+
; EGPR-NEXT: pushq %rbp # encoding: [0x55]
71+
; EGPR-NEXT: pushq %r15 # encoding: [0x41,0x57]
72+
; EGPR-NEXT: pushq %r14 # encoding: [0x41,0x56]
73+
; EGPR-NEXT: pushq %rbx # encoding: [0x53]
74+
; EGPR-NEXT: subq $4056, %rsp # encoding: [0x48,0x81,0xec,0xd8,0x0f,0x00,0x00]
75+
; EGPR-NEXT: # imm = 0xFD8
76+
; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
77+
; EGPR-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x44,0x24,0x0f]
78+
; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
79+
; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xf0,0x03,0x00,0x00,0x08]
80+
; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0xd0,0x03,0x00,0x00,0x08,0x00]
81+
; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xf1,0x03,0x00,0x00,0x08]
82+
; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0xd2,0x03,0x00,0x00,0x08,0x00]
83+
; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xf2,0x03,0x00,0x00,0x08]
84+
; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0xd4,0x03,0x00,0x00,0x08,0x00]
85+
; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xf3,0x03,0x00,0x00,0x08]
86+
; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x84,0x24,0xd6,0x03,0x00,0x00,0x08,0x00]
87+
; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
88+
; EGPR-NEXT: movl $64, %eax # encoding: [0xb8,0x40,0x00,0x00,0x00]
89+
; EGPR-NEXT: movw $8, %bp # encoding: [0x66,0xbd,0x08,0x00]
90+
; EGPR-NEXT: tileloadd (%rdi,%rax), %tmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x1c,0x07]
91+
; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
92+
; EGPR-NEXT: testb %al, %al # encoding: [0x84,0xc0]
93+
; EGPR-NEXT: jne .LBB0_3 # encoding: [0x75,A]
94+
; EGPR-NEXT: # fixup A - offset: 1, value: .LBB0_3-1, kind: FK_PCRel_1
95+
; EGPR-NEXT: # %bb.1: # %loop.header.preheader
96+
; EGPR-NEXT: movq %rdi, %rbx # encoding: [0x48,0x89,0xfb]
97+
; EGPR-NEXT: xorl %r14d, %r14d # encoding: [0x45,0x31,0xf6]
98+
; EGPR-NEXT: movl $32, %r15d # encoding: [0x41,0xbf,0x20,0x00,0x00,0x00]
99+
; EGPR-NEXT: .p2align 4, 0x90
100+
; EGPR-NEXT: .LBB0_2: # %loop.header
101+
; EGPR-NEXT: # =>This Inner Loop Header: Depth=1
102+
; EGPR-NEXT: movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
103+
; EGPR-NEXT: tilestored %tmm3, 3024(%rsp,%rax) # 1024-byte Folded Spill
104+
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x9c,0x04,0xd0,0x0b,0x00,0x00]
105+
; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
106+
; EGPR-NEXT: callq foo # encoding: [0xe8,A,A,A,A]
107+
; EGPR-NEXT: # fixup A - offset: 1, value: foo-4, kind: reloc_branch_4byte_pcrel
108+
; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
109+
; EGPR-NEXT: movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
110+
; EGPR-NEXT: tileloadd 3024(%rsp,%rax), %tmm3 # 1024-byte Folded Reload
111+
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x9c,0x04,0xd0,0x0b,0x00,0x00]
112+
; EGPR-NEXT: tileloadd (%rbx,%r15), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x04,0x3b]
113+
; EGPR-NEXT: tileloadd (%rbx,%r15), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x3b]
114+
; EGPR-NEXT: # implicit-def: $rax
115+
; EGPR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
116+
; EGPR-NEXT: # encoding: [0x48,0x89,0x84,0x24,0xb8,0x03,0x00,0x00]
117+
; EGPR-NEXT: movabsq $64, %rax # encoding: [0x48,0xb8,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
118+
; EGPR-NEXT: tilestored %tmm3, 1024(%rsp,%rax) # 1024-byte Folded Spill
119+
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x9c,0x04,0x00,0x04,0x00,0x00]
120+
; EGPR-NEXT: tileloadd {{[-0-9]+}}(%r{{[sb]}}p), %tmm2 # 1024-byte Folded Reload
121+
; EGPR-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4b,0x94,0x24,0x00,0x04,0x00,0x00]
122+
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
123+
; EGPR-NEXT: # encoding: [0x48,0x8b,0x84,0x24,0xb8,0x03,0x00,0x00]
124+
; EGPR-NEXT: tdpbssd %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x5e,0xd0]
125+
; EGPR-NEXT: tilestored %tmm2, (%rbx,%r15) # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7a,0x4b,0x14,0x3b]
126+
; EGPR-NEXT: incl %r14d # encoding: [0x41,0xff,0xc6]
127+
; EGPR-NEXT: cmpw $100, %r14w # encoding: [0x66,0x41,0x83,0xfe,0x64]
128+
; EGPR-NEXT: jl .LBB0_2 # encoding: [0x7c,A]
129+
; EGPR-NEXT: # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1
130+
; EGPR-NEXT: .LBB0_3: # %exit
131+
; EGPR-NEXT: addq $4056, %rsp # encoding: [0x48,0x81,0xc4,0xd8,0x0f,0x00,0x00]
132+
; EGPR-NEXT: # imm = 0xFD8
133+
; EGPR-NEXT: popq %rbx # encoding: [0x5b]
134+
; EGPR-NEXT: popq %r14 # encoding: [0x41,0x5e]
135+
; EGPR-NEXT: popq %r15 # encoding: [0x41,0x5f]
136+
; EGPR-NEXT: popq %rbp # encoding: [0x5d]
137+
; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
138+
; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
139+
; EGPR-NEXT: retq # encoding: [0xc3]
66140
entry:
67141
%t1 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 8, ptr %buf, i64 64)
68142
br i1 undef, label %loop.header, label %exit
@@ -139,6 +213,60 @@ define dso_local void @test2(ptr%buf) nounwind {
139213
; CHECK-NEXT: tilerelease
140214
; CHECK-NEXT: vzeroupper
141215
; CHECK-NEXT: retq
216+
;
217+
; EGPR-LABEL: test2:
218+
; EGPR: # %bb.0: # %entry
219+
; EGPR-NEXT: pushq %rbp # encoding: [0x55]
220+
; EGPR-NEXT: pushq %r15 # encoding: [0x41,0x57]
221+
; EGPR-NEXT: pushq %r14 # encoding: [0x41,0x56]
222+
; EGPR-NEXT: pushq %rbx # encoding: [0x53]
223+
; EGPR-NEXT: subq $72, %rsp # encoding: [0x48,0x83,0xec,0x48]
224+
; EGPR-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
225+
; EGPR-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp) # encoding: [0x62,0xf1,0x7c,0x48,0x11,0x84,0x24,0x08,0x00,0x00,0x00]
226+
; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x08,0x01]
227+
; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x38,0x08]
228+
; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x18,0x08,0x00]
229+
; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x39,0x08]
230+
; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x1a,0x08,0x00]
231+
; EGPR-NEXT: movb $8, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x44,0x24,0x3a,0x08]
232+
; EGPR-NEXT: movw $8, {{[0-9]+}}(%rsp) # encoding: [0x66,0xc7,0x44,0x24,0x1c,0x08,0x00]
233+
; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0x08]
234+
; EGPR-NEXT: movw $8, %bp # encoding: [0x66,0xbd,0x08,0x00]
235+
; EGPR-NEXT: tilezero %tmm0 # encoding: [0xc4,0xe2,0x7b,0x49,0xc0]
236+
; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
237+
; EGPR-NEXT: testb %al, %al # encoding: [0x84,0xc0]
238+
; EGPR-NEXT: jne .LBB1_3 # encoding: [0x75,A]
239+
; EGPR-NEXT: # fixup A - offset: 1, value: .LBB1_3-1, kind: FK_PCRel_1
240+
; EGPR-NEXT: # %bb.1: # %loop.header.preheader
241+
; EGPR-NEXT: movq %rdi, %rbx # encoding: [0x48,0x89,0xfb]
242+
; EGPR-NEXT: xorl %r14d, %r14d # encoding: [0x45,0x31,0xf6]
243+
; EGPR-NEXT: movl $32, %r15d # encoding: [0x41,0xbf,0x20,0x00,0x00,0x00]
244+
; EGPR-NEXT: .p2align 4, 0x90
245+
; EGPR-NEXT: .LBB1_2: # %loop.header
246+
; EGPR-NEXT: # =>This Inner Loop Header: Depth=1
247+
; EGPR-NEXT: tilezero %tmm0 # encoding: [0xc4,0xe2,0x7b,0x49,0xc0]
248+
; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
249+
; EGPR-NEXT: callq foo # encoding: [0xe8,A,A,A,A]
250+
; EGPR-NEXT: # fixup A - offset: 1, value: foo-4, kind: reloc_branch_4byte_pcrel
251+
; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x44,0x24,0x08]
252+
; EGPR-NEXT: tilezero %tmm2 # encoding: [0xc4,0xe2,0x7b,0x49,0xd0]
253+
; EGPR-NEXT: tileloadd (%rbx,%r15), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x04,0x3b]
254+
; EGPR-NEXT: tileloadd (%rbx,%r15), %tmm1 # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7b,0x4b,0x0c,0x3b]
255+
; EGPR-NEXT: tdpbssd %tmm1, %tmm0, %tmm2 # encoding: [0xc4,0xe2,0x73,0x5e,0xd0]
256+
; EGPR-NEXT: tilestored %tmm2, (%rbx,%r15) # EVEX TO VEX Compression encoding: [0xc4,0xa2,0x7a,0x4b,0x14,0x3b]
257+
; EGPR-NEXT: incl %r14d # encoding: [0x41,0xff,0xc6]
258+
; EGPR-NEXT: cmpw $100, %r14w # encoding: [0x66,0x41,0x83,0xfe,0x64]
259+
; EGPR-NEXT: jl .LBB1_2 # encoding: [0x7c,A]
260+
; EGPR-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
261+
; EGPR-NEXT: .LBB1_3: # %exit
262+
; EGPR-NEXT: addq $72, %rsp # encoding: [0x48,0x83,0xc4,0x48]
263+
; EGPR-NEXT: popq %rbx # encoding: [0x5b]
264+
; EGPR-NEXT: popq %r14 # encoding: [0x41,0x5e]
265+
; EGPR-NEXT: popq %r15 # encoding: [0x41,0x5f]
266+
; EGPR-NEXT: popq %rbp # encoding: [0x5d]
267+
; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
268+
; EGPR-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
269+
; EGPR-NEXT: retq # encoding: [0xc3]
142270
entry:
143271
%t1 = tail call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 8)
144272
br i1 undef, label %loop.header, label %exit

0 commit comments

Comments
 (0)