Skip to content

[X86][APX] Support APX + AMX-MOVRS/AMX-TRANSPOSE #123267

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions llvm/lib/Target/X86/X86ExpandPseudo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -578,10 +578,10 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
unsigned Opc;
switch (Opcode) {
case X86::PTILELOADDRSV:
Opc = X86::TILELOADDRS;
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS);
break;
case X86::PTILELOADDRST1V:
Opc = X86::TILELOADDRST1;
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1);
break;
case X86::PTILELOADDV:
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADD);
Expand Down Expand Up @@ -737,28 +737,28 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
unsigned Opc;
switch (Opcode) {
case X86::PT2RPNTLVWZ0V:
Opc = X86::T2RPNTLVWZ0;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
break;
case X86::PT2RPNTLVWZ0T1V:
Opc = X86::T2RPNTLVWZ0T1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
break;
case X86::PT2RPNTLVWZ1V:
Opc = X86::T2RPNTLVWZ1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
break;
case X86::PT2RPNTLVWZ1T1V:
Opc = X86::T2RPNTLVWZ1T1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
break;
case X86::PT2RPNTLVWZ0RSV:
Opc = X86::T2RPNTLVWZ0RS;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
break;
case X86::PT2RPNTLVWZ0RST1V:
Opc = X86::T2RPNTLVWZ0RST1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
break;
case X86::PT2RPNTLVWZ1RSV:
Opc = X86::T2RPNTLVWZ1RS;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
break;
case X86::PT2RPNTLVWZ1RST1V:
Opc = X86::T2RPNTLVWZ1RST1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
break;
default:
llvm_unreachable("Impossible Opcode!");
Expand Down
24 changes: 13 additions & 11 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37800,14 +37800,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTILESTORED:
Opc = GET_EGPR_IF_ENABLED(X86::TILESTORED);
break;
#undef GET_EGPR_IF_ENABLED
case X86::PTILELOADDRS:
Opc = X86::TILELOADDRS;
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRS);
break;
case X86::PTILELOADDRST1:
Opc = X86::TILELOADDRST1;
Opc = GET_EGPR_IF_ENABLED(X86::TILELOADDRST1);
break;
}
#undef GET_EGPR_IF_ENABLED

MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
unsigned CurOp = 0;
Expand Down Expand Up @@ -37838,34 +37838,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PT2RPNTLVWZ1RST1: {
const DebugLoc &DL = MI.getDebugLoc();
unsigned Opc;
#define GET_EGPR_IF_ENABLED(OPC) (Subtarget.hasEGPR() ? OPC##_EVEX : OPC)
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected instruction!");
case X86::PT2RPNTLVWZ0:
Opc = X86::T2RPNTLVWZ0;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0);
break;
case X86::PT2RPNTLVWZ0T1:
Opc = X86::T2RPNTLVWZ0T1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0T1);
break;
case X86::PT2RPNTLVWZ1:
Opc = X86::T2RPNTLVWZ1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1);
break;
case X86::PT2RPNTLVWZ1T1:
Opc = X86::T2RPNTLVWZ1T1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1T1);
break;
case X86::PT2RPNTLVWZ0RS:
Opc = X86::T2RPNTLVWZ0RS;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RS);
break;
case X86::PT2RPNTLVWZ0RST1:
Opc = X86::T2RPNTLVWZ0RST1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ0RST1);
break;
case X86::PT2RPNTLVWZ1RS:
Opc = X86::T2RPNTLVWZ1RS;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RS);
break;
case X86::PT2RPNTLVWZ1RST1:
Opc = X86::T2RPNTLVWZ1RST1;
Opc = GET_EGPR_IF_ENABLED(X86::T2RPNTLVWZ1RST1);
break;
}
#undef GET_EGPR_IF_ENABLED
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
MIB.addReg(TMMImmToTMMPair(MI.getOperand(0).getImm()), RegState::Define);

Expand Down
75 changes: 35 additions & 40 deletions llvm/lib/Target/X86/X86InstrAMX.td
Original file line number Diff line number Diff line change
Expand Up @@ -345,26 +345,33 @@ let Predicates = [HasAMXTILE, In64BitMode], isPseudo = true, SchedRW = [WriteSys
def PTILEPAIRLOAD : PseudoI<(outs TILEPair:$dst), (ins opaquemem:$src), []>;
}

let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
let SchedRW = [WriteSystem] in {
def T2RPNTLVWZ0 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz0\t{$src, $dst|$dst, $src}",
[]>, VEX, WIG, T8,PS;
multiclass T2RPNTLVW_Base<bits<8> op1, bits<8> op2, string rs, string suffix> {
def Z0#rs#suffix : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
"t2rpntlvwz0" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PS;
def Z0#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
"t2rpntlvwz0" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PS;
def Z1#rs#suffix : I<op1, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
"t2rpntlvwz1" #!tolower(rs)# "\t{$src, $dst|$dst, $src}", []>, PD;
def Z1#rs#T1#suffix : I<op2, MRMSrcMemFSIB, (outs TILEPair:$dst), (ins sibmem:$src),
"t2rpntlvwz1" #!tolower(rs)# "t1\t{$src, $dst|$dst, $src}", []>, PD;
}

def T2RPNTLVWZ0T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz0t1\t{$src, $dst|$dst, $src}",
[]>, VEX, T8,PS;
let Predicates = [HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "">, T8, VEX;

def T2RPNTLVWZ1 : I<0x6e, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz1\t{$src, $dst|$dst, $src}",
[]>, VEX, T8,PD;
let Predicates = [HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
defm T2RPNTLVW : T2RPNTLVW_Base<0x6e, 0x6f, "", "_EVEX">, T8, EVEX, NoCD8;

def T2RPNTLVWZ1T1 : I<0x6f, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src), "t2rpntlvwz1t1\t{$src, $dst|$dst, $src}",
[]>, VEX, T8,PD;
let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in
defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "">, T_MAP5, VEX;

let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
defm T2RPNTLVW : T2RPNTLVW_Base<0xf8, 0xf9, "RS", "_EVEX">, T_MAP5, EVEX, NoCD8;

let Predicates = [HasAMXTRANSPOSE, In64BitMode] in {
let SchedRW = [WriteSystem] in {
def TTRANSPOSED : I<0x5f, MRMSrcReg, (outs TILE:$dst), (ins TILE:$src),
"ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8,XS;
"ttransposed\t{$src, $dst|$dst, $src}", []>, VEX, T8, XS;
let isPseudo = true in {
def PT2RPNTLVWZ0V : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
Expand Down Expand Up @@ -491,22 +498,6 @@ let Predicates = [HasAMXCOMPLEX, HasAMXTRANSPOSE, In64BitMode], SchedRW = [Write
}

let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSystem] in {
def T2RPNTLVWZ0RS : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src1),
"t2rpntlvwz0rs\t{$src1, $dst|$dst, $src1}",
[]>, VEX, T_MAP5;
def T2RPNTLVWZ0RST1 : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src1),
"t2rpntlvwz0rst1\t{$src1, $dst|$dst, $src1}",
[]>, VEX, T_MAP5;
def T2RPNTLVWZ1RS : I<0xf8, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src1),
"t2rpntlvwz1rs\t{$src1, $dst|$dst, $src1}",
[]>, VEX, T_MAP5, PD;
def T2RPNTLVWZ1RST1 : I<0xf9, MRMSrcMemFSIB, (outs TILEPair:$dst),
(ins sibmem:$src1),
"t2rpntlvwz1rst1\t{$src1, $dst|$dst, $src1}",
[]>, VEX, T_MAP5, PD;
let isPseudo = true in {
def PT2RPNTLVWZ0RSV : PseudoI<(outs TILEPair:$dst),
(ins GR16:$src1, GR16:$src2, GR16:$src3, opaquemem:$src4),
Expand All @@ -529,16 +520,20 @@ let Predicates = [HasAMXMOVRS, HasAMXTRANSPOSE, In64BitMode], SchedRW = [WriteSy
}
} // HasAMXMOVRS, HasAMXTRANSPOSE

let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
def TILELOADDRS : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
(ins sibmem:$src1),
"tileloaddrs\t{$src1, $dst|$dst, $src1}",
[]>, VEX, T8, XD;
def TILELOADDRST1 : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst),
(ins sibmem:$src1),
"tileloaddrst1\t{$src1, $dst|$dst, $src1}",
[]>, VEX, T8, PD;
multiclass TILELOADDRS_Base<string suffix> {
def suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
"tileloaddrs\t{$src1, $dst|$dst, $src1}", []>, T8, XD;
def T1#suffix : I<0x4a, MRMSrcMemFSIB, (outs TILE:$dst), (ins sibmem:$src1),
"tileloaddrst1\t{$src1, $dst|$dst, $src1}", []>, T8, PD;
}

let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in
defm TILELOADDRS : TILELOADDRS_Base<"">, VEX;

let Predicates = [HasAMXMOVRS, HasEGPR, In64BitMode], SchedRW = [WriteSystem] in
defm TILELOADDRS : TILELOADDRS_Base<"_EVEX">, EVEX, NoCD8;

let Predicates = [HasAMXMOVRS, In64BitMode], SchedRW = [WriteSystem] in {
let isPseudo = true, mayLoad = 1 in {
def PTILELOADDRSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
GR16:$src2,
Expand Down
89 changes: 89 additions & 0 deletions llvm/test/CodeGen/X86/amx_movrs_intrinsics.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs | FileCheck %s
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-movrs,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR

define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-LABEL: test_amx_internal:
Expand Down Expand Up @@ -35,6 +36,44 @@ define void @test_amx_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
;
; EGPR-LABEL: test_amx_internal:
; EGPR: # %bb.0: # %entry
; EGPR-NEXT: pushq %rbp # encoding: [0x55]
; EGPR-NEXT: .cfi_def_cfa_offset 16
; EGPR-NEXT: .cfi_offset %rbp, -16
; EGPR-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
; EGPR-NEXT: .cfi_def_cfa_register %rbp
; EGPR-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
; EGPR-NEXT: # imm = 0xFC00
; EGPR-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
; EGPR-NEXT: # imm = 0xC00
; EGPR-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
; EGPR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; EGPR-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
; EGPR-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
; EGPR-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
; EGPR-NEXT: # implicit-def: $al
; EGPR-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
; EGPR-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
; EGPR-NEXT: tileloaddrs (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x04,0x32]
; EGPR-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
; EGPR-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
; EGPR-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
; EGPR-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
; EGPR-NEXT: popq %rbp # encoding: [0x5d]
; EGPR-NEXT: .cfi_def_cfa %rsp, 8
; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
; EGPR-NEXT: retq # encoding: [0xc3]
entry:
%t1 = call x86_amx @llvm.x86.tileloaddrs64.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
Expand All @@ -48,6 +87,12 @@ define void @test_amx_old(i16 %m, i16 %n, ptr %buf) {
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: tileloaddrs (%rdx,%rax), %tmm2
; CHECK-NEXT: retq
;
; EGPR-LABEL: test_amx_old:
; EGPR: # %bb.0: # %entry
; EGPR-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
; EGPR-NEXT: tileloaddrs (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0x4a,0x14,0x02]
; EGPR-NEXT: retq # encoding: [0xc3]
entry:
call void @llvm.x86.tileloaddrs64(i8 2, ptr %buf, i64 32)
ret void
Expand Down Expand Up @@ -88,6 +133,44 @@ define void @test_amx_t1_internal(i16 %m, i16 %n, ptr %buf, i64 %s) {
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: tilerelease
; CHECK-NEXT: retq
;
; EGPR-LABEL: test_amx_t1_internal:
; EGPR: # %bb.0: # %entry
; EGPR-NEXT: pushq %rbp # encoding: [0x55]
; EGPR-NEXT: .cfi_def_cfa_offset 16
; EGPR-NEXT: .cfi_offset %rbp, -16
; EGPR-NEXT: movq %rsp, %rbp # encoding: [0x48,0x89,0xe5]
; EGPR-NEXT: .cfi_def_cfa_register %rbp
; EGPR-NEXT: andq $-1024, %rsp # encoding: [0x48,0x81,0xe4,0x00,0xfc,0xff,0xff]
; EGPR-NEXT: # imm = 0xFC00
; EGPR-NEXT: subq $3072, %rsp # encoding: [0x48,0x81,0xec,0x00,0x0c,0x00,0x00]
; EGPR-NEXT: # imm = 0xC00
; EGPR-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0]
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xc0,0x03,0x00,0x00]
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xd0,0x03,0x00,0x00]
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xe0,0x03,0x00,0x00]
; EGPR-NEXT: movups %xmm0, {{[0-9]+}}(%rsp) # encoding: [0x0f,0x11,0x84,0x24,0xf0,0x03,0x00,0x00]
; EGPR-NEXT: movb $1, {{[0-9]+}}(%rsp) # encoding: [0xc6,0x84,0x24,0xc0,0x03,0x00,0x00,0x01]
; EGPR-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: # encoding: [0x48,0x89,0x8c,0x24,0xb8,0x03,0x00,0x00]
; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; EGPR-NEXT: # encoding: [0x48,0x8b,0xb4,0x24,0xb8,0x03,0x00,0x00]
; EGPR-NEXT: movw %ax, %cx # encoding: [0x66,0x89,0xc1]
; EGPR-NEXT: movw %di, %ax # encoding: [0x66,0x89,0xf8]
; EGPR-NEXT: # implicit-def: $al
; EGPR-NEXT: movb %al, {{[0-9]+}}(%rsp) # encoding: [0x88,0x84,0x24,0xf0,0x03,0x00,0x00]
; EGPR-NEXT: movw %cx, {{[0-9]+}}(%rsp) # encoding: [0x66,0x89,0x8c,0x24,0xd0,0x03,0x00,0x00]
; EGPR-NEXT: ldtilecfg {{[0-9]+}}(%rsp) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x78,0x49,0x84,0x24,0xc0,0x03,0x00,0x00]
; EGPR-NEXT: tileloaddrst1 (%rdx,%rsi), %tmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x04,0x32]
; EGPR-NEXT: movl $64, %esi # encoding: [0xbe,0x40,0x00,0x00,0x00]
; EGPR-NEXT: leaq {{[0-9]+}}(%rsp), %rdx # encoding: [0x48,0x8d,0x94,0x24,0x00,0x04,0x00,0x00]
; EGPR-NEXT: tilestored %tmm0, (%rdx,%rsi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0x4b,0x04,0x32]
; EGPR-NEXT: movq %rbp, %rsp # encoding: [0x48,0x89,0xec]
; EGPR-NEXT: popq %rbp # encoding: [0x5d]
; EGPR-NEXT: .cfi_def_cfa %rsp, 8
; EGPR-NEXT: tilerelease # encoding: [0xc4,0xe2,0x78,0x49,0xc0]
; EGPR-NEXT: retq # encoding: [0xc3]
entry:
%t1 = call x86_amx @llvm.x86.tileloaddrst164.internal(i16 %m, i16 %n, ptr %buf, i64 %s)
%t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1)
Expand All @@ -101,6 +184,12 @@ define void @test_amx_t1_old(i16 %m, i16 %n, ptr %buf) {
; CHECK-NEXT: movl $32, %eax
; CHECK-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2
; CHECK-NEXT: retq
;
; EGPR-LABEL: test_amx_t1_old:
; EGPR: # %bb.0: # %entry
; EGPR-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
; EGPR-NEXT: tileloaddrst1 (%rdx,%rax), %tmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x4a,0x14,0x02]
; EGPR-NEXT: retq # encoding: [0xc3]
entry:
call void @llvm.x86.tileloaddrst164(i8 2, ptr %buf, i64 32)
ret void
Expand Down
Loading
Loading