Skip to content

Commit e9c11c1

Browse files
committed
[X86] Zero AMX config buffer for non AVX512 cases.
Zero AMX config buffer for non AVX512 cases. Differential Revision: https://reviews.llvm.org/D96927
1 parent da59c2e commit e9c11c1

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

llvm/lib/Target/X86/X86PreTileConfig.cpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,16 +103,44 @@ static void buildConfigMI(MachineBasicBlock::iterator MI, int FrameIdx,
103103
const X86Subtarget *ST) {
104104
auto *MBB = MI->getParent();
105105

106-
// FIXME: AMX should assume AVX512 enabled.
106+
// Zero stack slot.
107107
if (ST->hasAVX512()) {
108-
// Zero stack slot.
109108
Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
110109
BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VPXORDZrr), Zmm)
111110
.addReg(Zmm, RegState::Undef)
112111
.addReg(Zmm, RegState::Undef);
113112
addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VMOVUPSZmr)),
114113
FrameIdx)
115114
.addReg(Zmm);
115+
} else if (ST->hasAVX2()) {
116+
Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
117+
BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VPXORYrr), Ymm)
118+
.addReg(Ymm, RegState::Undef)
119+
.addReg(Ymm, RegState::Undef);
120+
addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VMOVUPSYmr)),
121+
FrameIdx)
122+
.addReg(Ymm);
123+
addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::VMOVUPSYmr)),
124+
FrameIdx, 32)
125+
.addReg(Ymm);
126+
} else {
127+
assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
128+
Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
129+
BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::PXORrr), Xmm)
130+
.addReg(Xmm, RegState::Undef)
131+
.addReg(Xmm, RegState::Undef);
132+
addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::MOVUPSmr)),
133+
FrameIdx)
134+
.addReg(Xmm);
135+
addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::MOVUPSmr)),
136+
FrameIdx, 16)
137+
.addReg(Xmm);
138+
addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::MOVUPSmr)),
139+
FrameIdx, 32)
140+
.addReg(Xmm);
141+
addFrameReference(BuildMI(*MBB, MI, DebugLoc(), TII->get(X86::MOVUPSmr)),
142+
FrameIdx, 48)
143+
.addReg(Xmm);
116144
}
117145

118146
// build psuedo ldtilecfg

llvm/test/CodeGen/X86/AMX/amx-config.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ define dso_local void @test_api(i32 %0, i16 signext %1, i16 signext %2) {
4545
; AVX2: # %bb.0:
4646
; AVX2-NEXT: testl %edi, %edi
4747
; AVX2-NEXT: movsbl %sil, %eax
48+
; AVX2-NEXT: vxorps %ymm0, %ymm0, %ymm0
49+
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
50+
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
4851
; AVX2-NEXT: movb $1, -{{[0-9]+}}(%rsp)
4952
; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
5053
; AVX2-NEXT: movw %si, -{{[0-9]+}}(%rsp)
@@ -69,12 +72,18 @@ define dso_local void @test_api(i32 %0, i16 signext %1, i16 signext %2) {
6972
; AVX2-NEXT: movl $32, %esi
7073
; AVX2-NEXT: tilestored %tmm1, (%rcx,%rsi)
7174
; AVX2-NEXT: tilerelease
75+
; AVX2-NEXT: vzeroupper
7276
; AVX2-NEXT: retq
7377
;
7478
; SSE2-LABEL: test_api:
7579
; SSE2: # %bb.0:
7680
; SSE2-NEXT: testl %edi, %edi
7781
; SSE2-NEXT: movsbl %sil, %eax
82+
; SSE2-NEXT: xorps %xmm0, %xmm0
83+
; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
84+
; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
85+
; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
86+
; SSE2-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
7887
; SSE2-NEXT: movb $1, -{{[0-9]+}}(%rsp)
7988
; SSE2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
8089
; SSE2-NEXT: movw %si, -{{[0-9]+}}(%rsp)

0 commit comments

Comments
 (0)