Skip to content

Commit 9dcfb95

Browse files
committed
[X86] Add AVX2/SSE2 checks for AMX config buffer zeroing. NFC
1 parent 016eca8 commit 9dcfb95

File tree

1 file changed

+95
-33
lines changed

1 file changed

+95
-33
lines changed

llvm/test/CodeGen/X86/AMX/amx-config.ll

Lines changed: 95 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,105 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs | FileCheck %s
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefix=AVX512
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx2 -verify-machineinstrs | FileCheck %s --check-prefix=AVX2
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -verify-machineinstrs | FileCheck %s --check-prefix=SSE2
35

46
@buf = dso_local global [1024 x i8] zeroinitializer, align 64
57
@buf2 = dso_local global [1024 x i8] zeroinitializer, align 64
68

79
; Function Attrs: nounwind uwtable
810
define dso_local void @test_api(i32 %0, i16 signext %1, i16 signext %2) {
9-
; CHECK-LABEL: test_api:
10-
; CHECK: # %bb.0:
11-
; CHECK-NEXT: testl %edi, %edi
12-
; CHECK-NEXT: movsbl %sil, %eax
13-
; CHECK-NEXT: vpxord %zmm0, %zmm0, %zmm0
14-
; CHECK-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp)
15-
; CHECK-NEXT: movb $1, -{{[0-9]+}}(%rsp)
16-
; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp)
17-
; CHECK-NEXT: movw %si, -{{[0-9]+}}(%rsp)
18-
; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp)
19-
; CHECK-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
20-
; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp)
21-
; CHECK-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
22-
; CHECK-NEXT: ldtilecfg -{{[0-9]+}}(%rsp)
23-
; CHECK-NEXT: je .LBB0_2
24-
; CHECK-NEXT: # %bb.1:
25-
; CHECK-NEXT: movl $buf, %ecx
26-
; CHECK-NEXT: jmp .LBB0_3
27-
; CHECK-NEXT: .LBB0_2:
28-
; CHECK-NEXT: movl $buf2, %ecx
29-
; CHECK-NEXT: .LBB0_3:
30-
; CHECK-NEXT: movl $32, %edi
31-
; CHECK-NEXT: tileloadd (%rcx,%rdi), %tmm0
32-
; CHECK-NEXT: tileloadd (%rcx,%rdi), %tmm2
33-
; CHECK-NEXT: tileloadd (%rcx,%rdi), %tmm1
34-
; CHECK-NEXT: tdpbssd %tmm2, %tmm0, %tmm1
35-
; CHECK-NEXT: movl $buf, %ecx
36-
; CHECK-NEXT: movl $32, %esi
37-
; CHECK-NEXT: tilestored %tmm1, (%rcx,%rsi)
38-
; CHECK-NEXT: tilerelease
39-
; CHECK-NEXT: vzeroupper
40-
; CHECK-NEXT: retq
11+
; AVX512-LABEL: test_api:
12+
; AVX512: # %bb.0:
13+
; AVX512-NEXT: testl %edi, %edi
14+
; AVX512-NEXT: movsbl %sil, %eax
15+
; AVX512-NEXT: vpxord %zmm0, %zmm0, %zmm0
16+
; AVX512-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp)
17+
; AVX512-NEXT: movb $1, -{{[0-9]+}}(%rsp)
18+
; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
19+
; AVX512-NEXT: movw %si, -{{[0-9]+}}(%rsp)
20+
; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
21+
; AVX512-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
22+
; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
23+
; AVX512-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
24+
; AVX512-NEXT: ldtilecfg -{{[0-9]+}}(%rsp)
25+
; AVX512-NEXT: je .LBB0_2
26+
; AVX512-NEXT: # %bb.1:
27+
; AVX512-NEXT: movl $buf, %ecx
28+
; AVX512-NEXT: jmp .LBB0_3
29+
; AVX512-NEXT: .LBB0_2:
30+
; AVX512-NEXT: movl $buf2, %ecx
31+
; AVX512-NEXT: .LBB0_3:
32+
; AVX512-NEXT: movl $32, %edi
33+
; AVX512-NEXT: tileloadd (%rcx,%rdi), %tmm0
34+
; AVX512-NEXT: tileloadd (%rcx,%rdi), %tmm2
35+
; AVX512-NEXT: tileloadd (%rcx,%rdi), %tmm1
36+
; AVX512-NEXT: tdpbssd %tmm2, %tmm0, %tmm1
37+
; AVX512-NEXT: movl $buf, %ecx
38+
; AVX512-NEXT: movl $32, %esi
39+
; AVX512-NEXT: tilestored %tmm1, (%rcx,%rsi)
40+
; AVX512-NEXT: tilerelease
41+
; AVX512-NEXT: vzeroupper
42+
; AVX512-NEXT: retq
43+
;
44+
; AVX2-LABEL: test_api:
45+
; AVX2: # %bb.0:
46+
; AVX2-NEXT: testl %edi, %edi
47+
; AVX2-NEXT: movsbl %sil, %eax
48+
; AVX2-NEXT: movb $1, -{{[0-9]+}}(%rsp)
49+
; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
50+
; AVX2-NEXT: movw %si, -{{[0-9]+}}(%rsp)
51+
; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
52+
; AVX2-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
53+
; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
54+
; AVX2-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
55+
; AVX2-NEXT: ldtilecfg -{{[0-9]+}}(%rsp)
56+
; AVX2-NEXT: je .LBB0_2
57+
; AVX2-NEXT: # %bb.1:
58+
; AVX2-NEXT: movl $buf, %ecx
59+
; AVX2-NEXT: jmp .LBB0_3
60+
; AVX2-NEXT: .LBB0_2:
61+
; AVX2-NEXT: movl $buf2, %ecx
62+
; AVX2-NEXT: .LBB0_3:
63+
; AVX2-NEXT: movl $32, %edi
64+
; AVX2-NEXT: tileloadd (%rcx,%rdi), %tmm0
65+
; AVX2-NEXT: tileloadd (%rcx,%rdi), %tmm2
66+
; AVX2-NEXT: tileloadd (%rcx,%rdi), %tmm1
67+
; AVX2-NEXT: tdpbssd %tmm2, %tmm0, %tmm1
68+
; AVX2-NEXT: movl $buf, %ecx
69+
; AVX2-NEXT: movl $32, %esi
70+
; AVX2-NEXT: tilestored %tmm1, (%rcx,%rsi)
71+
; AVX2-NEXT: tilerelease
72+
; AVX2-NEXT: retq
73+
;
74+
; SSE2-LABEL: test_api:
75+
; SSE2: # %bb.0:
76+
; SSE2-NEXT: testl %edi, %edi
77+
; SSE2-NEXT: movsbl %sil, %eax
78+
; SSE2-NEXT: movb $1, -{{[0-9]+}}(%rsp)
79+
; SSE2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
80+
; SSE2-NEXT: movw %si, -{{[0-9]+}}(%rsp)
81+
; SSE2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
82+
; SSE2-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
83+
; SSE2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
84+
; SSE2-NEXT: movw %dx, -{{[0-9]+}}(%rsp)
85+
; SSE2-NEXT: ldtilecfg -{{[0-9]+}}(%rsp)
86+
; SSE2-NEXT: je .LBB0_2
87+
; SSE2-NEXT: # %bb.1:
88+
; SSE2-NEXT: movl $buf, %ecx
89+
; SSE2-NEXT: jmp .LBB0_3
90+
; SSE2-NEXT: .LBB0_2:
91+
; SSE2-NEXT: movl $buf2, %ecx
92+
; SSE2-NEXT: .LBB0_3:
93+
; SSE2-NEXT: movl $32, %edi
94+
; SSE2-NEXT: tileloadd (%rcx,%rdi), %tmm0
95+
; SSE2-NEXT: tileloadd (%rcx,%rdi), %tmm2
96+
; SSE2-NEXT: tileloadd (%rcx,%rdi), %tmm1
97+
; SSE2-NEXT: tdpbssd %tmm2, %tmm0, %tmm1
98+
; SSE2-NEXT: movl $buf, %ecx
99+
; SSE2-NEXT: movl $32, %esi
100+
; SSE2-NEXT: tilestored %tmm1, (%rcx,%rsi)
101+
; SSE2-NEXT: tilerelease
102+
; SSE2-NEXT: retq
41103
%4 = icmp eq i32 %0, 0
42104
%5 = shl i16 %1, 8
43105
%6 = ashr exact i16 %5, 8

0 commit comments

Comments
 (0)