1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,ZNVER,AVX512BW -VNNI
3
- ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,ZNVER, AVX-VNNI
2
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,AVX512VL -VNNI
3
+ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,AVX-VNNI
4
4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+fast-dpwssd | FileCheck %s --check-prefixes=CHECK,AVX512-VNNI
5
5
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl,+fast-dpwssd | FileCheck %s --check-prefixes=CHECK,AVX512VL-VNNI
6
6
@@ -14,31 +14,11 @@ define <16 x i32> @vpdpwssd_test(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) {
14
14
}
15
15
16
16
define <16 x i32 > @vpdpwssd_v16i32_accumulate (<32 x i16 > %a0 , <32 x i16 > %a1 , <16 x i32 > %a2 ) {
17
- ; ZNVER-LABEL: vpdpwssd_v16i32_accumulate:
18
- ; ZNVER: # %bb.0:
19
- ; ZNVER-NEXT: vpdpwssd %zmm1, %zmm0, %zmm2
20
- ; ZNVER-NEXT: vmovdqa64 %zmm2, %zmm0
21
- ; ZNVER-NEXT: retq
22
- ;
23
- ; AVX512-VNNI-LABEL: vpdpwssd_v16i32_accumulate:
24
- ; AVX512-VNNI: # %bb.0:
25
- ; AVX512-VNNI-NEXT: vextracti64x4 $1, %zmm1, %ymm3
26
- ; AVX512-VNNI-NEXT: vextracti64x4 $1, %zmm0, %ymm4
27
- ; AVX512-VNNI-NEXT: vpmaddwd %ymm3, %ymm4, %ymm3
28
- ; AVX512-VNNI-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
29
- ; AVX512-VNNI-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
30
- ; AVX512-VNNI-NEXT: vpaddd %zmm2, %zmm0, %zmm0
31
- ; AVX512-VNNI-NEXT: retq
32
- ;
33
- ; AVX512VL-VNNI-LABEL: vpdpwssd_v16i32_accumulate:
34
- ; AVX512VL-VNNI: # %bb.0:
35
- ; AVX512VL-VNNI-NEXT: vextracti64x4 $1, %zmm1, %ymm3
36
- ; AVX512VL-VNNI-NEXT: vextracti64x4 $1, %zmm0, %ymm4
37
- ; AVX512VL-VNNI-NEXT: vpmaddwd %ymm3, %ymm4, %ymm3
38
- ; AVX512VL-VNNI-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
39
- ; AVX512VL-VNNI-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
40
- ; AVX512VL-VNNI-NEXT: vpaddd %zmm2, %zmm0, %zmm0
41
- ; AVX512VL-VNNI-NEXT: retq
17
+ ; CHECK-LABEL: vpdpwssd_v16i32_accumulate:
18
+ ; CHECK: # %bb.0:
19
+ ; CHECK-NEXT: vpdpwssd %zmm1, %zmm0, %zmm2
20
+ ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
21
+ ; CHECK-NEXT: retq
42
22
%x0 = sext <32 x i16 > %a0 to <32 x i32 >
43
23
%x1 = sext <32 x i16 > %a1 to <32 x i32 >
44
24
%m = mul nsw <32 x i32 > %x0 , %x1
@@ -50,11 +30,11 @@ define <16 x i32> @vpdpwssd_v16i32_accumulate(<32 x i16> %a0, <32 x i16> %a1, <1
50
30
}
51
31
52
32
define <8 x i32 > @vpdpwssd_v8i32_accumulate (<16 x i16 > %a0 , <16 x i16 > %a1 , <8 x i32 > %a2 ) {
53
- ; AVX512BW -VNNI-LABEL: vpdpwssd_v8i32_accumulate:
54
- ; AVX512BW -VNNI: # %bb.0:
55
- ; AVX512BW -VNNI-NEXT: vpdpwssd %ymm1, %ymm0, %ymm2
56
- ; AVX512BW -VNNI-NEXT: vmovdqa %ymm2, %ymm0
57
- ; AVX512BW -VNNI-NEXT: retq
33
+ ; AVX512VL -VNNI-LABEL: vpdpwssd_v8i32_accumulate:
34
+ ; AVX512VL -VNNI: # %bb.0:
35
+ ; AVX512VL -VNNI-NEXT: vpdpwssd %ymm1, %ymm0, %ymm2
36
+ ; AVX512VL -VNNI-NEXT: vmovdqa %ymm2, %ymm0
37
+ ; AVX512VL -VNNI-NEXT: retq
58
38
;
59
39
; AVX-VNNI-LABEL: vpdpwssd_v8i32_accumulate:
60
40
; AVX-VNNI: # %bb.0:
@@ -67,12 +47,6 @@ define <8 x i32> @vpdpwssd_v8i32_accumulate(<16 x i16> %a0, <16 x i16> %a1, <8 x
67
47
; AVX512-VNNI-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
68
48
; AVX512-VNNI-NEXT: vpaddd %ymm2, %ymm0, %ymm0
69
49
; AVX512-VNNI-NEXT: retq
70
- ;
71
- ; AVX512VL-VNNI-LABEL: vpdpwssd_v8i32_accumulate:
72
- ; AVX512VL-VNNI: # %bb.0:
73
- ; AVX512VL-VNNI-NEXT: vpdpwssd %ymm1, %ymm0, %ymm2
74
- ; AVX512VL-VNNI-NEXT: vmovdqa %ymm2, %ymm0
75
- ; AVX512VL-VNNI-NEXT: retq
76
50
%x0 = sext <16 x i16 > %a0 to <16 x i32 >
77
51
%x1 = sext <16 x i16 > %a1 to <16 x i32 >
78
52
%m = mul nsw <16 x i32 > %x0 , %x1
@@ -84,11 +58,11 @@ define <8 x i32> @vpdpwssd_v8i32_accumulate(<16 x i16> %a0, <16 x i16> %a1, <8 x
84
58
}
85
59
86
60
define <4 x i32 > @vpdpwssd_v4i32_accumulate (<8 x i16 > %a0 , <8 x i16 > %a1 , <4 x i32 > %a2 ) {
87
- ; AVX512BW -VNNI-LABEL: vpdpwssd_v4i32_accumulate:
88
- ; AVX512BW -VNNI: # %bb.0:
89
- ; AVX512BW -VNNI-NEXT: vpdpwssd %xmm1, %xmm0, %xmm2
90
- ; AVX512BW -VNNI-NEXT: vmovdqa %xmm2, %xmm0
91
- ; AVX512BW -VNNI-NEXT: retq
61
+ ; AVX512VL -VNNI-LABEL: vpdpwssd_v4i32_accumulate:
62
+ ; AVX512VL -VNNI: # %bb.0:
63
+ ; AVX512VL -VNNI-NEXT: vpdpwssd %xmm1, %xmm0, %xmm2
64
+ ; AVX512VL -VNNI-NEXT: vmovdqa %xmm2, %xmm0
65
+ ; AVX512VL -VNNI-NEXT: retq
92
66
;
93
67
; AVX-VNNI-LABEL: vpdpwssd_v4i32_accumulate:
94
68
; AVX-VNNI: # %bb.0:
@@ -101,12 +75,6 @@ define <4 x i32> @vpdpwssd_v4i32_accumulate(<8 x i16> %a0, <8 x i16> %a1, <4 x i
101
75
; AVX512-VNNI-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
102
76
; AVX512-VNNI-NEXT: vpaddd %xmm2, %xmm0, %xmm0
103
77
; AVX512-VNNI-NEXT: retq
104
- ;
105
- ; AVX512VL-VNNI-LABEL: vpdpwssd_v4i32_accumulate:
106
- ; AVX512VL-VNNI: # %bb.0:
107
- ; AVX512VL-VNNI-NEXT: vpdpwssd %xmm1, %xmm0, %xmm2
108
- ; AVX512VL-VNNI-NEXT: vmovdqa %xmm2, %xmm0
109
- ; AVX512VL-VNNI-NEXT: retq
110
78
%x0 = sext <8 x i16 > %a0 to <8 x i32 >
111
79
%x1 = sext <8 x i16 > %a1 to <8 x i32 >
112
80
%m = mul nsw <8 x i32 > %x0 , %x1
0 commit comments