Skip to content

Commit 140680c

Browse files
committed
[X86] Add peephole for (add (concat_vectors vpmaddwd, vpmaddwd)) -> vpdpwssd on VNNI targets
Cleanup for #118433
1 parent d6ec7c8 commit 140680c

File tree

2 files changed

+34
-49
lines changed

2 files changed

+34
-49
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56847,6 +56847,23 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
5684756847
}
5684856848
}
5684956849

56850+
// Peephole for 512-bit VPDPBSSD on non-VLX targets.
56851+
// TODO: Should this be part of matchPMADDWD/matchPMADDWD_2?
56852+
if (Subtarget.hasVNNI() && VT == MVT::v16i32) {
56853+
using namespace SDPatternMatch;
56854+
SDValue Accum, Lo0, Lo1, Hi0, Hi1;
56855+
if (sd_match(N, m_Add(m_Value(Accum),
56856+
m_Node(ISD::CONCAT_VECTORS,
56857+
m_BinOp(X86ISD::VPMADDWD, m_Value(Lo0),
56858+
m_Value(Lo1)),
56859+
m_BinOp(X86ISD::VPMADDWD, m_Value(Hi0),
56860+
m_Value(Hi1)))))) {
56861+
return DAG.getNode(X86ISD::VPDPWSSD, DL, VT, Accum,
56862+
concatSubVectors(Lo0, Hi0, DAG, DL),
56863+
concatSubVectors(Lo1, Hi1, DAG, DL));
56864+
}
56865+
}
56866+
5685056867
// Fold ADD(ADC(Y,0,W),X) -> ADC(X,Y,W)
5685156868
if (Op0.getOpcode() == X86ISD::ADC && Op0->hasOneUse() &&
5685256869
X86::isZeroNode(Op0.getOperand(1))) {

llvm/test/CodeGen/X86/vpdpwssd.ll

Lines changed: 17 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,ZNVER,AVX512BW-VNNI
3-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,ZNVER,AVX-VNNI
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,AVX512VL-VNNI
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,AVX-VNNI
44
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+fast-dpwssd | FileCheck %s --check-prefixes=CHECK,AVX512-VNNI
55
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl,+fast-dpwssd | FileCheck %s --check-prefixes=CHECK,AVX512VL-VNNI
66

@@ -14,31 +14,11 @@ define <16 x i32> @vpdpwssd_test(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) {
1414
}
1515

1616
define <16 x i32> @vpdpwssd_v16i32_accumulate(<32 x i16> %a0, <32 x i16> %a1, <16 x i32> %a2) {
17-
; ZNVER-LABEL: vpdpwssd_v16i32_accumulate:
18-
; ZNVER: # %bb.0:
19-
; ZNVER-NEXT: vpdpwssd %zmm1, %zmm0, %zmm2
20-
; ZNVER-NEXT: vmovdqa64 %zmm2, %zmm0
21-
; ZNVER-NEXT: retq
22-
;
23-
; AVX512-VNNI-LABEL: vpdpwssd_v16i32_accumulate:
24-
; AVX512-VNNI: # %bb.0:
25-
; AVX512-VNNI-NEXT: vextracti64x4 $1, %zmm1, %ymm3
26-
; AVX512-VNNI-NEXT: vextracti64x4 $1, %zmm0, %ymm4
27-
; AVX512-VNNI-NEXT: vpmaddwd %ymm3, %ymm4, %ymm3
28-
; AVX512-VNNI-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
29-
; AVX512-VNNI-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
30-
; AVX512-VNNI-NEXT: vpaddd %zmm2, %zmm0, %zmm0
31-
; AVX512-VNNI-NEXT: retq
32-
;
33-
; AVX512VL-VNNI-LABEL: vpdpwssd_v16i32_accumulate:
34-
; AVX512VL-VNNI: # %bb.0:
35-
; AVX512VL-VNNI-NEXT: vextracti64x4 $1, %zmm1, %ymm3
36-
; AVX512VL-VNNI-NEXT: vextracti64x4 $1, %zmm0, %ymm4
37-
; AVX512VL-VNNI-NEXT: vpmaddwd %ymm3, %ymm4, %ymm3
38-
; AVX512VL-VNNI-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
39-
; AVX512VL-VNNI-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0
40-
; AVX512VL-VNNI-NEXT: vpaddd %zmm2, %zmm0, %zmm0
41-
; AVX512VL-VNNI-NEXT: retq
17+
; CHECK-LABEL: vpdpwssd_v16i32_accumulate:
18+
; CHECK: # %bb.0:
19+
; CHECK-NEXT: vpdpwssd %zmm1, %zmm0, %zmm2
20+
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
21+
; CHECK-NEXT: retq
4222
%x0 = sext <32 x i16> %a0 to <32 x i32>
4323
%x1 = sext <32 x i16> %a1 to <32 x i32>
4424
%m = mul nsw <32 x i32> %x0, %x1
@@ -50,11 +30,11 @@ define <16 x i32> @vpdpwssd_v16i32_accumulate(<32 x i16> %a0, <32 x i16> %a1, <1
5030
}
5131

5232
define <8 x i32> @vpdpwssd_v8i32_accumulate(<16 x i16> %a0, <16 x i16> %a1, <8 x i32> %a2) {
53-
; AVX512BW-VNNI-LABEL: vpdpwssd_v8i32_accumulate:
54-
; AVX512BW-VNNI: # %bb.0:
55-
; AVX512BW-VNNI-NEXT: vpdpwssd %ymm1, %ymm0, %ymm2
56-
; AVX512BW-VNNI-NEXT: vmovdqa %ymm2, %ymm0
57-
; AVX512BW-VNNI-NEXT: retq
33+
; AVX512VL-VNNI-LABEL: vpdpwssd_v8i32_accumulate:
34+
; AVX512VL-VNNI: # %bb.0:
35+
; AVX512VL-VNNI-NEXT: vpdpwssd %ymm1, %ymm0, %ymm2
36+
; AVX512VL-VNNI-NEXT: vmovdqa %ymm2, %ymm0
37+
; AVX512VL-VNNI-NEXT: retq
5838
;
5939
; AVX-VNNI-LABEL: vpdpwssd_v8i32_accumulate:
6040
; AVX-VNNI: # %bb.0:
@@ -67,12 +47,6 @@ define <8 x i32> @vpdpwssd_v8i32_accumulate(<16 x i16> %a0, <16 x i16> %a1, <8 x
6747
; AVX512-VNNI-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
6848
; AVX512-VNNI-NEXT: vpaddd %ymm2, %ymm0, %ymm0
6949
; AVX512-VNNI-NEXT: retq
70-
;
71-
; AVX512VL-VNNI-LABEL: vpdpwssd_v8i32_accumulate:
72-
; AVX512VL-VNNI: # %bb.0:
73-
; AVX512VL-VNNI-NEXT: vpdpwssd %ymm1, %ymm0, %ymm2
74-
; AVX512VL-VNNI-NEXT: vmovdqa %ymm2, %ymm0
75-
; AVX512VL-VNNI-NEXT: retq
7650
%x0 = sext <16 x i16> %a0 to <16 x i32>
7751
%x1 = sext <16 x i16> %a1 to <16 x i32>
7852
%m = mul nsw <16 x i32> %x0, %x1
@@ -84,11 +58,11 @@ define <8 x i32> @vpdpwssd_v8i32_accumulate(<16 x i16> %a0, <16 x i16> %a1, <8 x
8458
}
8559

8660
define <4 x i32> @vpdpwssd_v4i32_accumulate(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
87-
; AVX512BW-VNNI-LABEL: vpdpwssd_v4i32_accumulate:
88-
; AVX512BW-VNNI: # %bb.0:
89-
; AVX512BW-VNNI-NEXT: vpdpwssd %xmm1, %xmm0, %xmm2
90-
; AVX512BW-VNNI-NEXT: vmovdqa %xmm2, %xmm0
91-
; AVX512BW-VNNI-NEXT: retq
61+
; AVX512VL-VNNI-LABEL: vpdpwssd_v4i32_accumulate:
62+
; AVX512VL-VNNI: # %bb.0:
63+
; AVX512VL-VNNI-NEXT: vpdpwssd %xmm1, %xmm0, %xmm2
64+
; AVX512VL-VNNI-NEXT: vmovdqa %xmm2, %xmm0
65+
; AVX512VL-VNNI-NEXT: retq
9266
;
9367
; AVX-VNNI-LABEL: vpdpwssd_v4i32_accumulate:
9468
; AVX-VNNI: # %bb.0:
@@ -101,12 +75,6 @@ define <4 x i32> @vpdpwssd_v4i32_accumulate(<8 x i16> %a0, <8 x i16> %a1, <4 x i
10175
; AVX512-VNNI-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0
10276
; AVX512-VNNI-NEXT: vpaddd %xmm2, %xmm0, %xmm0
10377
; AVX512-VNNI-NEXT: retq
104-
;
105-
; AVX512VL-VNNI-LABEL: vpdpwssd_v4i32_accumulate:
106-
; AVX512VL-VNNI: # %bb.0:
107-
; AVX512VL-VNNI-NEXT: vpdpwssd %xmm1, %xmm0, %xmm2
108-
; AVX512VL-VNNI-NEXT: vmovdqa %xmm2, %xmm0
109-
; AVX512VL-VNNI-NEXT: retq
11078
%x0 = sext <8 x i16> %a0 to <8 x i32>
11179
%x1 = sext <8 x i16> %a1 to <8 x i32>
11280
%m = mul nsw <8 x i32> %x0, %x1

0 commit comments

Comments
 (0)