Skip to content

Commit 4c50112

Browse files
committed
[AArch64] Add patterns for 64bit vector addp
This extends the existing patterns for addp to 64bit outputs with a single input. Whilst the general pattern is similar to the 128bit patterns (add(uzp1(extract_lo, extract_hi), uzp2(extract_lo, extract_hi))), at the late stage other optimzations have happened to turn the first uzp1 into trunc and the second into extract(uzp2) with undef. Fixes #109108
1 parent 4ec4ac1 commit 4c50112

File tree

2 files changed

+38
-4
lines changed

2 files changed

+38
-4
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9634,6 +9634,18 @@ def : Pat<(v16i8 (add (AArch64uzp1 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)),
96349634
(AArch64uzp2 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)))),
96359635
(v16i8 (ADDPv16i8 $Rn, $Rm))>;
96369636

9637+
def : Pat<(v2i32 (add (AArch64zip1 (extract_subvector (v4i32 FPR128:$Rn), (i64 0)),
9638+
(extract_subvector (v4i32 FPR128:$Rn), (i64 2))),
9639+
(AArch64zip2 (extract_subvector (v4i32 FPR128:$Rn), (i64 0)),
9640+
(extract_subvector (v4i32 FPR128:$Rn), (i64 2))))),
9641+
(EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub)>;
9642+
def : Pat<(v4i16 (add (trunc (v4i32 (bitconvert FPR128:$Rn))),
9643+
(extract_subvector (AArch64uzp2 (v8i16 FPR128:$Rn), undef), (i64 0)))),
9644+
(EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub)>;
9645+
def : Pat<(v8i8 (add (trunc (v8i16 (bitconvert FPR128:$Rn))),
9646+
(extract_subvector (AArch64uzp2 (v16i8 FPR128:$Rn), undef), (i64 0)))),
9647+
(EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub)>;
9648+
96379649
def : Pat<(v2f64 (fadd (AArch64zip1 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)),
96389650
(AArch64zip2 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)))),
96399651
(v2f64 (FADDPv2f64 $Rn, $Rm))>;

llvm/test/CodeGen/AArch64/addp-shuffle.ll

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,8 @@ define <4 x i32> @deinterleave_shuffle_v8i32_c(<8 x i32> %a) {
2727
define <2 x i32> @deinterleave_shuffle_v4i32(<4 x i32> %a) {
2828
; CHECK-LABEL: deinterleave_shuffle_v4i32:
2929
; CHECK: // %bb.0:
30-
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
31-
; CHECK-NEXT: zip1 v2.2s, v0.2s, v1.2s
32-
; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s
33-
; CHECK-NEXT: add v0.2s, v2.2s, v0.2s
30+
; CHECK-NEXT: addp v0.4s, v0.4s, v0.4s
31+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
3432
; CHECK-NEXT: ret
3533
%r0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
3634
%r1 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
@@ -49,6 +47,18 @@ define <8 x i16> @deinterleave_shuffle_v16i16(<16 x i16> %a) {
4947
ret <8 x i16> %o
5048
}
5149

50+
define <4 x i16> @deinterleave_shuffle_v8i16(<8 x i16> %a) {
51+
; CHECK-LABEL: deinterleave_shuffle_v8i16:
52+
; CHECK: // %bb.0:
53+
; CHECK-NEXT: addp v0.8h, v0.8h, v0.8h
54+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
55+
; CHECK-NEXT: ret
56+
%r0 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
57+
%r1 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
58+
%o = add <4 x i16> %r0, %r1
59+
ret <4 x i16> %o
60+
}
61+
5262
define <16 x i8> @deinterleave_shuffle_v32i8(<32 x i8> %a) {
5363
; CHECK-LABEL: deinterleave_shuffle_v32i8:
5464
; CHECK: // %bb.0:
@@ -60,6 +70,18 @@ define <16 x i8> @deinterleave_shuffle_v32i8(<32 x i8> %a) {
6070
ret <16 x i8> %o
6171
}
6272

73+
define <8 x i8> @deinterleave_shuffle_v16i8(<16 x i8> %a) {
74+
; CHECK-LABEL: deinterleave_shuffle_v16i8:
75+
; CHECK: // %bb.0:
76+
; CHECK-NEXT: addp v0.16b, v0.16b, v0.16b
77+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
78+
; CHECK-NEXT: ret
79+
%r0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
80+
%r1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
81+
%o = add <8 x i8> %r0, %r1
82+
ret <8 x i8> %o
83+
}
84+
6385
define <4 x i64> @deinterleave_shuffle_v8i64(<8 x i64> %a) {
6486
; CHECK-LABEL: deinterleave_shuffle_v8i64:
6587
; CHECK: // %bb.0:

0 commit comments

Comments
 (0)