Skip to content

Commit 757384a

Browse files
committed
[AArch64][SVE][InstCombine] Fold redundant zip1/2(uzp1/2) operations
zip1(uzp1(A, B), uzp2(A, B)) --> A zip2(uzp1(A, B), uzp2(A, B)) --> B Differential Revision: https://reviews.llvm.org/D109666 Change-Id: I4a6578db2fcef9ff71ad0e77b9fe08354e6dbfcd
1 parent 0db9481 commit 757384a

File tree

2 files changed

+42
-0
lines changed

2 files changed

+42
-0
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,21 @@ static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
792792
return IC.replaceInstUsesWith(II, VectorSplat);
793793
}
794794

795+
static Optional<Instruction *> instCombineSVEZip(InstCombiner &IC,
796+
IntrinsicInst &II) {
797+
// zip1(uzp1(A, B), uzp2(A, B)) --> A
798+
// zip2(uzp1(A, B), uzp2(A, B)) --> B
799+
Value *A, *B;
800+
if (match(II.getArgOperand(0),
801+
m_Intrinsic<Intrinsic::aarch64_sve_uzp1>(m_Value(A), m_Value(B))) &&
802+
match(II.getArgOperand(1), m_Intrinsic<Intrinsic::aarch64_sve_uzp2>(
803+
m_Specific(A), m_Specific(B))))
804+
return IC.replaceInstUsesWith(
805+
II, (II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ? A : B));
806+
807+
return None;
808+
}
809+
795810
Optional<Instruction *>
796811
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
797812
IntrinsicInst &II) const {
@@ -835,6 +850,9 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
835850
case Intrinsic::aarch64_sve_sunpkhi:
836851
case Intrinsic::aarch64_sve_sunpklo:
837852
return instCombineSVEUnpack(IC, II);
853+
case Intrinsic::aarch64_sve_zip1:
854+
case Intrinsic::aarch64_sve_zip2:
855+
return instCombineSVEZip(IC, II);
838856
}
839857

840858
return None;
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -instcombine < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
define <vscale x 4 x i32> @redundant_zip_unzip(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
7+
; CHECK-LABEL: @redundant_zip_unzip(
8+
; CHECK-NEXT: [[RET:%.*]] = add <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
9+
; CHECK-NEXT: ret <vscale x 4 x i32> [[RET]]
10+
;
11+
%uzp1 = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
12+
%uzp2 = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
13+
%zip1 = call <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32> %uzp1, <vscale x 4 x i32> %uzp2)
14+
%zip2 = call <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32> %uzp1, <vscale x 4 x i32> %uzp2)
15+
%ret = add <vscale x 4 x i32> %zip1, %zip2
16+
ret <vscale x 4 x i32> %ret
17+
}
18+
19+
declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
20+
declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
21+
declare <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
22+
declare <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
23+
24+
attributes #0 = { "target-features"="+sve" }

0 commit comments

Comments
 (0)