Skip to content

Commit 325a308

Browse files
committed
[AArch64] Adjust operand sequence for Add+Sub to combine more inline shift
((X >> C) - Y) + Z --> (Z - Y) + (X >> C) Fix AArch part: #55714 Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D136158
1 parent 72e9447 commit 325a308

File tree

2 files changed

+127
-9
lines changed

2 files changed

+127
-9
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16854,6 +16854,44 @@ static SDValue performBuildVectorCombine(SDNode *N,
1685416854
return SDValue();
1685516855
}
1685616856

16857+
// ((X >> C) - Y) + Z --> (Z - Y) + (X >> C)
16858+
static SDValue performAddCombineSubShift(SDNode *N, SDValue SUB, SDValue Z,
16859+
SelectionDAG &DAG) {
16860+
auto IsOneUseShiftC = [&](SDValue Shift) {
16861+
if (!Shift.hasOneUse())
16862+
return false;
16863+
16864+
// TODO: support SRL and SRA also
16865+
if (Shift.getOpcode() != ISD::SHL)
16866+
return false;
16867+
16868+
if (!isa<ConstantSDNode>(Shift.getOperand(1)))
16869+
return false;
16870+
return true;
16871+
};
16872+
16873+
// DAGCombiner will revert the combination when Z is constant cause
16874+
// dead loop. So don't enable the combination when Z is constant.
16875+
// If Z is one use shift C, we also can't do the optimization.
16876+
// It will falling to self infinite loop.
16877+
if (isa<ConstantSDNode>(Z) || IsOneUseShiftC(Z))
16878+
return SDValue();
16879+
16880+
if (SUB.getOpcode() != ISD::SUB || !SUB.hasOneUse())
16881+
return SDValue();
16882+
16883+
SDValue Shift = SUB.getOperand(0);
16884+
if (!IsOneUseShiftC(Shift))
16885+
return SDValue();
16886+
16887+
SDLoc DL(N);
16888+
EVT VT = N->getValueType(0);
16889+
16890+
SDValue Y = SUB.getOperand(1);
16891+
SDValue NewSub = DAG.getNode(ISD::SUB, DL, VT, Z, Y);
16892+
return DAG.getNode(ISD::ADD, DL, VT, NewSub, Shift);
16893+
}
16894+
1685716895
static SDValue performAddCombineForShiftedOperands(SDNode *N,
1685816896
SelectionDAG &DAG) {
1685916897
// NOTE: Swapping LHS and RHS is not done for SUB, since SUB is not
@@ -16871,6 +16909,11 @@ static SDValue performAddCombineForShiftedOperands(SDNode *N,
1687116909
SDValue LHS = N->getOperand(0);
1687216910
SDValue RHS = N->getOperand(1);
1687316911

16912+
if (SDValue Val = performAddCombineSubShift(N, LHS, RHS, DAG))
16913+
return Val;
16914+
if (SDValue Val = performAddCombineSubShift(N, RHS, LHS, DAG))
16915+
return Val;
16916+
1687416917
uint64_t LHSImm = 0, RHSImm = 0;
1687516918
// If both operand are shifted by imm and shift amount is not greater than 4
1687616919
// for one operand, swap LHS and RHS to put operand with smaller shift amount

llvm/test/CodeGen/AArch64/addsub.ll

Lines changed: 84 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -694,40 +694,115 @@ if.end: ; preds = %if.then, %lor.lhs.f
694694
ret i32 undef
695695
}
696696

697+
; ((X >> C) - Y) + Z --> (Z - Y) + (X >> C)
697698
define i32 @commute_subop0(i32 %x, i32 %y, i32 %z) {
698699
; CHECK-LABEL: commute_subop0:
699700
; CHECK: // %bb.0:
700-
; CHECK-NEXT: lsl w8, w0, #3
701-
; CHECK-NEXT: sub w8, w8, w1
702-
; CHECK-NEXT: add w0, w8, w2
701+
; CHECK-NEXT: sub w8, w2, w1
702+
; CHECK-NEXT: add w0, w8, w0, lsl #3
703703
; CHECK-NEXT: ret
704704
%shl = shl i32 %x, 3
705705
%sub = sub i32 %shl, %y
706706
%add = add i32 %sub, %z
707707
ret i32 %add
708708
}
709709

710+
; ((X << C) - Y) + Z --> (Z - Y) + (X << C)
711+
define i32 @commute_subop0_lshr(i32 %x, i32 %y, i32 %z) {
712+
; CHECK-LABEL: commute_subop0_lshr:
713+
; CHECK: // %bb.0:
714+
; CHECK-NEXT: lsr w8, w0, #3
715+
; CHECK-NEXT: sub w8, w8, w1
716+
; CHECK-NEXT: add w0, w8, w2
717+
; CHECK-NEXT: ret
718+
%lshr = lshr i32 %x, 3
719+
%sub = sub i32 %lshr, %y
720+
%add = add i32 %sub, %z
721+
ret i32 %add
722+
}
723+
724+
; ((X << C) - Y) + Z --> (Z - Y) + (X << C)
725+
define i32 @commute_subop0_ashr(i32 %x, i32 %y, i32 %z) {
726+
; CHECK-LABEL: commute_subop0_ashr:
727+
; CHECK: // %bb.0:
728+
; CHECK-NEXT: asr w8, w0, #3
729+
; CHECK-NEXT: sub w8, w8, w1
730+
; CHECK-NEXT: add w0, w8, w2
731+
; CHECK-NEXT: ret
732+
%ashr = ashr i32 %x, 3
733+
%sub = sub i32 %ashr, %y
734+
%add = add i32 %sub, %z
735+
ret i32 %add
736+
}
737+
738+
; Z + ((X >> C) - Y) --> (Z - Y) + (X >> C)
710739
define i32 @commute_subop0_cadd(i32 %x, i32 %y, i32 %z) {
711740
; CHECK-LABEL: commute_subop0_cadd:
712741
; CHECK: // %bb.0:
713-
; CHECK-NEXT: lsl w8, w0, #3
714-
; CHECK-NEXT: sub w8, w8, w1
715-
; CHECK-NEXT: add w0, w2, w8
742+
; CHECK-NEXT: sub w8, w2, w1
743+
; CHECK-NEXT: add w0, w8, w0, lsl #3
716744
; CHECK-NEXT: ret
717745
%shl = shl i32 %x, 3
718746
%sub = sub i32 %shl, %y
719747
%add = add i32 %z, %sub
720748
ret i32 %add
721749
}
722750

751+
; Y + ((X >> C) - X) --> (Y - X) + (X >> C)
723752
define i32 @commute_subop0_mul(i32 %x, i32 %y) {
724753
; CHECK-LABEL: commute_subop0_mul:
725754
; CHECK: // %bb.0:
726-
; CHECK-NEXT: lsl w8, w0, #3
727-
; CHECK-NEXT: sub w8, w8, w0
728-
; CHECK-NEXT: add w0, w8, w1
755+
; CHECK-NEXT: sub w8, w1, w0
756+
; CHECK-NEXT: add w0, w8, w0, lsl #3
729757
; CHECK-NEXT: ret
730758
%mul = mul i32 %x, 7
731759
%add = add i32 %mul, %y
732760
ret i32 %add
733761
}
762+
763+
; negative case for ((X >> C) - Y) + Z --> (Z - Y) + (X >> C)
764+
; Y can't be constant to avoid dead loop
765+
define i32 @commute_subop0_zconst(i32 %x, i32 %y) {
766+
; CHECK-LABEL: commute_subop0_zconst:
767+
; CHECK: // %bb.0:
768+
; CHECK-NEXT: lsl w8, w0, #3
769+
; CHECK-NEXT: sub w8, w8, w1
770+
; CHECK-NEXT: add w0, w8, #1
771+
; CHECK-NEXT: ret
772+
%shl = shl i32 %x, 3
773+
%sub = sub i32 %shl, %y
774+
%add = add i32 %sub, 1
775+
ret i32 %add
776+
}
777+
778+
; negative case for ((X >> C) - Y) + Z --> (Z - Y) + (X >> C)
779+
; Y can't be shift C also to avoid dead loop
780+
define i32 @commute_subop0_zshiftc_oneuse(i32 %x, i32 %y, i32 %z) {
781+
; CHECK-LABEL: commute_subop0_zshiftc_oneuse:
782+
; CHECK: // %bb.0:
783+
; CHECK-NEXT: lsl w8, w0, #3
784+
; CHECK-NEXT: sub w8, w8, w1
785+
; CHECK-NEXT: add w0, w8, w2, lsl #2
786+
; CHECK-NEXT: ret
787+
%xshl = shl i32 %x, 3
788+
%sub = sub i32 %xshl, %y
789+
%zshl = shl i32 %z, 2
790+
%add = add i32 %sub, %zshl
791+
ret i32 %add
792+
}
793+
794+
define i32 @commute_subop0_zshiftc(i32 %x, i32 %y, i32 %z) {
795+
; CHECK-LABEL: commute_subop0_zshiftc:
796+
; CHECK: // %bb.0:
797+
; CHECK-NEXT: lsl w8, w2, #2
798+
; CHECK-NEXT: sub w9, w8, w1
799+
; CHECK-NEXT: add w9, w9, w0, lsl #3
800+
; CHECK-NEXT: eor w0, w8, w9
801+
; CHECK-NEXT: ret
802+
%xshl = shl i32 %x, 3
803+
%sub = sub i32 %xshl, %y
804+
%zshl = shl i32 %z, 2
805+
%add = add i32 %sub, %zshl
806+
%r = xor i32 %zshl, %add
807+
ret i32 %r
808+
}

0 commit comments

Comments
 (0)