Skip to content

Commit 94a393a

Browse files
author
git apple-llvm automerger
committed
Merge commit 'e12364301e33' from llvm.org/main into next
2 parents 85ee866 + e123643 commit 94a393a

File tree

2 files changed

+103
-2
lines changed

2 files changed

+103
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17882,6 +17882,23 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
1788217882
return false;
1788317883
};
1788417884

17885+
// Can the const C be decomposed into (1 - (1 - 2^M) * 2^N), eg:
17886+
// C = 29 is equal to 1 - (1 - 2^3) * 2^2.
17887+
auto isPowMinusMinusOneConst = [](APInt C, APInt &M, APInt &N) {
17888+
APInt CVMinus1 = C - 1;
17889+
if (CVMinus1.isNegative())
17890+
return false;
17891+
unsigned TrailingZeroes = CVMinus1.countr_zero();
17892+
APInt CVPlus1 = CVMinus1.ashr(TrailingZeroes) + 1;
17893+
if (CVPlus1.isPowerOf2()) {
17894+
unsigned BitWidth = CVPlus1.getBitWidth();
17895+
M = APInt(BitWidth, CVPlus1.logBase2());
17896+
N = APInt(BitWidth, TrailingZeroes);
17897+
return true;
17898+
}
17899+
return false;
17900+
};
17901+
1788517902
if (ConstValue.isNonNegative()) {
1788617903
// (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
1788717904
// (mul x, 2^N - 1) => (sub (shl x, N), x)
@@ -17890,6 +17907,8 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
1789017907
// => MV = (add (shl x, M), x); (add (shl MV, N), MV)
1789117908
// (mul x, (2^M + 1) * 2^N + 1))
1789217909
// => MV = add (shl x, M), x); add (shl MV, N), x)
17910+
// (mul x, 1 - (1 - 2^M) * 2^N))
17911+
// => MV = sub (x - (shl x, M)); sub (x - (shl MV, N))
1789317912
APInt SCVMinus1 = ShiftedConstValue - 1;
1789417913
APInt SCVPlus1 = ShiftedConstValue + 1;
1789517914
APInt CVPlus1 = ConstValue + 1;
@@ -17926,6 +17945,17 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
1792617945
return Add(Shl(MVal, CVN.getZExtValue()), N0);
1792717946
}
1792817947
}
17948+
17949+
if (Subtarget->hasALULSLFast() &&
17950+
isPowMinusMinusOneConst(ConstValue, CVM, CVN)) {
17951+
unsigned ShiftM = CVM.getZExtValue();
17952+
unsigned ShiftN = CVN.getZExtValue();
17953+
// ALULSLFast implicate that Shifts <= 4 places are fast
17954+
if (ShiftM <= 4 && ShiftN <= 4) {
17955+
SDValue MVal = Sub(N0, Shl(N0, CVM.getZExtValue()));
17956+
return Sub(N0, Shl(MVal, CVN.getZExtValue()));
17957+
}
17958+
}
1792917959
} else {
1793017960
// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
1793117961
// (mul x, -(2^N + 1)) => - (add (shl x, N), x)

llvm/test/CodeGen/AArch64/mul_pow2.ll

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,23 @@ define i32 @test25_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
527527
ret i32 %mul
528528
}
529529

530+
define i32 @test29_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
531+
; CHECK-LABEL: test29_fast_shift:
532+
; CHECK: // %bb.0:
533+
; CHECK-NEXT: sub w8, w0, w0, lsl #3
534+
; CHECK-NEXT: sub w0, w0, w8, lsl #2
535+
; CHECK-NEXT: ret
536+
;
537+
; GISEL-LABEL: test29_fast_shift:
538+
; GISEL: // %bb.0:
539+
; GISEL-NEXT: mov w8, #29 // =0x1d
540+
; GISEL-NEXT: mul w0, w0, w8
541+
; GISEL-NEXT: ret
542+
543+
%mul = mul nsw i32 %x, 29 ; 29 = 1 - (1-8) * 4
544+
ret i32 %mul
545+
}
546+
530547
define i32 @test45_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
531548
; CHECK-LABEL: test45_fast_shift:
532549
; CHECK: // %bb.0:
@@ -615,6 +632,42 @@ define i32 @test97_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
615632
ret i32 %mul
616633
}
617634

635+
; Negative test: The shift number 5 is out of bound
636+
define i32 @test125_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
637+
; CHECK-LABEL: test125_fast_shift:
638+
; CHECK: // %bb.0:
639+
; CHECK-NEXT: mov w8, #125 // =0x7d
640+
; CHECK-NEXT: mul w0, w0, w8
641+
; CHECK-NEXT: ret
642+
;
643+
; GISEL-LABEL: test125_fast_shift:
644+
; GISEL: // %bb.0:
645+
; GISEL-NEXT: mov w8, #125 // =0x7d
646+
; GISEL-NEXT: mul w0, w0, w8
647+
; GISEL-NEXT: ret
648+
649+
%mul = mul nsw i32 %x, 125 ; 125 = 1 - ((1-32) << 2)
650+
ret i32 %mul
651+
}
652+
653+
; TODO: (1 - 2^M) * (1 - 2^N)
654+
define i32 @test225_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
655+
; CHECK-LABEL: test225_fast_shift:
656+
; CHECK: // %bb.0:
657+
; CHECK-NEXT: mov w8, #225 // =0xe1
658+
; CHECK-NEXT: mul w0, w0, w8
659+
; CHECK-NEXT: ret
660+
;
661+
; GISEL-LABEL: test225_fast_shift:
662+
; GISEL: // %bb.0:
663+
; GISEL-NEXT: mov w8, #225 // =0xe1
664+
; GISEL-NEXT: mul w0, w0, w8
665+
; GISEL-NEXT: ret
666+
667+
%mul = mul nsw i32 %x, 225 ; 225 = (1-16)*(1-16)
668+
ret i32 %mul
669+
}
670+
618671
; Negative test: The shift amount 5 larger than 4
619672
define i32 @test297_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
620673
; CHECK-LABEL: test297_fast_shift:
@@ -633,6 +686,24 @@ define i32 @test297_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
633686
ret i32 %mul
634687
}
635688

689+
; Negative test: The shift number 5 is out of bound
690+
define i32 @test481_fast_shift(i32 %x) "target-features"="+alu-lsl-fast" {
691+
; CHECK-LABEL: test481_fast_shift:
692+
; CHECK: // %bb.0:
693+
; CHECK-NEXT: mov w8, #481 // =0x1e1
694+
; CHECK-NEXT: mul w0, w0, w8
695+
; CHECK-NEXT: ret
696+
;
697+
; GISEL-LABEL: test481_fast_shift:
698+
; GISEL: // %bb.0:
699+
; GISEL-NEXT: mov w8, #481 // =0x1e1
700+
; GISEL-NEXT: mul w0, w0, w8
701+
; GISEL-NEXT: ret
702+
703+
%mul = mul nsw i32 %x, 481 ; 481 = 1 - ((1-16) << 5)
704+
ret i32 %mul
705+
}
706+
636707
; Convert mul x, -pow2 to shift.
637708
; Convert mul x, -(pow2 +/- 1) to shift + add/sub.
638709
; Lowering other negative constants are not supported yet.
@@ -910,9 +981,9 @@ define <4 x i32> @muladd_demand_commute(<4 x i32> %x, <4 x i32> %y) {
910981
;
911982
; GISEL-LABEL: muladd_demand_commute:
912983
; GISEL: // %bb.0:
913-
; GISEL-NEXT: adrp x8, .LCPI52_0
984+
; GISEL-NEXT: adrp x8, .LCPI56_0
914985
; GISEL-NEXT: movi v3.4s, #1, msl #16
915-
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI52_0]
986+
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI56_0]
916987
; GISEL-NEXT: mla v1.4s, v0.4s, v2.4s
917988
; GISEL-NEXT: and v0.16b, v1.16b, v3.16b
918989
; GISEL-NEXT: ret

0 commit comments

Comments
 (0)