Skip to content

Commit b027ce0

Browse files
committed
[DAG] Fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
Assuming the ADD is nsw then it may be sign-extended to merge with a SHL op in a similar fold to the existing (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) fold. This is most useful for helping to expose address math for X86, but has also touched several aarch64 test cases as well. Alive2: https://alive2.llvm.org/ce/z/2UpSbJ Differential Revision: https://reviews.llvm.org/D159198
1 parent e821914 commit b027ce0

File tree

4 files changed

+30
-8
lines changed

4 files changed

+30
-8
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10009,6 +10009,27 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
1000910009
}
1001010010
}
1001110011

10012+
// fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10013+
// TODO: Add zext/add_nuw variant with suitable test coverage
10014+
// TODO: Should we limit this with isLegalAddImmediate?
10015+
if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10016+
N0.getOperand(0).getOpcode() == ISD::ADD &&
10017+
N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
10018+
N0.getOperand(0)->hasOneUse() &&
10019+
TLI.isDesirableToCommuteWithShift(N, Level)) {
10020+
SDValue Add = N0.getOperand(0);
10021+
SDLoc DL(N0);
10022+
if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10023+
{Add.getOperand(1)})) {
10024+
if (SDValue ShlC =
10025+
DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10026+
SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10027+
SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10028+
return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10029+
}
10030+
}
10031+
}
10032+
1001210033
// fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
1001310034
if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
1001410035
SDValue N01 = N0.getOperand(1);

llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,8 +275,9 @@ entry:
275275
define i64 @extendedLeftShiftintToint64By4(i32 %a) nounwind readnone ssp {
276276
; CHECK-LABEL: extendedLeftShiftintToint64By4:
277277
; CHECK: ; %bb.0: ; %entry
278-
; CHECK-NEXT: add w8, w0, #1
279-
; CHECK-NEXT: sbfiz x0, x8, #4, #32
278+
; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0
279+
; CHECK-NEXT: sbfiz x8, x0, #4, #32
280+
; CHECK-NEXT: add x0, x8, #16
280281
; CHECK-NEXT: ret
281282
entry:
282283
%inc = add nsw i32 %a, 1

llvm/test/CodeGen/AArch64/arm64-trunc-store.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ define void @fct32(i32 %arg, i64 %var) {
2020
; CHECK-LABEL: fct32:
2121
; CHECK: // %bb.0: // %bb
2222
; CHECK-NEXT: adrp x8, :got:zptr32
23-
; CHECK-NEXT: sub w9, w0, #1
2423
; CHECK-NEXT: ldr x8, [x8, :got_lo12:zptr32]
2524
; CHECK-NEXT: ldr x8, [x8]
26-
; CHECK-NEXT: str w1, [x8, w9, sxtw #2]
25+
; CHECK-NEXT: add x8, x8, w0, sxtw #2
26+
; CHECK-NEXT: stur w1, [x8, #-4]
2727
; CHECK-NEXT: ret
2828
bb:
2929
%.pre37 = load ptr, ptr @zptr32, align 8
@@ -39,10 +39,10 @@ define void @fct16(i32 %arg, i64 %var) {
3939
; CHECK-LABEL: fct16:
4040
; CHECK: // %bb.0: // %bb
4141
; CHECK-NEXT: adrp x8, :got:zptr16
42-
; CHECK-NEXT: sub w9, w0, #1
4342
; CHECK-NEXT: ldr x8, [x8, :got_lo12:zptr16]
4443
; CHECK-NEXT: ldr x8, [x8]
45-
; CHECK-NEXT: strh w1, [x8, w9, sxtw #1]
44+
; CHECK-NEXT: add x8, x8, w0, sxtw #1
45+
; CHECK-NEXT: sturh w1, [x8, #-2]
4646
; CHECK-NEXT: ret
4747
bb:
4848
%.pre37 = load ptr, ptr @zptr16, align 8

llvm/test/CodeGen/X86/addr-mode-matcher-2.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ define void @foo_sext_nsw(i1 zeroext, i32) nounwind {
5252
; X64-NEXT: .p2align 4, 0x90
5353
; X64-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
5454
; X64-NEXT: cltq
55-
; X64-NEXT: leaq 4(,%rax,4), %rax
56-
; X64-NEXT: leaq (%rax,%rax,4), %rdi
55+
; X64-NEXT: shlq $2, %rax
56+
; X64-NEXT: leaq 20(%rax,%rax,4), %rdi
5757
; X64-NEXT: callq bar@PLT
5858
; X64-NEXT: jmp .LBB0_2
5959
br i1 %0, label %9, label %3

0 commit comments

Comments
 (0)