[X86] combineAdd - fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) #142734

RKSimon · 2025-06-04T07:31:59Z

Attempt to keep adds/shifts closer together for LEA matching

Fixes #55714

…l x, c), z), y) Attempt to keep adds/shifts closer together for LEA matching Fixes llvm#55714

llvmbot · 2025-06-04T07:32:33Z

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Attempt to keep adds/shifts closer together for LEA matching

Fixes #55714

Full diff: https://github.com/llvm/llvm-project/pull/142734.diff

5 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+17)
(modified) llvm/test/CodeGen/X86/addr-mode-matcher-3.ll (+7-9)
(modified) llvm/test/CodeGen/X86/apx/reloc-opt.ll (+3-5)
(modified) llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll (+7-9)
(modified) llvm/test/CodeGen/X86/mul-constant-i64.ll (+10-10)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2399936ffd827..becd03e619d32 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58010,6 +58010,23 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget))
     return V;
 
+  // Canonicalize hidden LEA pattern:
+  // Fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y)
+  // iff c < 4
+  if (VT == MVT::i32 || VT == MVT::i64) {
+    SDValue Y, Z, Shift;
+    APInt Amt;
+    if (sd_match(
+            N, m_Add(m_OneUse(m_Sub(m_AllOf(m_Value(Shift),
+                                            m_Shl(m_Value(), m_ConstInt(Amt))),
+                                    m_Value(Y))),
+                     m_Value(Z))) &&
+        Amt.ult(4) && !isa<ConstantSDNode>(Z)) {
+      return DAG.getNode(ISD::SUB, DL, VT,
+                         DAG.getNode(ISD::ADD, DL, VT, Shift, Z), Y);
+    }
+  }
+
   // add(psadbw(X,0),psadbw(Y,0)) -> psadbw(add(X,Y),0)
   // iff X and Y won't overflow.
   if (Op0.getOpcode() == X86ISD::PSADBW && Op1.getOpcode() == X86ISD::PSADBW &&
diff --git a/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll b/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll
index 522b42e07c6e0..beea6d36fe874 100644
--- a/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll
+++ b/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll
@@ -104,16 +104,16 @@ define i32 @PR55714_i32(i32 %n, i32 %q) {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    leal (,%ecx,8), %eax
-; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: PR55714_i32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    leal (,%rsi,8), %eax
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rsi,8), %eax
 ; X64-NEXT:    subl %esi, %eax
-; X64-NEXT:    addl %edi, %eax
 ; X64-NEXT:    retq
   %mul = mul i32 %q, 7
   %add = add i32 %mul, %n
@@ -123,21 +123,19 @@ define i32 @PR55714_i32(i32 %n, i32 %q) {
 define i64 @PR55714_i64(i64 %n, i64 %q) {
 ; X86-LABEL: PR55714_i64:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    leal (,%eax,8), %ecx
-; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl $7, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    leal (%edx,%ecx,8), %edx
+; X86-NEXT:    subl %ecx, %edx
 ; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: PR55714_i64:
 ; X64:       # %bb.0:
-; X64-NEXT:    leaq (,%rsi,8), %rax
+; X64-NEXT:    leaq (%rdi,%rsi,8), %rax
 ; X64-NEXT:    subq %rsi, %rax
-; X64-NEXT:    addq %rdi, %rax
 ; X64-NEXT:    retq
   %mul = mul i64 %q, 7
   %add = add i64 %mul, %n
diff --git a/llvm/test/CodeGen/X86/apx/reloc-opt.ll b/llvm/test/CodeGen/X86/apx/reloc-opt.ll
index a5ab94b00d64b..ecc3d3297ceab 100644
--- a/llvm/test/CodeGen/X86/apx/reloc-opt.ll
+++ b/llvm/test/CodeGen/X86/apx/reloc-opt.ll
@@ -13,11 +13,9 @@
 
 
 ; CHECK-LABEL: test_regclass_not_updated_by_regalloc_1
-; APXREL: movq    (%rip), %r16
-; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4
-; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4
-; NOAPXREL: movq    (%rip), %rdi
-; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar-0x4
+; CHECK-NOT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4
+; CHECK: movq    (%rip), %rdi
+; CHECK-NEXT: R_X86_64_REX_GOTPCRELX gvar-0x4
 
 @gvar = external global [20000 x i8]
 
diff --git a/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll b/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll
index 345014edd0e9d..5e94598565aa9 100644
--- a/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll
+++ b/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll
@@ -7,7 +7,6 @@
 define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
 ; SSE2-LABEL: dot_ext_v8i8_v8i32:
 ; SSE2:       # %bb.0: # %entry
-; SSE2-NEXT:    pushq %r14
 ; SSE2-NEXT:    pushq %rbx
 ; SSE2-NEXT:    movzbl (%rdi), %eax
 ; SSE2-NEXT:    movzbl (%rdi,%rsi), %ecx
@@ -18,9 +17,9 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
 ; SSE2-NEXT:    leaq (%rsi,%rsi,4), %rbx
 ; SSE2-NEXT:    movzbl (%rdi,%rbx), %ebx
 ; SSE2-NEXT:    movzbl (%rdi,%r9,2), %r9d
-; SSE2-NEXT:    leaq (,%rsi,8), %r14
-; SSE2-NEXT:    subq %rsi, %r14
-; SSE2-NEXT:    movzbl (%rdi,%r14), %esi
+; SSE2-NEXT:    leaq (%rdi,%rsi,8), %rdi
+; SSE2-NEXT:    subq %rsi, %rdi
+; SSE2-NEXT:    movzbl (%rdi), %esi
 ; SSE2-NEXT:    shll $16, %ecx
 ; SSE2-NEXT:    orl %eax, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm0
@@ -38,7 +37,6 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
 ; SSE2-NEXT:    paddd %xmm0, %xmm1
 ; SSE2-NEXT:    movd %xmm1, %eax
 ; SSE2-NEXT:    popq %rbx
-; SSE2-NEXT:    popq %r14
 ; SSE2-NEXT:    retq
 ;
 ; SSE4-LABEL: dot_ext_v8i8_v8i32:
@@ -46,7 +44,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
 ; SSE4-NEXT:    movzbl (%rdi), %eax
 ; SSE4-NEXT:    leaq (%rsi,%rsi,4), %rcx
 ; SSE4-NEXT:    leaq (%rsi,%rsi,2), %r8
-; SSE4-NEXT:    leaq (,%rsi,8), %r9
+; SSE4-NEXT:    leaq (%rdi,%rsi,8), %r9
 ; SSE4-NEXT:    subq %rsi, %r9
 ; SSE4-NEXT:    movd %eax, %xmm0
 ; SSE4-NEXT:    pinsrb $2, (%rdi,%rsi), %xmm0
@@ -55,7 +53,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
 ; SSE4-NEXT:    pinsrb $8, (%rdi,%rsi,4), %xmm0
 ; SSE4-NEXT:    pinsrb $10, (%rdi,%rcx), %xmm0
 ; SSE4-NEXT:    pinsrb $12, (%rdi,%r8,2), %xmm0
-; SSE4-NEXT:    pinsrb $14, (%rdi,%r9), %xmm0
+; SSE4-NEXT:    pinsrb $14, (%r9), %xmm0
 ; SSE4-NEXT:    movdqu (%rdx), %xmm1
 ; SSE4-NEXT:    pmaddwd %xmm0, %xmm1
 ; SSE4-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
@@ -70,7 +68,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
 ; AVX-NEXT:    movzbl (%rdi), %eax
 ; AVX-NEXT:    leaq (%rsi,%rsi,2), %rcx
 ; AVX-NEXT:    leaq (%rsi,%rsi,4), %r8
-; AVX-NEXT:    leaq (,%rsi,8), %r9
+; AVX-NEXT:    leaq (%rdi,%rsi,8), %r9
 ; AVX-NEXT:    subq %rsi, %r9
 ; AVX-NEXT:    vmovd %eax, %xmm0
 ; AVX-NEXT:    vpinsrb $2, (%rdi,%rsi), %xmm0, %xmm0
@@ -79,7 +77,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
 ; AVX-NEXT:    vpinsrb $8, (%rdi,%rsi,4), %xmm0, %xmm0
 ; AVX-NEXT:    vpinsrb $10, (%rdi,%r8), %xmm0, %xmm0
 ; AVX-NEXT:    vpinsrb $12, (%rdi,%rcx,2), %xmm0, %xmm0
-; AVX-NEXT:    vpinsrb $14, (%rdi,%r9), %xmm0, %xmm0
+; AVX-NEXT:    vpinsrb $14, (%r9), %xmm0, %xmm0
 ; AVX-NEXT:    vpmaddwd (%rdx), %xmm0, %xmm0
 ; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
 ; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/mul-constant-i64.ll b/llvm/test/CodeGen/X86/mul-constant-i64.ll
index 03dd5351c78ac..40d591f8d1be8 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i64.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i64.ll
@@ -166,12 +166,11 @@ define i64 @test_mul_by_6(i64 %x) {
 define i64 @test_mul_by_7(i64 %x) {
 ; X86-LABEL: test_mul_by_7:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    leal (,%eax,8), %ecx
-; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl $7, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    leal (%edx,%ecx,8), %edx
+; X86-NEXT:    subl %ecx, %edx
 ; X86-NEXT:    retl
 ;
 ; X86-NOOPT-LABEL: test_mul_by_7:
@@ -733,16 +732,17 @@ define i64 @test_mul_by_22(i64 %x) {
   ret i64 %mul
 }
 
-define i64 @test_mul_by_23(i64 %x) {
+define i64 @test_mul_by_23(i64 %x) nounwind {
 ; X86-LABEL: test_mul_by_23:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    leal (%eax,%eax,2), %ecx
-; X86-NEXT:    shll $3, %ecx
-; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %esi
 ; X86-NEXT:    movl $23, %eax
 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NEXT:    addl %ecx, %edx
+; X86-NEXT:    leal (%edx,%esi,8), %edx
+; X86-NEXT:    subl %ecx, %edx
+; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X86-NOOPT-LABEL: test_mul_by_23:

phoebewang

LGTM.

…l x, c), z), y) (llvm#142734) Attempt to keep adds/shifts closer together for LEA matching Fixes llvm#55714

mysterymath · 2025-06-17T21:15:22Z

Hey there! The Fuchsia team is seeing dramatic slowdowns on our Mac AArch64 toolchain builders, from 3.5 hours to a timeout hit at 5 hours. This is one of two plausible commits in the blamelist. It only seems to appear in our LTO 2-stage prod builds. We do build builtins for X86 and other platforms. Do you think something in this change could cause this kind of slowdown? We haven't been able to directly determine the culprit beyond a rough range.

RKSimon · 2025-06-18T06:40:57Z

Test case? I'm not going to rule it out but I doubt it.

topperc · 2025-06-18T06:57:45Z

Hey there! The Fuchsia team is seeing dramatic slowdowns on our Mac AArch64 toolchain builders, from 3.5 hours to a timeout hit at 5 hours. This is one of two plausible commits in the blamelist. It only seems to appear in our LTO 2-stage prod builds. We do build builtins for X86 and other platforms. Do you think something in this change could cause this kind of slowdown? We haven't been able to directly determine the culprit beyond a rough range.

Is it a slowdown or is the compiler hung?

mysterymath · 2025-06-18T17:53:29Z

Hey there! The Fuchsia team is seeing dramatic slowdowns on our Mac AArch64 toolchain builders, from 3.5 hours to a timeout hit at 5 hours. This is one of two plausible commits in the blamelist. It only seems to appear in our LTO 2-stage prod builds. We do build builtins for X86 and other platforms. Do you think something in this change could cause this kind of slowdown? We haven't been able to directly determine the culprit beyond a rough range.

Is it a slowdown or is the compiler hung?

It's difficult to tell at the moment; when our builders timeout we lose access to the ninja traces. I'm working on getting them back; I'll post back when I have more information.

mysterymath · 2025-06-20T17:58:54Z

Hey there! The Fuchsia team is seeing dramatic slowdowns on our Mac AArch64 toolchain builders, from 3.5 hours to a timeout hit at 5 hours. This is one of two plausible commits in the blamelist. It only seems to appear in our LTO 2-stage prod builds. We do build builtins for X86 and other platforms. Do you think something in this change could cause this kind of slowdown? We haven't been able to directly determine the culprit beyond a rough range.

Is it a slowdown or is the compiler hung?

It's definitely a compiler hang; it occurs during the first CMake ABI check against the just-built clang. It only occurs in LTO mode, which I didn't realize. I'm running through a fresh round of trials with reverts for the 4 changes in the blamelist (this is still one of them); I'll post back with either an exoneration or confirmation that this is causing the hang.

…(add (shl x, c), z), y) (llvm#142734)" This reverts commit dba4188.

mysterymath · 2025-06-23T19:16:07Z

I was able to reproduce the issue, and reverting this PR didn't resolve it. Sorry for the false alarm.

[X86] combineAdd - fold (add (sub (shl x, c), y), z) -> (sub (add (sh…

e8a9760

…l x, c), z), y) Attempt to keep adds/shifts closer together for LEA matching Fixes llvm#55714

RKSimon requested review from phoebewang, KanRobert and topperc June 4, 2025 07:31

llvmbot added the backend:X86 label Jun 4, 2025

phoebewang approved these changes Jun 5, 2025

View reviewed changes

RKSimon merged commit dba4188 into llvm:main Jun 5, 2025
13 checks passed

RKSimon deleted the x86-lea-add-sub branch June 5, 2025 07:20

rorth pushed a commit to rorth/llvm-project that referenced this pull request Jun 11, 2025

[X86] combineAdd - fold (add (sub (shl x, c), y), z) -> (sub (add (sh…

dceb722

…l x, c), z), y) (llvm#142734) Attempt to keep adds/shifts closer together for LEA matching Fixes llvm#55714

mysterymath added a commit to mysterymath/llvm-project that referenced this pull request Jun 20, 2025

Revert "[X86] combineAdd - fold (add (sub (shl x, c), y), z) -> (sub …

dcc2e3d

…(add (shl x, c), z), y) (llvm#142734)" This reverts commit dba4188.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86] combineAdd - fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) #142734

[X86] combineAdd - fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) #142734

Uh oh!

RKSimon commented Jun 4, 2025

Uh oh!

llvmbot commented Jun 4, 2025

Uh oh!

phoebewang left a comment

Uh oh!

Uh oh!

mysterymath commented Jun 17, 2025

Uh oh!

RKSimon commented Jun 18, 2025

Uh oh!

topperc commented Jun 18, 2025

Uh oh!

mysterymath commented Jun 18, 2025

Uh oh!

mysterymath commented Jun 20, 2025

Uh oh!

mysterymath commented Jun 23, 2025

Uh oh!

Uh oh!

[X86] combineAdd - fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) #142734

[X86] combineAdd - fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) #142734

Uh oh!

Conversation

RKSimon commented Jun 4, 2025

Uh oh!

llvmbot commented Jun 4, 2025

Uh oh!

phoebewang left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

mysterymath commented Jun 17, 2025

Uh oh!

RKSimon commented Jun 18, 2025

Uh oh!

topperc commented Jun 18, 2025

Uh oh!

mysterymath commented Jun 18, 2025

Uh oh!

mysterymath commented Jun 20, 2025

Uh oh!

mysterymath commented Jun 23, 2025

Uh oh!

Uh oh!