Skip to content

Commit dceb722

Browse files
RKSimonrorth
authored andcommitted
[X86] combineAdd - fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) (llvm#142734)
Attempt to keep adds/shifts closer together for LEA matching Fixes llvm#55714
1 parent d41aef9 commit dceb722

File tree

5 files changed

+44
-33
lines changed

5 files changed

+44
-33
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58048,6 +58048,23 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
5804858048
if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget))
5804958049
return V;
5805058050

58051+
// Canonicalize hidden LEA pattern:
58052+
// Fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y)
58053+
// iff c < 4
58054+
if (VT == MVT::i32 || VT == MVT::i64) {
58055+
SDValue Y, Z, Shift;
58056+
APInt Amt;
58057+
if (sd_match(
58058+
N, m_Add(m_OneUse(m_Sub(m_AllOf(m_Value(Shift),
58059+
m_Shl(m_Value(), m_ConstInt(Amt))),
58060+
m_Value(Y))),
58061+
m_Value(Z))) &&
58062+
Amt.ult(4) && !isa<ConstantSDNode>(Z)) {
58063+
return DAG.getNode(ISD::SUB, DL, VT,
58064+
DAG.getNode(ISD::ADD, DL, VT, Shift, Z), Y);
58065+
}
58066+
}
58067+
5805158068
// add(psadbw(X,0),psadbw(Y,0)) -> psadbw(add(X,Y),0)
5805258069
// iff X and Y won't overflow.
5805358070
if (Op0.getOpcode() == X86ISD::PSADBW && Op1.getOpcode() == X86ISD::PSADBW &&

llvm/test/CodeGen/X86/addr-mode-matcher-3.ll

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -104,16 +104,16 @@ define i32 @PR55714_i32(i32 %n, i32 %q) {
104104
; X86: # %bb.0:
105105
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
106106
; X86-NEXT: leal (,%ecx,8), %eax
107-
; X86-NEXT: subl %ecx, %eax
108107
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
108+
; X86-NEXT: subl %ecx, %eax
109109
; X86-NEXT: retl
110110
;
111111
; X64-LABEL: PR55714_i32:
112112
; X64: # %bb.0:
113113
; X64-NEXT: # kill: def $esi killed $esi def $rsi
114-
; X64-NEXT: leal (,%rsi,8), %eax
114+
; X64-NEXT: # kill: def $edi killed $edi def $rdi
115+
; X64-NEXT: leal (%rdi,%rsi,8), %eax
115116
; X64-NEXT: subl %esi, %eax
116-
; X64-NEXT: addl %edi, %eax
117117
; X64-NEXT: retq
118118
%mul = mul i32 %q, 7
119119
%add = add i32 %mul, %n
@@ -123,21 +123,19 @@ define i32 @PR55714_i32(i32 %n, i32 %q) {
123123
define i64 @PR55714_i64(i64 %n, i64 %q) {
124124
; X86-LABEL: PR55714_i64:
125125
; X86: # %bb.0:
126-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
127-
; X86-NEXT: leal (,%eax,8), %ecx
128-
; X86-NEXT: subl %eax, %ecx
126+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
129127
; X86-NEXT: movl $7, %eax
130128
; X86-NEXT: mull {{[0-9]+}}(%esp)
131-
; X86-NEXT: addl %ecx, %edx
129+
; X86-NEXT: leal (%edx,%ecx,8), %edx
130+
; X86-NEXT: subl %ecx, %edx
132131
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
133132
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
134133
; X86-NEXT: retl
135134
;
136135
; X64-LABEL: PR55714_i64:
137136
; X64: # %bb.0:
138-
; X64-NEXT: leaq (,%rsi,8), %rax
137+
; X64-NEXT: leaq (%rdi,%rsi,8), %rax
139138
; X64-NEXT: subq %rsi, %rax
140-
; X64-NEXT: addq %rdi, %rax
141139
; X64-NEXT: retq
142140
%mul = mul i64 %q, 7
143141
%add = add i64 %mul, %n

llvm/test/CodeGen/X86/apx/reloc-opt.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,9 @@
1313

1414

1515
; CHECK-LABEL: test_regclass_not_updated_by_regalloc_1
16-
; APXREL: movq (%rip), %r16
17-
; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4
18-
; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4
19-
; NOAPXREL: movq (%rip), %rdi
20-
; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar-0x4
16+
; CHECK-NOT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4
17+
; CHECK: movq (%rip), %rdi
18+
; CHECK-NEXT: R_X86_64_REX_GOTPCRELX gvar-0x4
2119

2220
@gvar = external global [20000 x i8]
2321

llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
88
; SSE2-LABEL: dot_ext_v8i8_v8i32:
99
; SSE2: # %bb.0: # %entry
10-
; SSE2-NEXT: pushq %r14
1110
; SSE2-NEXT: pushq %rbx
1211
; SSE2-NEXT: movzbl (%rdi), %eax
1312
; SSE2-NEXT: movzbl (%rdi,%rsi), %ecx
@@ -18,9 +17,9 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
1817
; SSE2-NEXT: leaq (%rsi,%rsi,4), %rbx
1918
; SSE2-NEXT: movzbl (%rdi,%rbx), %ebx
2019
; SSE2-NEXT: movzbl (%rdi,%r9,2), %r9d
21-
; SSE2-NEXT: leaq (,%rsi,8), %r14
22-
; SSE2-NEXT: subq %rsi, %r14
23-
; SSE2-NEXT: movzbl (%rdi,%r14), %esi
20+
; SSE2-NEXT: leaq (%rdi,%rsi,8), %rdi
21+
; SSE2-NEXT: subq %rsi, %rdi
22+
; SSE2-NEXT: movzbl (%rdi), %esi
2423
; SSE2-NEXT: shll $16, %ecx
2524
; SSE2-NEXT: orl %eax, %ecx
2625
; SSE2-NEXT: movd %ecx, %xmm0
@@ -38,15 +37,14 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
3837
; SSE2-NEXT: paddd %xmm0, %xmm1
3938
; SSE2-NEXT: movd %xmm1, %eax
4039
; SSE2-NEXT: popq %rbx
41-
; SSE2-NEXT: popq %r14
4240
; SSE2-NEXT: retq
4341
;
4442
; SSE4-LABEL: dot_ext_v8i8_v8i32:
4543
; SSE4: # %bb.0: # %entry
4644
; SSE4-NEXT: movzbl (%rdi), %eax
4745
; SSE4-NEXT: leaq (%rsi,%rsi,4), %rcx
4846
; SSE4-NEXT: leaq (%rsi,%rsi,2), %r8
49-
; SSE4-NEXT: leaq (,%rsi,8), %r9
47+
; SSE4-NEXT: leaq (%rdi,%rsi,8), %r9
5048
; SSE4-NEXT: subq %rsi, %r9
5149
; SSE4-NEXT: movd %eax, %xmm0
5250
; SSE4-NEXT: pinsrb $2, (%rdi,%rsi), %xmm0
@@ -55,7 +53,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
5553
; SSE4-NEXT: pinsrb $8, (%rdi,%rsi,4), %xmm0
5654
; SSE4-NEXT: pinsrb $10, (%rdi,%rcx), %xmm0
5755
; SSE4-NEXT: pinsrb $12, (%rdi,%r8,2), %xmm0
58-
; SSE4-NEXT: pinsrb $14, (%rdi,%r9), %xmm0
56+
; SSE4-NEXT: pinsrb $14, (%r9), %xmm0
5957
; SSE4-NEXT: movdqu (%rdx), %xmm1
6058
; SSE4-NEXT: pmaddwd %xmm0, %xmm1
6159
; SSE4-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
@@ -70,7 +68,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
7068
; AVX-NEXT: movzbl (%rdi), %eax
7169
; AVX-NEXT: leaq (%rsi,%rsi,2), %rcx
7270
; AVX-NEXT: leaq (%rsi,%rsi,4), %r8
73-
; AVX-NEXT: leaq (,%rsi,8), %r9
71+
; AVX-NEXT: leaq (%rdi,%rsi,8), %r9
7472
; AVX-NEXT: subq %rsi, %r9
7573
; AVX-NEXT: vmovd %eax, %xmm0
7674
; AVX-NEXT: vpinsrb $2, (%rdi,%rsi), %xmm0, %xmm0
@@ -79,7 +77,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
7977
; AVX-NEXT: vpinsrb $8, (%rdi,%rsi,4), %xmm0, %xmm0
8078
; AVX-NEXT: vpinsrb $10, (%rdi,%r8), %xmm0, %xmm0
8179
; AVX-NEXT: vpinsrb $12, (%rdi,%rcx,2), %xmm0, %xmm0
82-
; AVX-NEXT: vpinsrb $14, (%rdi,%r9), %xmm0, %xmm0
80+
; AVX-NEXT: vpinsrb $14, (%r9), %xmm0, %xmm0
8381
; AVX-NEXT: vpmaddwd (%rdx), %xmm0, %xmm0
8482
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
8583
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0

llvm/test/CodeGen/X86/mul-constant-i64.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -166,12 +166,11 @@ define i64 @test_mul_by_6(i64 %x) {
166166
define i64 @test_mul_by_7(i64 %x) {
167167
; X86-LABEL: test_mul_by_7:
168168
; X86: # %bb.0:
169-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
170-
; X86-NEXT: leal (,%eax,8), %ecx
171-
; X86-NEXT: subl %eax, %ecx
169+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
172170
; X86-NEXT: movl $7, %eax
173171
; X86-NEXT: mull {{[0-9]+}}(%esp)
174-
; X86-NEXT: addl %ecx, %edx
172+
; X86-NEXT: leal (%edx,%ecx,8), %edx
173+
; X86-NEXT: subl %ecx, %edx
175174
; X86-NEXT: retl
176175
;
177176
; X86-NOOPT-LABEL: test_mul_by_7:
@@ -733,16 +732,17 @@ define i64 @test_mul_by_22(i64 %x) {
733732
ret i64 %mul
734733
}
735734

736-
define i64 @test_mul_by_23(i64 %x) {
735+
define i64 @test_mul_by_23(i64 %x) nounwind {
737736
; X86-LABEL: test_mul_by_23:
738737
; X86: # %bb.0:
739-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
740-
; X86-NEXT: leal (%eax,%eax,2), %ecx
741-
; X86-NEXT: shll $3, %ecx
742-
; X86-NEXT: subl %eax, %ecx
738+
; X86-NEXT: pushl %esi
739+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
740+
; X86-NEXT: leal (%ecx,%ecx,2), %esi
743741
; X86-NEXT: movl $23, %eax
744742
; X86-NEXT: mull {{[0-9]+}}(%esp)
745-
; X86-NEXT: addl %ecx, %edx
743+
; X86-NEXT: leal (%edx,%esi,8), %edx
744+
; X86-NEXT: subl %ecx, %edx
745+
; X86-NEXT: popl %esi
746746
; X86-NEXT: retl
747747
;
748748
; X86-NOOPT-LABEL: test_mul_by_23:

0 commit comments

Comments
 (0)