Skip to content

Commit 3a677d3

Browse files
committed
[X86] Improve transform for add-like nodes to add
We previously did this only in tablegen, but this catches a lot less potential cases.
1 parent d1db5f3 commit 3a677d3

File tree

115 files changed

+1241
-1139
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+1241
-1139
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5295,6 +5295,20 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
52955295
if (tryVPTERNLOG(Node))
52965296
return;
52975297

5298+
// Convert addlike to add before final selection. Do this before we drop
5299+
// flags like `disjoint`.
5300+
// NB: Conversion to add is preferable so we use `lea` in codegen.
5301+
if (NVT.isScalarInteger() &&
5302+
(Opcode == ISD::OR ||
5303+
(NVT == MVT::i8 || NVT == MVT::i16 || NVT == MVT::i32)) &&
5304+
CurDAG->isADDLike(SDValue(Node, 0))) {
5305+
SDValue AsAdd = CurDAG->getNode(ISD::ADD, SDLoc(Node), NVT,
5306+
Node->getOperand(0), Node->getOperand(1));
5307+
ReplaceUses(SDValue(Node, 0), AsAdd);
5308+
CurDAG->RemoveDeadNode(Node);
5309+
Node = AsAdd.getNode();
5310+
Opcode = ISD::ADD;
5311+
}
52985312
[[fallthrough]];
52995313
case ISD::ADD:
53005314
if (Opcode == ISD::ADD && matchBitExtract(Node))

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,21 +1560,40 @@ let Predicates = [HasNDD] in {
15601560
}
15611561

15621562
// Depositing value to 8/16 bit subreg:
1563-
def : Pat<(or (and GR64:$dst, -256),
1563+
def : Pat<(or (and GR64:$dst, -256),
15641564
(i64 (zextloadi8 addr:$src))),
1565-
(INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
1565+
(INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
15661566

1567-
def : Pat<(or (and GR32:$dst, -256),
1567+
def : Pat<(or (and GR32:$dst, -256),
15681568
(i32 (zextloadi8 addr:$src))),
1569-
(INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
1569+
(INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
15701570

1571-
def : Pat<(or (and GR64:$dst, -65536),
1571+
def : Pat<(or (and GR64:$dst, -65536),
15721572
(i64 (zextloadi16 addr:$src))),
15731573
(INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
15741574

1575-
def : Pat<(or (and GR32:$dst, -65536),
1575+
def : Pat<(or (and GR32:$dst, -65536),
15761576
(i32 (zextloadi16 addr:$src))),
1577-
(INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
1577+
(INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
1578+
1579+
// Same pattern as above but supporting `add` as the join
1580+
// operator. Need to support `add` as well, as we can convert `or` ->
1581+
// `add` when the `or` is `disjoint` (as in this patterns case).
1582+
def : Pat<(add (and GR64:$dst, -256),
1583+
(i64 (zextloadi8 addr:$src))),
1584+
(INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
1585+
1586+
def : Pat<(add (and GR32:$dst, -256),
1587+
(i32 (zextloadi8 addr:$src))),
1588+
(INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
1589+
1590+
def : Pat<(add (and GR64:$dst, -65536),
1591+
(i64 (zextloadi16 addr:$src))),
1592+
(INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
1593+
1594+
def : Pat<(add (and GR32:$dst, -65536),
1595+
(i32 (zextloadi16 addr:$src))),
1596+
(INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
15781597

15791598
// To avoid needing to materialize an immediate in a register, use a 32-bit and
15801599
// with implicit zero-extension instead of a 64-bit and if the immediate has at

llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ define i64 @foo(i64 %b) nounwind readnone {
2121
; CHECK: # %bb.0: # %entry
2222
; CHECK-NEXT: movsbq %dil, %rax
2323
; CHECK-NEXT: shlq $8, %rax
24-
; CHECK-NEXT: orq $1, %rax
24+
; CHECK-NEXT: incq %rax
2525
; CHECK-NEXT: retq
2626
entry:
2727
%shl = shl i64 %b, 56 ; <i64> [#uses=1]

llvm/test/CodeGen/X86/3addr-or.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ define i64 @test2(i8 %A, i8 %B) nounwind {
2424
; CHECK-NEXT: andl $48, %edi
2525
; CHECK-NEXT: movzbl %sil, %eax
2626
; CHECK-NEXT: shrl $4, %eax
27-
; CHECK-NEXT: orl %edi, %eax
27+
; CHECK-NEXT: addl %edi, %eax
2828
; CHECK-NEXT: retq
2929
%C = zext i8 %A to i64
3030
%D = shl i64 %C, 4
@@ -42,7 +42,7 @@ define void @test3(i32 %x, ptr %P) nounwind readnone ssp {
4242
; CHECK-LABEL: test3:
4343
; CHECK: # %bb.0:
4444
; CHECK-NEXT: shll $5, %edi
45-
; CHECK-NEXT: orl $3, %edi
45+
; CHECK-NEXT: addl $3, %edi
4646
; CHECK-NEXT: movl %edi, (%rsi)
4747
; CHECK-NEXT: retq
4848
%t0 = shl i32 %x, 5
@@ -71,7 +71,7 @@ define void @test5(i32 %a, i32 %b, ptr nocapture %P) nounwind ssp {
7171
; CHECK: # %bb.0:
7272
; CHECK-NEXT: andl $6, %edi
7373
; CHECK-NEXT: andl $16, %esi
74-
; CHECK-NEXT: orl %edi, %esi
74+
; CHECK-NEXT: addl %edi, %esi
7575
; CHECK-NEXT: movl %esi, (%rdx)
7676
; CHECK-NEXT: retq
7777
%and = and i32 %a, 6

llvm/test/CodeGen/X86/addcarry2.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ define void @adc_load_store_32_127(ptr inreg %x, ptr inreg %x2, i32 inreg %y) no
138138
; X64-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07]
139139
; X64-NEXT: shlq $32, %rax # encoding: [0x48,0xc1,0xe0,0x20]
140140
; X64-NEXT: movl %edx, %ecx # encoding: [0x89,0xd1]
141-
; X64-NEXT: orq %rax, %rcx # encoding: [0x48,0x09,0xc1]
141+
; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
142142
; X64-NEXT: movabsq $545460846593, %rax # encoding: [0x48,0xb8,0x01,0x00,0x00,0x00,0x7f,0x00,0x00,0x00]
143143
; X64-NEXT: # imm = 0x7F00000001
144144
; X64-NEXT: xorl %edx, %edx # encoding: [0x31,0xd2]
@@ -178,7 +178,7 @@ define void @adc_load_store_32_128(ptr inreg %x, ptr inreg %x2, i32 inreg %y) no
178178
; X64-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07]
179179
; X64-NEXT: shlq $32, %rax # encoding: [0x48,0xc1,0xe0,0x20]
180180
; X64-NEXT: movl %edx, %ecx # encoding: [0x89,0xd1]
181-
; X64-NEXT: orq %rax, %rcx # encoding: [0x48,0x09,0xc1]
181+
; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
182182
; X64-NEXT: movabsq $549755813889, %rax # encoding: [0x48,0xb8,0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00]
183183
; X64-NEXT: # imm = 0x8000000001
184184
; X64-NEXT: xorl %edx, %edx # encoding: [0x31,0xd2]

llvm/test/CodeGen/X86/and-or-fold.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ define i32 @test1(i32 %x, i16 %y) {
4545
; DARWIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
4646
; DARWIN-NEXT: movl {{[0-9]+}}(%esp), %eax
4747
; DARWIN-NEXT: shll $16, %eax
48-
; DARWIN-NEXT: orl %ecx, %eax
48+
; DARWIN-NEXT: addl %ecx, %eax
4949
; DARWIN-NEXT: andl $16711807, %eax ## imm = 0xFF007F
5050
; DARWIN-NEXT: retl
5151
;
@@ -54,7 +54,7 @@ define i32 @test1(i32 %x, i16 %y) {
5454
; DARWIN-OPT-NEXT: andl $127, %esi
5555
; DARWIN-OPT-NEXT: movzbl %dil, %eax
5656
; DARWIN-OPT-NEXT: shll $16, %eax
57-
; DARWIN-OPT-NEXT: orl %esi, %eax
57+
; DARWIN-OPT-NEXT: addl %esi, %eax
5858
; DARWIN-OPT-NEXT: retq
5959
%tmp1 = zext i16 %y to i32
6060
%tmp2 = and i32 %tmp1, 127

llvm/test/CodeGen/X86/andimm8.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,15 @@ define void @foo(i64 %zed, ptr %x) nounwind {
2929
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
3030
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x08]
3131
; X86-NEXT: andl $-4, %ecx # encoding: [0x83,0xe1,0xfc]
32-
; X86-NEXT: orl $2, %ecx # encoding: [0x83,0xc9,0x02]
32+
; X86-NEXT: addl $2, %ecx # encoding: [0x83,0xc1,0x02]
3333
; X86-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04]
3434
; X86-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08]
3535
; X86-NEXT: retl # encoding: [0xc3]
3636
;
3737
; X64-LABEL: foo:
3838
; X64: # %bb.0:
3939
; X64-NEXT: andq $-4, %rdi # encoding: [0x48,0x83,0xe7,0xfc]
40-
; X64-NEXT: orq $2, %rdi # encoding: [0x48,0x83,0xcf,0x02]
40+
; X64-NEXT: addq $2, %rdi # encoding: [0x48,0x83,0xc7,0x02]
4141
; X64-NEXT: movq %rdi, (%rsi) # encoding: [0x48,0x89,0x3e]
4242
; X64-NEXT: retq # encoding: [0xc3]
4343
%t1 = and i64 %zed, -4

llvm/test/CodeGen/X86/atomic-unordered.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2359,7 +2359,7 @@ define i16 @load_combine(ptr %p) {
23592359
; CHECK-O3-NEXT: movzbl (%rdi), %ecx
23602360
; CHECK-O3-NEXT: movzbl 1(%rdi), %eax
23612361
; CHECK-O3-NEXT: shll $8, %eax
2362-
; CHECK-O3-NEXT: orl %ecx, %eax
2362+
; CHECK-O3-NEXT: addl %ecx, %eax
23632363
; CHECK-O3-NEXT: # kill: def $ax killed $ax killed $eax
23642364
; CHECK-O3-NEXT: retq
23652365
%v1 = load atomic i8, ptr %p unordered, align 2

0 commit comments

Comments
 (0)