Skip to content

Topodagworklistx86 #77475

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
81 changes: 59 additions & 22 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1817,13 +1817,15 @@ void DAGCombiner::Run(CombineLevel AtLevel) {

WorklistInserter AddNodes(*this);

DAG.AssignTopologicalOrder();

// Add all the dag nodes to the worklist.
//
// Note: All nodes are not added to PruningList here, this is because the only
// nodes which can be deleted are those which have no uses and all other nodes
// which would otherwise be added to the worklist by the first call to
// getNextWorklistEntry are already present in it.
for (SDNode &Node : DAG.allnodes())
for (SDNode &Node : reverse(DAG.allnodes()))
AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());

// Create a dummy node (which is not added to allnodes), that adds a reference
Expand Down Expand Up @@ -3488,7 +3490,6 @@ static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner,
return SDValue();
}


auto cancelDiamond = [&](SDValue A,SDValue B) {
SDLoc DL(N);
SDValue NewY =
Expand Down Expand Up @@ -3559,46 +3560,82 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
SDValue Carry0 = getAsCarry(TLI, N0);
if (!Carry0)
return SDValue();

SDValue Carry1 = getAsCarry(TLI, N1);
if (!Carry1)
return SDValue();

unsigned Opcode = Carry0.getOpcode();
if (Opcode != Carry1.getOpcode())
auto matchCarry = [](SDValue N, SDValue &A, SDValue &B, bool &IsAdd) {
unsigned Opcode = N.getOpcode();
IsAdd = Opcode == ISD::UADDO || Opcode == ISD::UADDO_CARRY;

if (Opcode == ISD::UADDO || Opcode == ISD::USUBO) {
A = N.getOperand(0);
B = N.getOperand(1);
return true;
}

if ((Opcode == ISD::UADDO_CARRY || Opcode == ISD::USUBO_CARRY) &&
isNullConstant(N.getOperand(1))) {
A = N.getOperand(0);
B = N.getOperand(2);
return true;
}

return false;
};

SDValue X, Y;
bool IsAdd;
if (!matchCarry(Carry0, X, Y, IsAdd))
return SDValue();

SDValue A, B;
bool IsAdd1;
if (!matchCarry(Carry1, A, B, IsAdd1))
return SDValue();
if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)

if (IsAdd != IsAdd1)
return SDValue();

unsigned NewOp = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
return SDValue();

// Canonicalize the add/sub of A and B (the top node in the above ASCII art)
// as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
if (Carry0.getValue(0) == A || Carry0.getValue(0) == B) {
std::swap(Carry0, Carry1);
std::swap(A, X);
std::swap(B, Y);
}

// Check if nodes are connected in expected way.
if (Carry1.getOperand(0) != Carry0.getValue(0) &&
Carry1.getOperand(1) != Carry0.getValue(0))
return SDValue();

// The carry in value must be on the righthand side for subtraction.
unsigned CarryInOperandNum =
Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
return SDValue();
SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);

unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
if (Carry1.getValue(0) != X && (!IsAdd || Carry1.getValue(0) != Y))
return SDValue();

// Verify that the carry/borrow in is plausibly a carry/borrow bit.
// TODO: make getAsCarry() aware of how partial carries are merged.
SDValue CarryIn = Carry1.getValue(0) == X ? Y : X;
CarryIn = getAsCarry(TLI, CarryIn, true);
if (!CarryIn)
return SDValue();

SDLoc DL(N);
SDValue Merged =
DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
Carry0.getOperand(1), CarryIn);
DAG.getNode(NewOp, DL, Carry1->getVTList(), A, B, CarryIn);

LLVM_DEBUG(
dbgs() << "Combine carry diamond:\n";
dbgs() << "\tCarry0: "; Carry0->dump();
dbgs() << "\tX: "; X->dump();
dbgs() << "\tY: "; Y->dump();
dbgs() << "\tCarry1: "; Carry1->dump();
dbgs() << "\tA: "; A->dump();
dbgs() << "\tB: "; B->dump();
dbgs() << "\tCarryIn: "; CarryIn->dump();
dbgs() << "\tMerged: "; Merged->dump();
dbgs() << "\n");

// Please note that because we have proven that the result of the UADDO/USUBO
// of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
Expand All @@ -3613,7 +3650,7 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
// carry flags; and that AND can return a constant zero.
//
// TODO: match other operations that can merge flags (ADD, etc)
DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
DAG.ReplaceAllUsesOfValueWith(Carry0.getValue(0), Merged.getValue(0));
if (N->getOpcode() == ISD::AND)
return DAG.getConstant(0, DL, MVT::i1);
return Merged.getValue(1);
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ define void @test(<1 x i64> %c64, <1 x i64> %mask1, ptr %P) {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %eax, (%esp)
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %eax, (%esp)
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
; CHECK-NEXT: movq (%esp), %mm0
; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm1
; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm0
; CHECK-NEXT: movq (%esp), %mm1
; CHECK-NEXT: maskmovq %mm0, %mm1
; CHECK-NEXT: addl $16, %esp
; CHECK-NEXT: popl %edi
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ define void @passing2(i64 %str.0, i64 %str.1, i16 signext %s, i32 %j, i8 signex
; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movb %ah, {{[0-9]+}}(%rsp)
; CHECK-NEXT: shrq $16, %rax
; CHECK-NEXT: shrl $16, %esi
; CHECK-NEXT: movb %sil, {{[0-9]+}}(%rsp)
; CHECK-NEXT: shrl $24, %eax
; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movb %ah, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movw %dx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movb %r8b, {{[0-9]+}}(%rsp)
Expand Down
12 changes: 7 additions & 5 deletions llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ define i16 @f(i64 %x, double %y) {
; CHECK-LABEL: f:
; CHECK: # %bb.0:
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: movsd %xmm1, atomic
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: movsd %xmm1, atomic2
; CHECK-NEXT: movsd %xmm0, anything
; CHECK-NEXT: movsd %xmm0, atomic
; CHECK-NEXT: xorps %xmm0, %xmm0
; CHECK-NEXT: movsd %xmm0, atomic2
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl %ecx, anything+4
; CHECK-NEXT: movl %eax, anything
; CHECK-NEXT: movl ioport, %ecx
; CHECK-NEXT: movl ioport, %eax
; CHECK-NEXT: shrl $16, %eax
Expand Down
7 changes: 5 additions & 2 deletions llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ define ptr @test(ptr %a, ptr %L, ptr %P) nounwind {
; CHECK-LABEL: test:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: addl $-2, %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: addl %ecx, %edx
; CHECK-NEXT: subl %edx, %eax
; CHECK-NEXT: leal -2(%eax,%ecx), %eax
; CHECK-NEXT: retl
entry:
%0 = ptrtoint ptr %a to i32
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ define void @cpuid(ptr %data) nounwind {
; CHECK-NEXT: ## InlineAsm Start
; CHECK-NEXT: cpuid
; CHECK-NEXT: ## InlineAsm End
; CHECK-NEXT: movl %ebx, 8(%esi)
; CHECK-NEXT: movl %ecx, 12(%esi)
; CHECK-NEXT: movl %edx, 16(%esi)
; CHECK-NEXT: movl %ecx, 12(%esi)
; CHECK-NEXT: movl %ebx, 8(%esi)
; CHECK-NEXT: movl %eax, 4(%esi)
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %ebx
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@ define fastcc i32 @cli_magic_scandesc(ptr %in) nounwind ssp {
; CHECK-NEXT: movq %rdx, (%rsp)
; CHECK-NEXT: movq 24(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq 16(%rdi), %rdx
; CHECK-NEXT: movq 56(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq 32(%rdi), %rdx
; CHECK-NEXT: movq 48(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq 40(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq 48(%rdi), %rdx
; CHECK-NEXT: movq 32(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq 56(%rdi), %rdx
; CHECK-NEXT: movq 16(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movb %al, (%rsp)
; CHECK-NEXT: movb %cl, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq __stack_chk_guard(%rip), %rax
Expand Down
22 changes: 10 additions & 12 deletions llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,19 @@ target triple = "x86_64-unknown-linux-gnu"
define dso_local i32 @main() nounwind uwtable {
; CHECK-LABEL: main:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl i(%rip), %esi
; CHECK-NEXT: movl j(%rip), %eax
; CHECK-NEXT: movl %esi, %edx
; CHECK-NEXT: movq i(%rip), %rdx
; CHECK-NEXT: movq j(%rip), %rsi
; CHECK-NEXT: movsbl %sil, %eax
; CHECK-NEXT: idivb %dl
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: shrl $8, %edx
; CHECK-NEXT: movsbl %al, %ecx
; CHECK-NEXT: shrl $8, %eax
; CHECK-NEXT: cbtw
; CHECK-NEXT: shrl $8, %esi
; CHECK-NEXT: movsbl %sil, %eax
; CHECK-NEXT: idivb %dl
; CHECK-NEXT: movl %eax, %edx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: idivb %sil
; CHECK-NEXT: movzbl %dl, %ecx
; CHECK-NEXT: movzbl %cl, %ecx
; CHECK-NEXT: movd %ecx, %xmm0
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: movd %eax, %xmm0
; CHECK-NEXT: pinsrb $1, %ecx, %xmm0
; CHECK-NEXT: pinsrb $1, %eax, %xmm0
; CHECK-NEXT: pextrw $0, %xmm0, res(%rip)
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retq
Expand Down
64 changes: 44 additions & 20 deletions llvm/test/CodeGen/X86/abds.ll
Original file line number Diff line number Diff line change
Expand Up @@ -129,21 +129,27 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; X86-LABEL: abd_ext_i16_i32:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: subl %ecx, %edx
; X86-NEXT: negl %edx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: sarl $31, %esi
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovlel %edx, %eax
; X86-NEXT: sbbl %edx, %esi
; X86-NEXT: sarl $31, %esi
; X86-NEXT: xorl %esi, %eax
; X86-NEXT: subl %esi, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i16_i32:
; X64: # %bb.0:
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: movswq %di, %rcx
; X64-NEXT: movslq %esi, %rax
; X64-NEXT: movswl %di, %ecx
; X64-NEXT: movslq %ecx, %rcx
; X64-NEXT: subq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: negq %rax
Expand Down Expand Up @@ -191,13 +197,19 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_ext_i32:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: subl %ecx, %edx
; X86-NEXT: negl %edx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: sarl $31, %esi
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovlel %edx, %eax
; X86-NEXT: sbbl %edx, %esi
; X86-NEXT: sarl $31, %esi
; X86-NEXT: xorl %esi, %eax
; X86-NEXT: subl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i32:
Expand All @@ -221,20 +233,26 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; X86-LABEL: abd_ext_i32_i16:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: subl %ecx, %edx
; X86-NEXT: negl %edx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: sarl $31, %esi
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovlel %edx, %eax
; X86-NEXT: sbbl %edx, %esi
; X86-NEXT: sarl $31, %esi
; X86-NEXT: xorl %esi, %eax
; X86-NEXT: subl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i32_i16:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movslq %edi, %rcx
; X64-NEXT: movswl %si, %eax
; X64-NEXT: cltq
; X64-NEXT: movswq %si, %rax
; X64-NEXT: subq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: negq %rax
Expand All @@ -252,13 +270,19 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_ext_i32_undef:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %ecx, %edx
; X86-NEXT: sarl $31, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %eax, %edx
; X86-NEXT: subl %ecx, %edx
; X86-NEXT: negl %edx
; X86-NEXT: movl %eax, %esi
; X86-NEXT: sarl $31, %esi
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: cmovlel %edx, %eax
; X86-NEXT: sbbl %edx, %esi
; X86-NEXT: sarl $31, %esi
; X86-NEXT: xorl %esi, %eax
; X86-NEXT: subl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i32_undef:
Expand Down
Loading