Skip to content

Commit b25b9a7

Browse files
committed
[DAG] visitSELECT - add "select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)" fold (and neg equivalent)
Handle cases where CGP has merged the CMP+SUB into a USUBO node - improves a few outstanding niggles from #100810
1 parent 7afdc6b commit b25b9a7

File tree

3 files changed

+89
-75
lines changed

3 files changed

+89
-75
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11719,6 +11719,24 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
1171911719
N2_2, Flags);
1172011720
}
1172111721
}
11722+
11723+
// select usubo(x, y).overflow, (sub y, x), (usubo x, y) -> abdu(x, y)
11724+
if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
11725+
N2.getNode() == N0.getNode() && N2.getResNo() == 0 &&
11726+
N1.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
11727+
N2.getOperand(1) == N1.getOperand(0) &&
11728+
(!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
11729+
return DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1));
11730+
11731+
// select usubo(x, y).overflow, (usubo x, y), (sub y, x) -> neg (abdu x, y)
11732+
if (N0.getOpcode() == ISD::USUBO && N0.getResNo() == 1 &&
11733+
N1.getNode() == N0.getNode() && N1.getResNo() == 0 &&
11734+
N2.getOpcode() == ISD::SUB && N2.getOperand(0) == N1.getOperand(1) &&
11735+
N2.getOperand(1) == N1.getOperand(0) &&
11736+
(!LegalOperations || TLI.isOperationLegal(ISD::ABDU, VT)))
11737+
return DAG.getNegative(
11738+
DAG.getNode(ISD::ABDU, DL, VT, N0.getOperand(0), N0.getOperand(1)),
11739+
DL, VT);
1172211740
}
1172311741

1172411742
// Fold selects based on a setcc into other things, such as min/max/abs.
@@ -11776,6 +11794,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
1177611794
return SelectNode;
1177711795
}
1177811796

11797+
if (SDValue ABD = foldSelectToABD(Cond0, Cond1, N1, N2, CC, DL))
11798+
return ABD;
11799+
1177911800
if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
1178011801
return NewSel;
1178111802
}

llvm/test/CodeGen/X86/abdu-neg.ll

Lines changed: 55 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -751,27 +751,23 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
751751
define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
752752
; X86-LABEL: abd_cmp_i16:
753753
; X86: # %bb.0:
754-
; X86-NEXT: pushl %esi
754+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
755755
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
756-
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
757-
; X86-NEXT: movl %ecx, %esi
758-
; X86-NEXT: subw %dx, %si
759-
; X86-NEXT: movl %esi, %eax
756+
; X86-NEXT: subl %eax, %ecx
757+
; X86-NEXT: movl %ecx, %eax
760758
; X86-NEXT: negl %eax
761-
; X86-NEXT: cmpw %dx, %cx
762-
; X86-NEXT: cmovbl %esi, %eax
759+
; X86-NEXT: cmovnsl %ecx, %eax
763760
; X86-NEXT: # kill: def $ax killed $ax killed $eax
764-
; X86-NEXT: popl %esi
765761
; X86-NEXT: retl
766762
;
767763
; X64-LABEL: abd_cmp_i16:
768764
; X64: # %bb.0:
769-
; X64-NEXT: movl %edi, %ecx
770-
; X64-NEXT: subw %si, %cx
765+
; X64-NEXT: movzwl %si, %eax
766+
; X64-NEXT: movzwl %di, %ecx
767+
; X64-NEXT: subl %eax, %ecx
771768
; X64-NEXT: movl %ecx, %eax
772769
; X64-NEXT: negl %eax
773-
; X64-NEXT: cmpw %si, %di
774-
; X64-NEXT: cmovbl %ecx, %eax
770+
; X64-NEXT: cmovnsl %ecx, %eax
775771
; X64-NEXT: # kill: def $ax killed $ax killed $eax
776772
; X64-NEXT: retq
777773
%cmp = icmp ult i16 %a, %b
@@ -811,33 +807,30 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
811807
define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
812808
; X86-LABEL: abd_cmp_i64:
813809
; X86: # %bb.0:
814-
; X86-NEXT: pushl %ebx
815-
; X86-NEXT: pushl %edi
816810
; X86-NEXT: pushl %esi
817811
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
818-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
819812
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
820-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
821-
; X86-NEXT: movl %ecx, %edi
822-
; X86-NEXT: subl %eax, %edi
823-
; X86-NEXT: movl %esi, %ebx
824-
; X86-NEXT: sbbl %edx, %ebx
825-
; X86-NEXT: subl %ecx, %eax
826-
; X86-NEXT: sbbl %esi, %edx
827-
; X86-NEXT: cmovael %edi, %eax
828-
; X86-NEXT: cmovael %ebx, %edx
813+
; X86-NEXT: xorl %edx, %edx
814+
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
815+
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
816+
; X86-NEXT: movl $0, %esi
817+
; X86-NEXT: sbbl %esi, %esi
818+
; X86-NEXT: xorl %esi, %ecx
819+
; X86-NEXT: xorl %esi, %eax
820+
; X86-NEXT: subl %esi, %eax
821+
; X86-NEXT: sbbl %esi, %ecx
822+
; X86-NEXT: negl %eax
823+
; X86-NEXT: sbbl %ecx, %edx
829824
; X86-NEXT: popl %esi
830-
; X86-NEXT: popl %edi
831-
; X86-NEXT: popl %ebx
832825
; X86-NEXT: retl
833826
;
834827
; X64-LABEL: abd_cmp_i64:
835828
; X64: # %bb.0:
836829
; X64-NEXT: movq %rdi, %rax
837830
; X64-NEXT: subq %rsi, %rax
831+
; X64-NEXT: subq %rdi, %rsi
832+
; X64-NEXT: cmovaeq %rsi, %rax
838833
; X64-NEXT: negq %rax
839-
; X64-NEXT: subq %rsi, %rdi
840-
; X64-NEXT: cmovbq %rdi, %rax
841834
; X64-NEXT: retq
842835
%cmp = icmp ult i64 %a, %b
843836
%ab = sub i64 %a, %b
@@ -853,34 +846,36 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
853846
; X86-NEXT: pushl %ebx
854847
; X86-NEXT: pushl %edi
855848
; X86-NEXT: pushl %esi
856-
; X86-NEXT: pushl %eax
849+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
857850
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
858851
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
859-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
860-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
861852
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
862-
; X86-NEXT: subl %edx, %eax
863-
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
864-
; X86-NEXT: sbbl %esi, %ebx
865-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp
866-
; X86-NEXT: sbbl %ecx, %ebp
867-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
868853
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
869-
; X86-NEXT: sbbl %edi, %eax
854+
; X86-NEXT: xorl %edi, %edi
870855
; X86-NEXT: subl {{[0-9]+}}(%esp), %edx
856+
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx
871857
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi
872858
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
873-
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi
874-
; X86-NEXT: cmovael (%esp), %edx # 4-byte Folded Reload
875-
; X86-NEXT: cmovael %ebx, %esi
876-
; X86-NEXT: cmovael %ebp, %ecx
877-
; X86-NEXT: cmovael %eax, %edi
878-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
879-
; X86-NEXT: movl %edi, 12(%eax)
880-
; X86-NEXT: movl %ecx, 8(%eax)
881-
; X86-NEXT: movl %esi, 4(%eax)
859+
; X86-NEXT: movl $0, %ebp
860+
; X86-NEXT: sbbl %ebp, %ebp
861+
; X86-NEXT: xorl %ebp, %ecx
862+
; X86-NEXT: xorl %ebp, %esi
863+
; X86-NEXT: xorl %ebp, %ebx
864+
; X86-NEXT: xorl %ebp, %edx
865+
; X86-NEXT: subl %ebp, %edx
866+
; X86-NEXT: sbbl %ebp, %ebx
867+
; X86-NEXT: sbbl %ebp, %esi
868+
; X86-NEXT: sbbl %ebp, %ecx
869+
; X86-NEXT: negl %edx
870+
; X86-NEXT: movl $0, %ebp
871+
; X86-NEXT: sbbl %ebx, %ebp
872+
; X86-NEXT: movl $0, %ebx
873+
; X86-NEXT: sbbl %esi, %ebx
874+
; X86-NEXT: sbbl %ecx, %edi
882875
; X86-NEXT: movl %edx, (%eax)
883-
; X86-NEXT: addl $4, %esp
876+
; X86-NEXT: movl %ebp, 4(%eax)
877+
; X86-NEXT: movl %ebx, 8(%eax)
878+
; X86-NEXT: movl %edi, 12(%eax)
884879
; X86-NEXT: popl %esi
885880
; X86-NEXT: popl %edi
886881
; X86-NEXT: popl %ebx
@@ -889,15 +884,19 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
889884
;
890885
; X64-LABEL: abd_cmp_i128:
891886
; X64: # %bb.0:
892-
; X64-NEXT: movq %rdx, %rax
893-
; X64-NEXT: subq %rdi, %rax
894-
; X64-NEXT: movq %rcx, %r8
895-
; X64-NEXT: sbbq %rsi, %r8
896-
; X64-NEXT: subq %rdx, %rdi
887+
; X64-NEXT: movq %rdi, %rax
888+
; X64-NEXT: xorl %edi, %edi
889+
; X64-NEXT: subq %rdx, %rax
897890
; X64-NEXT: sbbq %rcx, %rsi
898-
; X64-NEXT: cmovbq %rdi, %rax
899-
; X64-NEXT: cmovbq %rsi, %r8
900-
; X64-NEXT: movq %r8, %rdx
891+
; X64-NEXT: movl $0, %ecx
892+
; X64-NEXT: sbbq %rcx, %rcx
893+
; X64-NEXT: xorq %rcx, %rsi
894+
; X64-NEXT: xorq %rcx, %rax
895+
; X64-NEXT: subq %rcx, %rax
896+
; X64-NEXT: sbbq %rcx, %rsi
897+
; X64-NEXT: negq %rax
898+
; X64-NEXT: sbbq %rsi, %rdi
899+
; X64-NEXT: movq %rdi, %rdx
901900
; X64-NEXT: retq
902901
%cmp = icmp ult i128 %a, %b
903902
%ab = sub i128 %a, %b

llvm/test/CodeGen/X86/abdu.ll

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -608,25 +608,21 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
608608
; X86: # %bb.0:
609609
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
610610
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
611-
; X86-NEXT: movl %eax, %edx
612-
; X86-NEXT: subb %cl, %dl
613-
; X86-NEXT: negb %dl
614-
; X86-NEXT: subb %cl, %al
615-
; X86-NEXT: movzbl %al, %ecx
616-
; X86-NEXT: movzbl %dl, %eax
617-
; X86-NEXT: cmovael %ecx, %eax
611+
; X86-NEXT: subl %eax, %ecx
612+
; X86-NEXT: movl %ecx, %eax
613+
; X86-NEXT: negl %eax
614+
; X86-NEXT: cmovsl %ecx, %eax
618615
; X86-NEXT: # kill: def $al killed $al killed $eax
619616
; X86-NEXT: retl
620617
;
621618
; X64-LABEL: abd_cmp_i8:
622619
; X64: # %bb.0:
623-
; X64-NEXT: movl %esi, %eax
624-
; X64-NEXT: subb %dil, %al
625-
; X64-NEXT: negb %al
626-
; X64-NEXT: subb %dil, %sil
620+
; X64-NEXT: movzbl %dil, %eax
627621
; X64-NEXT: movzbl %sil, %ecx
628-
; X64-NEXT: movzbl %al, %eax
629-
; X64-NEXT: cmovael %ecx, %eax
622+
; X64-NEXT: subl %eax, %ecx
623+
; X64-NEXT: movl %ecx, %eax
624+
; X64-NEXT: negl %eax
625+
; X64-NEXT: cmovsl %ecx, %eax
630626
; X64-NEXT: # kill: def $al killed $al killed $eax
631627
; X64-NEXT: retq
632628
%cmp = icmp ugt i8 %a, %b
@@ -670,9 +666,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
670666
; X86: # %bb.0:
671667
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
672668
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
673-
; X86-NEXT: movl %eax, %edx
674-
; X86-NEXT: subl %ecx, %edx
675-
; X86-NEXT: negl %edx
669+
; X86-NEXT: movl %ecx, %edx
670+
; X86-NEXT: subl %eax, %edx
676671
; X86-NEXT: subl %ecx, %eax
677672
; X86-NEXT: cmovbl %edx, %eax
678673
; X86-NEXT: retl
@@ -681,9 +676,8 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
681676
; X64: # %bb.0:
682677
; X64-NEXT: movl %edi, %eax
683678
; X64-NEXT: subl %esi, %eax
684-
; X64-NEXT: negl %eax
685-
; X64-NEXT: subl %esi, %edi
686-
; X64-NEXT: cmovael %edi, %eax
679+
; X64-NEXT: subl %edi, %esi
680+
; X64-NEXT: cmovael %esi, %eax
687681
; X64-NEXT: retq
688682
%cmp = icmp ult i32 %a, %b
689683
%ab = sub i32 %a, %b

0 commit comments

Comments
 (0)