Skip to content

Commit 6ec889e

Browse files
committed
[DAG] Add support for neg(abd(x,y)) patterns.
Currently limited to cases which have legal/custom ABDS/ABDU handling - I'll extend this for all targets in future (similar to how we support neg(abs(x))) once I've addressed some outstanding regressions on aarch64/riscv. Helps avoid a lot of extra cmov instructions on x86 in particular, and allows us to more easily improve the codegen in future commits.
1 parent 1f70fce commit 6ec889e

File tree

4 files changed

+154
-132
lines changed

4 files changed

+154
-132
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4103,12 +4103,24 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
41034103
sd_match(N1, m_SMin(m_Specific(A), m_Specific(B))))
41044104
return DAG.getNode(ISD::ABDS, DL, VT, A, B);
41054105

4106+
// smin(a,b) - smax(a,b) --> neg(abds(a,b))
4107+
if (hasOperation(ISD::ABDS, VT) &&
4108+
sd_match(N0, m_SMin(m_Value(A), m_Value(B))) &&
4109+
sd_match(N1, m_SMax(m_Specific(A), m_Specific(B))))
4110+
return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT);
4111+
41064112
// umax(a,b) - umin(a,b) --> abdu(a,b)
41074113
if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
41084114
sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
41094115
sd_match(N1, m_UMin(m_Specific(A), m_Specific(B))))
41104116
return DAG.getNode(ISD::ABDU, DL, VT, A, B);
41114117

4118+
// umin(a,b) - umax(a,b) --> neg(abdu(a,b))
4119+
if (hasOperation(ISD::ABDU, VT) &&
4120+
sd_match(N0, m_UMin(m_Value(A), m_Value(B))) &&
4121+
sd_match(N1, m_UMax(m_Specific(A), m_Specific(B))))
4122+
return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT);
4123+
41124124
return SDValue();
41134125
}
41144126

@@ -11605,6 +11617,10 @@ SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
1160511617
if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
1160611618
sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))))
1160711619
return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
11620+
if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11621+
sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11622+
hasOperation(ABDOpc, VT))
11623+
return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
1160811624
break;
1160911625
case ISD::SETLT:
1161011626
case ISD::SETLE:
@@ -11613,6 +11629,10 @@ SDValue DAGCombiner::foldSelectToABD(SDValue LHS, SDValue RHS, SDValue True,
1161311629
if (sd_match(True, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
1161411630
sd_match(False, m_Sub(m_Specific(LHS), m_Specific(RHS))))
1161511631
return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
11632+
if (sd_match(True, m_Sub(m_Specific(LHS), m_Specific(RHS))) &&
11633+
sd_match(False, m_Sub(m_Specific(RHS), m_Specific(LHS))) &&
11634+
hasOperation(ABDOpc, VT))
11635+
return DAG.getNegative(DAG.getNode(ABDOpc, DL, VT, LHS, RHS), DL, VT);
1161611636
break;
1161711637
default:
1161811638
break;

llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1933,13 +1933,19 @@ define <16 x i8> @absd_int8_sle(<16 x i8>, <16 x i8>) {
19331933
; some cases we are unable to optimize
19341934
; check whether goes beyond the scope
19351935
define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) {
1936-
; CHECK-LABEL: absd_int32_ugt_opp:
1937-
; CHECK: # %bb.0:
1938-
; CHECK-NEXT: vcmpgtuw v4, v2, v3
1939-
; CHECK-NEXT: vsubuwm v5, v2, v3
1940-
; CHECK-NEXT: vsubuwm v2, v3, v2
1941-
; CHECK-NEXT: xxsel v2, v5, v2, v4
1942-
; CHECK-NEXT: blr
1936+
; CHECK-PWR9-LABEL: absd_int32_ugt_opp:
1937+
; CHECK-PWR9: # %bb.0:
1938+
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
1939+
; CHECK-PWR9-NEXT: vnegw v2, v2
1940+
; CHECK-PWR9-NEXT: blr
1941+
;
1942+
; CHECK-PWR78-LABEL: absd_int32_ugt_opp:
1943+
; CHECK-PWR78: # %bb.0:
1944+
; CHECK-PWR78-NEXT: vcmpgtuw v4, v2, v3
1945+
; CHECK-PWR78-NEXT: vsubuwm v5, v2, v3
1946+
; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
1947+
; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
1948+
; CHECK-PWR78-NEXT: blr
19431949
%3 = icmp ugt <4 x i32> %0, %1
19441950
%4 = sub <4 x i32> %0, %1
19451951
%5 = sub <4 x i32> %1, %0

llvm/test/CodeGen/X86/abds-neg.ll

Lines changed: 67 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -522,23 +522,25 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind {
522522
define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
523523
; X86-LABEL: abd_minmax_i8:
524524
; X86: # %bb.0:
525-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
526-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
527-
; X86-NEXT: cmpb %cl, %dl
525+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
526+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
527+
; X86-NEXT: subl %eax, %ecx
528528
; X86-NEXT: movl %ecx, %eax
529-
; X86-NEXT: cmovll %edx, %eax
530-
; X86-NEXT: cmovgl %edx, %ecx
531-
; X86-NEXT: subb %cl, %al
529+
; X86-NEXT: negl %eax
530+
; X86-NEXT: cmovsl %ecx, %eax
531+
; X86-NEXT: negb %al
532532
; X86-NEXT: # kill: def $al killed $al killed $eax
533533
; X86-NEXT: retl
534534
;
535535
; X64-LABEL: abd_minmax_i8:
536536
; X64: # %bb.0:
537-
; X64-NEXT: cmpb %sil, %dil
538-
; X64-NEXT: movl %esi, %eax
539-
; X64-NEXT: cmovll %edi, %eax
540-
; X64-NEXT: cmovgl %edi, %esi
541-
; X64-NEXT: subb %sil, %al
537+
; X64-NEXT: movsbl %sil, %eax
538+
; X64-NEXT: movsbl %dil, %ecx
539+
; X64-NEXT: subl %eax, %ecx
540+
; X64-NEXT: movl %ecx, %eax
541+
; X64-NEXT: negl %eax
542+
; X64-NEXT: cmovsl %ecx, %eax
543+
; X64-NEXT: negb %al
542544
; X64-NEXT: # kill: def $al killed $al killed $eax
543545
; X64-NEXT: retq
544546
%min = call i8 @llvm.smin.i8(i8 %a, i8 %b)
@@ -550,23 +552,23 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
550552
define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
551553
; X86-LABEL: abd_minmax_i16:
552554
; X86: # %bb.0:
553-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
554-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
555-
; X86-NEXT: cmpw %cx, %dx
555+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
556+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
557+
; X86-NEXT: subl %eax, %ecx
556558
; X86-NEXT: movl %ecx, %eax
557-
; X86-NEXT: cmovll %edx, %eax
558-
; X86-NEXT: cmovgl %edx, %ecx
559-
; X86-NEXT: subl %ecx, %eax
559+
; X86-NEXT: negl %eax
560+
; X86-NEXT: cmovnsl %ecx, %eax
560561
; X86-NEXT: # kill: def $ax killed $ax killed $eax
561562
; X86-NEXT: retl
562563
;
563564
; X64-LABEL: abd_minmax_i16:
564565
; X64: # %bb.0:
565-
; X64-NEXT: cmpw %si, %di
566-
; X64-NEXT: movl %esi, %eax
567-
; X64-NEXT: cmovll %edi, %eax
568-
; X64-NEXT: cmovgl %edi, %esi
569-
; X64-NEXT: subl %esi, %eax
566+
; X64-NEXT: movswl %si, %eax
567+
; X64-NEXT: movswl %di, %ecx
568+
; X64-NEXT: subl %eax, %ecx
569+
; X64-NEXT: movl %ecx, %eax
570+
; X64-NEXT: negl %eax
571+
; X64-NEXT: cmovnsl %ecx, %eax
570572
; X64-NEXT: # kill: def $ax killed $ax killed $eax
571573
; X64-NEXT: retq
572574
%min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
@@ -578,22 +580,22 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
578580
define i32 @abd_minmax_i32(i32 %a, i32 %b) nounwind {
579581
; X86-LABEL: abd_minmax_i32:
580582
; X86: # %bb.0:
583+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
581584
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
582-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
583-
; X86-NEXT: cmpl %ecx, %edx
584-
; X86-NEXT: movl %ecx, %eax
585-
; X86-NEXT: cmovll %edx, %eax
586-
; X86-NEXT: cmovgl %edx, %ecx
585+
; X86-NEXT: movl %ecx, %edx
586+
; X86-NEXT: subl %eax, %edx
587587
; X86-NEXT: subl %ecx, %eax
588+
; X86-NEXT: cmovll %edx, %eax
589+
; X86-NEXT: negl %eax
588590
; X86-NEXT: retl
589591
;
590592
; X64-LABEL: abd_minmax_i32:
591593
; X64: # %bb.0:
592-
; X64-NEXT: cmpl %esi, %edi
593-
; X64-NEXT: movl %esi, %eax
594-
; X64-NEXT: cmovll %edi, %eax
595-
; X64-NEXT: cmovgl %edi, %esi
594+
; X64-NEXT: movl %edi, %eax
596595
; X64-NEXT: subl %esi, %eax
596+
; X64-NEXT: subl %edi, %esi
597+
; X64-NEXT: cmovgel %esi, %eax
598+
; X64-NEXT: negl %eax
597599
; X64-NEXT: retq
598600
%min = call i32 @llvm.smin.i32(i32 %a, i32 %b)
599601
%max = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -634,11 +636,11 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind {
634636
;
635637
; X64-LABEL: abd_minmax_i64:
636638
; X64: # %bb.0:
637-
; X64-NEXT: cmpq %rsi, %rdi
638-
; X64-NEXT: movq %rsi, %rax
639-
; X64-NEXT: cmovlq %rdi, %rax
640-
; X64-NEXT: cmovgq %rdi, %rsi
639+
; X64-NEXT: movq %rdi, %rax
641640
; X64-NEXT: subq %rsi, %rax
641+
; X64-NEXT: subq %rdi, %rsi
642+
; X64-NEXT: cmovgeq %rsi, %rax
643+
; X64-NEXT: negq %rax
642644
; X64-NEXT: retq
643645
%min = call i64 @llvm.smin.i64(i64 %a, i64 %b)
644646
%max = call i64 @llvm.smax.i64(i64 %a, i64 %b)
@@ -736,27 +738,25 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind {
736738
define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
737739
; X86-LABEL: abd_cmp_i8:
738740
; X86: # %bb.0:
739-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
740-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
741-
; X86-NEXT: movl %eax, %edx
742-
; X86-NEXT: subb %cl, %dl
743-
; X86-NEXT: negb %dl
744-
; X86-NEXT: subb %cl, %al
745-
; X86-NEXT: movzbl %al, %ecx
746-
; X86-NEXT: movzbl %dl, %eax
747-
; X86-NEXT: cmovlel %ecx, %eax
741+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
742+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
743+
; X86-NEXT: subl %eax, %ecx
744+
; X86-NEXT: movl %ecx, %eax
745+
; X86-NEXT: negl %eax
746+
; X86-NEXT: cmovsl %ecx, %eax
747+
; X86-NEXT: negb %al
748748
; X86-NEXT: # kill: def $al killed $al killed $eax
749749
; X86-NEXT: retl
750750
;
751751
; X64-LABEL: abd_cmp_i8:
752752
; X64: # %bb.0:
753-
; X64-NEXT: movl %edi, %eax
754-
; X64-NEXT: subb %sil, %al
753+
; X64-NEXT: movsbl %sil, %eax
754+
; X64-NEXT: movsbl %dil, %ecx
755+
; X64-NEXT: subl %eax, %ecx
756+
; X64-NEXT: movl %ecx, %eax
757+
; X64-NEXT: negl %eax
758+
; X64-NEXT: cmovsl %ecx, %eax
755759
; X64-NEXT: negb %al
756-
; X64-NEXT: subb %sil, %dil
757-
; X64-NEXT: movzbl %dil, %ecx
758-
; X64-NEXT: movzbl %al, %eax
759-
; X64-NEXT: cmovlel %ecx, %eax
760760
; X64-NEXT: # kill: def $al killed $al killed $eax
761761
; X64-NEXT: retq
762762
%cmp = icmp sle i8 %a, %b
@@ -769,27 +769,23 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
769769
define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
770770
; X86-LABEL: abd_cmp_i16:
771771
; X86: # %bb.0:
772-
; X86-NEXT: pushl %esi
773-
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
774-
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx
775-
; X86-NEXT: movl %ecx, %esi
776-
; X86-NEXT: subw %dx, %si
777-
; X86-NEXT: movl %esi, %eax
772+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
773+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
774+
; X86-NEXT: subl %eax, %ecx
775+
; X86-NEXT: movl %ecx, %eax
778776
; X86-NEXT: negl %eax
779-
; X86-NEXT: cmpw %dx, %cx
780-
; X86-NEXT: cmovll %esi, %eax
777+
; X86-NEXT: cmovnsl %ecx, %eax
781778
; X86-NEXT: # kill: def $ax killed $ax killed $eax
782-
; X86-NEXT: popl %esi
783779
; X86-NEXT: retl
784780
;
785781
; X64-LABEL: abd_cmp_i16:
786782
; X64: # %bb.0:
787-
; X64-NEXT: movl %edi, %ecx
788-
; X64-NEXT: subw %si, %cx
783+
; X64-NEXT: movswl %si, %eax
784+
; X64-NEXT: movswl %di, %ecx
785+
; X64-NEXT: subl %eax, %ecx
789786
; X64-NEXT: movl %ecx, %eax
790787
; X64-NEXT: negl %eax
791-
; X64-NEXT: cmpw %si, %di
792-
; X64-NEXT: cmovll %ecx, %eax
788+
; X64-NEXT: cmovnsl %ecx, %eax
793789
; X64-NEXT: # kill: def $ax killed $ax killed $eax
794790
; X64-NEXT: retq
795791
%cmp = icmp slt i16 %a, %b
@@ -804,20 +800,20 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
804800
; X86: # %bb.0:
805801
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
806802
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
807-
; X86-NEXT: movl %eax, %edx
808-
; X86-NEXT: subl %ecx, %edx
809-
; X86-NEXT: negl %edx
803+
; X86-NEXT: movl %ecx, %edx
804+
; X86-NEXT: subl %eax, %edx
810805
; X86-NEXT: subl %ecx, %eax
811-
; X86-NEXT: cmovgel %edx, %eax
806+
; X86-NEXT: cmovll %edx, %eax
807+
; X86-NEXT: negl %eax
812808
; X86-NEXT: retl
813809
;
814810
; X64-LABEL: abd_cmp_i32:
815811
; X64: # %bb.0:
816812
; X64-NEXT: movl %edi, %eax
817813
; X64-NEXT: subl %esi, %eax
814+
; X64-NEXT: subl %edi, %esi
815+
; X64-NEXT: cmovgel %esi, %eax
818816
; X64-NEXT: negl %eax
819-
; X64-NEXT: subl %esi, %edi
820-
; X64-NEXT: cmovll %edi, %eax
821817
; X64-NEXT: retq
822818
%cmp = icmp sge i32 %a, %b
823819
%ab = sub i32 %a, %b
@@ -853,9 +849,9 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
853849
; X64: # %bb.0:
854850
; X64-NEXT: movq %rdi, %rax
855851
; X64-NEXT: subq %rsi, %rax
852+
; X64-NEXT: subq %rdi, %rsi
853+
; X64-NEXT: cmovgeq %rsi, %rax
856854
; X64-NEXT: negq %rax
857-
; X64-NEXT: subq %rsi, %rdi
858-
; X64-NEXT: cmovlq %rdi, %rax
859855
; X64-NEXT: retq
860856
%cmp = icmp slt i64 %a, %b
861857
%ab = sub i64 %a, %b

0 commit comments

Comments
 (0)