Skip to content

Commit b98aa6f

Browse files
committed
[X86] LowerABD - lower i8/i16 cases directly to CMOV(SUB(X,Y),SUB(Y,X)) pattern
Better codegen (shorter dependency chain for better ILP) than via the TRUNC(ABS(SUB(EXT(LHS),EXT(RHS)))) expansion
1 parent d57be19 commit b98aa6f

File tree

5 files changed

+305
-292
lines changed

5 files changed

+305
-292
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28541,12 +28541,14 @@ static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget,
2854128541
bool IsSigned = Op.getOpcode() == ISD::ABDS;
2854228542
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2854328543

28544-
if (VT.isScalarInteger()) {
28544+
if (Subtarget.canUseCMOV() && VT.isScalarInteger()) {
28545+
X86::CondCode CC = IsSigned ? X86::COND_L : X86::COND_B;
28546+
unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
28547+
2854528548
// abds(lhs, rhs) -> select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs))
2854628549
// abdu(lhs, rhs) -> select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs))
28547-
if (Subtarget.canUseCMOV() && VT.bitsGE(MVT::i32)) {
28550+
if (VT.bitsGE(MVT::i32)) {
2854828551
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
28549-
X86::CondCode CC = IsSigned ? X86::COND_L : X86::COND_B;
2855028552
SDValue LHS = DAG.getFreeze(Op.getOperand(0));
2855128553
SDValue RHS = DAG.getFreeze(Op.getOperand(1));
2855228554
SDValue Diff0 = DAG.getNode(X86ISD::SUB, dl, VTs, LHS, RHS);
@@ -28556,17 +28558,19 @@ static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget,
2855628558
Diff1.getValue(1));
2855728559
}
2855828560

28559-
// TODO: Move to TargetLowering expandABD() once we have ABD promotion.
2856028561
// abds(lhs, rhs) -> trunc(abs(sub(sext(lhs), sext(rhs))))
2856128562
// abdu(lhs, rhs) -> trunc(abs(sub(zext(lhs), zext(rhs))))
2856228563
unsigned WideBits = std::max<unsigned>(2 * VT.getScalarSizeInBits(), 32u);
2856328564
MVT WideVT = MVT::getIntegerVT(WideBits);
2856428565
if (TLI.isTypeLegal(WideVT)) {
28565-
unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
28566+
SDVTList WideVTs = DAG.getVTList(WideVT, MVT::i32);
2856628567
SDValue LHS = DAG.getNode(ExtOpc, dl, WideVT, Op.getOperand(0));
2856728568
SDValue RHS = DAG.getNode(ExtOpc, dl, WideVT, Op.getOperand(1));
28568-
SDValue Diff = DAG.getNode(ISD::SUB, dl, WideVT, LHS, RHS);
28569-
SDValue AbsDiff = DAG.getNode(ISD::ABS, dl, WideVT, Diff);
28569+
SDValue Diff0 = DAG.getNode(X86ISD::SUB, dl, WideVTs, LHS, RHS);
28570+
SDValue Diff1 = DAG.getNode(X86ISD::SUB, dl, WideVTs, RHS, LHS);
28571+
SDValue AbsDiff = DAG.getNode(X86ISD::CMOV, dl, WideVT, Diff1, Diff0,
28572+
DAG.getTargetConstant(CC, dl, MVT::i8),
28573+
Diff1.getValue(1));
2857028574
return DAG.getNode(ISD::TRUNCATE, dl, VT, AbsDiff);
2857128575
}
2857228576
}
@@ -55526,8 +55530,13 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
5552655530
SDVTList VTs = DAG.getVTList(N->getValueType(0));
5552755531
if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) {
5552855532
SDValue Op(N, 0);
55529-
if (Negate)
55533+
if (Negate) {
55534+
// Bail if this is only used by a user of the x86 add/sub.
55535+
if (GenericAddSub->hasOneUse() &&
55536+
GenericAddSub->use_begin()->isOnlyUserOf(N))
55537+
return;
5553055538
Op = DAG.getNegative(Op, DL, VT);
55539+
}
5553155540
DCI.CombineTo(GenericAddSub, Op);
5553255541
}
5553355542
};

llvm/test/CodeGen/X86/abds-neg.ll

Lines changed: 72 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
1111
; X86: # %bb.0:
1212
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
1313
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
14-
; X86-NEXT: subl %eax, %ecx
15-
; X86-NEXT: movl %ecx, %eax
16-
; X86-NEXT: negl %eax
17-
; X86-NEXT: cmovsl %ecx, %eax
14+
; X86-NEXT: movl %ecx, %edx
15+
; X86-NEXT: subl %eax, %edx
16+
; X86-NEXT: subl %ecx, %eax
17+
; X86-NEXT: cmovll %edx, %eax
1818
; X86-NEXT: negb %al
1919
; X86-NEXT: # kill: def $al killed $al killed $eax
2020
; X86-NEXT: retl
@@ -23,10 +23,10 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
2323
; X64: # %bb.0:
2424
; X64-NEXT: movsbl %sil, %eax
2525
; X64-NEXT: movsbl %dil, %ecx
26-
; X64-NEXT: subl %eax, %ecx
27-
; X64-NEXT: movl %ecx, %eax
28-
; X64-NEXT: negl %eax
29-
; X64-NEXT: cmovsl %ecx, %eax
26+
; X64-NEXT: movl %ecx, %edx
27+
; X64-NEXT: subl %eax, %edx
28+
; X64-NEXT: subl %ecx, %eax
29+
; X64-NEXT: cmovll %edx, %eax
3030
; X64-NEXT: negb %al
3131
; X64-NEXT: # kill: def $al killed $al killed $eax
3232
; X64-NEXT: retq
@@ -44,22 +44,21 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
4444
; X86: # %bb.0:
4545
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
4646
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
47-
; X86-NEXT: subl %eax, %ecx
48-
; X86-NEXT: movl %ecx, %eax
49-
; X86-NEXT: negl %eax
50-
; X86-NEXT: cmovsl %ecx, %eax
47+
; X86-NEXT: movl %ecx, %edx
48+
; X86-NEXT: subl %eax, %edx
49+
; X86-NEXT: subl %ecx, %eax
50+
; X86-NEXT: cmovll %edx, %eax
5151
; X86-NEXT: negb %al
5252
; X86-NEXT: # kill: def $al killed $al killed $eax
5353
; X86-NEXT: retl
5454
;
5555
; X64-LABEL: abd_ext_i8_i16:
5656
; X64: # %bb.0:
57-
; X64-NEXT: movswl %si, %eax
5857
; X64-NEXT: movsbl %dil, %ecx
59-
; X64-NEXT: subl %eax, %ecx
60-
; X64-NEXT: movl %ecx, %eax
61-
; X64-NEXT: negl %eax
62-
; X64-NEXT: cmovsl %ecx, %eax
58+
; X64-NEXT: subl %esi, %edi
59+
; X64-NEXT: movswl %si, %eax
60+
; X64-NEXT: subl %ecx, %eax
61+
; X64-NEXT: cmovll %edi, %eax
6362
; X64-NEXT: negb %al
6463
; X64-NEXT: # kill: def $al killed $al killed $eax
6564
; X64-NEXT: retq
@@ -77,10 +76,10 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
7776
; X86: # %bb.0:
7877
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
7978
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
80-
; X86-NEXT: subl %eax, %ecx
81-
; X86-NEXT: movl %ecx, %eax
82-
; X86-NEXT: negl %eax
83-
; X86-NEXT: cmovsl %ecx, %eax
79+
; X86-NEXT: movl %ecx, %edx
80+
; X86-NEXT: subl %eax, %edx
81+
; X86-NEXT: subl %ecx, %eax
82+
; X86-NEXT: cmovll %edx, %eax
8483
; X86-NEXT: negb %al
8584
; X86-NEXT: # kill: def $al killed $al killed $eax
8685
; X86-NEXT: retl
@@ -89,10 +88,10 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
8988
; X64: # %bb.0:
9089
; X64-NEXT: movsbl %sil, %eax
9190
; X64-NEXT: movsbl %dil, %ecx
92-
; X64-NEXT: subl %eax, %ecx
93-
; X64-NEXT: movl %ecx, %eax
94-
; X64-NEXT: negl %eax
95-
; X64-NEXT: cmovsl %ecx, %eax
91+
; X64-NEXT: movl %ecx, %edx
92+
; X64-NEXT: subl %eax, %edx
93+
; X64-NEXT: subl %ecx, %eax
94+
; X64-NEXT: cmovll %edx, %eax
9695
; X64-NEXT: negb %al
9796
; X64-NEXT: # kill: def $al killed $al killed $eax
9897
; X64-NEXT: retq
@@ -110,21 +109,22 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
110109
; X86: # %bb.0:
111110
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
112111
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
113-
; X86-NEXT: subl %eax, %ecx
114-
; X86-NEXT: movl %ecx, %eax
112+
; X86-NEXT: movl %ecx, %edx
113+
; X86-NEXT: subl %eax, %edx
114+
; X86-NEXT: subl %ecx, %eax
115+
; X86-NEXT: cmovll %edx, %eax
115116
; X86-NEXT: negl %eax
116-
; X86-NEXT: cmovnsl %ecx, %eax
117117
; X86-NEXT: # kill: def $ax killed $ax killed $eax
118118
; X86-NEXT: retl
119119
;
120120
; X64-LABEL: abd_ext_i16:
121121
; X64: # %bb.0:
122-
; X64-NEXT: movswl %si, %eax
123122
; X64-NEXT: movswl %di, %ecx
124-
; X64-NEXT: subl %eax, %ecx
125-
; X64-NEXT: movl %ecx, %eax
123+
; X64-NEXT: subl %esi, %edi
124+
; X64-NEXT: movswl %si, %eax
125+
; X64-NEXT: subl %ecx, %eax
126+
; X64-NEXT: cmovll %edi, %eax
126127
; X64-NEXT: negl %eax
127-
; X64-NEXT: cmovnsl %ecx, %eax
128128
; X64-NEXT: # kill: def $ax killed $ax killed $eax
129129
; X64-NEXT: retq
130130
%aext = sext i16 %a to i64
@@ -173,21 +173,22 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
173173
; X86: # %bb.0:
174174
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
175175
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
176-
; X86-NEXT: subl %eax, %ecx
177-
; X86-NEXT: movl %ecx, %eax
176+
; X86-NEXT: movl %ecx, %edx
177+
; X86-NEXT: subl %eax, %edx
178+
; X86-NEXT: subl %ecx, %eax
179+
; X86-NEXT: cmovll %edx, %eax
178180
; X86-NEXT: negl %eax
179-
; X86-NEXT: cmovnsl %ecx, %eax
180181
; X86-NEXT: # kill: def $ax killed $ax killed $eax
181182
; X86-NEXT: retl
182183
;
183184
; X64-LABEL: abd_ext_i16_undef:
184185
; X64: # %bb.0:
185-
; X64-NEXT: movswl %si, %eax
186186
; X64-NEXT: movswl %di, %ecx
187-
; X64-NEXT: subl %eax, %ecx
188-
; X64-NEXT: movl %ecx, %eax
187+
; X64-NEXT: subl %esi, %edi
188+
; X64-NEXT: movswl %si, %eax
189+
; X64-NEXT: subl %ecx, %eax
190+
; X64-NEXT: cmovll %edi, %eax
189191
; X64-NEXT: negl %eax
190-
; X64-NEXT: cmovnsl %ecx, %eax
191192
; X64-NEXT: # kill: def $ax killed $ax killed $eax
192193
; X64-NEXT: retq
193194
%aext = sext i16 %a to i64
@@ -524,10 +525,10 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
524525
; X86: # %bb.0:
525526
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
526527
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
527-
; X86-NEXT: subl %eax, %ecx
528-
; X86-NEXT: movl %ecx, %eax
529-
; X86-NEXT: negl %eax
530-
; X86-NEXT: cmovsl %ecx, %eax
528+
; X86-NEXT: movl %ecx, %edx
529+
; X86-NEXT: subl %eax, %edx
530+
; X86-NEXT: subl %ecx, %eax
531+
; X86-NEXT: cmovll %edx, %eax
531532
; X86-NEXT: negb %al
532533
; X86-NEXT: # kill: def $al killed $al killed $eax
533534
; X86-NEXT: retl
@@ -536,10 +537,10 @@ define i8 @abd_minmax_i8(i8 %a, i8 %b) nounwind {
536537
; X64: # %bb.0:
537538
; X64-NEXT: movsbl %sil, %eax
538539
; X64-NEXT: movsbl %dil, %ecx
539-
; X64-NEXT: subl %eax, %ecx
540-
; X64-NEXT: movl %ecx, %eax
541-
; X64-NEXT: negl %eax
542-
; X64-NEXT: cmovsl %ecx, %eax
540+
; X64-NEXT: movl %ecx, %edx
541+
; X64-NEXT: subl %eax, %edx
542+
; X64-NEXT: subl %ecx, %eax
543+
; X64-NEXT: cmovll %edx, %eax
543544
; X64-NEXT: negb %al
544545
; X64-NEXT: # kill: def $al killed $al killed $eax
545546
; X64-NEXT: retq
@@ -554,21 +555,22 @@ define i16 @abd_minmax_i16(i16 %a, i16 %b) nounwind {
554555
; X86: # %bb.0:
555556
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
556557
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
557-
; X86-NEXT: subl %eax, %ecx
558-
; X86-NEXT: movl %ecx, %eax
558+
; X86-NEXT: movl %ecx, %edx
559+
; X86-NEXT: subl %eax, %edx
560+
; X86-NEXT: subl %ecx, %eax
561+
; X86-NEXT: cmovll %edx, %eax
559562
; X86-NEXT: negl %eax
560-
; X86-NEXT: cmovnsl %ecx, %eax
561563
; X86-NEXT: # kill: def $ax killed $ax killed $eax
562564
; X86-NEXT: retl
563565
;
564566
; X64-LABEL: abd_minmax_i16:
565567
; X64: # %bb.0:
566-
; X64-NEXT: movswl %si, %eax
567568
; X64-NEXT: movswl %di, %ecx
568-
; X64-NEXT: subl %eax, %ecx
569-
; X64-NEXT: movl %ecx, %eax
569+
; X64-NEXT: subl %esi, %edi
570+
; X64-NEXT: movswl %si, %eax
571+
; X64-NEXT: subl %ecx, %eax
572+
; X64-NEXT: cmovll %edi, %eax
570573
; X64-NEXT: negl %eax
571-
; X64-NEXT: cmovnsl %ecx, %eax
572574
; X64-NEXT: # kill: def $ax killed $ax killed $eax
573575
; X64-NEXT: retq
574576
%min = call i16 @llvm.smin.i16(i16 %a, i16 %b)
@@ -740,10 +742,10 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
740742
; X86: # %bb.0:
741743
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
742744
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
743-
; X86-NEXT: subl %eax, %ecx
744-
; X86-NEXT: movl %ecx, %eax
745-
; X86-NEXT: negl %eax
746-
; X86-NEXT: cmovsl %ecx, %eax
745+
; X86-NEXT: movl %ecx, %edx
746+
; X86-NEXT: subl %eax, %edx
747+
; X86-NEXT: subl %ecx, %eax
748+
; X86-NEXT: cmovll %edx, %eax
747749
; X86-NEXT: negb %al
748750
; X86-NEXT: # kill: def $al killed $al killed $eax
749751
; X86-NEXT: retl
@@ -752,10 +754,10 @@ define i8 @abd_cmp_i8(i8 %a, i8 %b) nounwind {
752754
; X64: # %bb.0:
753755
; X64-NEXT: movsbl %sil, %eax
754756
; X64-NEXT: movsbl %dil, %ecx
755-
; X64-NEXT: subl %eax, %ecx
756-
; X64-NEXT: movl %ecx, %eax
757-
; X64-NEXT: negl %eax
758-
; X64-NEXT: cmovsl %ecx, %eax
757+
; X64-NEXT: movl %ecx, %edx
758+
; X64-NEXT: subl %eax, %edx
759+
; X64-NEXT: subl %ecx, %eax
760+
; X64-NEXT: cmovll %edx, %eax
759761
; X64-NEXT: negb %al
760762
; X64-NEXT: # kill: def $al killed $al killed $eax
761763
; X64-NEXT: retq
@@ -771,21 +773,22 @@ define i16 @abd_cmp_i16(i16 %a, i16 %b) nounwind {
771773
; X86: # %bb.0:
772774
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
773775
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
774-
; X86-NEXT: subl %eax, %ecx
775-
; X86-NEXT: movl %ecx, %eax
776+
; X86-NEXT: movl %ecx, %edx
777+
; X86-NEXT: subl %eax, %edx
778+
; X86-NEXT: subl %ecx, %eax
779+
; X86-NEXT: cmovll %edx, %eax
776780
; X86-NEXT: negl %eax
777-
; X86-NEXT: cmovnsl %ecx, %eax
778781
; X86-NEXT: # kill: def $ax killed $ax killed $eax
779782
; X86-NEXT: retl
780783
;
781784
; X64-LABEL: abd_cmp_i16:
782785
; X64: # %bb.0:
783-
; X64-NEXT: movswl %si, %eax
784786
; X64-NEXT: movswl %di, %ecx
785-
; X64-NEXT: subl %eax, %ecx
786-
; X64-NEXT: movl %ecx, %eax
787+
; X64-NEXT: subl %esi, %edi
788+
; X64-NEXT: movswl %si, %eax
789+
; X64-NEXT: subl %ecx, %eax
790+
; X64-NEXT: cmovll %edi, %eax
787791
; X64-NEXT: negl %eax
788-
; X64-NEXT: cmovnsl %ecx, %eax
789792
; X64-NEXT: # kill: def $ax killed $ax killed $eax
790793
; X64-NEXT: retq
791794
%cmp = icmp slt i16 %a, %b

0 commit comments

Comments
 (0)