Skip to content

Commit b05c554

Browse files
authored
[X86] LowerABD - simplify i32/i64 to use sub+sub+cmov instead of repeating nodes via abs (#102174)
Using X86ISD::SUB nodes directly allows us to drive the X86ISD::CMOV node with exact flags instead of trying to cleanup the generic codegen via ICMP/SUBO nodes.
1 parent 0cc6b46 commit b05c554

File tree

5 files changed

+197
-257
lines changed

5 files changed

+197
-257
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28443,13 +28443,27 @@ static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget,
2844328443
bool IsSigned = Op.getOpcode() == ISD::ABDS;
2844428444
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2844528445

28446-
// TODO: Move to TargetLowering expandABD() once we have ABD promotion.
2844728446
if (VT.isScalarInteger()) {
28447+
// abds(lhs, rhs) -> select(slt(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs))
28448+
// abdu(lhs, rhs) -> select(ult(lhs,rhs),sub(rhs,lhs),sub(lhs,rhs))
28449+
if (Subtarget.canUseCMOV() && VT.bitsGE(MVT::i32)) {
28450+
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
28451+
X86::CondCode CC = IsSigned ? X86::COND_L : X86::COND_B;
28452+
SDValue LHS = DAG.getFreeze(Op.getOperand(0));
28453+
SDValue RHS = DAG.getFreeze(Op.getOperand(1));
28454+
SDValue Diff0 = DAG.getNode(X86ISD::SUB, dl, VTs, LHS, RHS);
28455+
SDValue Diff1 = DAG.getNode(X86ISD::SUB, dl, VTs, RHS, LHS);
28456+
return DAG.getNode(X86ISD::CMOV, dl, VT, Diff1, Diff0,
28457+
DAG.getTargetConstant(CC, dl, MVT::i8),
28458+
Diff1.getValue(1));
28459+
}
28460+
28461+
// TODO: Move to TargetLowering expandABD() once we have ABD promotion.
28462+
// abds(lhs, rhs) -> trunc(abs(sub(sext(lhs), sext(rhs))))
28463+
// abdu(lhs, rhs) -> trunc(abs(sub(zext(lhs), zext(rhs))))
2844828464
unsigned WideBits = std::max<unsigned>(2 * VT.getScalarSizeInBits(), 32u);
2844928465
MVT WideVT = MVT::getIntegerVT(WideBits);
2845028466
if (TLI.isTypeLegal(WideVT)) {
28451-
// abds(lhs, rhs) -> trunc(abs(sub(sext(lhs), sext(rhs))))
28452-
// abdu(lhs, rhs) -> trunc(abs(sub(zext(lhs), zext(rhs))))
2845328467
unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2845428468
SDValue LHS = DAG.getNode(ExtOpc, dl, WideVT, Op.getOperand(0));
2845528469
SDValue RHS = DAG.getNode(ExtOpc, dl, WideVT, Op.getOperand(1));

llvm/test/CodeGen/X86/abds-neg.ll

Lines changed: 41 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -139,28 +139,25 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
139139
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
140140
; X86-LABEL: abd_ext_i16_i32:
141141
; X86: # %bb.0:
142-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
143-
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
144-
; X86-NEXT: movl %eax, %edx
145-
; X86-NEXT: subl %ecx, %edx
146-
; X86-NEXT: negl %edx
142+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
143+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
144+
; X86-NEXT: movl %ecx, %edx
145+
; X86-NEXT: subl %eax, %edx
147146
; X86-NEXT: subl %ecx, %eax
148-
; X86-NEXT: cmovlel %edx, %eax
147+
; X86-NEXT: cmovll %edx, %eax
149148
; X86-NEXT: negl %eax
150149
; X86-NEXT: # kill: def $ax killed $ax killed $eax
151150
; X86-NEXT: retl
152151
;
153152
; X64-LABEL: abd_ext_i16_i32:
154153
; X64: # %bb.0:
155-
; X64-NEXT: # kill: def $edi killed $edi def $rdi
156-
; X64-NEXT: movswq %di, %rcx
157-
; X64-NEXT: movslq %esi, %rax
158-
; X64-NEXT: subq %rax, %rcx
159-
; X64-NEXT: movq %rcx, %rax
160-
; X64-NEXT: negq %rax
161-
; X64-NEXT: cmovsq %rcx, %rax
154+
; X64-NEXT: movswl %di, %ecx
155+
; X64-NEXT: movl %edi, %eax
156+
; X64-NEXT: subl %esi, %eax
157+
; X64-NEXT: subl %ecx, %esi
158+
; X64-NEXT: cmovgel %esi, %eax
162159
; X64-NEXT: negl %eax
163-
; X64-NEXT: # kill: def $ax killed $ax killed $rax
160+
; X64-NEXT: # kill: def $ax killed $ax killed $eax
164161
; X64-NEXT: retq
165162
%aext = sext i16 %a to i64
166163
%bext = sext i32 %b to i64
@@ -205,26 +202,22 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
205202
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
206203
; X86-LABEL: abd_ext_i32:
207204
; X86: # %bb.0:
208-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
209205
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
210-
; X86-NEXT: movl %eax, %edx
211-
; X86-NEXT: subl %ecx, %edx
212-
; X86-NEXT: negl %edx
206+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
207+
; X86-NEXT: movl %ecx, %edx
208+
; X86-NEXT: subl %eax, %edx
213209
; X86-NEXT: subl %ecx, %eax
214-
; X86-NEXT: cmovlel %edx, %eax
210+
; X86-NEXT: cmovll %edx, %eax
215211
; X86-NEXT: negl %eax
216212
; X86-NEXT: retl
217213
;
218214
; X64-LABEL: abd_ext_i32:
219215
; X64: # %bb.0:
220-
; X64-NEXT: movslq %esi, %rax
221-
; X64-NEXT: movslq %edi, %rcx
222-
; X64-NEXT: subq %rax, %rcx
223-
; X64-NEXT: movq %rcx, %rax
224-
; X64-NEXT: negq %rax
225-
; X64-NEXT: cmovsq %rcx, %rax
216+
; X64-NEXT: movl %edi, %eax
217+
; X64-NEXT: subl %esi, %eax
218+
; X64-NEXT: subl %edi, %esi
219+
; X64-NEXT: cmovgel %esi, %eax
226220
; X64-NEXT: negl %eax
227-
; X64-NEXT: # kill: def $eax killed $eax killed $rax
228221
; X64-NEXT: retq
229222
%aext = sext i32 %a to i64
230223
%bext = sext i32 %b to i64
@@ -238,27 +231,23 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
238231
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
239232
; X86-LABEL: abd_ext_i32_i16:
240233
; X86: # %bb.0:
241-
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
242-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
243-
; X86-NEXT: movl %eax, %edx
244-
; X86-NEXT: subl %ecx, %edx
245-
; X86-NEXT: negl %edx
234+
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
235+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
236+
; X86-NEXT: movl %ecx, %edx
237+
; X86-NEXT: subl %eax, %edx
246238
; X86-NEXT: subl %ecx, %eax
247-
; X86-NEXT: cmovlel %edx, %eax
239+
; X86-NEXT: cmovll %edx, %eax
248240
; X86-NEXT: negl %eax
249241
; X86-NEXT: retl
250242
;
251243
; X64-LABEL: abd_ext_i32_i16:
252244
; X64: # %bb.0:
253-
; X64-NEXT: # kill: def $esi killed $esi def $rsi
254-
; X64-NEXT: movswq %si, %rax
255-
; X64-NEXT: movslq %edi, %rcx
256-
; X64-NEXT: subq %rax, %rcx
257-
; X64-NEXT: movq %rcx, %rax
258-
; X64-NEXT: negq %rax
259-
; X64-NEXT: cmovsq %rcx, %rax
245+
; X64-NEXT: movswl %si, %eax
246+
; X64-NEXT: movl %edi, %ecx
247+
; X64-NEXT: subl %eax, %ecx
248+
; X64-NEXT: subl %edi, %eax
249+
; X64-NEXT: cmovll %ecx, %eax
260250
; X64-NEXT: negl %eax
261-
; X64-NEXT: # kill: def $eax killed $eax killed $rax
262251
; X64-NEXT: retq
263252
%aext = sext i32 %a to i64
264253
%bext = sext i16 %b to i64
@@ -272,26 +261,22 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
272261
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
273262
; X86-LABEL: abd_ext_i32_undef:
274263
; X86: # %bb.0:
275-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
276264
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
277-
; X86-NEXT: movl %eax, %edx
278-
; X86-NEXT: subl %ecx, %edx
279-
; X86-NEXT: negl %edx
265+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
266+
; X86-NEXT: movl %ecx, %edx
267+
; X86-NEXT: subl %eax, %edx
280268
; X86-NEXT: subl %ecx, %eax
281-
; X86-NEXT: cmovlel %edx, %eax
269+
; X86-NEXT: cmovll %edx, %eax
282270
; X86-NEXT: negl %eax
283271
; X86-NEXT: retl
284272
;
285273
; X64-LABEL: abd_ext_i32_undef:
286274
; X64: # %bb.0:
287-
; X64-NEXT: movslq %esi, %rax
288-
; X64-NEXT: movslq %edi, %rcx
289-
; X64-NEXT: subq %rax, %rcx
290-
; X64-NEXT: movq %rcx, %rax
291-
; X64-NEXT: negq %rax
292-
; X64-NEXT: cmovsq %rcx, %rax
275+
; X64-NEXT: movl %edi, %eax
276+
; X64-NEXT: subl %esi, %eax
277+
; X64-NEXT: subl %edi, %esi
278+
; X64-NEXT: cmovgel %esi, %eax
293279
; X64-NEXT: negl %eax
294-
; X64-NEXT: # kill: def $eax killed $eax killed $rax
295280
; X64-NEXT: retq
296281
%aext = sext i32 %a to i64
297282
%bext = sext i32 %b to i64
@@ -332,9 +317,8 @@ define i64 @abd_ext_i64(i64 %a, i64 %b) nounwind {
332317
; X64: # %bb.0:
333318
; X64-NEXT: movq %rdi, %rax
334319
; X64-NEXT: subq %rsi, %rax
335-
; X64-NEXT: negq %rax
336-
; X64-NEXT: subq %rsi, %rdi
337-
; X64-NEXT: cmovgq %rdi, %rax
320+
; X64-NEXT: subq %rdi, %rsi
321+
; X64-NEXT: cmovgeq %rsi, %rax
338322
; X64-NEXT: negq %rax
339323
; X64-NEXT: retq
340324
%aext = sext i64 %a to i128
@@ -376,9 +360,8 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind {
376360
; X64: # %bb.0:
377361
; X64-NEXT: movq %rdi, %rax
378362
; X64-NEXT: subq %rsi, %rax
379-
; X64-NEXT: negq %rax
380-
; X64-NEXT: subq %rsi, %rdi
381-
; X64-NEXT: cmovgq %rdi, %rax
363+
; X64-NEXT: subq %rdi, %rsi
364+
; X64-NEXT: cmovgeq %rsi, %rax
382365
; X64-NEXT: negq %rax
383366
; X64-NEXT: retq
384367
%aext = sext i64 %a to i128

0 commit comments

Comments
 (0)