Skip to content

Commit 27ec0f1

Browse files
AZero13yuxuanchen1997
authored andcommitted
[AArch64] Use isKnownNonZero to optimize eligible compares to cmn and ccmn (#96349)
Summary: The problematic case for unsigned comparisons occurs only when the second argument is zero. The problematic case for signed comparisons occurs only when the second argument is the signed minimum value. We can use KnownBits to know when we don't have to worry about this. Source: https://devblogs.microsoft.com/oldnewthing/20210607-00/?p=105288 Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251645
1 parent ee68c3e commit 27ec0f1

File tree

3 files changed

+476
-17
lines changed

3 files changed

+476
-17
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3403,19 +3403,26 @@ static bool isLegalArithImmed(uint64_t C) {
34033403
return IsLegal;
34043404
}
34053405

3406+
static bool cannotBeIntMin(SDValue CheckedVal, SelectionDAG &DAG) {
3407+
KnownBits KnownSrc = DAG.computeKnownBits(CheckedVal);
3408+
return !KnownSrc.getSignedMinValue().isMinSignedValue();
3409+
}
3410+
34063411
// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
34073412
// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
34083413
// can be set differently by this operation. It comes down to whether
34093414
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
34103415
// everything is fine. If not then the optimization is wrong. Thus general
34113416
// comparisons are only valid if op2 != 0.
34123417
//
3413-
// So, finally, the only LLVM-native comparisons that don't mention C and V
3414-
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
3415-
// the absence of information about op2.
3416-
static bool isCMN(SDValue Op, ISD::CondCode CC) {
3418+
// So, finally, the only LLVM-native comparisons that don't mention C or V
3419+
// are the ones that aren't unsigned comparisons. They're the only ones we can
3420+
// safely use CMN for in the absence of information about op2.
3421+
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG) {
34173422
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
3418-
(CC == ISD::SETEQ || CC == ISD::SETNE);
3423+
(isIntEqualitySetCC(CC) ||
3424+
(isUnsignedIntSetCC(CC) && DAG.isKnownNeverZero(Op.getOperand(1))) ||
3425+
(isSignedIntSetCC(CC) && cannotBeIntMin(Op.getOperand(1), DAG)));
34193426
}
34203427

34213428
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
@@ -3460,11 +3467,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
34603467
// register to WZR/XZR if it ends up being unused.
34613468
unsigned Opcode = AArch64ISD::SUBS;
34623469

3463-
if (isCMN(RHS, CC)) {
3470+
if (isCMN(RHS, CC, DAG)) {
34643471
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
34653472
Opcode = AArch64ISD::ADDS;
34663473
RHS = RHS.getOperand(1);
3467-
} else if (isCMN(LHS, CC)) {
3474+
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3475+
isIntEqualitySetCC(CC)) {
34683476
// As we are looking for EQ/NE compares, the operands can be commuted ; can
34693477
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
34703478
Opcode = AArch64ISD::ADDS;
@@ -3566,13 +3574,15 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
35663574
Opcode = AArch64ISD::CCMN;
35673575
RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0));
35683576
}
3569-
} else if (RHS.getOpcode() == ISD::SUB) {
3570-
SDValue SubOp0 = RHS.getOperand(0);
3571-
if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3572-
// See emitComparison() on why we can only do this for SETEQ and SETNE.
3573-
Opcode = AArch64ISD::CCMN;
3574-
RHS = RHS.getOperand(1);
3575-
}
3577+
} else if (isCMN(RHS, CC, DAG)) {
3578+
Opcode = AArch64ISD::CCMN;
3579+
RHS = RHS.getOperand(1);
3580+
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3581+
isIntEqualitySetCC(CC)) {
3582+
// As we are looking for EQ/NE compares, the operands can be commuted ; can
3583+
// we combine a (CCMP (sub 0, op1), op2) into a CCMN instruction ?
3584+
Opcode = AArch64ISD::CCMN;
3585+
LHS = LHS.getOperand(1);
35763586
}
35773587
if (Opcode == 0)
35783588
Opcode = AArch64ISD::CCMP;
@@ -3890,8 +3900,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
38903900
// cmp w12, w11, lsl #1
38913901
if (!isa<ConstantSDNode>(RHS) ||
38923902
!isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
3893-
bool LHSIsCMN = isCMN(LHS, CC);
3894-
bool RHSIsCMN = isCMN(RHS, CC);
3903+
bool LHSIsCMN = isCMN(LHS, CC, DAG);
3904+
bool RHSIsCMN = isCMN(RHS, CC, DAG);
38953905
SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
38963906
SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;
38973907

@@ -3904,7 +3914,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
39043914

39053915
SDValue Cmp;
39063916
AArch64CC::CondCode AArch64CC;
3907-
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
3917+
if (isIntEqualitySetCC(CC) && isa<ConstantSDNode>(RHS)) {
39083918
const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
39093919

39103920
// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.

llvm/test/CodeGen/AArch64/cmp-chains.ll

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,3 +258,246 @@ define i32 @neg_range_int(i32 %a, i32 %b, i32 %c) {
258258
ret i32 %retval.0
259259
}
260260

261+
; (b > -(d | 1) && a < c)
262+
define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
263+
; SDISEL-LABEL: neg_range_int_comp:
264+
; SDISEL: // %bb.0:
265+
; SDISEL-NEXT: orr w8, w3, #0x1
266+
; SDISEL-NEXT: cmp w0, w2
267+
; SDISEL-NEXT: ccmn w1, w8, #4, lt
268+
; SDISEL-NEXT: csel w0, w1, w0, gt
269+
; SDISEL-NEXT: ret
270+
;
271+
; GISEL-LABEL: neg_range_int_comp:
272+
; GISEL: // %bb.0:
273+
; GISEL-NEXT: orr w8, w3, #0x1
274+
; GISEL-NEXT: cmp w0, w2
275+
; GISEL-NEXT: neg w8, w8
276+
; GISEL-NEXT: ccmp w1, w8, #4, lt
277+
; GISEL-NEXT: csel w0, w1, w0, gt
278+
; GISEL-NEXT: ret
279+
%dor = or i32 %d, 1
280+
%negd = sub i32 0, %dor
281+
%cmp = icmp sgt i32 %b, %negd
282+
%cmp1 = icmp slt i32 %a, %c
283+
%or.cond = and i1 %cmp, %cmp1
284+
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
285+
ret i32 %retval.0
286+
}
287+
288+
; (b >u -(d | 1) && a < c)
289+
define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) {
290+
; SDISEL-LABEL: neg_range_int_comp_u:
291+
; SDISEL: // %bb.0:
292+
; SDISEL-NEXT: orr w8, w3, #0x1
293+
; SDISEL-NEXT: cmp w0, w2
294+
; SDISEL-NEXT: ccmn w1, w8, #0, lt
295+
; SDISEL-NEXT: csel w0, w1, w0, hi
296+
; SDISEL-NEXT: ret
297+
;
298+
; GISEL-LABEL: neg_range_int_comp_u:
299+
; GISEL: // %bb.0:
300+
; GISEL-NEXT: orr w8, w3, #0x1
301+
; GISEL-NEXT: cmp w0, w2
302+
; GISEL-NEXT: neg w8, w8
303+
; GISEL-NEXT: ccmp w1, w8, #0, lt
304+
; GISEL-NEXT: csel w0, w1, w0, hi
305+
; GISEL-NEXT: ret
306+
%dor = or i32 %d, 1
307+
%negd = sub i32 0, %dor
308+
%cmp = icmp ugt i32 %b, %negd
309+
%cmp1 = icmp slt i32 %a, %c
310+
%or.cond = and i1 %cmp, %cmp1
311+
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
312+
ret i32 %retval.0
313+
}
314+
315+
; (b > -(d | 1) && a u < c)
316+
define i32 @neg_range_int_comp_ua(i32 %a, i32 %b, i32 %c, i32 %d) {
317+
; SDISEL-LABEL: neg_range_int_comp_ua:
318+
; SDISEL: // %bb.0:
319+
; SDISEL-NEXT: orr w8, w3, #0x1
320+
; SDISEL-NEXT: cmp w0, w2
321+
; SDISEL-NEXT: ccmn w1, w8, #4, lo
322+
; SDISEL-NEXT: csel w0, w1, w0, gt
323+
; SDISEL-NEXT: ret
324+
;
325+
; GISEL-LABEL: neg_range_int_comp_ua:
326+
; GISEL: // %bb.0:
327+
; GISEL-NEXT: orr w8, w3, #0x1
328+
; GISEL-NEXT: cmp w0, w2
329+
; GISEL-NEXT: neg w8, w8
330+
; GISEL-NEXT: ccmp w1, w8, #4, lo
331+
; GISEL-NEXT: csel w0, w1, w0, gt
332+
; GISEL-NEXT: ret
333+
%dor = or i32 %d, 1
334+
%negd = sub i32 0, %dor
335+
%cmp = icmp sgt i32 %b, %negd
336+
%cmp1 = icmp ult i32 %a, %c
337+
%or.cond = and i1 %cmp, %cmp1
338+
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
339+
ret i32 %retval.0
340+
}
341+
342+
; (b <= -3 && a > c)
343+
define i32 @neg_range_int_2(i32 %a, i32 %b, i32 %c) {
344+
; SDISEL-LABEL: neg_range_int_2:
345+
; SDISEL: // %bb.0:
346+
; SDISEL-NEXT: cmp w0, w2
347+
; SDISEL-NEXT: ccmn w1, #4, #4, gt
348+
; SDISEL-NEXT: csel w0, w1, w0, gt
349+
; SDISEL-NEXT: ret
350+
;
351+
; GISEL-LABEL: neg_range_int_2:
352+
; GISEL: // %bb.0:
353+
; GISEL-NEXT: cmp w0, w2
354+
; GISEL-NEXT: ccmn w1, #3, #8, gt
355+
; GISEL-NEXT: csel w0, w1, w0, ge
356+
; GISEL-NEXT: ret
357+
%cmp = icmp sge i32 %b, -3
358+
%cmp1 = icmp sgt i32 %a, %c
359+
%or.cond = and i1 %cmp, %cmp1
360+
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
361+
ret i32 %retval.0
362+
}
363+
364+
; (b < -(d | 1) && a >= c)
365+
define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) {
366+
; SDISEL-LABEL: neg_range_int_comp2:
367+
; SDISEL: // %bb.0:
368+
; SDISEL-NEXT: orr w8, w3, #0x1
369+
; SDISEL-NEXT: cmp w0, w2
370+
; SDISEL-NEXT: ccmn w1, w8, #0, ge
371+
; SDISEL-NEXT: csel w0, w1, w0, lt
372+
; SDISEL-NEXT: ret
373+
;
374+
; GISEL-LABEL: neg_range_int_comp2:
375+
; GISEL: // %bb.0:
376+
; GISEL-NEXT: orr w8, w3, #0x1
377+
; GISEL-NEXT: cmp w0, w2
378+
; GISEL-NEXT: neg w8, w8
379+
; GISEL-NEXT: ccmp w1, w8, #0, ge
380+
; GISEL-NEXT: csel w0, w1, w0, lt
381+
; GISEL-NEXT: ret
382+
%dor = or i32 %d, 1
383+
%negd = sub i32 0, %dor
384+
%cmp = icmp slt i32 %b, %negd
385+
%cmp1 = icmp sge i32 %a, %c
386+
%or.cond = and i1 %cmp, %cmp1
387+
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
388+
ret i32 %retval.0
389+
}
390+
391+
; (b <u -(d | 1) && a > c)
392+
define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) {
393+
; SDISEL-LABEL: neg_range_int_comp_u2:
394+
; SDISEL: // %bb.0:
395+
; SDISEL-NEXT: orr w8, w3, #0x1
396+
; SDISEL-NEXT: cmp w0, w2
397+
; SDISEL-NEXT: ccmn w1, w8, #2, gt
398+
; SDISEL-NEXT: csel w0, w1, w0, lo
399+
; SDISEL-NEXT: ret
400+
;
401+
; GISEL-LABEL: neg_range_int_comp_u2:
402+
; GISEL: // %bb.0:
403+
; GISEL-NEXT: orr w8, w3, #0x1
404+
; GISEL-NEXT: cmp w0, w2
405+
; GISEL-NEXT: neg w8, w8
406+
; GISEL-NEXT: ccmp w1, w8, #2, gt
407+
; GISEL-NEXT: csel w0, w1, w0, lo
408+
; GISEL-NEXT: ret
409+
%dor = or i32 %d, 1
410+
%negd = sub i32 0, %dor
411+
%cmp = icmp ult i32 %b, %negd
412+
%cmp1 = icmp sgt i32 %a, %c
413+
%or.cond = and i1 %cmp, %cmp1
414+
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
415+
ret i32 %retval.0
416+
}
417+
418+
; (b > -(d | 1) && a u > c)
419+
define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) {
420+
; SDISEL-LABEL: neg_range_int_comp_ua2:
421+
; SDISEL: // %bb.0:
422+
; SDISEL-NEXT: orr w8, w3, #0x1
423+
; SDISEL-NEXT: cmp w0, w2
424+
; SDISEL-NEXT: ccmn w1, w8, #4, hi
425+
; SDISEL-NEXT: csel w0, w1, w0, gt
426+
; SDISEL-NEXT: ret
427+
;
428+
; GISEL-LABEL: neg_range_int_comp_ua2:
429+
; GISEL: // %bb.0:
430+
; GISEL-NEXT: orr w8, w3, #0x1
431+
; GISEL-NEXT: cmp w0, w2
432+
; GISEL-NEXT: neg w8, w8
433+
; GISEL-NEXT: ccmp w1, w8, #4, hi
434+
; GISEL-NEXT: csel w0, w1, w0, gt
435+
; GISEL-NEXT: ret
436+
%dor = or i32 %d, 1
437+
%negd = sub i32 0, %dor
438+
%cmp = icmp sgt i32 %b, %negd
439+
%cmp1 = icmp ugt i32 %a, %c
440+
%or.cond = and i1 %cmp, %cmp1
441+
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
442+
ret i32 %retval.0
443+
}
444+
445+
; (b > -(d | 1) && a u == c)
446+
define i32 @neg_range_int_comp_ua3(i32 %a, i32 %b, i32 %c, i32 %d) {
447+
; SDISEL-LABEL: neg_range_int_comp_ua3:
448+
; SDISEL: // %bb.0:
449+
; SDISEL-NEXT: orr w8, w3, #0x1
450+
; SDISEL-NEXT: cmp w0, w2
451+
; SDISEL-NEXT: ccmn w1, w8, #4, eq
452+
; SDISEL-NEXT: csel w0, w1, w0, gt
453+
; SDISEL-NEXT: ret
454+
;
455+
; GISEL-LABEL: neg_range_int_comp_ua3:
456+
; GISEL: // %bb.0:
457+
; GISEL-NEXT: orr w8, w3, #0x1
458+
; GISEL-NEXT: cmp w0, w2
459+
; GISEL-NEXT: neg w8, w8
460+
; GISEL-NEXT: ccmp w1, w8, #4, eq
461+
; GISEL-NEXT: csel w0, w1, w0, gt
462+
; GISEL-NEXT: ret
463+
%dor = or i32 %d, 1
464+
%negd = sub i32 0, %dor
465+
%cmp = icmp sgt i32 %b, %negd
466+
%cmp1 = icmp eq i32 %a, %c
467+
%or.cond = and i1 %cmp, %cmp1
468+
%retval.0 = select i1 %or.cond, i32 %b, i32 %a
469+
ret i32 %retval.0
470+
}
471+
472+
; -(a | 1) > (b | 3) && a < c
473+
define i32 @neg_range_int_c(i32 %a, i32 %b, i32 %c) {
474+
; SDISEL-LABEL: neg_range_int_c:
475+
; SDISEL: // %bb.0: // %entry
476+
; SDISEL-NEXT: orr w8, w0, #0x1
477+
; SDISEL-NEXT: orr w9, w1, #0x3
478+
; SDISEL-NEXT: cmn w9, w8
479+
; SDISEL-NEXT: ccmp w2, w0, #2, lo
480+
; SDISEL-NEXT: cset w0, lo
481+
; SDISEL-NEXT: ret
482+
;
483+
; GISEL-LABEL: neg_range_int_c:
484+
; GISEL: // %bb.0: // %entry
485+
; GISEL-NEXT: orr w8, w0, #0x1
486+
; GISEL-NEXT: orr w9, w1, #0x3
487+
; GISEL-NEXT: neg w8, w8
488+
; GISEL-NEXT: cmp w9, w8
489+
; GISEL-NEXT: cset w8, lo
490+
; GISEL-NEXT: cmp w2, w0
491+
; GISEL-NEXT: cset w9, lo
492+
; GISEL-NEXT: and w0, w8, w9
493+
; GISEL-NEXT: ret
494+
entry:
495+
%or = or i32 %a, 1
496+
%sub = sub i32 0, %or
497+
%or1 = or i32 %b, 3
498+
%cmp = icmp ult i32 %or1, %sub
499+
%cmp2 = icmp ult i32 %c, %a
500+
%0 = and i1 %cmp, %cmp2
501+
%land.ext = zext i1 %0 to i32
502+
ret i32 %land.ext
503+
}

0 commit comments

Comments
 (0)