Skip to content

Commit 44c2883

Browse files
committed
[AArch64] Use isKnownNonZero to optimize eligible compares to cmn
Turning a cmp into cmn saves an extra mov and negate instruction, so take that into account when choosing when to flip the compare operands. Also do not consider right-hand operands whose absolute value can be encoded into a cmn. adds 0 and sub 0 differ when carry handling, which is useful in unsigned comparisons. The problematic case for unsigned comparisons occurs only when the second argument is zero. Source: https://devblogs.microsoft.com/oldnewthing/20210607-00/?p=105288
1 parent 9f4e952 commit 44c2883

File tree

3 files changed

+155
-97
lines changed

3 files changed

+155
-97
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3403,19 +3403,26 @@ static bool isLegalArithImmed(uint64_t C) {
34033403
return IsLegal;
34043404
}
34053405

3406+
static bool cannotBeIntMin(SDValue CheckedVal, SelectionDAG &DAG) {
3407+
KnownBits KnownSrc = DAG.computeKnownBits(CheckedVal);
3408+
return !KnownSrc.getSignedMinValue().isMinSignedValue();
3409+
}
3410+
34063411
// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
34073412
// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
34083413
// can be set differently by this operation. It comes down to whether
34093414
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
34103415
// everything is fine. If not then the optimization is wrong. Thus general
34113416
// comparisons are only valid if op2 != 0.
34123417
//
3413-
// So, finally, the only LLVM-native comparisons that don't mention C and V
3414-
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
3415-
// the absence of information about op2.
3416-
static bool isCMN(SDValue Op, ISD::CondCode CC) {
3418+
// So, finally, the only LLVM-native comparisons that don't mention C or V
3419+
// are the ones that aren't unsigned comparisons. They're the only ones we can
3420+
// safely use CMN for in the absence of information about op2.
3421+
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG) {
34173422
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
3418-
(CC == ISD::SETEQ || CC == ISD::SETNE);
3423+
(isIntEqualitySetCC(CC) ||
3424+
(isUnsignedIntSetCC(CC) ? DAG.isKnownNeverZero(Op.getOperand(1))
3425+
: cannotBeIntMin(Op.getOperand(1), DAG)));
34193426
}
34203427

34213428
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
@@ -3460,11 +3467,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
34603467
// register to WZR/XZR if it ends up being unused.
34613468
unsigned Opcode = AArch64ISD::SUBS;
34623469

3463-
if (isCMN(RHS, CC)) {
3470+
if (isCMN(RHS, CC, DAG)) {
34643471
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
34653472
Opcode = AArch64ISD::ADDS;
34663473
RHS = RHS.getOperand(1);
3467-
} else if (isCMN(LHS, CC)) {
3474+
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3475+
isIntEqualitySetCC(CC)) {
34683476
// As we are looking for EQ/NE compares, the operands can be commuted ; can
34693477
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
34703478
Opcode = AArch64ISD::ADDS;
@@ -3566,13 +3574,15 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
35663574
Opcode = AArch64ISD::CCMN;
35673575
RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0));
35683576
}
3569-
} else if (RHS.getOpcode() == ISD::SUB) {
3570-
SDValue SubOp0 = RHS.getOperand(0);
3571-
if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3572-
// See emitComparison() on why we can only do this for SETEQ and SETNE.
3573-
Opcode = AArch64ISD::CCMN;
3574-
RHS = RHS.getOperand(1);
3575-
}
3577+
} else if (isCMN(RHS, CC, DAG)) {
3578+
Opcode = AArch64ISD::CCMN;
3579+
RHS = RHS.getOperand(1);
3580+
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3581+
isIntEqualitySetCC(CC)) {
3582+
// As we are looking for EQ/NE compares, the operands can be commuted ; can
3583+
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
3584+
Opcode = AArch64ISD::CCMN;
3585+
LHS = LHS.getOperand(1);
35763586
}
35773587
if (Opcode == 0)
35783588
Opcode = AArch64ISD::CCMP;
@@ -3890,8 +3900,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
38903900
// cmp w12, w11, lsl #1
38913901
if (!isa<ConstantSDNode>(RHS) ||
38923902
!isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
3893-
bool LHSIsCMN = isCMN(LHS, CC);
3894-
bool RHSIsCMN = isCMN(RHS, CC);
3903+
bool LHSIsCMN = isCMN(LHS, CC, DAG);
3904+
bool RHSIsCMN = isCMN(RHS, CC, DAG);
38953905
SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
38963906
SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;
38973907

@@ -3904,7 +3914,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
39043914

39053915
SDValue Cmp;
39063916
AArch64CC::CondCode AArch64CC;
3907-
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
3917+
if (isIntEqualitySetCC(CC) && isa<ConstantSDNode>(RHS)) {
39083918
const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
39093919

39103920
// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.

llvm/test/CodeGen/AArch64/cmp-chains.ll

Lines changed: 112 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -260,14 +260,22 @@ define i32 @neg_range_int(i32 %a, i32 %b, i32 %c) {
260260

261261
; (b > -(d | 1) && a < c)
262262
define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
263-
; CHECK-LABEL: neg_range_int_comp:
264-
; CHECK: // %bb.0:
265-
; CHECK-NEXT: orr w8, w3, #0x1
266-
; CHECK-NEXT: cmp w0, w2
267-
; CHECK-NEXT: neg w8, w8
268-
; CHECK-NEXT: ccmp w1, w8, #4, lt
269-
; CHECK-NEXT: csel w0, w1, w0, gt
270-
; CHECK-NEXT: ret
263+
; SDISEL-LABEL: neg_range_int_comp:
264+
; SDISEL: // %bb.0:
265+
; SDISEL-NEXT: orr w8, w3, #0x1
266+
; SDISEL-NEXT: cmp w0, w2
267+
; SDISEL-NEXT: ccmn w1, w8, #4, lt
268+
; SDISEL-NEXT: csel w0, w1, w0, gt
269+
; SDISEL-NEXT: ret
270+
;
271+
; GISEL-LABEL: neg_range_int_comp:
272+
; GISEL: // %bb.0:
273+
; GISEL-NEXT: orr w8, w3, #0x1
274+
; GISEL-NEXT: cmp w0, w2
275+
; GISEL-NEXT: neg w8, w8
276+
; GISEL-NEXT: ccmp w1, w8, #4, lt
277+
; GISEL-NEXT: csel w0, w1, w0, gt
278+
; GISEL-NEXT: ret
271279
%dor = or i32 %d, 1
272280
%negd = sub i32 0, %dor
273281
%cmp = icmp sgt i32 %b, %negd
@@ -279,14 +287,22 @@ define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
279287

280288
; (b >u -(d | 1) && a < c)
281289
define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) {
282-
; CHECK-LABEL: neg_range_int_comp_u:
283-
; CHECK: // %bb.0:
284-
; CHECK-NEXT: orr w8, w3, #0x1
285-
; CHECK-NEXT: cmp w0, w2
286-
; CHECK-NEXT: neg w8, w8
287-
; CHECK-NEXT: ccmp w1, w8, #0, lt
288-
; CHECK-NEXT: csel w0, w1, w0, hi
289-
; CHECK-NEXT: ret
290+
; SDISEL-LABEL: neg_range_int_comp_u:
291+
; SDISEL: // %bb.0:
292+
; SDISEL-NEXT: orr w8, w3, #0x1
293+
; SDISEL-NEXT: cmp w0, w2
294+
; SDISEL-NEXT: ccmn w1, w8, #0, lt
295+
; SDISEL-NEXT: csel w0, w1, w0, hi
296+
; SDISEL-NEXT: ret
297+
;
298+
; GISEL-LABEL: neg_range_int_comp_u:
299+
; GISEL: // %bb.0:
300+
; GISEL-NEXT: orr w8, w3, #0x1
301+
; GISEL-NEXT: cmp w0, w2
302+
; GISEL-NEXT: neg w8, w8
303+
; GISEL-NEXT: ccmp w1, w8, #0, lt
304+
; GISEL-NEXT: csel w0, w1, w0, hi
305+
; GISEL-NEXT: ret
290306
%dor = or i32 %d, 1
291307
%negd = sub i32 0, %dor
292308
%cmp = icmp ugt i32 %b, %negd
@@ -298,14 +314,22 @@ define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) {
298314

299315
; (b > -(d | 1) && a u < c)
300316
define i32 @neg_range_int_comp_ua(i32 %a, i32 %b, i32 %c, i32 %d) {
301-
; CHECK-LABEL: neg_range_int_comp_ua:
302-
; CHECK: // %bb.0:
303-
; CHECK-NEXT: orr w8, w3, #0x1
304-
; CHECK-NEXT: cmp w0, w2
305-
; CHECK-NEXT: neg w8, w8
306-
; CHECK-NEXT: ccmp w1, w8, #4, lo
307-
; CHECK-NEXT: csel w0, w1, w0, gt
308-
; CHECK-NEXT: ret
317+
; SDISEL-LABEL: neg_range_int_comp_ua:
318+
; SDISEL: // %bb.0:
319+
; SDISEL-NEXT: orr w8, w3, #0x1
320+
; SDISEL-NEXT: cmp w0, w2
321+
; SDISEL-NEXT: ccmn w1, w8, #4, lo
322+
; SDISEL-NEXT: csel w0, w1, w0, gt
323+
; SDISEL-NEXT: ret
324+
;
325+
; GISEL-LABEL: neg_range_int_comp_ua:
326+
; GISEL: // %bb.0:
327+
; GISEL-NEXT: orr w8, w3, #0x1
328+
; GISEL-NEXT: cmp w0, w2
329+
; GISEL-NEXT: neg w8, w8
330+
; GISEL-NEXT: ccmp w1, w8, #4, lo
331+
; GISEL-NEXT: csel w0, w1, w0, gt
332+
; GISEL-NEXT: ret
309333
%dor = or i32 %d, 1
310334
%negd = sub i32 0, %dor
311335
%cmp = icmp sgt i32 %b, %negd
@@ -339,14 +363,22 @@ define i32 @neg_range_int_2(i32 %a, i32 %b, i32 %c) {
339363

340364
; (b < -(d | 1) && a >= c)
341365
define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) {
342-
; CHECK-LABEL: neg_range_int_comp2:
343-
; CHECK: // %bb.0:
344-
; CHECK-NEXT: orr w8, w3, #0x1
345-
; CHECK-NEXT: cmp w0, w2
346-
; CHECK-NEXT: neg w8, w8
347-
; CHECK-NEXT: ccmp w1, w8, #0, ge
348-
; CHECK-NEXT: csel w0, w1, w0, lt
349-
; CHECK-NEXT: ret
366+
; SDISEL-LABEL: neg_range_int_comp2:
367+
; SDISEL: // %bb.0:
368+
; SDISEL-NEXT: orr w8, w3, #0x1
369+
; SDISEL-NEXT: cmp w0, w2
370+
; SDISEL-NEXT: ccmn w1, w8, #0, ge
371+
; SDISEL-NEXT: csel w0, w1, w0, lt
372+
; SDISEL-NEXT: ret
373+
;
374+
; GISEL-LABEL: neg_range_int_comp2:
375+
; GISEL: // %bb.0:
376+
; GISEL-NEXT: orr w8, w3, #0x1
377+
; GISEL-NEXT: cmp w0, w2
378+
; GISEL-NEXT: neg w8, w8
379+
; GISEL-NEXT: ccmp w1, w8, #0, ge
380+
; GISEL-NEXT: csel w0, w1, w0, lt
381+
; GISEL-NEXT: ret
350382
%dor = or i32 %d, 1
351383
%negd = sub i32 0, %dor
352384
%cmp = icmp slt i32 %b, %negd
@@ -358,14 +390,22 @@ define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) {
358390

359391
; (b <u -(d | 1) && a > c)
360392
define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) {
361-
; CHECK-LABEL: neg_range_int_comp_u2:
362-
; CHECK: // %bb.0:
363-
; CHECK-NEXT: orr w8, w3, #0x1
364-
; CHECK-NEXT: cmp w0, w2
365-
; CHECK-NEXT: neg w8, w8
366-
; CHECK-NEXT: ccmp w1, w8, #2, gt
367-
; CHECK-NEXT: csel w0, w1, w0, lo
368-
; CHECK-NEXT: ret
393+
; SDISEL-LABEL: neg_range_int_comp_u2:
394+
; SDISEL: // %bb.0:
395+
; SDISEL-NEXT: orr w8, w3, #0x1
396+
; SDISEL-NEXT: cmp w0, w2
397+
; SDISEL-NEXT: ccmn w1, w8, #2, gt
398+
; SDISEL-NEXT: csel w0, w1, w0, lo
399+
; SDISEL-NEXT: ret
400+
;
401+
; GISEL-LABEL: neg_range_int_comp_u2:
402+
; GISEL: // %bb.0:
403+
; GISEL-NEXT: orr w8, w3, #0x1
404+
; GISEL-NEXT: cmp w0, w2
405+
; GISEL-NEXT: neg w8, w8
406+
; GISEL-NEXT: ccmp w1, w8, #2, gt
407+
; GISEL-NEXT: csel w0, w1, w0, lo
408+
; GISEL-NEXT: ret
369409
%dor = or i32 %d, 1
370410
%negd = sub i32 0, %dor
371411
%cmp = icmp ult i32 %b, %negd
@@ -377,14 +417,22 @@ define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) {
377417

378418
; (b > -(d | 1) && a u > c)
379419
define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) {
380-
; CHECK-LABEL: neg_range_int_comp_ua2:
381-
; CHECK: // %bb.0:
382-
; CHECK-NEXT: orr w8, w3, #0x1
383-
; CHECK-NEXT: cmp w0, w2
384-
; CHECK-NEXT: neg w8, w8
385-
; CHECK-NEXT: ccmp w1, w8, #4, hi
386-
; CHECK-NEXT: csel w0, w1, w0, gt
387-
; CHECK-NEXT: ret
420+
; SDISEL-LABEL: neg_range_int_comp_ua2:
421+
; SDISEL: // %bb.0:
422+
; SDISEL-NEXT: orr w8, w3, #0x1
423+
; SDISEL-NEXT: cmp w0, w2
424+
; SDISEL-NEXT: ccmn w1, w8, #4, hi
425+
; SDISEL-NEXT: csel w0, w1, w0, gt
426+
; SDISEL-NEXT: ret
427+
;
428+
; GISEL-LABEL: neg_range_int_comp_ua2:
429+
; GISEL: // %bb.0:
430+
; GISEL-NEXT: orr w8, w3, #0x1
431+
; GISEL-NEXT: cmp w0, w2
432+
; GISEL-NEXT: neg w8, w8
433+
; GISEL-NEXT: ccmp w1, w8, #4, hi
434+
; GISEL-NEXT: csel w0, w1, w0, gt
435+
; GISEL-NEXT: ret
388436
%dor = or i32 %d, 1
389437
%negd = sub i32 0, %dor
390438
%cmp = icmp sgt i32 %b, %negd
@@ -396,14 +444,22 @@ define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) {
396444

397445
; (b > -(d | 1) && a u == c)
398446
define i32 @neg_range_int_comp_ua3(i32 %a, i32 %b, i32 %c, i32 %d) {
399-
; CHECK-LABEL: neg_range_int_comp_ua3:
400-
; CHECK: // %bb.0:
401-
; CHECK-NEXT: orr w8, w3, #0x1
402-
; CHECK-NEXT: cmp w0, w2
403-
; CHECK-NEXT: neg w8, w8
404-
; CHECK-NEXT: ccmp w1, w8, #4, eq
405-
; CHECK-NEXT: csel w0, w1, w0, gt
406-
; CHECK-NEXT: ret
447+
; SDISEL-LABEL: neg_range_int_comp_ua3:
448+
; SDISEL: // %bb.0:
449+
; SDISEL-NEXT: orr w8, w3, #0x1
450+
; SDISEL-NEXT: cmp w0, w2
451+
; SDISEL-NEXT: ccmn w1, w8, #4, eq
452+
; SDISEL-NEXT: csel w0, w1, w0, gt
453+
; SDISEL-NEXT: ret
454+
;
455+
; GISEL-LABEL: neg_range_int_comp_ua3:
456+
; GISEL: // %bb.0:
457+
; GISEL-NEXT: orr w8, w3, #0x1
458+
; GISEL-NEXT: cmp w0, w2
459+
; GISEL-NEXT: neg w8, w8
460+
; GISEL-NEXT: ccmp w1, w8, #4, eq
461+
; GISEL-NEXT: csel w0, w1, w0, gt
462+
; GISEL-NEXT: ret
407463
%dor = or i32 %d, 1
408464
%negd = sub i32 0, %dor
409465
%cmp = icmp sgt i32 %b, %negd

0 commit comments

Comments
 (0)