Skip to content

Commit 8865a3f

Browse files
committed
[AArch64] Use isKnownNonZero to optimize eligible compares to cmn and ccmn
The problematic case for unsigned comparisons occurs only when the second argument is zero, and in signed cases when the second argument is the minimum possible signed integer. If we can prove the register value be those, it is safe to fold into CMN and CCMN. Source: https://devblogs.microsoft.com/oldnewthing/20210607-00/?p=105288
1 parent 3d58110 commit 8865a3f

File tree

3 files changed

+162
-99
lines changed

3 files changed

+162
-99
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3403,19 +3403,26 @@ static bool isLegalArithImmed(uint64_t C) {
34033403
return IsLegal;
34043404
}
34053405

3406+
static bool cannotBeIntMin(SDValue CheckedVal, SelectionDAG &DAG) {
3407+
KnownBits KnownSrc = DAG.computeKnownBits(CheckedVal);
3408+
return !KnownSrc.getSignedMinValue().isMinSignedValue();
3409+
}
3410+
34063411
// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
34073412
// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
34083413
// can be set differently by this operation. It comes down to whether
34093414
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
34103415
// everything is fine. If not then the optimization is wrong. Thus general
34113416
// comparisons are only valid if op2 != 0.
34123417
//
3413-
// So, finally, the only LLVM-native comparisons that don't mention C and V
3414-
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
3415-
// the absence of information about op2.
3416-
static bool isCMN(SDValue Op, ISD::CondCode CC) {
3418+
// So, finally, the only LLVM-native comparisons that don't mention C or V
3419+
// are the ones that aren't unsigned comparisons. They're the only ones we can
3420+
// safely use CMN for in the absence of information about op2.
3421+
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG) {
34173422
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
3418-
(CC == ISD::SETEQ || CC == ISD::SETNE);
3423+
(isIntEqualitySetCC(CC) ||
3424+
(isUnsignedIntSetCC(CC) && DAG.isKnownNeverZero(Op.getOperand(1))) ||
3425+
(isSignedIntSetCC(CC) && cannotBeIntMin(Op.getOperand(1), DAG)));
34193426
}
34203427

34213428
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
@@ -3460,11 +3467,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
34603467
// register to WZR/XZR if it ends up being unused.
34613468
unsigned Opcode = AArch64ISD::SUBS;
34623469

3463-
if (isCMN(RHS, CC)) {
3470+
if (isCMN(RHS, CC, DAG)) {
34643471
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
34653472
Opcode = AArch64ISD::ADDS;
34663473
RHS = RHS.getOperand(1);
3467-
} else if (isCMN(LHS, CC)) {
3474+
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3475+
isIntEqualitySetCC(CC)) {
34683476
// As we are looking for EQ/NE compares, the operands can be commuted ; can
34693477
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
34703478
Opcode = AArch64ISD::ADDS;
@@ -3566,13 +3574,21 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
35663574
Opcode = AArch64ISD::CCMN;
35673575
RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0));
35683576
}
3569-
} else if (RHS.getOpcode() == ISD::SUB) {
3570-
SDValue SubOp0 = RHS.getOperand(0);
3571-
if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3572-
// See emitComparison() on why we can only do this for SETEQ and SETNE.
3573-
Opcode = AArch64ISD::CCMN;
3574-
RHS = RHS.getOperand(1);
3575-
}
3577+
} else if (isCMN(RHS, CC, DAG)) {
3578+
Opcode = AArch64ISD::CCMN;
3579+
RHS = RHS.getOperand(1);
3580+
} else if (isCMN(LHS, CC, DAG) &&
3581+
(isIntEqualitySetCC(CC) ||
3582+
(isUnsignedIntSetCC(CC) && DAG.isKnownNeverZero(RHS)) ||
3583+
(isSignedIntSetCC(CC) && cannotBeIntMin(RHS, DAG)))) {
3584+
// We can communte (CMP (sub 0, op1), op2)) if neither LHS nor RHS can be
3585+
// INT_MIN if a signed comparison, or 0 if unsigned.
3586+
Opcode = AArch64ISD::CCMN;
3587+
LHS = LHS.getOperand(1);
3588+
// Swap LHS and RHS if it wasn't an equality comparison
3589+
// So we don't have to worry about changing the CC
3590+
// a < b -> -b < -a
3591+
std::swap(LHS, RHS);
35763592
}
35773593
if (Opcode == 0)
35783594
Opcode = AArch64ISD::CCMP;
@@ -3890,8 +3906,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
38903906
// cmp w12, w11, lsl #1
38913907
if (!isa<ConstantSDNode>(RHS) ||
38923908
!isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
3893-
bool LHSIsCMN = isCMN(LHS, CC);
3894-
bool RHSIsCMN = isCMN(RHS, CC);
3909+
bool LHSIsCMN = isCMN(LHS, CC, DAG);
3910+
bool RHSIsCMN = isCMN(RHS, CC, DAG);
38953911
SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
38963912
SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;
38973913

@@ -3904,7 +3920,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
39043920

39053921
SDValue Cmp;
39063922
AArch64CC::CondCode AArch64CC;
3907-
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
3923+
if (isIntEqualitySetCC(CC) && isa<ConstantSDNode>(RHS)) {
39083924
const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
39093925

39103926
// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.

llvm/test/CodeGen/AArch64/cmp-chains.ll

Lines changed: 113 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -260,14 +260,22 @@ define i32 @neg_range_int(i32 %a, i32 %b, i32 %c) {
260260

261261
; (b > -(d | 1) && a < c)
262262
define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
263-
; CHECK-LABEL: neg_range_int_comp:
264-
; CHECK: // %bb.0:
265-
; CHECK-NEXT: orr w8, w3, #0x1
266-
; CHECK-NEXT: cmp w0, w2
267-
; CHECK-NEXT: neg w8, w8
268-
; CHECK-NEXT: ccmp w1, w8, #4, lt
269-
; CHECK-NEXT: csel w0, w1, w0, gt
270-
; CHECK-NEXT: ret
263+
; SDISEL-LABEL: neg_range_int_comp:
264+
; SDISEL: // %bb.0:
265+
; SDISEL-NEXT: orr w8, w3, #0x1
266+
; SDISEL-NEXT: cmp w0, w2
267+
; SDISEL-NEXT: ccmn w1, w8, #4, lt
268+
; SDISEL-NEXT: csel w0, w1, w0, gt
269+
; SDISEL-NEXT: ret
270+
;
271+
; GISEL-LABEL: neg_range_int_comp:
272+
; GISEL: // %bb.0:
273+
; GISEL-NEXT: orr w8, w3, #0x1
274+
; GISEL-NEXT: cmp w0, w2
275+
; GISEL-NEXT: neg w8, w8
276+
; GISEL-NEXT: ccmp w1, w8, #4, lt
277+
; GISEL-NEXT: csel w0, w1, w0, gt
278+
; GISEL-NEXT: ret
271279
%dor = or i32 %d, 1
272280
%negd = sub i32 0, %dor
273281
%cmp = icmp sgt i32 %b, %negd
@@ -279,14 +287,22 @@ define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
279287

280288
; (b >u -(d | 1) && a < c)
281289
define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) {
282-
; CHECK-LABEL: neg_range_int_comp_u:
283-
; CHECK: // %bb.0:
284-
; CHECK-NEXT: orr w8, w3, #0x1
285-
; CHECK-NEXT: cmp w0, w2
286-
; CHECK-NEXT: neg w8, w8
287-
; CHECK-NEXT: ccmp w1, w8, #0, lt
288-
; CHECK-NEXT: csel w0, w1, w0, hi
289-
; CHECK-NEXT: ret
290+
; SDISEL-LABEL: neg_range_int_comp_u:
291+
; SDISEL: // %bb.0:
292+
; SDISEL-NEXT: orr w8, w3, #0x1
293+
; SDISEL-NEXT: cmp w0, w2
294+
; SDISEL-NEXT: ccmn w1, w8, #0, lt
295+
; SDISEL-NEXT: csel w0, w1, w0, hi
296+
; SDISEL-NEXT: ret
297+
;
298+
; GISEL-LABEL: neg_range_int_comp_u:
299+
; GISEL: // %bb.0:
300+
; GISEL-NEXT: orr w8, w3, #0x1
301+
; GISEL-NEXT: cmp w0, w2
302+
; GISEL-NEXT: neg w8, w8
303+
; GISEL-NEXT: ccmp w1, w8, #0, lt
304+
; GISEL-NEXT: csel w0, w1, w0, hi
305+
; GISEL-NEXT: ret
290306
%dor = or i32 %d, 1
291307
%negd = sub i32 0, %dor
292308
%cmp = icmp ugt i32 %b, %negd
@@ -298,14 +314,22 @@ define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) {
298314

299315
; (b > -(d | 1) && a u < c)
300316
define i32 @neg_range_int_comp_ua(i32 %a, i32 %b, i32 %c, i32 %d) {
301-
; CHECK-LABEL: neg_range_int_comp_ua:
302-
; CHECK: // %bb.0:
303-
; CHECK-NEXT: orr w8, w3, #0x1
304-
; CHECK-NEXT: cmp w0, w2
305-
; CHECK-NEXT: neg w8, w8
306-
; CHECK-NEXT: ccmp w1, w8, #4, lo
307-
; CHECK-NEXT: csel w0, w1, w0, gt
308-
; CHECK-NEXT: ret
317+
; SDISEL-LABEL: neg_range_int_comp_ua:
318+
; SDISEL: // %bb.0:
319+
; SDISEL-NEXT: orr w8, w3, #0x1
320+
; SDISEL-NEXT: cmp w0, w2
321+
; SDISEL-NEXT: ccmn w1, w8, #4, lo
322+
; SDISEL-NEXT: csel w0, w1, w0, gt
323+
; SDISEL-NEXT: ret
324+
;
325+
; GISEL-LABEL: neg_range_int_comp_ua:
326+
; GISEL: // %bb.0:
327+
; GISEL-NEXT: orr w8, w3, #0x1
328+
; GISEL-NEXT: cmp w0, w2
329+
; GISEL-NEXT: neg w8, w8
330+
; GISEL-NEXT: ccmp w1, w8, #4, lo
331+
; GISEL-NEXT: csel w0, w1, w0, gt
332+
; GISEL-NEXT: ret
309333
%dor = or i32 %d, 1
310334
%negd = sub i32 0, %dor
311335
%cmp = icmp sgt i32 %b, %negd
@@ -339,14 +363,22 @@ define i32 @neg_range_int_2(i32 %a, i32 %b, i32 %c) {
339363

340364
; (b < -(d | 1) && a >= c)
341365
define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) {
342-
; CHECK-LABEL: neg_range_int_comp2:
343-
; CHECK: // %bb.0:
344-
; CHECK-NEXT: orr w8, w3, #0x1
345-
; CHECK-NEXT: cmp w0, w2
346-
; CHECK-NEXT: neg w8, w8
347-
; CHECK-NEXT: ccmp w1, w8, #0, ge
348-
; CHECK-NEXT: csel w0, w1, w0, lt
349-
; CHECK-NEXT: ret
366+
; SDISEL-LABEL: neg_range_int_comp2:
367+
; SDISEL: // %bb.0:
368+
; SDISEL-NEXT: orr w8, w3, #0x1
369+
; SDISEL-NEXT: cmp w0, w2
370+
; SDISEL-NEXT: ccmn w1, w8, #0, ge
371+
; SDISEL-NEXT: csel w0, w1, w0, lt
372+
; SDISEL-NEXT: ret
373+
;
374+
; GISEL-LABEL: neg_range_int_comp2:
375+
; GISEL: // %bb.0:
376+
; GISEL-NEXT: orr w8, w3, #0x1
377+
; GISEL-NEXT: cmp w0, w2
378+
; GISEL-NEXT: neg w8, w8
379+
; GISEL-NEXT: ccmp w1, w8, #0, ge
380+
; GISEL-NEXT: csel w0, w1, w0, lt
381+
; GISEL-NEXT: ret
350382
%dor = or i32 %d, 1
351383
%negd = sub i32 0, %dor
352384
%cmp = icmp slt i32 %b, %negd
@@ -358,14 +390,22 @@ define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) {
358390

359391
; (b <u -(d | 1) && a > c)
360392
define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) {
361-
; CHECK-LABEL: neg_range_int_comp_u2:
362-
; CHECK: // %bb.0:
363-
; CHECK-NEXT: orr w8, w3, #0x1
364-
; CHECK-NEXT: cmp w0, w2
365-
; CHECK-NEXT: neg w8, w8
366-
; CHECK-NEXT: ccmp w1, w8, #2, gt
367-
; CHECK-NEXT: csel w0, w1, w0, lo
368-
; CHECK-NEXT: ret
393+
; SDISEL-LABEL: neg_range_int_comp_u2:
394+
; SDISEL: // %bb.0:
395+
; SDISEL-NEXT: orr w8, w3, #0x1
396+
; SDISEL-NEXT: cmp w0, w2
397+
; SDISEL-NEXT: ccmn w1, w8, #2, gt
398+
; SDISEL-NEXT: csel w0, w1, w0, lo
399+
; SDISEL-NEXT: ret
400+
;
401+
; GISEL-LABEL: neg_range_int_comp_u2:
402+
; GISEL: // %bb.0:
403+
; GISEL-NEXT: orr w8, w3, #0x1
404+
; GISEL-NEXT: cmp w0, w2
405+
; GISEL-NEXT: neg w8, w8
406+
; GISEL-NEXT: ccmp w1, w8, #2, gt
407+
; GISEL-NEXT: csel w0, w1, w0, lo
408+
; GISEL-NEXT: ret
369409
%dor = or i32 %d, 1
370410
%negd = sub i32 0, %dor
371411
%cmp = icmp ult i32 %b, %negd
@@ -377,14 +417,22 @@ define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) {
377417

378418
; (b > -(d | 1) && a u > c)
379419
define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) {
380-
; CHECK-LABEL: neg_range_int_comp_ua2:
381-
; CHECK: // %bb.0:
382-
; CHECK-NEXT: orr w8, w3, #0x1
383-
; CHECK-NEXT: cmp w0, w2
384-
; CHECK-NEXT: neg w8, w8
385-
; CHECK-NEXT: ccmp w1, w8, #4, hi
386-
; CHECK-NEXT: csel w0, w1, w0, gt
387-
; CHECK-NEXT: ret
420+
; SDISEL-LABEL: neg_range_int_comp_ua2:
421+
; SDISEL: // %bb.0:
422+
; SDISEL-NEXT: orr w8, w3, #0x1
423+
; SDISEL-NEXT: cmp w0, w2
424+
; SDISEL-NEXT: ccmn w1, w8, #4, hi
425+
; SDISEL-NEXT: csel w0, w1, w0, gt
426+
; SDISEL-NEXT: ret
427+
;
428+
; GISEL-LABEL: neg_range_int_comp_ua2:
429+
; GISEL: // %bb.0:
430+
; GISEL-NEXT: orr w8, w3, #0x1
431+
; GISEL-NEXT: cmp w0, w2
432+
; GISEL-NEXT: neg w8, w8
433+
; GISEL-NEXT: ccmp w1, w8, #4, hi
434+
; GISEL-NEXT: csel w0, w1, w0, gt
435+
; GISEL-NEXT: ret
388436
%dor = or i32 %d, 1
389437
%negd = sub i32 0, %dor
390438
%cmp = icmp sgt i32 %b, %negd
@@ -396,14 +444,22 @@ define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) {
396444

397445
; (b > -(d | 1) && a u == c)
398446
define i32 @neg_range_int_comp_ua3(i32 %a, i32 %b, i32 %c, i32 %d) {
399-
; CHECK-LABEL: neg_range_int_comp_ua3:
400-
; CHECK: // %bb.0:
401-
; CHECK-NEXT: orr w8, w3, #0x1
402-
; CHECK-NEXT: cmp w0, w2
403-
; CHECK-NEXT: neg w8, w8
404-
; CHECK-NEXT: ccmp w1, w8, #4, eq
405-
; CHECK-NEXT: csel w0, w1, w0, gt
406-
; CHECK-NEXT: ret
447+
; SDISEL-LABEL: neg_range_int_comp_ua3:
448+
; SDISEL: // %bb.0:
449+
; SDISEL-NEXT: orr w8, w3, #0x1
450+
; SDISEL-NEXT: cmp w0, w2
451+
; SDISEL-NEXT: ccmn w1, w8, #4, eq
452+
; SDISEL-NEXT: csel w0, w1, w0, gt
453+
; SDISEL-NEXT: ret
454+
;
455+
; GISEL-LABEL: neg_range_int_comp_ua3:
456+
; GISEL: // %bb.0:
457+
; GISEL-NEXT: orr w8, w3, #0x1
458+
; GISEL-NEXT: cmp w0, w2
459+
; GISEL-NEXT: neg w8, w8
460+
; GISEL-NEXT: ccmp w1, w8, #4, eq
461+
; GISEL-NEXT: csel w0, w1, w0, gt
462+
; GISEL-NEXT: ret
407463
%dor = or i32 %d, 1
408464
%negd = sub i32 0, %dor
409465
%cmp = icmp sgt i32 %b, %negd
@@ -419,8 +475,7 @@ define i32 @neg_range_int_c(i32 %a, i32 %b, i32 %c) {
419475
; SDISEL: // %bb.0: // %entry
420476
; SDISEL-NEXT: orr w8, w0, #0x1
421477
; SDISEL-NEXT: orr w9, w1, #0x3
422-
; SDISEL-NEXT: neg w8, w8
423-
; SDISEL-NEXT: cmp w9, w8
478+
; SDISEL-NEXT: cmn w9, w8
424479
; SDISEL-NEXT: ccmp w2, w0, #2, lo
425480
; SDISEL-NEXT: cset w0, lo
426481
; SDISEL-NEXT: ret

0 commit comments

Comments
 (0)