Skip to content

Commit 643e797

Browse files
committed
[AArch64] Use isKnownNonZero to optimize eligible compares to cmn
Turning a cmp into cmn saves an extra mov and negate instruction, so take that into account when choosing when to flip the compare operands. Also do not consider right-hand operands whose absolute value can be encoded into a cmn. adds 0 and sub 0 differ when carry handling, which is useful in unsigned comparisons. The problematic case for unsigned comparisons occurs only when the second argument is zero. Source: https://devblogs.microsoft.com/oldnewthing/20210607-00/?p=105288
1 parent 9a90f60 commit 643e797

File tree

3 files changed

+81
-50
lines changed

3 files changed

+81
-50
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3385,19 +3385,26 @@ static bool isLegalArithImmed(uint64_t C) {
33853385
return IsLegal;
33863386
}
33873387

3388+
static bool cannotBeIntMin(SDValue CheckedVal, SelectionDAG &DAG) {
3389+
KnownBits KnownSrc = DAG.computeKnownBits(CheckedVal);
3390+
return !KnownSrc.getSignedMinValue().isMinSignedValue();
3391+
}
3392+
33883393
// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
33893394
// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
33903395
// can be set differently by this operation. It comes down to whether
33913396
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
33923397
// everything is fine. If not then the optimization is wrong. Thus general
33933398
// comparisons are only valid if op2 != 0.
33943399
//
3395-
// So, finally, the only LLVM-native comparisons that don't mention C and V
3396-
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
3397-
// the absence of information about op2.
3398-
static bool isCMN(SDValue Op, ISD::CondCode CC) {
3400+
// So, finally, the only LLVM-native comparisons that don't mention C or V
3401+
// are the ones that aren't unsigned comparisons. They're the only ones we can
3402+
// safely use CMN for in the absence of information about op2.
3403+
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG) {
33993404
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
3400-
(CC == ISD::SETEQ || CC == ISD::SETNE);
3405+
(isIntEqualitySetCC(CC) ||
3406+
(isUnsignedIntSetCC(CC) ? DAG.isKnownNeverZero(Op.getOperand(1))
3407+
: cannotBeIntMin(Op.getOperand(1), DAG)));
34013408
}
34023409

34033410
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
@@ -3442,11 +3449,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
34423449
// register to WZR/XZR if it ends up being unused.
34433450
unsigned Opcode = AArch64ISD::SUBS;
34443451

3445-
if (isCMN(RHS, CC)) {
3452+
if (isCMN(RHS, CC, DAG)) {
34463453
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
34473454
Opcode = AArch64ISD::ADDS;
34483455
RHS = RHS.getOperand(1);
3449-
} else if (isCMN(LHS, CC)) {
3456+
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3457+
isIntEqualitySetCC(CC)) {
34503458
// As we are looking for EQ/NE compares, the operands can be commuted ; can
34513459
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
34523460
Opcode = AArch64ISD::ADDS;
@@ -3548,13 +3556,15 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
35483556
Opcode = AArch64ISD::CCMN;
35493557
RHS = DAG.getConstant(Imm.abs(), DL, Const->getValueType(0));
35503558
}
3551-
} else if (RHS.getOpcode() == ISD::SUB) {
3552-
SDValue SubOp0 = RHS.getOperand(0);
3553-
if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3554-
// See emitComparison() on why we can only do this for SETEQ and SETNE.
3555-
Opcode = AArch64ISD::CCMN;
3556-
RHS = RHS.getOperand(1);
3557-
}
3559+
} else if (isCMN(RHS, CC, DAG)) {
3560+
Opcode = AArch64ISD::CCMN;
3561+
RHS = RHS.getOperand(1);
3562+
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3563+
isIntEqualitySetCC(CC)) {
3564+
// As we are looking for EQ/NE compares, the operands can be commuted ; can
3565+
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
3566+
Opcode = AArch64ISD::CCMN;
3567+
LHS = LHS.getOperand(1);
35583568
}
35593569
if (Opcode == 0)
35603570
Opcode = AArch64ISD::CCMP;
@@ -3872,8 +3882,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
38723882
// cmp w12, w11, lsl #1
38733883
if (!isa<ConstantSDNode>(RHS) ||
38743884
!isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
3875-
bool LHSIsCMN = isCMN(LHS, CC);
3876-
bool RHSIsCMN = isCMN(RHS, CC);
3885+
bool LHSIsCMN = isCMN(LHS, CC, DAG);
3886+
bool RHSIsCMN = isCMN(RHS, CC, DAG);
38773887
SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
38783888
SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;
38793889

@@ -3886,7 +3896,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
38863896

38873897
SDValue Cmp;
38883898
AArch64CC::CondCode AArch64CC;
3889-
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
3899+
if (isIntEqualitySetCC(CC) && isa<ConstantSDNode>(RHS)) {
38903900
const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
38913901

38923902
// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.

llvm/test/CodeGen/AArch64/cmp-chains.ll

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -260,14 +260,22 @@ define i32 @neg_range_int(i32 %a, i32 %b, i32 %c) {
260260

261261
; (b > -(d | 1) && a < c)
262262
define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
263-
; CHECK-LABEL: neg_range_int_comp:
264-
; CHECK: // %bb.0:
265-
; CHECK-NEXT: orr w8, w3, #0x1
266-
; CHECK-NEXT: cmp w0, w2
267-
; CHECK-NEXT: neg w8, w8
268-
; CHECK-NEXT: ccmp w1, w8, #4, lt
269-
; CHECK-NEXT: csel w0, w1, w0, gt
270-
; CHECK-NEXT: ret
263+
; SDISEL-LABEL: neg_range_int_comp:
264+
; SDISEL: // %bb.0:
265+
; SDISEL-NEXT: orr w8, w3, #0x1
266+
; SDISEL-NEXT: cmp w0, w2
267+
; SDISEL-NEXT: ccmn w1, w8, #4, lt
268+
; SDISEL-NEXT: csel w0, w1, w0, gt
269+
; SDISEL-NEXT: ret
270+
;
271+
; GISEL-LABEL: neg_range_int_comp:
272+
; GISEL: // %bb.0:
273+
; GISEL-NEXT: orr w8, w3, #0x1
274+
; GISEL-NEXT: cmp w0, w2
275+
; GISEL-NEXT: neg w8, w8
276+
; GISEL-NEXT: ccmp w1, w8, #4, lt
277+
; GISEL-NEXT: csel w0, w1, w0, gt
278+
; GISEL-NEXT: ret
271279
%dor = or i32 %d, 1
272280
%negd = sub i32 0, %dor
273281
%cmp = icmp sgt i32 %b, %negd
@@ -279,14 +287,22 @@ define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
279287

280288
; (b >u -(d | 1) && a < c)
281289
define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) {
282-
; CHECK-LABEL: neg_range_int_comp_u:
283-
; CHECK: // %bb.0:
284-
; CHECK-NEXT: orr w8, w3, #0x1
285-
; CHECK-NEXT: cmp w0, w2
286-
; CHECK-NEXT: neg w8, w8
287-
; CHECK-NEXT: ccmp w1, w8, #0, lt
288-
; CHECK-NEXT: csel w0, w1, w0, hi
289-
; CHECK-NEXT: ret
290+
; SDISEL-LABEL: neg_range_int_comp_u:
291+
; SDISEL: // %bb.0:
292+
; SDISEL-NEXT: orr w8, w3, #0x1
293+
; SDISEL-NEXT: cmp w0, w2
294+
; SDISEL-NEXT: ccmn w1, w8, #0, lt
295+
; SDISEL-NEXT: csel w0, w1, w0, hi
296+
; SDISEL-NEXT: ret
297+
;
298+
; GISEL-LABEL: neg_range_int_comp_u:
299+
; GISEL: // %bb.0:
300+
; GISEL-NEXT: orr w8, w3, #0x1
301+
; GISEL-NEXT: cmp w0, w2
302+
; GISEL-NEXT: neg w8, w8
303+
; GISEL-NEXT: ccmp w1, w8, #0, lt
304+
; GISEL-NEXT: csel w0, w1, w0, hi
305+
; GISEL-NEXT: ret
290306
%dor = or i32 %d, 1
291307
%negd = sub i32 0, %dor
292308
%cmp = icmp ugt i32 %b, %negd
@@ -298,14 +314,22 @@ define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) {
298314

299315
; (b > -(d | 1) && a u < c)
300316
define i32 @neg_range_int_comp_ua(i32 %a, i32 %b, i32 %c, i32 %d) {
301-
; CHECK-LABEL: neg_range_int_comp_ua:
302-
; CHECK: // %bb.0:
303-
; CHECK-NEXT: orr w8, w3, #0x1
304-
; CHECK-NEXT: cmp w0, w2
305-
; CHECK-NEXT: neg w8, w8
306-
; CHECK-NEXT: ccmp w1, w8, #4, lo
307-
; CHECK-NEXT: csel w0, w1, w0, gt
308-
; CHECK-NEXT: ret
317+
; SDISEL-LABEL: neg_range_int_comp_ua:
318+
; SDISEL: // %bb.0:
319+
; SDISEL-NEXT: orr w8, w3, #0x1
320+
; SDISEL-NEXT: cmp w0, w2
321+
; SDISEL-NEXT: ccmn w1, w8, #4, lo
322+
; SDISEL-NEXT: csel w0, w1, w0, gt
323+
; SDISEL-NEXT: ret
324+
;
325+
; GISEL-LABEL: neg_range_int_comp_ua:
326+
; GISEL: // %bb.0:
327+
; GISEL-NEXT: orr w8, w3, #0x1
328+
; GISEL-NEXT: cmp w0, w2
329+
; GISEL-NEXT: neg w8, w8
330+
; GISEL-NEXT: ccmp w1, w8, #4, lo
331+
; GISEL-NEXT: csel w0, w1, w0, gt
332+
; GISEL-NEXT: ret
309333
%dor = or i32 %d, 1
310334
%negd = sub i32 0, %dor
311335
%cmp = icmp sgt i32 %b, %negd

llvm/test/CodeGen/AArch64/cmp-select-sign.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -266,9 +266,8 @@ define i32 @or_neg(i32 %x, i32 %y) {
266266
; CHECK-LABEL: or_neg:
267267
; CHECK: // %bb.0:
268268
; CHECK-NEXT: orr w8, w0, #0x1
269-
; CHECK-NEXT: neg w8, w8
270-
; CHECK-NEXT: cmp w8, w1
271-
; CHECK-NEXT: cset w0, gt
269+
; CHECK-NEXT: cmn w1, w8
270+
; CHECK-NEXT: cset w0, lt
272271
; CHECK-NEXT: ret
273272
%3 = or i32 %x, 1
274273
%4 = sub i32 0, %3
@@ -281,9 +280,8 @@ define i32 @or_neg_ult(i32 %x, i32 %y) {
281280
; CHECK-LABEL: or_neg_ult:
282281
; CHECK: // %bb.0:
283282
; CHECK-NEXT: orr w8, w0, #0x1
284-
; CHECK-NEXT: neg w8, w8
285-
; CHECK-NEXT: cmp w8, w1
286-
; CHECK-NEXT: cset w0, hi
283+
; CHECK-NEXT: cmn w1, w8
284+
; CHECK-NEXT: cset w0, lo
287285
; CHECK-NEXT: ret
288286
%3 = or i32 %x, 1
289287
%4 = sub i32 0, %3
@@ -326,9 +324,8 @@ define i32 @or_neg_no_smin_but_zero(i32 %x, i32 %y) {
326324
; CHECK-LABEL: or_neg_no_smin_but_zero:
327325
; CHECK: // %bb.0:
328326
; CHECK-NEXT: bic w8, w0, w0, asr #31
329-
; CHECK-NEXT: neg w8, w8
330-
; CHECK-NEXT: cmp w8, w1
331-
; CHECK-NEXT: cset w0, gt
327+
; CHECK-NEXT: cmn w1, w8
328+
; CHECK-NEXT: cset w0, lt
332329
; CHECK-NEXT: ret
333330
%3 = call i32 @llvm.smax.i32(i32 %x, i32 0)
334331
%4 = sub i32 0, %3

0 commit comments

Comments
 (0)