Skip to content

Commit e00366d

Browse files
authored
[AArch64] Check for negative numbers when adjusting icmps (#141151)
This relies on the fact that we can use tst and ands for comparisons as by emitComparison. Also has mitigations for when comparing with -1 and 1 to avoid regressions. Fixes: #141137
1 parent f88a9a3 commit e00366d

12 files changed

+288
-106
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 61 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -3342,6 +3342,12 @@ static bool isLegalArithImmed(uint64_t C) {
33423342
return IsLegal;
33433343
}
33443344

3345+
bool isLegalCmpImmed(APInt C) {
3346+
// Works for negative immediates too, as it can be written as an ADDS
3347+
// instruction with a negated immediate.
3348+
return isLegalArithImmed(C.abs().getZExtValue());
3349+
}
3350+
33453351
static bool cannotBeIntMin(SDValue CheckedVal, SelectionDAG &DAG) {
33463352
KnownBits KnownSrc = DAG.computeKnownBits(CheckedVal);
33473353
return !KnownSrc.getSignedMinValue().isMinSignedValue();
@@ -3767,58 +3773,82 @@ static unsigned getCmpOperandFoldingProfit(SDValue Op) {
37673773
return 0;
37683774
}
37693775

3776+
// emitComparison() converts comparison with one or negative one to comparison
3777+
// with 0. Note that this only works for signed comparisons because of how ANDS
3778+
// works.
3779+
static bool shouldBeAdjustedToZero(SDValue LHS, APInt C, ISD::CondCode &CC) {
3780+
// Only works for ANDS and AND.
3781+
if (LHS.getOpcode() != ISD::AND && LHS.getOpcode() != AArch64ISD::ANDS)
3782+
return false;
3783+
3784+
if (C.isOne() && (CC == ISD::SETLT || CC == ISD::SETGE)) {
3785+
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3786+
return true;
3787+
}
3788+
3789+
if (C.isAllOnes() && (CC == ISD::SETLE || CC == ISD::SETGT)) {
3790+
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3791+
return true;
3792+
}
3793+
3794+
return false;
3795+
}
3796+
37703797
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
37713798
SDValue &AArch64cc, SelectionDAG &DAG,
37723799
const SDLoc &dl) {
37733800
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
37743801
EVT VT = RHS.getValueType();
3775-
uint64_t C = RHSC->getZExtValue();
3776-
if (!isLegalArithImmed(C)) {
3802+
APInt C = RHSC->getAPIntValue();
3803+
// shouldBeAdjustedToZero is a special case to better fold with
3804+
// emitComparison().
3805+
if (shouldBeAdjustedToZero(LHS, C, CC)) {
3806+
// Adjust the constant to zero.
3807+
// CC has already been adjusted.
3808+
RHS = DAG.getConstant(0, dl, VT);
3809+
} else if (!isLegalCmpImmed(C)) {
37773810
// Constant does not fit, try adjusting it by one?
37783811
switch (CC) {
37793812
default:
37803813
break;
37813814
case ISD::SETLT:
37823815
case ISD::SETGE:
3783-
if ((VT == MVT::i32 && C != 0x80000000 &&
3784-
isLegalArithImmed((uint32_t)(C - 1))) ||
3785-
(VT == MVT::i64 && C != 0x80000000ULL &&
3786-
isLegalArithImmed(C - 1ULL))) {
3787-
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3788-
C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
3789-
RHS = DAG.getConstant(C, dl, VT);
3816+
if (!C.isMinSignedValue()) {
3817+
APInt CMinusOne = C - 1;
3818+
if (isLegalCmpImmed(CMinusOne)) {
3819+
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3820+
RHS = DAG.getConstant(CMinusOne, dl, VT);
3821+
}
37903822
}
37913823
break;
37923824
case ISD::SETULT:
37933825
case ISD::SETUGE:
3794-
if ((VT == MVT::i32 && C != 0 &&
3795-
isLegalArithImmed((uint32_t)(C - 1))) ||
3796-
(VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
3797-
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3798-
C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
3799-
RHS = DAG.getConstant(C, dl, VT);
3826+
if (!C.isZero()) {
3827+
APInt CMinusOne = C - 1;
3828+
if (isLegalCmpImmed(CMinusOne)) {
3829+
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3830+
RHS = DAG.getConstant(CMinusOne, dl, VT);
3831+
}
38003832
}
38013833
break;
38023834
case ISD::SETLE:
38033835
case ISD::SETGT:
3804-
if ((VT == MVT::i32 && C != INT32_MAX &&
3805-
isLegalArithImmed((uint32_t)(C + 1))) ||
3806-
(VT == MVT::i64 && C != INT64_MAX &&
3807-
isLegalArithImmed(C + 1ULL))) {
3808-
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3809-
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
3810-
RHS = DAG.getConstant(C, dl, VT);
3836+
if (!C.isMaxSignedValue()) {
3837+
APInt CPlusOne = C + 1;
3838+
if (isLegalCmpImmed(CPlusOne)) {
3839+
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3840+
RHS = DAG.getConstant(CPlusOne, dl, VT);
3841+
}
38113842
}
38123843
break;
38133844
case ISD::SETULE:
38143845
case ISD::SETUGT:
3815-
if ((VT == MVT::i32 && C != UINT32_MAX &&
3816-
isLegalArithImmed((uint32_t)(C + 1))) ||
3817-
(VT == MVT::i64 && C != UINT64_MAX &&
3818-
isLegalArithImmed(C + 1ULL))) {
3819-
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3820-
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
3821-
RHS = DAG.getConstant(C, dl, VT);
3846+
if (!C.isAllOnes()) {
3847+
APInt CPlusOne = C + 1;
3848+
if (isLegalCmpImmed(CPlusOne)) {
3849+
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3850+
RHS = DAG.getConstant(CPlusOne, dl, VT);
3851+
}
38223852
}
38233853
break;
38243854
}
@@ -3835,8 +3865,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
38353865
// cmp w13, w12
38363866
// can be turned into:
38373867
// cmp w12, w11, lsl #1
3838-
if (!isa<ConstantSDNode>(RHS) ||
3839-
!isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
3868+
if (!isa<ConstantSDNode>(RHS) || !isLegalCmpImmed(RHS->getAsAPIntVal())) {
38403869
bool LHSIsCMN = isCMN(LHS, CC, DAG);
38413870
bool RHSIsCMN = isCMN(RHS, CC, DAG);
38423871
SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
@@ -17365,17 +17394,10 @@ LLT AArch64TargetLowering::getOptimalMemOpLLT(
1736517394
// 12-bit optionally shifted immediates are legal for adds.
1736617395
bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
1736717396
if (Immed == std::numeric_limits<int64_t>::min()) {
17368-
LLVM_DEBUG(dbgs() << "Illegal add imm " << Immed
17369-
<< ": avoid UB for INT64_MIN\n");
1737017397
return false;
1737117398
}
1737217399
// Same encoding for add/sub, just flip the sign.
17373-
Immed = std::abs(Immed);
17374-
bool IsLegal = ((Immed >> 12) == 0 ||
17375-
((Immed & 0xfff) == 0 && Immed >> 24 == 0));
17376-
LLVM_DEBUG(dbgs() << "Is " << Immed
17377-
<< " legal add imm: " << (IsLegal ? "yes" : "no") << "\n");
17378-
return IsLegal;
17400+
return isLegalArithImmed((uint64_t)std::abs(Immed));
1737917401
}
1738017402

1738117403
bool AArch64TargetLowering::isLegalAddScalableImmediate(int64_t Imm) const {

llvm/test/CodeGen/AArch64/arm64-csel.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,8 @@ define i32 @foo7(i32 %a, i32 %b) nounwind {
100100
; CHECK-NEXT: subs w8, w0, w1
101101
; CHECK-NEXT: cneg w9, w8, mi
102102
; CHECK-NEXT: cmn w8, #1
103-
; CHECK-NEXT: csel w10, w9, w0, lt
104-
; CHECK-NEXT: cmp w8, #0
105-
; CHECK-NEXT: csel w0, w10, w9, ge
103+
; CHECK-NEXT: csel w8, w9, w0, lt
104+
; CHECK-NEXT: csel w0, w8, w9, gt
106105
; CHECK-NEXT: ret
107106
entry:
108107
%sub = sub nsw i32 %a, %b

llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ define i32 @f_i8_sign_extend_inreg(i8 %in, i32 %a, i32 %b) nounwind {
1414
; CHECK-LABEL: f_i8_sign_extend_inreg:
1515
; CHECK: // %bb.0: // %entry
1616
; CHECK-NEXT: sxtb w8, w0
17-
; CHECK-NEXT: cmp w8, #0
18-
; CHECK-NEXT: csel w8, w1, w2, ge
17+
; CHECK-NEXT: cmn w8, #1
18+
; CHECK-NEXT: csel w8, w1, w2, gt
1919
; CHECK-NEXT: add w0, w8, w0, uxtb
2020
; CHECK-NEXT: ret
2121
entry:
@@ -36,8 +36,8 @@ define i32 @f_i16_sign_extend_inreg(i16 %in, i32 %a, i32 %b) nounwind {
3636
; CHECK-LABEL: f_i16_sign_extend_inreg:
3737
; CHECK: // %bb.0: // %entry
3838
; CHECK-NEXT: sxth w8, w0
39-
; CHECK-NEXT: cmp w8, #0
40-
; CHECK-NEXT: csel w8, w1, w2, ge
39+
; CHECK-NEXT: cmn w8, #1
40+
; CHECK-NEXT: csel w8, w1, w2, gt
4141
; CHECK-NEXT: add w0, w8, w0, uxth
4242
; CHECK-NEXT: ret
4343
entry:
@@ -57,8 +57,8 @@ B:
5757
define i64 @f_i32_sign_extend_inreg(i32 %in, i64 %a, i64 %b) nounwind {
5858
; CHECK-LABEL: f_i32_sign_extend_inreg:
5959
; CHECK: // %bb.0: // %entry
60-
; CHECK-NEXT: cmp w0, #0
61-
; CHECK-NEXT: csel x8, x1, x2, ge
60+
; CHECK-NEXT: cmn w0, #1
61+
; CHECK-NEXT: csel x8, x1, x2, gt
6262
; CHECK-NEXT: add x0, x8, w0, uxtw
6363
; CHECK-NEXT: ret
6464
entry:
@@ -145,8 +145,8 @@ define i64 @f_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind {
145145
; CHECK: // %bb.0: // %entry
146146
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
147147
; CHECK-NEXT: sxtw x8, w0
148-
; CHECK-NEXT: cmp x8, #0
149-
; CHECK-NEXT: csel x8, x1, x2, ge
148+
; CHECK-NEXT: cmn x8, #1
149+
; CHECK-NEXT: csel x8, x1, x2, gt
150150
; CHECK-NEXT: add x0, x8, w0, uxtw
151151
; CHECK-NEXT: ret
152152
entry:

llvm/test/CodeGen/AArch64/cmp-to-cmn.ll

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,3 +430,175 @@ entry:
430430
%cmp = icmp ne i32 %conv, %add
431431
ret i1 %cmp
432432
}
433+
434+
define i1 @cmn_large_imm(i32 %a) {
435+
; CHECK-LABEL: cmn_large_imm:
436+
; CHECK: // %bb.0:
437+
; CHECK-NEXT: mov w8, #64765 // =0xfcfd
438+
; CHECK-NEXT: movk w8, #64764, lsl #16
439+
; CHECK-NEXT: cmp w0, w8
440+
; CHECK-NEXT: cset w0, gt
441+
; CHECK-NEXT: ret
442+
%cmp = icmp sgt i32 %a, -50529027
443+
ret i1 %cmp
444+
}
445+
446+
define i1 @almost_immediate_neg_slt(i32 %x) {
447+
; CHECK-LABEL: almost_immediate_neg_slt:
448+
; CHECK: // %bb.0:
449+
; CHECK-NEXT: cmn w0, #4079, lsl #12 // =16707584
450+
; CHECK-NEXT: cset w0, le
451+
; CHECK-NEXT: ret
452+
%cmp = icmp slt i32 %x, -16707583
453+
ret i1 %cmp
454+
}
455+
456+
define i1 @almost_immediate_neg_slt_64(i64 %x) {
457+
; CHECK-LABEL: almost_immediate_neg_slt_64:
458+
; CHECK: // %bb.0:
459+
; CHECK-NEXT: cmn x0, #4079, lsl #12 // =16707584
460+
; CHECK-NEXT: cset w0, le
461+
; CHECK-NEXT: ret
462+
%cmp = icmp slt i64 %x, -16707583
463+
ret i1 %cmp
464+
}
465+
466+
define i1 @almost_immediate_neg_sge(i32 %x) {
467+
; CHECK-LABEL: almost_immediate_neg_sge:
468+
; CHECK: // %bb.0:
469+
; CHECK-NEXT: cmn w0, #4079, lsl #12 // =16707584
470+
; CHECK-NEXT: cset w0, gt
471+
; CHECK-NEXT: ret
472+
%cmp = icmp sge i32 %x, -16707583
473+
ret i1 %cmp
474+
}
475+
476+
define i1 @almost_immediate_neg_sge_64(i64 %x) {
477+
; CHECK-LABEL: almost_immediate_neg_sge_64:
478+
; CHECK: // %bb.0:
479+
; CHECK-NEXT: cmn x0, #4079, lsl #12 // =16707584
480+
; CHECK-NEXT: cset w0, gt
481+
; CHECK-NEXT: ret
482+
%cmp = icmp sge i64 %x, -16707583
483+
ret i1 %cmp
484+
}
485+
486+
define i1 @almost_immediate_neg_uge(i32 %x) {
487+
; CHECK-LABEL: almost_immediate_neg_uge:
488+
; CHECK: // %bb.0:
489+
; CHECK-NEXT: cmn w0, #4079, lsl #12 // =16707584
490+
; CHECK-NEXT: cset w0, hi
491+
; CHECK-NEXT: ret
492+
%cmp = icmp uge i32 %x, -16707583
493+
ret i1 %cmp
494+
}
495+
496+
define i1 @almost_immediate_neg_uge_64(i64 %x) {
497+
; CHECK-LABEL: almost_immediate_neg_uge_64:
498+
; CHECK: // %bb.0:
499+
; CHECK-NEXT: cmn x0, #4079, lsl #12 // =16707584
500+
; CHECK-NEXT: cset w0, hi
501+
; CHECK-NEXT: ret
502+
%cmp = icmp uge i64 %x, -16707583
503+
ret i1 %cmp
504+
}
505+
506+
define i1 @almost_immediate_neg_ult(i32 %x) {
507+
; CHECK-LABEL: almost_immediate_neg_ult:
508+
; CHECK: // %bb.0:
509+
; CHECK-NEXT: cmn w0, #4079, lsl #12 // =16707584
510+
; CHECK-NEXT: cset w0, ls
511+
; CHECK-NEXT: ret
512+
%cmp = icmp ult i32 %x, -16707583
513+
ret i1 %cmp
514+
}
515+
516+
define i1 @almost_immediate_neg_ult_64(i64 %x) {
517+
; CHECK-LABEL: almost_immediate_neg_ult_64:
518+
; CHECK: // %bb.0:
519+
; CHECK-NEXT: cmn x0, #4079, lsl #12 // =16707584
520+
; CHECK-NEXT: cset w0, ls
521+
; CHECK-NEXT: ret
522+
%cmp = icmp ult i64 %x, -16707583
523+
ret i1 %cmp
524+
}
525+
526+
define i1 @almost_immediate_neg_sle(i32 %x) {
527+
; CHECK-LABEL: almost_immediate_neg_sle:
528+
; CHECK: // %bb.0:
529+
; CHECK-NEXT: cmn w0, #4095, lsl #12 // =16773120
530+
; CHECK-NEXT: cset w0, lt
531+
; CHECK-NEXT: ret
532+
%cmp = icmp sle i32 %x, -16773121
533+
ret i1 %cmp
534+
}
535+
536+
define i1 @almost_immediate_neg_sle_64(i64 %x) {
537+
; CHECK-LABEL: almost_immediate_neg_sle_64:
538+
; CHECK: // %bb.0:
539+
; CHECK-NEXT: cmn x0, #4095, lsl #12 // =16773120
540+
; CHECK-NEXT: cset w0, lt
541+
; CHECK-NEXT: ret
542+
%cmp = icmp sle i64 %x, -16773121
543+
ret i1 %cmp
544+
}
545+
546+
define i1 @almost_immediate_neg_sgt(i32 %x) {
547+
; CHECK-LABEL: almost_immediate_neg_sgt:
548+
; CHECK: // %bb.0:
549+
; CHECK-NEXT: cmn w0, #4095, lsl #12 // =16773120
550+
; CHECK-NEXT: cset w0, ge
551+
; CHECK-NEXT: ret
552+
%cmp = icmp sgt i32 %x, -16773121
553+
ret i1 %cmp
554+
}
555+
556+
define i1 @almost_immediate_neg_sgt_64(i64 %x) {
557+
; CHECK-LABEL: almost_immediate_neg_sgt_64:
558+
; CHECK: // %bb.0:
559+
; CHECK-NEXT: cmn x0, #4095, lsl #12 // =16773120
560+
; CHECK-NEXT: cset w0, ge
561+
; CHECK-NEXT: ret
562+
%cmp = icmp sgt i64 %x, -16773121
563+
ret i1 %cmp
564+
}
565+
566+
define i1 @almost_immediate_neg_ule(i32 %x) {
567+
; CHECK-LABEL: almost_immediate_neg_ule:
568+
; CHECK: // %bb.0:
569+
; CHECK-NEXT: cmn w0, #4095, lsl #12 // =16773120
570+
; CHECK-NEXT: cset w0, lo
571+
; CHECK-NEXT: ret
572+
%cmp = icmp ule i32 %x, -16773121
573+
ret i1 %cmp
574+
}
575+
576+
define i1 @almost_immediate_neg_ule_64(i64 %x) {
577+
; CHECK-LABEL: almost_immediate_neg_ule_64:
578+
; CHECK: // %bb.0:
579+
; CHECK-NEXT: cmn x0, #4095, lsl #12 // =16773120
580+
; CHECK-NEXT: cset w0, lo
581+
; CHECK-NEXT: ret
582+
%cmp = icmp ule i64 %x, -16773121
583+
ret i1 %cmp
584+
}
585+
586+
define i1 @almost_immediate_neg_ugt(i32 %x) {
587+
; CHECK-LABEL: almost_immediate_neg_ugt:
588+
; CHECK: // %bb.0:
589+
; CHECK-NEXT: cmn w0, #4095, lsl #12 // =16773120
590+
; CHECK-NEXT: cset w0, hs
591+
; CHECK-NEXT: ret
592+
%cmp = icmp ugt i32 %x, -16773121
593+
ret i1 %cmp
594+
}
595+
596+
define i1 @almost_immediate_neg_ugt_64(i64 %x) {
597+
; CHECK-LABEL: almost_immediate_neg_ugt_64:
598+
; CHECK: // %bb.0:
599+
; CHECK-NEXT: cmn x0, #4095, lsl #12 // =16773120
600+
; CHECK-NEXT: cset w0, hs
601+
; CHECK-NEXT: ret
602+
%cmp = icmp ugt i64 %x, -16773121
603+
ret i1 %cmp
604+
}

0 commit comments

Comments
 (0)