Skip to content

Commit 65538f5

Browse files
committed
[AArch64] Take cmn into account when adjusting compare constants
Turning a cmp into cmn saves an extra mov and negate instruction, so take that into account when choosing when to flip the compare operands. Also do not consider right-hand operands whose absolute value can be encoded into a cmn.
1 parent b81fcd0 commit 65538f5

File tree

3 files changed

+37
-32
lines changed

3 files changed

+37
-32
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3876,10 +3876,15 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
38763876
// cmp w13, w12
38773877
// can be turned into:
38783878
// cmp w12, w11, lsl #1
3879-
if (!isa<ConstantSDNode>(RHS) || !isLegalArithImmed(RHS->getAsZExtVal())) {
3880-
SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
3881-
3882-
if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
3879+
if (!isa<ConstantSDNode>(RHS) ||
3880+
!isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
3881+
bool LHSIsCMN = LHS.getOpcode() == ISD::SUB && isCMN(LHS, CC);
3882+
bool RHSIsCMN = RHS.getOpcode() == ISD::SUB && isCMN(RHS, CC);
3883+
SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
3884+
SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;
3885+
3886+
if (getCmpOperandFoldingProfit(TheLHS) + (LHSIsCMN ? 1 : 0) >
3887+
getCmpOperandFoldingProfit(TheRHS) + (RHSIsCMN ? 1 : 0)) {
38833888
std::swap(LHS, RHS);
38843889
CC = ISD::getSetCCSwappedOperands(CC);
38853890
}

llvm/test/CodeGen/AArch64/cmp-to-cmn.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ target triple = "arm64"
66
define i1 @test_EQ_IllEbT(i64 %a, i64 %b) {
77
; CHECK-LABEL: test_EQ_IllEbT:
88
; CHECK: // %bb.0: // %entry
9-
; CHECK-NEXT: cmn x1, x0
9+
; CHECK-NEXT: cmn x0, x1
1010
; CHECK-NEXT: cset w0, eq
1111
; CHECK-NEXT: ret
1212
entry:
@@ -72,7 +72,7 @@ entry:
7272
define i1 @test_EQ_IiiEbT(i32 %a, i32 %b) {
7373
; CHECK-LABEL: test_EQ_IiiEbT:
7474
; CHECK: // %bb.0: // %entry
75-
; CHECK-NEXT: cmn w1, w0
75+
; CHECK-NEXT: cmn w0, w1
7676
; CHECK-NEXT: cset w0, eq
7777
; CHECK-NEXT: ret
7878
entry:
@@ -137,8 +137,8 @@ entry:
137137
define i1 @test_EQ_IssEbT(i16 %a, i16 %b) {
138138
; CHECK-LABEL: test_EQ_IssEbT:
139139
; CHECK: // %bb.0: // %entry
140-
; CHECK-NEXT: sxth w8, w1
141-
; CHECK-NEXT: cmn w8, w0, sxth
140+
; CHECK-NEXT: sxth w8, w0
141+
; CHECK-NEXT: cmn w8, w1, sxth
142142
; CHECK-NEXT: cset w0, eq
143143
; CHECK-NEXT: ret
144144
entry:
@@ -152,8 +152,8 @@ entry:
152152
define i1 @test_EQ_IscEbT(i16 %a, i8 %b) {
153153
; CHECK-LABEL: test_EQ_IscEbT:
154154
; CHECK: // %bb.0: // %entry
155-
; CHECK-NEXT: and w8, w1, #0xff
156-
; CHECK-NEXT: cmn w8, w0, sxth
155+
; CHECK-NEXT: sxth w8, w0
156+
; CHECK-NEXT: cmn w8, w1, uxtb
157157
; CHECK-NEXT: cset w0, eq
158158
; CHECK-NEXT: ret
159159
entry:
@@ -194,8 +194,8 @@ entry:
194194
define i1 @test_EQ_IcsEbT(i8 %a, i16 %b) {
195195
; CHECK-LABEL: test_EQ_IcsEbT:
196196
; CHECK: // %bb.0: // %entry
197-
; CHECK-NEXT: sxth w8, w1
198-
; CHECK-NEXT: cmn w8, w0, uxtb
197+
; CHECK-NEXT: and w8, w0, #0xff
198+
; CHECK-NEXT: cmn w8, w1, sxth
199199
; CHECK-NEXT: cset w0, eq
200200
; CHECK-NEXT: ret
201201
entry:
@@ -209,8 +209,8 @@ entry:
209209
define i1 @test_EQ_IccEbT(i8 %a, i8 %b) {
210210
; CHECK-LABEL: test_EQ_IccEbT:
211211
; CHECK: // %bb.0: // %entry
212-
; CHECK-NEXT: and w8, w1, #0xff
213-
; CHECK-NEXT: cmn w8, w0, uxtb
212+
; CHECK-NEXT: and w8, w0, #0xff
213+
; CHECK-NEXT: cmn w8, w1, uxtb
214214
; CHECK-NEXT: cset w0, eq
215215
; CHECK-NEXT: ret
216216
entry:
@@ -224,7 +224,7 @@ entry:
224224
define i1 @test_NE_IllEbT(i64 %a, i64 %b) {
225225
; CHECK-LABEL: test_NE_IllEbT:
226226
; CHECK: // %bb.0: // %entry
227-
; CHECK-NEXT: cmn x1, x0
227+
; CHECK-NEXT: cmn x0, x1
228228
; CHECK-NEXT: cset w0, ne
229229
; CHECK-NEXT: ret
230230
entry:
@@ -290,7 +290,7 @@ entry:
290290
define i1 @test_NE_IiiEbT(i32 %a, i32 %b) {
291291
; CHECK-LABEL: test_NE_IiiEbT:
292292
; CHECK: // %bb.0: // %entry
293-
; CHECK-NEXT: cmn w1, w0
293+
; CHECK-NEXT: cmn w0, w1
294294
; CHECK-NEXT: cset w0, ne
295295
; CHECK-NEXT: ret
296296
entry:
@@ -355,8 +355,8 @@ entry:
355355
define i1 @test_NE_IssEbT(i16 %a, i16 %b) {
356356
; CHECK-LABEL: test_NE_IssEbT:
357357
; CHECK: // %bb.0: // %entry
358-
; CHECK-NEXT: sxth w8, w1
359-
; CHECK-NEXT: cmn w8, w0, sxth
358+
; CHECK-NEXT: sxth w8, w0
359+
; CHECK-NEXT: cmn w8, w1, sxth
360360
; CHECK-NEXT: cset w0, ne
361361
; CHECK-NEXT: ret
362362
entry:
@@ -370,8 +370,8 @@ entry:
370370
define i1 @test_NE_IscEbT(i16 %a, i8 %b) {
371371
; CHECK-LABEL: test_NE_IscEbT:
372372
; CHECK: // %bb.0: // %entry
373-
; CHECK-NEXT: and w8, w1, #0xff
374-
; CHECK-NEXT: cmn w8, w0, sxth
373+
; CHECK-NEXT: sxth w8, w0
374+
; CHECK-NEXT: cmn w8, w1, uxtb
375375
; CHECK-NEXT: cset w0, ne
376376
; CHECK-NEXT: ret
377377
entry:
@@ -412,8 +412,8 @@ entry:
412412
define i1 @test_NE_IcsEbT(i8 %a, i16 %b) {
413413
; CHECK-LABEL: test_NE_IcsEbT:
414414
; CHECK: // %bb.0: // %entry
415-
; CHECK-NEXT: sxth w8, w1
416-
; CHECK-NEXT: cmn w8, w0, uxtb
415+
; CHECK-NEXT: and w8, w0, #0xff
416+
; CHECK-NEXT: cmn w8, w1, sxth
417417
; CHECK-NEXT: cset w0, ne
418418
; CHECK-NEXT: ret
419419
entry:
@@ -427,8 +427,8 @@ entry:
427427
define i1 @test_NE_IccEbT(i8 %a, i8 %b) {
428428
; CHECK-LABEL: test_NE_IccEbT:
429429
; CHECK: // %bb.0: // %entry
430-
; CHECK-NEXT: and w8, w1, #0xff
431-
; CHECK-NEXT: cmn w8, w0, uxtb
430+
; CHECK-NEXT: and w8, w0, #0xff
431+
; CHECK-NEXT: cmn w8, w1, uxtb
432432
; CHECK-NEXT: cset w0, ne
433433
; CHECK-NEXT: ret
434434
entry:

llvm/test/CodeGen/AArch64/typepromotion-overflow.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,11 @@ define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) {
107107
define i32 @overflow_add_positive_const_limit(i8 zeroext %a) {
108108
; CHECK-LABEL: overflow_add_positive_const_limit:
109109
; CHECK: // %bb.0:
110-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
111-
; CHECK-NEXT: mov w9, #8 // =0x8
112-
; CHECK-NEXT: cmp w8, w0, sxtb
110+
; CHECK-NEXT: sxtb w9, w0
113111
; CHECK-NEXT: mov w8, #16 // =0x10
114-
; CHECK-NEXT: csel w0, w9, w8, gt
112+
; CHECK-NEXT: cmn w9, #1
113+
; CHECK-NEXT: mov w9, #8 // =0x8
114+
; CHECK-NEXT: csel w0, w9, w8, lt
115115
; CHECK-NEXT: ret
116116
%cmp = icmp slt i8 %a, -1
117117
%res = select i1 %cmp, i32 8, i32 16
@@ -162,11 +162,11 @@ define i32 @safe_add_underflow_neg(i8 zeroext %a) {
162162
define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) {
163163
; CHECK-LABEL: overflow_sub_negative_const_limit:
164164
; CHECK: // %bb.0:
165-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
166-
; CHECK-NEXT: mov w9, #8 // =0x8
167-
; CHECK-NEXT: cmp w8, w0, sxtb
165+
; CHECK-NEXT: sxtb w9, w0
168166
; CHECK-NEXT: mov w8, #16 // =0x10
169-
; CHECK-NEXT: csel w0, w9, w8, gt
167+
; CHECK-NEXT: cmn w9, #1
168+
; CHECK-NEXT: mov w9, #8 // =0x8
169+
; CHECK-NEXT: csel w0, w9, w8, lt
170170
; CHECK-NEXT: ret
171171
%cmp = icmp slt i8 %a, -1
172172
%res = select i1 %cmp, i32 8, i32 16

0 commit comments

Comments
 (0)