Skip to content

Commit 922e8f0

Browse files
AZero13yuxuanchen1997
authored andcommitted
[AArch64] Take cmn into account when adjusting compare constants (#98634)
Summary: Turning a cmp into cmn saves an extra mov and negate instruction, so take that into account when choosing when to flip the compare operands. This will allow further optimizations down the line when we fold more variations of negative compares to cmn. As part of this, do not consider right-hand operands whose absolute value can be encoded into a cmn if it is the 2nd operand. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251526
1 parent 7d439bd commit 922e8f0

File tree

3 files changed

+37
-32
lines changed

3 files changed

+37
-32
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3870,10 +3870,15 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
38703870
// cmp w13, w12
38713871
// can be turned into:
38723872
// cmp w12, w11, lsl #1
3873-
if (!isa<ConstantSDNode>(RHS) || !isLegalArithImmed(RHS->getAsZExtVal())) {
3874-
SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
3875-
3876-
if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
3873+
if (!isa<ConstantSDNode>(RHS) ||
3874+
!isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
3875+
bool LHSIsCMN = isCMN(LHS, CC);
3876+
bool RHSIsCMN = isCMN(RHS, CC);
3877+
SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
3878+
SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;
3879+
3880+
if (getCmpOperandFoldingProfit(TheLHS) + (LHSIsCMN ? 1 : 0) >
3881+
getCmpOperandFoldingProfit(TheRHS) + (RHSIsCMN ? 1 : 0)) {
38773882
std::swap(LHS, RHS);
38783883
CC = ISD::getSetCCSwappedOperands(CC);
38793884
}

llvm/test/CodeGen/AArch64/cmp-to-cmn.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ target triple = "arm64"
66
define i1 @test_EQ_IllEbT(i64 %a, i64 %b) {
77
; CHECK-LABEL: test_EQ_IllEbT:
88
; CHECK: // %bb.0: // %entry
9-
; CHECK-NEXT: cmn x1, x0
9+
; CHECK-NEXT: cmn x0, x1
1010
; CHECK-NEXT: cset w0, eq
1111
; CHECK-NEXT: ret
1212
entry:
@@ -72,7 +72,7 @@ entry:
7272
define i1 @test_EQ_IiiEbT(i32 %a, i32 %b) {
7373
; CHECK-LABEL: test_EQ_IiiEbT:
7474
; CHECK: // %bb.0: // %entry
75-
; CHECK-NEXT: cmn w1, w0
75+
; CHECK-NEXT: cmn w0, w1
7676
; CHECK-NEXT: cset w0, eq
7777
; CHECK-NEXT: ret
7878
entry:
@@ -137,8 +137,8 @@ entry:
137137
define i1 @test_EQ_IssEbT(i16 %a, i16 %b) {
138138
; CHECK-LABEL: test_EQ_IssEbT:
139139
; CHECK: // %bb.0: // %entry
140-
; CHECK-NEXT: sxth w8, w1
141-
; CHECK-NEXT: cmn w8, w0, sxth
140+
; CHECK-NEXT: sxth w8, w0
141+
; CHECK-NEXT: cmn w8, w1, sxth
142142
; CHECK-NEXT: cset w0, eq
143143
; CHECK-NEXT: ret
144144
entry:
@@ -152,8 +152,8 @@ entry:
152152
define i1 @test_EQ_IscEbT(i16 %a, i8 %b) {
153153
; CHECK-LABEL: test_EQ_IscEbT:
154154
; CHECK: // %bb.0: // %entry
155-
; CHECK-NEXT: and w8, w1, #0xff
156-
; CHECK-NEXT: cmn w8, w0, sxth
155+
; CHECK-NEXT: sxth w8, w0
156+
; CHECK-NEXT: cmn w8, w1, uxtb
157157
; CHECK-NEXT: cset w0, eq
158158
; CHECK-NEXT: ret
159159
entry:
@@ -194,8 +194,8 @@ entry:
194194
define i1 @test_EQ_IcsEbT(i8 %a, i16 %b) {
195195
; CHECK-LABEL: test_EQ_IcsEbT:
196196
; CHECK: // %bb.0: // %entry
197-
; CHECK-NEXT: sxth w8, w1
198-
; CHECK-NEXT: cmn w8, w0, uxtb
197+
; CHECK-NEXT: and w8, w0, #0xff
198+
; CHECK-NEXT: cmn w8, w1, sxth
199199
; CHECK-NEXT: cset w0, eq
200200
; CHECK-NEXT: ret
201201
entry:
@@ -209,8 +209,8 @@ entry:
209209
define i1 @test_EQ_IccEbT(i8 %a, i8 %b) {
210210
; CHECK-LABEL: test_EQ_IccEbT:
211211
; CHECK: // %bb.0: // %entry
212-
; CHECK-NEXT: and w8, w1, #0xff
213-
; CHECK-NEXT: cmn w8, w0, uxtb
212+
; CHECK-NEXT: and w8, w0, #0xff
213+
; CHECK-NEXT: cmn w8, w1, uxtb
214214
; CHECK-NEXT: cset w0, eq
215215
; CHECK-NEXT: ret
216216
entry:
@@ -224,7 +224,7 @@ entry:
224224
define i1 @test_NE_IllEbT(i64 %a, i64 %b) {
225225
; CHECK-LABEL: test_NE_IllEbT:
226226
; CHECK: // %bb.0: // %entry
227-
; CHECK-NEXT: cmn x1, x0
227+
; CHECK-NEXT: cmn x0, x1
228228
; CHECK-NEXT: cset w0, ne
229229
; CHECK-NEXT: ret
230230
entry:
@@ -290,7 +290,7 @@ entry:
290290
define i1 @test_NE_IiiEbT(i32 %a, i32 %b) {
291291
; CHECK-LABEL: test_NE_IiiEbT:
292292
; CHECK: // %bb.0: // %entry
293-
; CHECK-NEXT: cmn w1, w0
293+
; CHECK-NEXT: cmn w0, w1
294294
; CHECK-NEXT: cset w0, ne
295295
; CHECK-NEXT: ret
296296
entry:
@@ -355,8 +355,8 @@ entry:
355355
define i1 @test_NE_IssEbT(i16 %a, i16 %b) {
356356
; CHECK-LABEL: test_NE_IssEbT:
357357
; CHECK: // %bb.0: // %entry
358-
; CHECK-NEXT: sxth w8, w1
359-
; CHECK-NEXT: cmn w8, w0, sxth
358+
; CHECK-NEXT: sxth w8, w0
359+
; CHECK-NEXT: cmn w8, w1, sxth
360360
; CHECK-NEXT: cset w0, ne
361361
; CHECK-NEXT: ret
362362
entry:
@@ -370,8 +370,8 @@ entry:
370370
define i1 @test_NE_IscEbT(i16 %a, i8 %b) {
371371
; CHECK-LABEL: test_NE_IscEbT:
372372
; CHECK: // %bb.0: // %entry
373-
; CHECK-NEXT: and w8, w1, #0xff
374-
; CHECK-NEXT: cmn w8, w0, sxth
373+
; CHECK-NEXT: sxth w8, w0
374+
; CHECK-NEXT: cmn w8, w1, uxtb
375375
; CHECK-NEXT: cset w0, ne
376376
; CHECK-NEXT: ret
377377
entry:
@@ -412,8 +412,8 @@ entry:
412412
define i1 @test_NE_IcsEbT(i8 %a, i16 %b) {
413413
; CHECK-LABEL: test_NE_IcsEbT:
414414
; CHECK: // %bb.0: // %entry
415-
; CHECK-NEXT: sxth w8, w1
416-
; CHECK-NEXT: cmn w8, w0, uxtb
415+
; CHECK-NEXT: and w8, w0, #0xff
416+
; CHECK-NEXT: cmn w8, w1, sxth
417417
; CHECK-NEXT: cset w0, ne
418418
; CHECK-NEXT: ret
419419
entry:
@@ -427,8 +427,8 @@ entry:
427427
define i1 @test_NE_IccEbT(i8 %a, i8 %b) {
428428
; CHECK-LABEL: test_NE_IccEbT:
429429
; CHECK: // %bb.0: // %entry
430-
; CHECK-NEXT: and w8, w1, #0xff
431-
; CHECK-NEXT: cmn w8, w0, uxtb
430+
; CHECK-NEXT: and w8, w0, #0xff
431+
; CHECK-NEXT: cmn w8, w1, uxtb
432432
; CHECK-NEXT: cset w0, ne
433433
; CHECK-NEXT: ret
434434
entry:

llvm/test/CodeGen/AArch64/typepromotion-overflow.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,11 @@ define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) {
107107
define i32 @overflow_add_positive_const_limit(i8 zeroext %a) {
108108
; CHECK-LABEL: overflow_add_positive_const_limit:
109109
; CHECK: // %bb.0:
110-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
111-
; CHECK-NEXT: mov w9, #8 // =0x8
112-
; CHECK-NEXT: cmp w8, w0, sxtb
110+
; CHECK-NEXT: sxtb w9, w0
113111
; CHECK-NEXT: mov w8, #16 // =0x10
114-
; CHECK-NEXT: csel w0, w9, w8, gt
112+
; CHECK-NEXT: cmn w9, #1
113+
; CHECK-NEXT: mov w9, #8 // =0x8
114+
; CHECK-NEXT: csel w0, w9, w8, lt
115115
; CHECK-NEXT: ret
116116
%cmp = icmp slt i8 %a, -1
117117
%res = select i1 %cmp, i32 8, i32 16
@@ -162,11 +162,11 @@ define i32 @safe_add_underflow_neg(i8 zeroext %a) {
162162
define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) {
163163
; CHECK-LABEL: overflow_sub_negative_const_limit:
164164
; CHECK: // %bb.0:
165-
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
166-
; CHECK-NEXT: mov w9, #8 // =0x8
167-
; CHECK-NEXT: cmp w8, w0, sxtb
165+
; CHECK-NEXT: sxtb w9, w0
168166
; CHECK-NEXT: mov w8, #16 // =0x10
169-
; CHECK-NEXT: csel w0, w9, w8, gt
167+
; CHECK-NEXT: cmn w9, #1
168+
; CHECK-NEXT: mov w9, #8 // =0x8
169+
; CHECK-NEXT: csel w0, w9, w8, lt
170170
; CHECK-NEXT: ret
171171
%cmp = icmp slt i8 %a, -1
172172
%res = select i1 %cmp, i32 8, i32 16

0 commit comments

Comments
 (0)