[AArch64] Take cmn into account when adjusting compare constants #98634

AZero13 · 2024-07-12T13:47:25Z

Turning a cmp into cmn saves an extra mov and negate instruction, so take that into account when choosing when to flip the compare operands.

This will allow further optimizations down the line when we fold more variations of negative compares to cmn.

As part of this, do not consider right-hand operands whose absolute value can be encoded into a cmn if it is the 2nd operand.

llvmbot · 2024-07-12T13:47:54Z

@llvm/pr-subscribers-backend-aarch64

Author: AtariDreams (AtariDreams)

Changes

Turning a cmp into cmn saves an extra mov and negate instruction, so take that into account when choosing when to flip the compare operands.

Also do not consider right-hand operands whose absolute value can be encoded into a cmn.

Full diff: https://github.com/llvm/llvm-project/pull/98634.diff

3 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+9-4)
(modified) llvm/test/CodeGen/AArch64/cmp-to-cmn.ll (+20-20)
(modified) llvm/test/CodeGen/AArch64/typepromotion-overflow.ll (+8-8)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e5d5c6b6832af..96a7b76d4678a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3876,10 +3876,15 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   //    cmp     w13, w12
   // can be turned into:
   //    cmp     w12, w11, lsl #1
-  if (!isa<ConstantSDNode>(RHS) || !isLegalArithImmed(RHS->getAsZExtVal())) {
-    SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
-
-    if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
+  if (!isa<ConstantSDNode>(RHS) ||
+      !isLegalArithImmed(RHS->getAsAPIntVal().abs().getZExtValue())) {
+    bool LHSIsCMN = LHS.getOpcode() == ISD::SUB && isCMN(LHS, CC);
+    bool RHSIsCMN = RHS.getOpcode() == ISD::SUB && isCMN(RHS, CC);
+    SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
+    SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;
+
+    if (getCmpOperandFoldingProfit(TheLHS) + (LHSIsCMN ? 1 : 0) >
+        getCmpOperandFoldingProfit(TheRHS) + (RHSIsCMN ? 1 : 0)) {
       std::swap(LHS, RHS);
       CC = ISD::getSetCCSwappedOperands(CC);
     }
diff --git a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
index cc6bd766eed78..1cc194e77b94b 100644
--- a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
@@ -6,7 +6,7 @@ target triple = "arm64"
 define i1 @test_EQ_IllEbT(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_EQ_IllEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cmn x1, x0
+; CHECK-NEXT:    cmn x0, x1
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
 entry:
@@ -72,7 +72,7 @@ entry:
 define i1 @test_EQ_IiiEbT(i32 %a, i32 %b) {
 ; CHECK-LABEL: test_EQ_IiiEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cmn w1, w0
+; CHECK-NEXT:    cmn w0, w1
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
 entry:
@@ -137,8 +137,8 @@ entry:
 define i1 @test_EQ_IssEbT(i16 %a, i16 %b) {
 ; CHECK-LABEL: test_EQ_IssEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    cmn w8, w0, sxth
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    cmn w8, w1, sxth
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
 entry:
@@ -152,8 +152,8 @@ entry:
 define i1 @test_EQ_IscEbT(i16 %a, i8 %b) {
 ; CHECK-LABEL: test_EQ_IscEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    and w8, w1, #0xff
-; CHECK-NEXT:    cmn w8, w0, sxth
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    cmn w8, w1, uxtb
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
 entry:
@@ -194,8 +194,8 @@ entry:
 define i1 @test_EQ_IcsEbT(i8 %a, i16 %b) {
 ; CHECK-LABEL: test_EQ_IcsEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    cmn w8, w0, uxtb
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    cmn w8, w1, sxth
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
 entry:
@@ -209,8 +209,8 @@ entry:
 define i1 @test_EQ_IccEbT(i8 %a, i8 %b) {
 ; CHECK-LABEL: test_EQ_IccEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    and w8, w1, #0xff
-; CHECK-NEXT:    cmn w8, w0, uxtb
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    cmn w8, w1, uxtb
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
 entry:
@@ -224,7 +224,7 @@ entry:
 define i1 @test_NE_IllEbT(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_NE_IllEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cmn x1, x0
+; CHECK-NEXT:    cmn x0, x1
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
 entry:
@@ -290,7 +290,7 @@ entry:
 define i1 @test_NE_IiiEbT(i32 %a, i32 %b) {
 ; CHECK-LABEL: test_NE_IiiEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cmn w1, w0
+; CHECK-NEXT:    cmn w0, w1
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
 entry:
@@ -355,8 +355,8 @@ entry:
 define i1 @test_NE_IssEbT(i16 %a, i16 %b) {
 ; CHECK-LABEL: test_NE_IssEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    cmn w8, w0, sxth
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    cmn w8, w1, sxth
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
 entry:
@@ -370,8 +370,8 @@ entry:
 define i1 @test_NE_IscEbT(i16 %a, i8 %b) {
 ; CHECK-LABEL: test_NE_IscEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    and w8, w1, #0xff
-; CHECK-NEXT:    cmn w8, w0, sxth
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    cmn w8, w1, uxtb
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
 entry:
@@ -412,8 +412,8 @@ entry:
 define i1 @test_NE_IcsEbT(i8 %a, i16 %b) {
 ; CHECK-LABEL: test_NE_IcsEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sxth w8, w1
-; CHECK-NEXT:    cmn w8, w0, uxtb
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    cmn w8, w1, sxth
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
 entry:
@@ -427,8 +427,8 @@ entry:
 define i1 @test_NE_IccEbT(i8 %a, i8 %b) {
 ; CHECK-LABEL: test_NE_IccEbT:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    and w8, w1, #0xff
-; CHECK-NEXT:    cmn w8, w0, uxtb
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    cmn w8, w1, uxtb
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll b/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll
index 39edc03ced442..2451ea478ed71 100644
--- a/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll
+++ b/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll
@@ -107,11 +107,11 @@ define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) {
 define i32 @overflow_add_positive_const_limit(i8 zeroext %a) {
 ; CHECK-LABEL: overflow_add_positive_const_limit:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
-; CHECK-NEXT:    mov w9, #8 // =0x8
-; CHECK-NEXT:    cmp w8, w0, sxtb
+; CHECK-NEXT:    sxtb w9, w0
 ; CHECK-NEXT:    mov w8, #16 // =0x10
-; CHECK-NEXT:    csel w0, w9, w8, gt
+; CHECK-NEXT:    cmn w9, #1
+; CHECK-NEXT:    mov w9, #8 // =0x8
+; CHECK-NEXT:    csel w0, w9, w8, lt
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i8 %a, -1
   %res = select i1 %cmp, i32 8, i32 16
@@ -162,11 +162,11 @@ define i32 @safe_add_underflow_neg(i8 zeroext %a) {
 define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) {
 ; CHECK-LABEL: overflow_sub_negative_const_limit:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
-; CHECK-NEXT:    mov w9, #8 // =0x8
-; CHECK-NEXT:    cmp w8, w0, sxtb
+; CHECK-NEXT:    sxtb w9, w0
 ; CHECK-NEXT:    mov w8, #16 // =0x10
-; CHECK-NEXT:    csel w0, w9, w8, gt
+; CHECK-NEXT:    cmn w9, #1
+; CHECK-NEXT:    mov w9, #8 // =0x8
+; CHECK-NEXT:    csel w0, w9, w8, lt
 ; CHECK-NEXT:    ret
   %cmp = icmp slt i8 %a, -1
   %res = select i1 %cmp, i32 8, i32 16

AZero13 · 2024-07-12T13:57:47Z

@davemgreen What do you think about this?

llvm/test/CodeGen/AArch64/typepromotion-overflow.ll

davemgreen

Sorry for the delay. I have been busy with some other things and this code isn't something I know super well. It sounds OK to me. Do you have any example of where it does reduce the number of instructions?

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

AZero13 · 2024-07-14T14:05:56Z

Sorry for the delay. I have been busy with some other things and this code isn't something I know super well. It sounds OK to me. Do you have any example of where it does reduce the number of instructions?

Yes but it can't be part of this PR because it requires another PR to be merged after this one to do that.

The PR in question alone doesn't change anything without this. It's a different PR because that PR does additional cmn folds, whereas this just has more accurate profitability checks that reflect the changes to be made.

AZero13 · 2024-07-14T14:06:26Z

#96349 this one in fact

david-arm · 2024-07-15T08:40:52Z

The PR in question alone doesn't change anything without this. It's a different PR because that PR does additional cmn folds, whereas this just has more accurate profitability checks that reflect the changes to be made.

Perhaps it's worth explaining a bit more in the commit message that while this change doesn't directly reduce the number of instructions, it will enable further optimisations to be made in a subsequent patch.

With some of the tests in this patch it seems at first glance to increase register dependency between instructions, but I can believe those tests are a bit artificial and not representative of the cases that you actually want to optimise.

Turning a cmp into cmn saves an extra mov and negate instruction, so take that into account when choosing when to flip the compare operands. This will allow further optimizations down the line when we fold more variations of negative compares to cmn. As part of this, do not consider right-hand operands whose absolute value can be encoded into a cmn if it is the 2nd operand.

AZero13 · 2024-07-15T21:54:06Z

Done!

davemgreen

Thanks. LGTM. I think this can reduce instruction count, but only from second-order effects.

) Summary: Turning a cmp into cmn saves an extra mov and negate instruction, so take that into account when choosing when to flip the compare operands. This will allow further optimizations down the line when we fold more variations of negative compares to cmn. As part of this, do not consider right-hand operands whose absolute value can be encoded into a cmn if it is the 2nd operand. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251526

llvmbot added the backend:AArch64 label Jul 12, 2024

AZero13 force-pushed the reg-2 branch from e38db6f to 65538f5 Compare July 12, 2024 13:47

david-arm reviewed Jul 12, 2024

View reviewed changes

llvm/test/CodeGen/AArch64/typepromotion-overflow.ll Show resolved Hide resolved

davemgreen reviewed Jul 14, 2024

View reviewed changes

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp Outdated Show resolved Hide resolved

AZero13 force-pushed the reg-2 branch from 65538f5 to 400cbcd Compare July 15, 2024 21:17

AZero13 requested review from davemgreen and david-arm July 15, 2024 21:18

AZero13 force-pushed the reg-2 branch from 400cbcd to 3958d87 Compare July 15, 2024 21:20

davemgreen approved these changes Jul 16, 2024

View reviewed changes

davemgreen merged commit 331ba43 into llvm:main Jul 16, 2024
5 of 6 checks passed

AZero13 deleted the reg-2 branch July 16, 2024 10:28

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64] Take cmn into account when adjusting compare constants #98634

[AArch64] Take cmn into account when adjusting compare constants #98634

Uh oh!

AZero13 commented Jul 12, 2024 •

edited

Loading

Uh oh!

llvmbot commented Jul 12, 2024

Uh oh!

AZero13 commented Jul 12, 2024

Uh oh!

Uh oh!

davemgreen left a comment

Uh oh!

Uh oh!

AZero13 commented Jul 14, 2024

Uh oh!

AZero13 commented Jul 14, 2024

Uh oh!

david-arm commented Jul 15, 2024

Uh oh!

AZero13 commented Jul 15, 2024

Uh oh!

davemgreen left a comment

Uh oh!

Uh oh!

Uh oh!

[AArch64] Take cmn into account when adjusting compare constants #98634

[AArch64] Take cmn into account when adjusting compare constants #98634

Uh oh!

Conversation

AZero13 commented Jul 12, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 12, 2024

Uh oh!

AZero13 commented Jul 12, 2024

Uh oh!

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

AZero13 commented Jul 14, 2024

Uh oh!

AZero13 commented Jul 14, 2024

Uh oh!

david-arm commented Jul 15, 2024

Uh oh!

AZero13 commented Jul 15, 2024

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

AZero13 commented Jul 12, 2024 •

edited

Loading