-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[RISCV] Lower SELECT's with one constant more efficiently using Zicond #143581
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Ryan Buchner (bababuck) ChangesSee #143580 for MR with the test commit. Performs the following transformations: @mgudim Full diff: https://github.com/llvm/llvm-project/pull/143581.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ab8b36df44d3f..b1e1243c52b17 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -9043,15 +9043,20 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
// (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
- if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
- const APInt &TrueVal = TrueV->getAsAPIntVal();
- const APInt &FalseVal = FalseV->getAsAPIntVal();
+ // (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
+ // (select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
+ if (isa<ConstantSDNode>(TrueV) || isa<ConstantSDNode>(FalseV)) {
+ bool BothAreConstant =
+ isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV);
// Prefer these over Zicond to avoid materializing an immediate:
// (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
// (select (x > -1), z, y) -> x >> (XLEN - 1) & (y - z) + z
if (CondV.getOpcode() == ISD::SETCC &&
- CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse()) {
+ CondV.getOperand(0).getValueType() == VT && CondV.hasOneUse() &&
+ BothAreConstant) {
+ const APInt &TrueVal = TrueV->getAsAPIntVal();
+ const APInt &FalseVal = FalseV->getAsAPIntVal();
ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
if ((CCVal == ISD::SETLT && isNullConstant(CondV.getOperand(1))) ||
(CCVal == ISD::SETGT && isAllOnesConstant(CondV.getOperand(1)))) {
@@ -9073,19 +9078,36 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
- const int TrueValCost = RISCVMatInt::getIntMatCost(
- TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
- const int FalseValCost = RISCVMatInt::getIntMatCost(
- FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
- bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
- SDValue LHSVal = DAG.getConstant(
- IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
- SDValue RHSVal =
- DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
- SDValue CMOV =
- DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
- DL, VT, LHSVal, CondV);
- return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
+ bool IsCZERO_NEZ;
+ SDValue ConstVal, SubOp;
+ if (BothAreConstant) {
+ const APInt &TrueVal = TrueV->getAsAPIntVal();
+ const APInt &FalseVal = FalseV->getAsAPIntVal();
+ const int TrueValCost = RISCVMatInt::getIntMatCost(
+ TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
+ const int FalseValCost = RISCVMatInt::getIntMatCost(
+ FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
+ IsCZERO_NEZ = TrueValCost <= FalseValCost;
+
+ SubOp = DAG.getConstant(
+ IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
+ ConstVal = DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
+ } else {
+ IsCZERO_NEZ = isa<ConstantSDNode>(TrueV);
+ ConstVal = IsCZERO_NEZ ? TrueV : FalseV;
+
+ SDValue &RegV = IsCZERO_NEZ ? FalseV : TrueV;
+ SubOp = DAG.getNode(ISD::SUB, DL, VT, RegV, ConstVal);
+ }
+ // The one constant case is only efficient is the constant fits into
+ // `ADDI`
+ if (BothAreConstant ||
+ isInt<12>(dyn_cast<ConstantSDNode>(ConstVal)->getSExtValue())) {
+ SDValue CMOV =
+ DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
+ DL, VT, SubOp, CondV);
+ return DAG.getNode(ISD::ADD, DL, VT, CMOV, ConstVal);
+ }
}
// (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
index b7b88584f3bdb..13c43a3875a08 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
@@ -173,14 +173,21 @@ define signext i32 @test6(i32 signext %x, i32 signext %z) {
; NOSFB-NEXT: or a0, a0, a1
; NOSFB-NEXT: ret
;
-; SFB-LABEL: test6:
-; SFB: # %bb.0:
-; SFB-NEXT: li a2, -1
-; SFB-NEXT: beqz a1, .LBB5_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: mv a0, a2
-; SFB-NEXT: .LBB5_2:
-; SFB-NEXT: ret
+; NOZICOND-LABEL: test6:
+; NOZICOND: # %bb.0:
+; NOZICOND-NEXT: li a2, -1
+; NOZICOND-NEXT: beqz a1, .LBB5_2
+; NOZICOND-NEXT: # %bb.1:
+; NOZICOND-NEXT: mv a0, a2
+; NOZICOND-NEXT: .LBB5_2:
+; NOZICOND-NEXT: ret
+;
+; ZICOND-LABEL: test6:
+; ZICOND: # %bb.0:
+; ZICOND-NEXT: addi a0, a0, 1
+; ZICOND-NEXT: czero.nez a0, a0, a1
+; ZICOND-NEXT: addi a0, a0, -1
+; ZICOND-NEXT: ret
%c = icmp eq i32 %z, 0
%b = select i1 %c, i32 %x, i32 -1
ret i32 %b
@@ -195,14 +202,21 @@ define signext i32 @test7(i32 signext %x, i32 signext %z) {
; NOSFB-NEXT: or a0, a0, a1
; NOSFB-NEXT: ret
;
-; SFB-LABEL: test7:
-; SFB: # %bb.0:
-; SFB-NEXT: li a2, -1
-; SFB-NEXT: bnez a1, .LBB6_2
-; SFB-NEXT: # %bb.1:
-; SFB-NEXT: mv a0, a2
-; SFB-NEXT: .LBB6_2:
-; SFB-NEXT: ret
+; NOZICOND-LABEL: test7:
+; NOZICOND: # %bb.0:
+; NOZICOND-NEXT: li a2, -1
+; NOZICOND-NEXT: bnez a1, .LBB6_2
+; NOZICOND-NEXT: # %bb.1:
+; NOZICOND-NEXT: mv a0, a2
+; NOZICOND-NEXT: .LBB6_2:
+; NOZICOND-NEXT: ret
+;
+; ZICOND-LABEL: test7:
+; ZICOND: # %bb.0:
+; ZICOND-NEXT: addi a0, a0, 1
+; ZICOND-NEXT: czero.eqz a0, a0, a1
+; ZICOND-NEXT: addi a0, a0, -1
+; ZICOND-NEXT: ret
%c = icmp eq i32 %z, 0
%b = select i1 %c, i32 -1, i32 %x
ret i32 %b
diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll
new file mode 100644
index 0000000000000..77588adbd63f3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll
@@ -0,0 +1,146 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -O2 -verify-machineinstrs -mattr=+b,+zicond < %s | FileCheck %s -check-prefix=RV32ZICOND
+; RUN: llc -mtriple=riscv64 -O2 -verify-machineinstrs -mattr=+b,+zicond < %s | FileCheck %s -check-prefix=RV64ZICOND
+
+define dso_local signext i32 @icmp_and(i64 noundef %0, i64 noundef %1) #0 {
+; RV32ZICOND-LABEL: icmp_and:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: snez a1, a2
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: and a0, a0, a1
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a1, a1
+; RV64ZICOND-NEXT: snez a0, a0
+; RV64ZICOND-NEXT: and a0, a0, a1
+; RV64ZICOND-NEXT: ret
+ %3 = icmp ne i64 %1, 0
+ %4 = icmp ne i64 %0, 0
+ %5 = and i1 %4, %3
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+define dso_local signext i32 @icmp_and_and(i64 noundef %0, i64 noundef %1, i64 noundef %2) #0 {
+; RV32ZICOND-LABEL: icmp_and_and:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: or a4, a4, a5
+; RV32ZICOND-NEXT: snez a1, a2
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: and a0, a1, a0
+; RV32ZICOND-NEXT: snez a1, a4
+; RV32ZICOND-NEXT: and a0, a1, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_and:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a1, a1
+; RV64ZICOND-NEXT: snez a0, a0
+; RV64ZICOND-NEXT: and a0, a1, a0
+; RV64ZICOND-NEXT: snez a1, a2
+; RV64ZICOND-NEXT: and a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %4 = icmp ne i64 %1, 0
+ %5 = icmp ne i64 %0, 0
+ %6 = and i1 %4, %5
+ %7 = icmp ne i64 %2, 0
+ %8 = and i1 %7, %6
+ %9 = zext i1 %8 to i32
+ ret i32 %9
+}
+
+define dso_local i64 @RotateL_eqz(i64 noundef %0, i64 noundef %1, i64 noundef %2, i64 noundef %3) local_unnamed_addr #0 {
+; RV32ZICOND-LABEL: RotateL_eqz:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a0, a6, a7
+; RV32ZICOND-NEXT: bexti a1, a4, 5
+; RV32ZICOND-NEXT: not a5, a4
+; RV32ZICOND-NEXT: czero.nez a6, a3, a1
+; RV32ZICOND-NEXT: czero.eqz a7, a2, a1
+; RV32ZICOND-NEXT: czero.nez t0, a2, a1
+; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
+; RV32ZICOND-NEXT: czero.nez a2, a2, a0
+; RV32ZICOND-NEXT: czero.nez a3, a3, a0
+; RV32ZICOND-NEXT: or a6, a7, a6
+; RV32ZICOND-NEXT: or a1, a1, t0
+; RV32ZICOND-NEXT: sll a7, a6, a4
+; RV32ZICOND-NEXT: srli t0, a1, 1
+; RV32ZICOND-NEXT: sll a1, a1, a4
+; RV32ZICOND-NEXT: srli a4, a6, 1
+; RV32ZICOND-NEXT: srl a6, t0, a5
+; RV32ZICOND-NEXT: srl a4, a4, a5
+; RV32ZICOND-NEXT: or a5, a7, a6
+; RV32ZICOND-NEXT: or a1, a1, a4
+; RV32ZICOND-NEXT: czero.eqz a1, a1, a0
+; RV32ZICOND-NEXT: czero.eqz a4, a5, a0
+; RV32ZICOND-NEXT: or a0, a2, a1
+; RV32ZICOND-NEXT: or a1, a3, a4
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: RotateL_eqz:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: rol a0, a1, a2
+; RV64ZICOND-NEXT: czero.nez a1, a1, a3
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a3
+; RV64ZICOND-NEXT: or a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %5 = icmp eq i64 %3, 0
+ %6 = call i64 @llvm.fshl.i64(i64 %1, i64 %1, i64 %2)
+ %7 = select i1 %5, i64 %1, i64 %6
+ ret i64 %7
+}
+
+define dso_local noundef i64 @select_imm_reg(i64 noundef %0, i64 noundef %1) local_unnamed_addr #0 {
+; RV32ZICOND-LABEL: select_imm_reg:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: xori a0, a0, 2
+; RV32ZICOND-NEXT: addi a2, a2, -3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a1, a3, a0
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV32ZICOND-NEXT: addi a0, a0, 3
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: select_imm_reg:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: addi a0, a0, -2
+; RV64ZICOND-NEXT: addi a1, a1, -3
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: addi a0, a0, 3
+; RV64ZICOND-NEXT: ret
+ %3 = icmp eq i64 %0, 2
+ %4 = select i1 %3, i64 3, i64 %1
+ ret i64 %4
+}
+
+define dso_local noundef i64 @test_InvAnd_eqz_001(i64 noundef %1, i64 noundef %2, i64 noundef %3) local_unnamed_addr #0 {
+; RV32ZICOND-LABEL: test_InvAnd_eqz_001:
+; RV32ZICOND: # %bb.0: # %entry
+; RV32ZICOND-NEXT: or a4, a4, a5
+; RV32ZICOND-NEXT: snez a4, a4
+; RV32ZICOND-NEXT: addi a4, a4, -1
+; RV32ZICOND-NEXT: orn a3, a4, a3
+; RV32ZICOND-NEXT: orn a2, a4, a2
+; RV32ZICOND-NEXT: and a0, a2, a0
+; RV32ZICOND-NEXT: and a1, a3, a1
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: test_InvAnd_eqz_001:
+; RV64ZICOND: # %bb.0: # %entry
+; RV64ZICOND-NEXT: czero.nez a2, a0, a2
+; RV64ZICOND-NEXT: andn a0, a0, a1
+; RV64ZICOND-NEXT: or a0, a0, a2
+; RV64ZICOND-NEXT: ret
+entry:
+ %4 = icmp ne i64 %3, 0
+ %5 = xor i64 %2, -1
+ %6 = select i1 %4, i64 %5, i64 -1
+ %7 = and i64 %6, %1
+ ret i64 %7
+}
|
// The one constant case is only efficient is the constant fits into | ||
// `ADDI` | ||
if (BothAreConstant || | ||
isInt<12>(dyn_cast<ConstantSDNode>(ConstVal)->getSExtValue())) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about the corner case where ConstVal is -2048 and -ConstVal is 2048 which isn't a 12-bit constant.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should have a test for this case
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added a test for -2048, would you want one for 2048
as well? I am concerned with adding too many tests.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ended up adding a test for 2048 as well. How do you find the balance of not enough tests and too many tests (where it becomes impossible to manually verify the changes are expected).
31c5ca9
to
af9bf83
Compare
Updated for the above suggestions. Slight refactoring to address the "It's bad practice to create a SDNode that isn't guaranteed to be used." |
af9bf83
to
5b9a5e2
Compare
5b9a5e2
to
c2e7487
Compare
c2e7487
to
dfd8c4c
Compare
Updated to include the latest version of #143580 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…cond extension Provides the following lowerings, which reduces instruction count by 1 for rv64: (select cond, const, t) -> (add (czero_nez t - c1, cond), const) (select cond, t, const) -> (add (czero_eqz t - c1, cond), const) For the special case of c1 == -2048 (select cond, -2048, t) -> (xor (czero_nez (xor t, -2048), cond), -2048) (select cond, t, -2048) -> (xor (czero_eqz (xor t, -2048), cond), -2048)
dfd8c4c
to
b04addc
Compare
Rebased to latest |
@mshockwave am I good to merge? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/157/builds/30707 Here is the relevant piece of the build log for the reference
|
llvm#143581) See llvm#143580 for MR with the test commit. Performs the following transformations: (select c, c1, t) -> (add (czero_nez t - c1, c), c1) (select c, t, c1) -> (add (czero_eqz t - c1, c), c1) @mgudim
llvm#143581) See llvm#143580 for MR with the test commit. Performs the following transformations: (select c, c1, t) -> (add (czero_nez t - c1, c), c1) (select c, t, c1) -> (add (czero_eqz t - c1, c), c1) @mgudim
See #143580 for MR with the test commit.
Performs the following transformations:
(select c, c1, t) -> (add (czero_nez t - c1, c), c1)
(select c, t, c1) -> (add (czero_eqz t - c1, c), c1)
@mgudim