-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Efficiently lower (select cond, u, rot[r/l](u, rot.amt)) using zicond extension #143768
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Ryan Buchner (bababuck) ChangesThe following lowerings now occur: Contains the test commit from #143580 temporarily as that is not yet merged, @mgudim Full diff: https://github.com/llvm/llvm-project/pull/143768.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index ab8b36df44d3f..eb04ff9588b92 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -18104,6 +18104,8 @@ static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
+// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
+// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
SDValue TrueVal, SDValue FalseVal,
bool Swapped) {
@@ -18116,6 +18118,8 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
case ISD::SRA:
case ISD::SRL:
case ISD::SUB:
+ case ISD::ROTL:
+ case ISD::ROTR:
Commutative = false;
break;
case ISD::ADD:
diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll
new file mode 100644
index 0000000000000..3efb594e4b882
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll
@@ -0,0 +1,275 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -O2 -verify-machineinstrs -mattr=+b,+zicond < %s | FileCheck %s -check-prefix=RV32ZICOND
+; RUN: llc -mtriple=riscv64 -O2 -verify-machineinstrs -mattr=+b,+zicond < %s | FileCheck %s -check-prefix=RV64ZICOND
+
+; (and (icmp x. 0, ne), (icmp y, 0, ne)) -> (czero.eqz (icmp x, 0, ne), y)
+define i32 @icmp_and(i64 %x, i64 %y) {
+; RV32ZICOND-LABEL: icmp_and:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: snez a1, a2
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: and a0, a0, a1
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a1, a1
+; RV64ZICOND-NEXT: snez a0, a0
+; RV64ZICOND-NEXT: and a0, a0, a1
+; RV64ZICOND-NEXT: ret
+ %3 = icmp ne i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = zext i1 %5 to i32
+ ret i32 %6
+}
+
+; (and (and (icmp x, 0, ne), (icmp y, 0, ne)), (icmp z, 0, ne)) -> (czero.eqz (czero.eqz (icmp x, 0, ne), y), z)
+define i32 @icmp_and_and(i64 %x, i64 %y, i64 %z) {
+; RV32ZICOND-LABEL: icmp_and_and:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: or a4, a4, a5
+; RV32ZICOND-NEXT: snez a1, a2
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: and a0, a1, a0
+; RV32ZICOND-NEXT: snez a1, a4
+; RV32ZICOND-NEXT: and a0, a1, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_and:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a1, a1
+; RV64ZICOND-NEXT: snez a0, a0
+; RV64ZICOND-NEXT: and a0, a1, a0
+; RV64ZICOND-NEXT: snez a1, a2
+; RV64ZICOND-NEXT: and a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %4 = icmp ne i64 %y, 0
+ %5 = icmp ne i64 %x, 0
+ %6 = and i1 %4, %5
+ %7 = icmp ne i64 %z, 0
+ %8 = and i1 %7, %6
+ %9 = zext i1 %8 to i32
+ ret i32 %9
+}
+
+; (select cond, x, rotl(x, rot.amt)) -> (rotl x, (czero_nez rot.amt, cond))
+define i64 @rotate_l_nez(i64 %x, i64 %rot.amt, i1 %cond) {
+; RV32ZICOND-LABEL: rotate_l_nez:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: andi a4, a4, 1
+; RV32ZICOND-NEXT: czero.nez a2, a2, a4
+; RV32ZICOND-NEXT: bexti a3, a2, 5
+; RV32ZICOND-NEXT: czero.nez a4, a0, a3
+; RV32ZICOND-NEXT: czero.eqz a5, a1, a3
+; RV32ZICOND-NEXT: czero.nez a1, a1, a3
+; RV32ZICOND-NEXT: czero.eqz a0, a0, a3
+; RV32ZICOND-NEXT: not a3, a2
+; RV32ZICOND-NEXT: or a4, a5, a4
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: sll a1, a4, a2
+; RV32ZICOND-NEXT: srli a5, a0, 1
+; RV32ZICOND-NEXT: sll a2, a0, a2
+; RV32ZICOND-NEXT: srli a4, a4, 1
+; RV32ZICOND-NEXT: srl a0, a5, a3
+; RV32ZICOND-NEXT: srl a3, a4, a3
+; RV32ZICOND-NEXT: or a0, a1, a0
+; RV32ZICOND-NEXT: or a1, a2, a3
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: rotate_l_nez:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: andi a2, a2, 1
+; RV64ZICOND-NEXT: czero.nez a1, a1, a2
+; RV64ZICOND-NEXT: rol a0, a0, a1
+; RV64ZICOND-NEXT: ret
+ %6 = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %rot.amt)
+ %7 = select i1 %cond, i64 %x, i64 %6
+ ret i64 %7
+}
+
+; (select cond, rotl(x, rot.amt), x) -> (rotl x, (czero_eqz rot.amt, cond))
+define i64 @rotate_l_eqz(i64 %x, i64 %rot.amt, i1 %cond) {
+; RV32ZICOND-LABEL: rotate_l_eqz:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: andi a4, a4, 1
+; RV32ZICOND-NEXT: czero.eqz a2, a2, a4
+; RV32ZICOND-NEXT: bexti a3, a2, 5
+; RV32ZICOND-NEXT: czero.nez a4, a0, a3
+; RV32ZICOND-NEXT: czero.eqz a5, a1, a3
+; RV32ZICOND-NEXT: czero.nez a1, a1, a3
+; RV32ZICOND-NEXT: czero.eqz a0, a0, a3
+; RV32ZICOND-NEXT: not a3, a2
+; RV32ZICOND-NEXT: or a4, a5, a4
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: sll a1, a4, a2
+; RV32ZICOND-NEXT: srli a5, a0, 1
+; RV32ZICOND-NEXT: sll a2, a0, a2
+; RV32ZICOND-NEXT: srli a4, a4, 1
+; RV32ZICOND-NEXT: srl a0, a5, a3
+; RV32ZICOND-NEXT: srl a3, a4, a3
+; RV32ZICOND-NEXT: or a0, a1, a0
+; RV32ZICOND-NEXT: or a1, a2, a3
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: rotate_l_eqz:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: andi a2, a2, 1
+; RV64ZICOND-NEXT: czero.eqz a1, a1, a2
+; RV64ZICOND-NEXT: rol a0, a0, a1
+; RV64ZICOND-NEXT: ret
+ %6 = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %rot.amt)
+ %7 = select i1 %cond, i64 %6, i64 %x
+ ret i64 %7
+}
+
+; (select cond, const, t) -> (add (czero_nez t - const, cond), const)
+define i64 @select_imm_reg(i64 %t, i1 %cond) {
+; RV32ZICOND-LABEL: select_imm_reg:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: andi a2, a2, 1
+; RV32ZICOND-NEXT: li a3, 3
+; RV32ZICOND-NEXT: czero.nez a0, a0, a2
+; RV32ZICOND-NEXT: czero.eqz a3, a3, a2
+; RV32ZICOND-NEXT: or a0, a3, a0
+; RV32ZICOND-NEXT: czero.nez a1, a1, a2
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: select_imm_reg:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: andi a1, a1, 1
+; RV64ZICOND-NEXT: li a2, 3
+; RV64ZICOND-NEXT: czero.nez a0, a0, a1
+; RV64ZICOND-NEXT: czero.eqz a1, a2, a1
+; RV64ZICOND-NEXT: or a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %4 = select i1 %cond, i64 3, i64 %t
+ ret i64 %4
+}
+
+; (select cond, t, const) -> (add (czero_eqz t - const, cond), const)
+define i64 @select_reg_imm(i64 %t, i1 %cond) {
+; RV32ZICOND-LABEL: select_reg_imm:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: andi a2, a2, 1
+; RV32ZICOND-NEXT: li a3, 3
+; RV32ZICOND-NEXT: czero.nez a3, a3, a2
+; RV32ZICOND-NEXT: czero.eqz a0, a0, a2
+; RV32ZICOND-NEXT: or a0, a0, a3
+; RV32ZICOND-NEXT: czero.eqz a1, a1, a2
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: select_reg_imm:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: andi a1, a1, 1
+; RV64ZICOND-NEXT: li a2, 3
+; RV64ZICOND-NEXT: czero.nez a2, a2, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a1
+; RV64ZICOND-NEXT: or a0, a0, a2
+; RV64ZICOND-NEXT: ret
+ %4 = select i1 %cond, i64 %t, i64 3
+ ret i64 %4
+}
+
+; (select cond, -2048, t) -> (xor (czero_nez (xor t, -2048), cond), -2048)
+define i64 @select_imm_reg_neg_2048(i64 %t, i1 %cond) {
+; RV32ZICOND-LABEL: select_imm_reg_neg_2048:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: andi a2, a2, 1
+; RV32ZICOND-NEXT: li a3, -2048
+; RV32ZICOND-NEXT: czero.nez a0, a0, a2
+; RV32ZICOND-NEXT: czero.eqz a3, a3, a2
+; RV32ZICOND-NEXT: neg a2, a2
+; RV32ZICOND-NEXT: or a0, a3, a0
+; RV32ZICOND-NEXT: or a1, a2, a1
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: select_imm_reg_neg_2048:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: andi a1, a1, 1
+; RV64ZICOND-NEXT: li a2, -2048
+; RV64ZICOND-NEXT: czero.nez a0, a0, a1
+; RV64ZICOND-NEXT: czero.eqz a1, a2, a1
+; RV64ZICOND-NEXT: or a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %4 = select i1 %cond, i64 -2048, i64 %t
+ ret i64 %4
+}
+
+; (select cond, 2048, t) -> no transform
+define i64 @select_imm_reg_2048(i64 %t, i1 %cond) {
+; RV32ZICOND-LABEL: select_imm_reg_2048:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: andi a2, a2, 1
+; RV32ZICOND-NEXT: bseti a3, zero, 11
+; RV32ZICOND-NEXT: czero.nez a0, a0, a2
+; RV32ZICOND-NEXT: czero.eqz a3, a3, a2
+; RV32ZICOND-NEXT: or a0, a3, a0
+; RV32ZICOND-NEXT: czero.nez a1, a1, a2
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: select_imm_reg_2048:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: andi a1, a1, 1
+; RV64ZICOND-NEXT: bseti a2, zero, 11
+; RV64ZICOND-NEXT: czero.nez a0, a0, a1
+; RV64ZICOND-NEXT: czero.eqz a1, a2, a1
+; RV64ZICOND-NEXT: or a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %4 = select i1 %cond, i64 2048, i64 %t
+ ret i64 %4
+}
+
+; (select cond, (and f, ~x), f) -> (andn f, (czero_eqz x, cond))
+define i64 @test_inv_and_nez(i64 %f, i64 %x, i1 %cond) {
+; RV32ZICOND-LABEL: test_inv_and_nez:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: andi a4, a4, 1
+; RV32ZICOND-NEXT: addi a4, a4, -1
+; RV32ZICOND-NEXT: orn a3, a4, a3
+; RV32ZICOND-NEXT: orn a2, a4, a2
+; RV32ZICOND-NEXT: and a0, a2, a0
+; RV32ZICOND-NEXT: and a1, a3, a1
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: test_inv_and_nez:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: andi a2, a2, 1
+; RV64ZICOND-NEXT: andn a1, a0, a1
+; RV64ZICOND-NEXT: czero.nez a0, a0, a2
+; RV64ZICOND-NEXT: or a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %5 = xor i64 %x, -1
+ %6 = select i1 %cond, i64 %5, i64 -1
+ %7 = and i64 %6, %f
+ ret i64 %7
+}
+
+; (select cond, f, (and f, ~x)) -> (andn f, (czero_nez x, cond))
+define i64 @test_inv_and_eqz(i64 %f, i64 %x, i1 %cond) {
+; RV32ZICOND-LABEL: test_inv_and_eqz:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: slli a4, a4, 31
+; RV32ZICOND-NEXT: srai a4, a4, 31
+; RV32ZICOND-NEXT: orn a3, a4, a3
+; RV32ZICOND-NEXT: orn a2, a4, a2
+; RV32ZICOND-NEXT: and a0, a2, a0
+; RV32ZICOND-NEXT: and a1, a3, a1
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: test_inv_and_eqz:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: andi a2, a2, 1
+; RV64ZICOND-NEXT: andn a1, a0, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a2
+; RV64ZICOND-NEXT: or a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %5 = xor i64 %x, -1
+ %6 = select i1 %cond, i64 -1, i64 %5
+ %7 = and i64 %6, %f
+ ret i64 %7
+}
|
Are the u and y in title supposed to be the same letter? |
…g zicond extension The following lowering now occurs: (select cond, u, rotr(u, rot.amt)) -> (rotr u, (czero_nez rot.amt, cond)) This same pattern holds for any other operations with an identity of 0 (i.e. op(x, 0) = x).
f7f04ad
to
52afe63
Compare
Fixed the same typo in the commit message |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
The following lowerings now occur:
(select cond, u, rotr(u, rot.amt)) -> (rotr u, (czero_nez rot.amt, cond))
(select cond, rotr(u, rot.amt), u) -> (rotr u, (czero_eqz rot.amt, cond))
(select cond, u, rotl(u, rot.amt)) -> (rotl u, (czero_nez rot.amt, cond))
(select cond, rotl(u, rot.amt), u) -> (rotl u, (czero_eqz rot.amt, cond))
Contains the test commit from #143580 temporarily as that is not yet merged,
@mgudim