Skip to content

Commit f7f04ad

Browse files
bababuckRyan Buchner
authored andcommitted
[RISCV] Efficiently lower (select cond, y, rot[r/l](u, rot.amt)) using zicond extension
The following lowering now occurs: (select cond, u, rotr(u, rot.amt)) -> (rotr u, (czero_nez rot.amt, cond)) This same pattern holds for any other operations with an identity of 0 (i.e. op(x, 0) = x).
1 parent d3f5efa commit f7f04ad

File tree

2 files changed

+38
-48
lines changed

2 files changed

+38
-48
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18104,6 +18104,8 @@ static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
1810418104
// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
1810518105
// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
1810618106
// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
18107+
// (select C, (rotl Y, X), Y) -> (rotl Y, (select C, X, 0)).
18108+
// (select C, (rotr Y, X), Y) -> (rotr Y, (select C, X, 0)).
1810718109
static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
1810818110
SDValue TrueVal, SDValue FalseVal,
1810918111
bool Swapped) {
@@ -18116,6 +18118,8 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
1811618118
case ISD::SRA:
1811718119
case ISD::SRL:
1811818120
case ISD::SUB:
18121+
case ISD::ROTL:
18122+
case ISD::ROTR:
1811918123
Commutative = false;
1812018124
break;
1812118125
case ISD::ADD:

llvm/test/CodeGen/RISCV/zicond-opts.ll

Lines changed: 34 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -62,37 +62,30 @@ define i64 @rotate_l_nez(i64 %x, i64 %rot.amt, i1 %cond) {
6262
; RV32ZICOND-LABEL: rotate_l_nez:
6363
; RV32ZICOND: # %bb.0:
6464
; RV32ZICOND-NEXT: andi a4, a4, 1
65-
; RV32ZICOND-NEXT: bexti a3, a2, 5
66-
; RV32ZICOND-NEXT: not a5, a2
67-
; RV32ZICOND-NEXT: czero.nez a6, a1, a3
68-
; RV32ZICOND-NEXT: czero.eqz a7, a0, a3
69-
; RV32ZICOND-NEXT: czero.nez t0, a0, a3
70-
; RV32ZICOND-NEXT: czero.eqz a3, a1, a3
71-
; RV32ZICOND-NEXT: czero.eqz a0, a0, a4
72-
; RV32ZICOND-NEXT: czero.eqz a1, a1, a4
73-
; RV32ZICOND-NEXT: or a6, a7, a6
74-
; RV32ZICOND-NEXT: or a3, a3, t0
75-
; RV32ZICOND-NEXT: sll a7, a6, a2
76-
; RV32ZICOND-NEXT: srli t0, a3, 1
77-
; RV32ZICOND-NEXT: sll a2, a3, a2
78-
; RV32ZICOND-NEXT: srli a3, a6, 1
79-
; RV32ZICOND-NEXT: srl a6, t0, a5
80-
; RV32ZICOND-NEXT: srl a3, a3, a5
81-
; RV32ZICOND-NEXT: or a5, a7, a6
82-
; RV32ZICOND-NEXT: or a2, a2, a3
8365
; RV32ZICOND-NEXT: czero.nez a2, a2, a4
84-
; RV32ZICOND-NEXT: czero.nez a3, a5, a4
85-
; RV32ZICOND-NEXT: or a0, a0, a2
86-
; RV32ZICOND-NEXT: or a1, a1, a3
66+
; RV32ZICOND-NEXT: bexti a3, a2, 5
67+
; RV32ZICOND-NEXT: czero.nez a4, a0, a3
68+
; RV32ZICOND-NEXT: czero.eqz a5, a1, a3
69+
; RV32ZICOND-NEXT: czero.nez a1, a1, a3
70+
; RV32ZICOND-NEXT: czero.eqz a0, a0, a3
71+
; RV32ZICOND-NEXT: not a3, a2
72+
; RV32ZICOND-NEXT: or a4, a5, a4
73+
; RV32ZICOND-NEXT: or a0, a0, a1
74+
; RV32ZICOND-NEXT: sll a1, a4, a2
75+
; RV32ZICOND-NEXT: srli a5, a0, 1
76+
; RV32ZICOND-NEXT: sll a2, a0, a2
77+
; RV32ZICOND-NEXT: srli a4, a4, 1
78+
; RV32ZICOND-NEXT: srl a0, a5, a3
79+
; RV32ZICOND-NEXT: srl a3, a4, a3
80+
; RV32ZICOND-NEXT: or a0, a1, a0
81+
; RV32ZICOND-NEXT: or a1, a2, a3
8782
; RV32ZICOND-NEXT: ret
8883
;
8984
; RV64ZICOND-LABEL: rotate_l_nez:
9085
; RV64ZICOND: # %bb.0:
9186
; RV64ZICOND-NEXT: andi a2, a2, 1
92-
; RV64ZICOND-NEXT: rol a1, a0, a1
9387
; RV64ZICOND-NEXT: czero.nez a1, a1, a2
94-
; RV64ZICOND-NEXT: czero.eqz a0, a0, a2
95-
; RV64ZICOND-NEXT: or a0, a0, a1
88+
; RV64ZICOND-NEXT: rol a0, a0, a1
9689
; RV64ZICOND-NEXT: ret
9790
%6 = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %rot.amt)
9891
%7 = select i1 %cond, i64 %x, i64 %6
@@ -104,37 +97,30 @@ define i64 @rotate_l_eqz(i64 %x, i64 %rot.amt, i1 %cond) {
10497
; RV32ZICOND-LABEL: rotate_l_eqz:
10598
; RV32ZICOND: # %bb.0:
10699
; RV32ZICOND-NEXT: andi a4, a4, 1
107-
; RV32ZICOND-NEXT: bexti a3, a2, 5
108-
; RV32ZICOND-NEXT: not a5, a2
109-
; RV32ZICOND-NEXT: czero.nez a6, a1, a3
110-
; RV32ZICOND-NEXT: czero.eqz a7, a0, a3
111-
; RV32ZICOND-NEXT: czero.nez t0, a0, a3
112-
; RV32ZICOND-NEXT: czero.eqz a3, a1, a3
113-
; RV32ZICOND-NEXT: czero.nez a0, a0, a4
114-
; RV32ZICOND-NEXT: czero.nez a1, a1, a4
115-
; RV32ZICOND-NEXT: or a6, a7, a6
116-
; RV32ZICOND-NEXT: or a3, a3, t0
117-
; RV32ZICOND-NEXT: sll a7, a6, a2
118-
; RV32ZICOND-NEXT: srli t0, a3, 1
119-
; RV32ZICOND-NEXT: sll a2, a3, a2
120-
; RV32ZICOND-NEXT: srli a3, a6, 1
121-
; RV32ZICOND-NEXT: srl a6, t0, a5
122-
; RV32ZICOND-NEXT: srl a3, a3, a5
123-
; RV32ZICOND-NEXT: or a5, a7, a6
124-
; RV32ZICOND-NEXT: or a2, a2, a3
125100
; RV32ZICOND-NEXT: czero.eqz a2, a2, a4
126-
; RV32ZICOND-NEXT: czero.eqz a3, a5, a4
127-
; RV32ZICOND-NEXT: or a0, a2, a0
128-
; RV32ZICOND-NEXT: or a1, a3, a1
101+
; RV32ZICOND-NEXT: bexti a3, a2, 5
102+
; RV32ZICOND-NEXT: czero.nez a4, a0, a3
103+
; RV32ZICOND-NEXT: czero.eqz a5, a1, a3
104+
; RV32ZICOND-NEXT: czero.nez a1, a1, a3
105+
; RV32ZICOND-NEXT: czero.eqz a0, a0, a3
106+
; RV32ZICOND-NEXT: not a3, a2
107+
; RV32ZICOND-NEXT: or a4, a5, a4
108+
; RV32ZICOND-NEXT: or a0, a0, a1
109+
; RV32ZICOND-NEXT: sll a1, a4, a2
110+
; RV32ZICOND-NEXT: srli a5, a0, 1
111+
; RV32ZICOND-NEXT: sll a2, a0, a2
112+
; RV32ZICOND-NEXT: srli a4, a4, 1
113+
; RV32ZICOND-NEXT: srl a0, a5, a3
114+
; RV32ZICOND-NEXT: srl a3, a4, a3
115+
; RV32ZICOND-NEXT: or a0, a1, a0
116+
; RV32ZICOND-NEXT: or a1, a2, a3
129117
; RV32ZICOND-NEXT: ret
130118
;
131119
; RV64ZICOND-LABEL: rotate_l_eqz:
132120
; RV64ZICOND: # %bb.0:
133121
; RV64ZICOND-NEXT: andi a2, a2, 1
134-
; RV64ZICOND-NEXT: rol a1, a0, a1
135-
; RV64ZICOND-NEXT: czero.nez a0, a0, a2
136122
; RV64ZICOND-NEXT: czero.eqz a1, a1, a2
137-
; RV64ZICOND-NEXT: or a0, a1, a0
123+
; RV64ZICOND-NEXT: rol a0, a0, a1
138124
; RV64ZICOND-NEXT: ret
139125
%6 = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %rot.amt)
140126
%7 = select i1 %cond, i64 %6, i64 %x

0 commit comments

Comments
 (0)