Skip to content

Commit 2bc9f43

Browse files
authored
[DAGCombiner] Fold pattern for srl-shl-zext (REAPPLIED) (#140038)
Fold (srl (lop x, (shl (zext y), c1)), c1) -> (lop (srl x, c1), (zext y)) where c1 <= leadingzeros(zext(y)). This is equivalent of existing fold chain (srl (shl (zext y), c1), c1) -> (and (zext y), mask) -> (zext y), but logical op in the middle prevents it from combining. Profit : Allow to reduce the number of instructions. Original commit: #138290 / bbc5221 Previously reverted due to conflict in LIT test. Mainline changed default version of load instruction to untyped version by this #137698 . Updated test uses `ld.param.b64` instead of `ld.param.u64`.
1 parent 15b20a1 commit 2bc9f43

File tree

2 files changed

+32
-26
lines changed

2 files changed

+32
-26
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10972,6 +10972,22 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
1097210972
return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
1097310973
}
1097410974

10975+
// fold (srl (logic_op x, (shl (zext y), c1)), c1)
10976+
// -> (logic_op (srl x, c1), (zext y))
10977+
// c1 <= leadingzeros(zext(y))
10978+
SDValue X, ZExtY;
10979+
if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic(
10980+
m_Value(X),
10981+
m_OneUse(m_Shl(m_AllOf(m_Value(ZExtY),
10982+
m_Opc(ISD::ZERO_EXTEND)),
10983+
m_Specific(N1))))))) {
10984+
unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
10985+
ZExtY.getOperand(0).getScalarValueSizeInBits();
10986+
if (N1C->getZExtValue() <= NumLeadingZeros)
10987+
return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
10988+
DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
10989+
}
10990+
1097510991
// fold operands of srl based on knowledge that the low bits are not
1097610992
// demanded.
1097710993
if (SimplifyDemandedBits(SDValue(N, 0)))

llvm/test/CodeGen/NVPTX/shift-opt.ll

Lines changed: 16 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,13 @@
66
define i64 @test_or(i64 %x, i32 %y) {
77
; CHECK-LABEL: test_or(
88
; CHECK: {
9-
; CHECK-NEXT: .reg .b32 %r<2>;
109
; CHECK-NEXT: .reg .b64 %rd<5>;
1110
; CHECK-EMPTY:
1211
; CHECK-NEXT: // %bb.0:
1312
; CHECK-NEXT: ld.param.b64 %rd1, [test_or_param_0];
14-
; CHECK-NEXT: ld.param.b32 %r1, [test_or_param_1];
15-
; CHECK-NEXT: mul.wide.u32 %rd2, %r1, 32;
16-
; CHECK-NEXT: or.b64 %rd3, %rd1, %rd2;
17-
; CHECK-NEXT: shr.u64 %rd4, %rd3, 5;
13+
; CHECK-NEXT: ld.param.b32 %rd2, [test_or_param_1];
14+
; CHECK-NEXT: shr.u64 %rd3, %rd1, 5;
15+
; CHECK-NEXT: or.b64 %rd4, %rd3, %rd2;
1816
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
1917
; CHECK-NEXT: ret;
2018
%ext = zext i32 %y to i64
@@ -29,15 +27,13 @@ define i64 @test_or(i64 %x, i32 %y) {
2927
define i64 @test_xor(i64 %x, i32 %y) {
3028
; CHECK-LABEL: test_xor(
3129
; CHECK: {
32-
; CHECK-NEXT: .reg .b32 %r<2>;
3330
; CHECK-NEXT: .reg .b64 %rd<5>;
3431
; CHECK-EMPTY:
3532
; CHECK-NEXT: // %bb.0:
3633
; CHECK-NEXT: ld.param.b64 %rd1, [test_xor_param_0];
37-
; CHECK-NEXT: ld.param.b32 %r1, [test_xor_param_1];
38-
; CHECK-NEXT: mul.wide.u32 %rd2, %r1, 32;
39-
; CHECK-NEXT: xor.b64 %rd3, %rd1, %rd2;
40-
; CHECK-NEXT: shr.u64 %rd4, %rd3, 5;
34+
; CHECK-NEXT: ld.param.b32 %rd2, [test_xor_param_1];
35+
; CHECK-NEXT: shr.u64 %rd3, %rd1, 5;
36+
; CHECK-NEXT: xor.b64 %rd4, %rd3, %rd2;
4137
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
4238
; CHECK-NEXT: ret;
4339
%ext = zext i32 %y to i64
@@ -52,15 +48,13 @@ define i64 @test_xor(i64 %x, i32 %y) {
5248
define i64 @test_and(i64 %x, i32 %y) {
5349
; CHECK-LABEL: test_and(
5450
; CHECK: {
55-
; CHECK-NEXT: .reg .b32 %r<2>;
5651
; CHECK-NEXT: .reg .b64 %rd<5>;
5752
; CHECK-EMPTY:
5853
; CHECK-NEXT: // %bb.0:
5954
; CHECK-NEXT: ld.param.b64 %rd1, [test_and_param_0];
60-
; CHECK-NEXT: ld.param.b32 %r1, [test_and_param_1];
61-
; CHECK-NEXT: mul.wide.u32 %rd2, %r1, 32;
62-
; CHECK-NEXT: and.b64 %rd3, %rd1, %rd2;
63-
; CHECK-NEXT: shr.u64 %rd4, %rd3, 5;
55+
; CHECK-NEXT: ld.param.b32 %rd2, [test_and_param_1];
56+
; CHECK-NEXT: shr.u64 %rd3, %rd1, 5;
57+
; CHECK-NEXT: and.b64 %rd4, %rd3, %rd2;
6458
; CHECK-NEXT: st.param.b64 [func_retval0], %rd4;
6559
; CHECK-NEXT: ret;
6660
%ext = zext i32 %y to i64
@@ -76,23 +70,19 @@ define i64 @test_and(i64 %x, i32 %y) {
7670
define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
7771
; CHECK-LABEL: test_vec(
7872
; CHECK: {
79-
; CHECK-NEXT: .reg .b16 %rs<9>;
80-
; CHECK-NEXT: .reg .b32 %r<7>;
73+
; CHECK-NEXT: .reg .b16 %rs<5>;
74+
; CHECK-NEXT: .reg .b32 %r<6>;
8175
; CHECK-EMPTY:
8276
; CHECK-NEXT: // %bb.0:
8377
; CHECK-NEXT: ld.param.b32 %r1, [test_vec_param_0];
8478
; CHECK-NEXT: ld.param.b32 %r2, [test_vec_param_1];
8579
; CHECK-NEXT: and.b32 %r3, %r2, 16711935;
86-
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r3;
87-
; CHECK-NEXT: shl.b16 %rs3, %rs2, 5;
88-
; CHECK-NEXT: shl.b16 %rs4, %rs1, 5;
80+
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
81+
; CHECK-NEXT: shr.u16 %rs3, %rs2, 5;
82+
; CHECK-NEXT: shr.u16 %rs4, %rs1, 5;
8983
; CHECK-NEXT: mov.b32 %r4, {%rs4, %rs3};
90-
; CHECK-NEXT: or.b32 %r5, %r1, %r4;
91-
; CHECK-NEXT: mov.b32 {%rs5, %rs6}, %r5;
92-
; CHECK-NEXT: shr.u16 %rs7, %rs6, 5;
93-
; CHECK-NEXT: shr.u16 %rs8, %rs5, 5;
94-
; CHECK-NEXT: mov.b32 %r6, {%rs8, %rs7};
95-
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
84+
; CHECK-NEXT: or.b32 %r5, %r4, %r3;
85+
; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
9686
; CHECK-NEXT: ret;
9787
%ext = zext <2 x i8> %y to <2 x i16>
9888
%shl = shl <2 x i16> %ext, splat(i16 5)

0 commit comments

Comments
 (0)