Skip to content

Commit bbc5221

Browse files
authored
[DAGCombiner] Fold pattern for srl-shl-zext (#138290)
Fold `(srl (lop x, (shl (zext y), c1)), c1) -> (lop (srl x, c1), (zext y))` where c1 <= leadingzeros(zext(y)). This is equivalent of existing fold chain `(srl (shl (zext y), c1), c1) -> (and (zext y), mask) -> (zext y)`, but logical op in the middle prevents it from combining. Profit : Allow to reduce the number of instructions. --------- Signed-off-by: Alexander Peskov <[email protected]>
1 parent 98683b0 commit bbc5221

File tree

2 files changed

+168
-0
lines changed

2 files changed

+168
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10972,6 +10972,22 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
1097210972
return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
1097310973
}
1097410974

10975+
// fold (srl (logic_op x, (shl (zext y), c1)), c1)
10976+
// -> (logic_op (srl x, c1), (zext y))
10977+
// c1 <= leadingzeros(zext(y))
10978+
SDValue X, ZExtY;
10979+
if (N1C && sd_match(N0, m_OneUse(m_BitwiseLogic(
10980+
m_Value(X),
10981+
m_OneUse(m_Shl(m_AllOf(m_Value(ZExtY),
10982+
m_Opc(ISD::ZERO_EXTEND)),
10983+
m_Specific(N1))))))) {
10984+
unsigned NumLeadingZeros = ZExtY.getScalarValueSizeInBits() -
10985+
ZExtY.getOperand(0).getScalarValueSizeInBits();
10986+
if (N1C->getZExtValue() <= NumLeadingZeros)
10987+
return DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
10988+
DAG.getNode(ISD::SRL, SDLoc(N0), VT, X, N1), ZExtY);
10989+
}
10990+
1097510991
// fold operands of srl based on knowledge that the low bits are not
1097610992
// demanded.
1097710993
if (SimplifyDemandedBits(SDValue(N, 0)))

llvm/test/CodeGen/NVPTX/shift-opt.ll

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
2+
3+
define i64 @test_or(i64 %x, i32 %y) {
4+
;
5+
; Fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
6+
; c1 <= leadingzeros(zext(y))
7+
;
8+
; CHECK-LABEL: test_or
9+
; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_or_param_0];
10+
; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_or_param_1];
11+
; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
12+
; CHECK: or.b64 %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
13+
; CHECK: st.param.b64 [func_retval0], %[[LOP]];
14+
;
15+
%ext = zext i32 %y to i64
16+
%shl = shl i64 %ext, 5
17+
%or = or i64 %x, %shl
18+
%srl = lshr i64 %or, 5
19+
ret i64 %srl
20+
}
21+
22+
define i64 @test_xor(i64 %x, i32 %y) {
23+
;
24+
; Fold: srl (xor (x, shl(zext(y),c1)),c1) -> xor(srl(x,c1), zext(y))
25+
; c1 <= leadingzeros(zext(y))
26+
;
27+
; CHECK-LABEL: test_xor
28+
; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_xor_param_0];
29+
; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_xor_param_1];
30+
; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
31+
; CHECK: xor.b64 %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
32+
; CHECK: st.param.b64 [func_retval0], %[[LOP]];
33+
;
34+
%ext = zext i32 %y to i64
35+
%shl = shl i64 %ext, 5
36+
%or = xor i64 %x, %shl
37+
%srl = lshr i64 %or, 5
38+
ret i64 %srl
39+
}
40+
41+
define i64 @test_and(i64 %x, i32 %y) {
42+
;
43+
; Fold: srl (and (x, shl(zext(y),c1)),c1) -> and(srl(x,c1), zext(y))
44+
; c1 <= leadingzeros(zext(y))
45+
;
46+
; CHECK-LABEL: test_and
47+
; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_and_param_0];
48+
; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_and_param_1];
49+
; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[X]], 5;
50+
; CHECK: and.b64 %[[LOP:rd[0-9]+]], %[[SHR]], %[[Y]];
51+
; CHECK: st.param.b64 [func_retval0], %[[LOP]];
52+
;
53+
%ext = zext i32 %y to i64
54+
%shl = shl i64 %ext, 5
55+
%or = and i64 %x, %shl
56+
%srl = lshr i64 %or, 5
57+
ret i64 %srl
58+
}
59+
60+
define <2 x i16> @test_vec(<2 x i16> %x, <2 x i8> %y) {
61+
;
62+
; Fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
63+
; c1 <= leadingzeros(zext(y))
64+
; x, y - vectors
65+
;
66+
; CHECK-LABEL: test_vec
67+
; CHECK: ld.param.u32 %[[X:r[0-9]+]], [test_vec_param_0];
68+
; CHECK: ld.param.u32 %[[P1:r[0-9]+]], [test_vec_param_1];
69+
; CHECK: and.b32 %[[Y:r[0-9]+]], %[[P1]], 16711935;
70+
; CHECK: mov.b32 {%[[X1:rs[0-9]+]], %[[X2:rs[0-9]+]]}, %[[X]];
71+
; CHECK: shr.u16 %[[SHR2:rs[0-9]+]], %[[X2]], 5;
72+
; CHECK: shr.u16 %[[SHR1:rs[0-9]+]], %[[X1]], 5;
73+
; CHECK: mov.b32 %[[SHR:r[0-9]+]], {%[[SHR1]], %[[SHR2]]};
74+
; CHECK: or.b32 %[[LOP:r[0-9]+]], %[[SHR]], %[[Y]];
75+
; CHECK: st.param.b32 [func_retval0], %[[LOP]];
76+
;
77+
%ext = zext <2 x i8> %y to <2 x i16>
78+
%shl = shl <2 x i16> %ext, splat(i16 5)
79+
%or = or <2 x i16> %x, %shl
80+
%srl = lshr <2 x i16> %or, splat(i16 5)
81+
ret <2 x i16> %srl
82+
}
83+
84+
define i64 @test_negative_c(i64 %x, i32 %y) {
85+
;
86+
; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
87+
; Reason: c1 > leadingzeros(zext(y)).
88+
;
89+
; CHECK-LABEL: test_negative_c
90+
; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_c_param_0];
91+
; CHECK: ld.param.u32 %[[Y:rd[0-9]+]], [test_negative_c_param_1];
92+
; CHECK: shl.b64 %[[SHL:rd[0-9]+]], %[[Y]], 33;
93+
; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
94+
; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 33;
95+
; CHECK: st.param.b64 [func_retval0], %[[SHR]];
96+
;
97+
%ext = zext i32 %y to i64
98+
%shl = shl i64 %ext, 33
99+
%or = or i64 %x, %shl
100+
%srl = lshr i64 %or, 33
101+
ret i64 %srl
102+
}
103+
104+
declare void @use(i64)
105+
106+
define i64 @test_negative_use_lop(i64 %x, i32 %y) {
107+
;
108+
; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
109+
; Reason: multiple usage of "or"
110+
;
111+
; CHECK-LABEL: test_negative_use_lop
112+
; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_use_lop_param_0];
113+
; CHECK: ld.param.u32 %[[Y:r[0-9]+]], [test_negative_use_lop_param_1];
114+
; CHECK: mul.wide.u32 %[[SHL:rd[0-9]+]], %[[Y]], 32;
115+
; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
116+
; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 5;
117+
; CHECK: { // callseq
118+
; CHECK: st.param.b64 [param0], %[[OR]];
119+
; CHECK: } // callseq
120+
; CHECK: st.param.b64 [func_retval0], %[[SHR]];
121+
;
122+
%ext = zext i32 %y to i64
123+
%shl = shl i64 %ext, 5
124+
%or = or i64 %x, %shl
125+
%srl = lshr i64 %or, 5
126+
call void @use(i64 %or)
127+
ret i64 %srl
128+
}
129+
130+
define i64 @test_negative_use_shl(i64 %x, i32 %y) {
131+
;
132+
; Do not fold: srl (or (x, shl(zext(y),c1)),c1) -> or(srl(x,c1), zext(y))
133+
; Reason: multiple usage of "shl"
134+
;
135+
; CHECK-LABEL: test_negative_use_shl
136+
; CHECK: ld.param.u64 %[[X:rd[0-9]+]], [test_negative_use_shl_param_0];
137+
; CHECK: ld.param.u32 %[[Y:r[0-9]+]], [test_negative_use_shl_param_1];
138+
; CHECK: mul.wide.u32 %[[SHL:rd[0-9]+]], %[[Y]], 32;
139+
; CHECK: or.b64 %[[OR:rd[0-9]+]], %[[X]], %[[SHL]];
140+
; CHECK: shr.u64 %[[SHR:rd[0-9]+]], %[[OR]], 5;
141+
; CHECK: { // callseq
142+
; CHECK: st.param.b64 [param0], %[[SHL]];
143+
; CHECK: } // callseq
144+
; CHECK: st.param.b64 [func_retval0], %[[SHR]];
145+
;
146+
%ext = zext i32 %y to i64
147+
%shl = shl i64 %ext, 5
148+
%or = or i64 %x, %shl
149+
%srl = lshr i64 %or, 5
150+
call void @use(i64 %shl)
151+
ret i64 %srl
152+
}

0 commit comments

Comments
 (0)