Skip to content

Commit c55899f

Browse files
fzhinkinrotateright
authored andcommitted
[DAGCombiner] Hoist funnel shifts from logic operation
Hoist funnel shift from logic op: logic_op (FSH x0, x1, s), (FSH y0, y1, s) --> FSH (logic_op x0, y0), (logic_op x1, y1), s The transformation improves code generated for some cases related to issue llvm#49541. Reduced amount of funnel shifts can also improve throughput on x86 CPUs by utilizing more available ports: https://quick-bench.com/q/gC7AKkJJsDZzRrs_JWDzm9t_iDM Transformation correctness checks: https://alive2.llvm.org/ce/z/TKPULH https://alive2.llvm.org/ce/z/UvTd_9 https://alive2.llvm.org/ce/z/j8qW3_ https://alive2.llvm.org/ce/z/7Wq7gE https://alive2.llvm.org/ce/z/Xr5w8R https://alive2.llvm.org/ce/z/D5xe_E https://alive2.llvm.org/ce/z/2yBZiy Differential Revision: https://reviews.llvm.org/D130994
1 parent 8ac015c commit c55899f

File tree

3 files changed

+71
-82
lines changed

3 files changed

+71
-82
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5326,6 +5326,21 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
53265326
return DAG.getNode(HandOpcode, DL, VT, Logic);
53275327
}
53285328

5329+
// For funnel shifts FSHL/FSHR:
5330+
// logic_op (OP x, x1, s), (OP y, y1, s) -->
5331+
// --> OP (logic_op x, y), (logic_op, x1, y1), s
5332+
if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5333+
N0.getOperand(2) == N1.getOperand(2)) {
5334+
if (!N0.hasOneUse() || !N1.hasOneUse())
5335+
return SDValue();
5336+
SDValue X1 = N0.getOperand(1);
5337+
SDValue Y1 = N1.getOperand(1);
5338+
SDValue S = N0.getOperand(2);
5339+
SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
5340+
SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
5341+
return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
5342+
}
5343+
53295344
// Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
53305345
// Only perform this optimization up until type legalization, before
53315346
// LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by

llvm/test/CodeGen/X86/funnel-shift-logic-fold.ll

Lines changed: 32 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,11 @@ declare i64 @llvm.fshr.i64(i64, i64, i64) nounwind readnone
77
define i64 @hoist_fshl_from_or(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind {
88
; X64-LABEL: hoist_fshl_from_or:
99
; X64: # %bb.0:
10-
; X64-NEXT: movq %rcx, %rax
10+
; X64-NEXT: movq %rdi, %rax
11+
; X64-NEXT: orq %rcx, %rsi
12+
; X64-NEXT: orq %rdx, %rax
1113
; X64-NEXT: movl %r8d, %ecx
12-
; X64-NEXT: shldq %cl, %rsi, %rdi
13-
; X64-NEXT: shldq %cl, %rax, %rdx
14-
; X64-NEXT: orq %rdi, %rdx
15-
; X64-NEXT: movq %rdx, %rax
14+
; X64-NEXT: shldq %cl, %rsi, %rax
1615
; X64-NEXT: retq
1716
%fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %s)
1817
%fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 %s)
@@ -23,12 +22,11 @@ define i64 @hoist_fshl_from_or(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind
2322
define i64 @hoist_fshl_from_and(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind {
2423
; X64-LABEL: hoist_fshl_from_and:
2524
; X64: # %bb.0:
26-
; X64-NEXT: movq %rcx, %rax
25+
; X64-NEXT: movq %rdi, %rax
26+
; X64-NEXT: andq %rcx, %rsi
27+
; X64-NEXT: andq %rdx, %rax
2728
; X64-NEXT: movl %r8d, %ecx
28-
; X64-NEXT: shldq %cl, %rsi, %rdi
29-
; X64-NEXT: shldq %cl, %rax, %rdx
30-
; X64-NEXT: andq %rdi, %rdx
31-
; X64-NEXT: movq %rdx, %rax
29+
; X64-NEXT: shldq %cl, %rsi, %rax
3230
; X64-NEXT: retq
3331
%fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %s)
3432
%fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 %s)
@@ -39,12 +37,11 @@ define i64 @hoist_fshl_from_and(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind
3937
define i64 @hoist_fshl_from_xor(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind {
4038
; X64-LABEL: hoist_fshl_from_xor:
4139
; X64: # %bb.0:
42-
; X64-NEXT: movq %rcx, %rax
40+
; X64-NEXT: movq %rdi, %rax
41+
; X64-NEXT: xorq %rcx, %rsi
42+
; X64-NEXT: xorq %rdx, %rax
4343
; X64-NEXT: movl %r8d, %ecx
44-
; X64-NEXT: shldq %cl, %rsi, %rdi
45-
; X64-NEXT: shldq %cl, %rax, %rdx
46-
; X64-NEXT: xorq %rdi, %rdx
47-
; X64-NEXT: movq %rdx, %rax
44+
; X64-NEXT: shldq %cl, %rsi, %rax
4845
; X64-NEXT: retq
4946
%fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %s)
5047
%fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 %s)
@@ -69,10 +66,10 @@ define i64 @fshl_or_with_different_shift_value(i64 %a, i64 %b, i64 %c, i64 %d) n
6966
define i64 @hoist_fshl_from_or_const_shift(i64 %a, i64 %b, i64 %c, i64 %d) nounwind {
7067
; X64-LABEL: hoist_fshl_from_or_const_shift:
7168
; X64: # %bb.0:
72-
; X64-NEXT: movq %rdx, %rax
73-
; X64-NEXT: shldq $15, %rsi, %rdi
74-
; X64-NEXT: shldq $15, %rcx, %rax
75-
; X64-NEXT: orq %rdi, %rax
69+
; X64-NEXT: movq %rdi, %rax
70+
; X64-NEXT: orq %rcx, %rsi
71+
; X64-NEXT: orq %rdx, %rax
72+
; X64-NEXT: shldq $15, %rsi, %rax
7673
; X64-NEXT: retq
7774
%fshl.0 = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 15)
7875
%fshl.1 = call i64 @llvm.fshl.i64(i64 %c, i64 %d, i64 15)
@@ -83,11 +80,11 @@ define i64 @hoist_fshl_from_or_const_shift(i64 %a, i64 %b, i64 %c, i64 %d) nounw
8380
define i64 @hoist_fshr_from_or(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind {
8481
; X64-LABEL: hoist_fshr_from_or:
8582
; X64: # %bb.0:
86-
; X64-NEXT: movq %rcx, %rax
83+
; X64-NEXT: movq %rsi, %rax
84+
; X64-NEXT: orq %rdx, %rdi
85+
; X64-NEXT: orq %rcx, %rax
8786
; X64-NEXT: movl %r8d, %ecx
88-
; X64-NEXT: shrdq %cl, %rdi, %rsi
89-
; X64-NEXT: shrdq %cl, %rdx, %rax
90-
; X64-NEXT: orq %rsi, %rax
87+
; X64-NEXT: shrdq %cl, %rdi, %rax
9188
; X64-NEXT: retq
9289
%fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %s)
9390
%fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 %s)
@@ -98,11 +95,11 @@ define i64 @hoist_fshr_from_or(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind
9895
define i64 @hoist_fshr_from_and(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind {
9996
; X64-LABEL: hoist_fshr_from_and:
10097
; X64: # %bb.0:
101-
; X64-NEXT: movq %rcx, %rax
98+
; X64-NEXT: movq %rsi, %rax
99+
; X64-NEXT: andq %rdx, %rdi
100+
; X64-NEXT: andq %rcx, %rax
102101
; X64-NEXT: movl %r8d, %ecx
103-
; X64-NEXT: shrdq %cl, %rdi, %rsi
104-
; X64-NEXT: shrdq %cl, %rdx, %rax
105-
; X64-NEXT: andq %rsi, %rax
102+
; X64-NEXT: shrdq %cl, %rdi, %rax
106103
; X64-NEXT: retq
107104
%fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %s)
108105
%fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 %s)
@@ -113,11 +110,11 @@ define i64 @hoist_fshr_from_and(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind
113110
define i64 @hoist_fshr_from_xor(i64 %a, i64 %b, i64 %c, i64 %d, i64 %s) nounwind {
114111
; X64-LABEL: hoist_fshr_from_xor:
115112
; X64: # %bb.0:
116-
; X64-NEXT: movq %rcx, %rax
113+
; X64-NEXT: movq %rsi, %rax
114+
; X64-NEXT: xorq %rdx, %rdi
115+
; X64-NEXT: xorq %rcx, %rax
117116
; X64-NEXT: movl %r8d, %ecx
118-
; X64-NEXT: shrdq %cl, %rdi, %rsi
119-
; X64-NEXT: shrdq %cl, %rdx, %rax
120-
; X64-NEXT: xorq %rsi, %rax
117+
; X64-NEXT: shrdq %cl, %rdi, %rax
121118
; X64-NEXT: retq
122119
%fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %s)
123120
%fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 %s)
@@ -142,10 +139,10 @@ define i64 @fshr_or_with_different_shift_value(i64 %a, i64 %b, i64 %c, i64 %d) n
142139
define i64 @hoist_fshr_from_or_const_shift(i64 %a, i64 %b, i64 %c, i64 %d) nounwind {
143140
; X64-LABEL: hoist_fshr_from_or_const_shift:
144141
; X64: # %bb.0:
145-
; X64-NEXT: movq %rdx, %rax
146-
; X64-NEXT: shldq $49, %rsi, %rdi
147-
; X64-NEXT: shldq $49, %rcx, %rax
148-
; X64-NEXT: orq %rdi, %rax
142+
; X64-NEXT: movq %rdi, %rax
143+
; X64-NEXT: orq %rcx, %rsi
144+
; X64-NEXT: orl %edx, %eax
145+
; X64-NEXT: shldq $49, %rsi, %rax
149146
; X64-NEXT: retq
150147
%fshr.0 = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 15)
151148
%fshr.1 = call i64 @llvm.fshr.i64(i64 %c, i64 %d, i64 15)

llvm/test/CodeGen/X86/icmp-shift-opt.ll

Lines changed: 24 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,11 @@ define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
2525
; X86-NEXT: adcl $0, %esi
2626
; X86-NEXT: adcl $0, %edx
2727
; X86-NEXT: adcl $0, %ecx
28-
; X86-NEXT: movl %ecx, %ebx
29-
; X86-NEXT: shldl $4, %edx, %ebx
28+
; X86-NEXT: movl %edx, %ebx
29+
; X86-NEXT: orl %ecx, %ebx
3030
; X86-NEXT: movl %esi, %ebp
31-
; X86-NEXT: orl %ecx, %ebp
32-
; X86-NEXT: shrdl $28, %edx, %ebp
3331
; X86-NEXT: orl %ebx, %ebp
32+
; X86-NEXT: shrdl $28, %ebx, %ebp
3433
; X86-NEXT: jne .LBB0_1
3534
; X86-NEXT: # %bb.2: # %exit
3635
; X86-NEXT: movl %edi, (%eax)
@@ -73,19 +72,15 @@ exit:
7372
define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
7473
; X86-LABEL: opt_setcc_srl_eq_zero:
7574
; X86: # %bb.0:
76-
; X86-NEXT: pushl %esi
7775
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
7876
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
77+
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
7978
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
80-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
81-
; X86-NEXT: shrdl $17, %ecx, %eax
82-
; X86-NEXT: orl %esi, %ecx
83-
; X86-NEXT: shldl $15, %edx, %esi
84-
; X86-NEXT: orl %esi, %eax
85-
; X86-NEXT: shrdl $17, %edx, %ecx
86-
; X86-NEXT: orl %eax, %ecx
79+
; X86-NEXT: orl %ecx, %edx
80+
; X86-NEXT: orl %eax, %edx
81+
; X86-NEXT: orl %ecx, %eax
82+
; X86-NEXT: shldl $15, %edx, %eax
8783
; X86-NEXT: sete %al
88-
; X86-NEXT: popl %esi
8984
; X86-NEXT: retl
9085
;
9186
; X64-LABEL: opt_setcc_srl_eq_zero:
@@ -102,19 +97,15 @@ define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
10297
define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
10398
; X86-LABEL: opt_setcc_srl_ne_zero:
10499
; X86: # %bb.0:
105-
; X86-NEXT: pushl %esi
106100
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
107101
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
102+
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
108103
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
109-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
110-
; X86-NEXT: shrdl $17, %ecx, %eax
111-
; X86-NEXT: orl %esi, %ecx
112-
; X86-NEXT: shldl $15, %edx, %esi
113-
; X86-NEXT: orl %esi, %eax
114-
; X86-NEXT: shrdl $17, %edx, %ecx
115-
; X86-NEXT: orl %eax, %ecx
104+
; X86-NEXT: orl %ecx, %edx
105+
; X86-NEXT: orl %eax, %edx
106+
; X86-NEXT: orl %ecx, %eax
107+
; X86-NEXT: shldl $15, %edx, %eax
116108
; X86-NEXT: setne %al
117-
; X86-NEXT: popl %esi
118109
; X86-NEXT: retl
119110
;
120111
; X64-LABEL: opt_setcc_srl_ne_zero:
@@ -131,19 +122,13 @@ define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
131122
define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
132123
; X86-LABEL: opt_setcc_shl_eq_zero:
133124
; X86: # %bb.0:
134-
; X86-NEXT: pushl %esi
135125
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
136126
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
137-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
138-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
139-
; X86-NEXT: shldl $17, %edx, %esi
140-
; X86-NEXT: orl %eax, %edx
141-
; X86-NEXT: shldl $17, %ecx, %edx
142-
; X86-NEXT: shldl $17, %eax, %ecx
143-
; X86-NEXT: orl %esi, %ecx
144-
; X86-NEXT: orl %ecx, %edx
127+
; X86-NEXT: shll $17, %ecx
128+
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
129+
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
130+
; X86-NEXT: orl %ecx, %eax
145131
; X86-NEXT: sete %al
146-
; X86-NEXT: popl %esi
147132
; X86-NEXT: retl
148133
;
149134
; X64-LABEL: opt_setcc_shl_eq_zero:
@@ -160,19 +145,13 @@ define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
160145
define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
161146
; X86-LABEL: opt_setcc_shl_ne_zero:
162147
; X86: # %bb.0:
163-
; X86-NEXT: pushl %esi
164148
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
165149
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
166-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
167-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
168-
; X86-NEXT: shldl $17, %edx, %esi
169-
; X86-NEXT: orl %eax, %edx
170-
; X86-NEXT: shldl $17, %ecx, %edx
171-
; X86-NEXT: shldl $17, %eax, %ecx
172-
; X86-NEXT: orl %esi, %ecx
173-
; X86-NEXT: orl %ecx, %edx
150+
; X86-NEXT: shll $17, %ecx
151+
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
152+
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
153+
; X86-NEXT: orl %ecx, %eax
174154
; X86-NEXT: setne %al
175-
; X86-NEXT: popl %esi
176155
; X86-NEXT: retl
177156
;
178157
; X64-LABEL: opt_setcc_shl_ne_zero:
@@ -243,13 +222,11 @@ define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
243222
; X86-LABEL: opt_setcc_expanded_shl_correct_shifts:
244223
; X86: # %bb.0:
245224
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
225+
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
226+
; X86-NEXT: orl {{[0-9]+}}(%esp), %eax
246227
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
247-
; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx
248-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
249-
; X86-NEXT: orl %eax, %edx
250-
; X86-NEXT: shldl $17, %ecx, %edx
228+
; X86-NEXT: orl %eax, %ecx
251229
; X86-NEXT: shldl $17, %eax, %ecx
252-
; X86-NEXT: orl %edx, %ecx
253230
; X86-NEXT: sete %al
254231
; X86-NEXT: retl
255232
;

0 commit comments

Comments
 (0)