Skip to content

Commit e9302bf

Browse files
committed
[SDAG] try harder to remove a rotate from X == 0
https://alive2.llvm.org/ce/z/mJP7XP This can be viewed as expanding the compare into and/or-of-compares: https://alive2.llvm.org/ce/z/bkZYWE followed by reduction of each compare. This could be extended in several ways: 1. There's a (X & Y) == -1 sibling. 2. We can recurse through more than 1 'or'. 3. The fold could be generalized beyond rotates - any operation that only changes the order of bits (bswap, bitreverse). This is a transform noted in D111530.
1 parent d3c16be commit e9302bf

File tree

3 files changed

+37
-36
lines changed

3 files changed

+37
-36
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3835,6 +3835,24 @@ static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
38353835
if (SDValue R = getRotateSource(N0))
38363836
return DAG.getSetCC(dl, VT, R, N1, Cond);
38373837

3838+
// Peek through an 'or' of a rotated value compared against 0:
3839+
// or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
3840+
// or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
3841+
//
3842+
// TODO: Add the 'and' with -1 sibling.
3843+
// TODO: Recurse through a series of 'or' ops to find the rotate.
3844+
EVT OpVT = N0.getValueType();
3845+
if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
3846+
if (SDValue R = getRotateSource(N0.getOperand(0))) {
3847+
SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
3848+
return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
3849+
}
3850+
if (SDValue R = getRotateSource(N0.getOperand(1))) {
3851+
SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
3852+
return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
3853+
}
3854+
}
3855+
38383856
return SDValue();
38393857
}
38403858

llvm/test/CodeGen/X86/legalize-shift.ll

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,28 +5,29 @@
55
define void @PR36250() nounwind {
66
; X86-LABEL: PR36250:
77
; X86: # %bb.0:
8+
; X86-NEXT: pushl %esi
89
; X86-NEXT: movl (%eax), %eax
910
; X86-NEXT: movl %eax, %ecx
1011
; X86-NEXT: roll %ecx
11-
; X86-NEXT: addl %eax, %eax
12-
; X86-NEXT: movl %ecx, %edx
13-
; X86-NEXT: orl %ecx, %edx
14-
; X86-NEXT: orl %ecx, %edx
15-
; X86-NEXT: orl %eax, %edx
16-
; X86-NEXT: orl %ecx, %edx
12+
; X86-NEXT: leal (%eax,%eax), %edx
13+
; X86-NEXT: movl %ecx, %esi
14+
; X86-NEXT: orl %ecx, %esi
15+
; X86-NEXT: orl %ecx, %esi
16+
; X86-NEXT: orl %edx, %esi
17+
; X86-NEXT: orl %eax, %esi
1718
; X86-NEXT: sete (%eax)
19+
; X86-NEXT: popl %esi
1820
; X86-NEXT: retl
1921
;
2022
; X64-LABEL: PR36250:
2123
; X64: # %bb.0:
2224
; X64-NEXT: movq (%rax), %rax
2325
; X64-NEXT: movq %rax, %rcx
2426
; X64-NEXT: rolq %rcx
25-
; X64-NEXT: addq %rax, %rax
26-
; X64-NEXT: movq %rcx, %rdx
27-
; X64-NEXT: orq %rcx, %rdx
28-
; X64-NEXT: orq %rax, %rdx
29-
; X64-NEXT: orq %rcx, %rdx
27+
; X64-NEXT: leaq (%rax,%rax), %rdx
28+
; X64-NEXT: orq %rcx, %rcx
29+
; X64-NEXT: orq %rdx, %rcx
30+
; X64-NEXT: orq %rax, %rcx
3031
; X64-NEXT: sete (%rax)
3132
; X64-NEXT: retq
3233
%1 = load i448, i448* undef

llvm/test/CodeGen/X86/setcc-fsh.ll

Lines changed: 7 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,6 @@ define i1 @fshl_eq_n1(i8 %x, i8 %y, i8 %z) nounwind {
188188
define i1 @or_rotl_eq_0(i8 %x, i8 %y, i8 %z) nounwind {
189189
; CHECK-LABEL: or_rotl_eq_0:
190190
; CHECK: # %bb.0:
191-
; CHECK-NEXT: movl %edx, %ecx
192-
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
193-
; CHECK-NEXT: rolb %cl, %dil
194191
; CHECK-NEXT: orb %sil, %dil
195192
; CHECK-NEXT: sete %al
196193
; CHECK-NEXT: retq
@@ -203,9 +200,6 @@ define i1 @or_rotl_eq_0(i8 %x, i8 %y, i8 %z) nounwind {
203200
define i1 @or_rotr_ne_0(i64 %x, i64 %y, i64 %z) nounwind {
204201
; CHECK-LABEL: or_rotr_ne_0:
205202
; CHECK: # %bb.0:
206-
; CHECK-NEXT: movq %rdx, %rcx
207-
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
208-
; CHECK-NEXT: rorq %cl, %rdi
209203
; CHECK-NEXT: orq %rsi, %rdi
210204
; CHECK-NEXT: setne %al
211205
; CHECK-NEXT: retq
@@ -215,6 +209,8 @@ define i1 @or_rotr_ne_0(i64 %x, i64 %y, i64 %z) nounwind {
215209
ret i1 %r
216210
}
217211

212+
; negative test - wrong constant
213+
218214
define i1 @or_rotl_ne_n1(i32 %x, i32 %y, i32 %z) nounwind {
219215
; CHECK-LABEL: or_rotl_ne_n1:
220216
; CHECK: # %bb.0:
@@ -231,6 +227,8 @@ define i1 @or_rotl_ne_n1(i32 %x, i32 %y, i32 %z) nounwind {
231227
ret i1 %r
232228
}
233229

230+
; negative test - extra use
231+
234232
define i1 @or_rotl_ne_0_use(i32 %x, i32 %y, i32 %z) nounwind {
235233
; CHECK-LABEL: or_rotl_ne_0_use:
236234
; CHECK: # %bb.0:
@@ -254,25 +252,9 @@ define i1 @or_rotl_ne_0_use(i32 %x, i32 %y, i32 %z) nounwind {
254252
define <4 x i1> @or_rotl_ne_eq0(<4 x i32> %x, <4 x i32> %y) nounwind {
255253
; CHECK-LABEL: or_rotl_ne_eq0:
256254
; CHECK: # %bb.0:
257-
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [31,31,31,31]
258-
; CHECK-NEXT: pand %xmm1, %xmm2
259-
; CHECK-NEXT: pslld $23, %xmm2
260-
; CHECK-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
261-
; CHECK-NEXT: cvttps2dq %xmm2, %xmm2
262-
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
263-
; CHECK-NEXT: pmuludq %xmm2, %xmm0
264-
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
265-
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
266-
; CHECK-NEXT: pmuludq %xmm3, %xmm2
267-
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
268-
; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
269-
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
270-
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
271-
; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
272-
; CHECK-NEXT: por %xmm1, %xmm4
273-
; CHECK-NEXT: por %xmm0, %xmm4
274-
; CHECK-NEXT: pxor %xmm0, %xmm0
275-
; CHECK-NEXT: pcmpeqd %xmm4, %xmm0
255+
; CHECK-NEXT: pxor %xmm2, %xmm2
256+
; CHECK-NEXT: por %xmm1, %xmm0
257+
; CHECK-NEXT: pcmpeqd %xmm2, %xmm0
276258
; CHECK-NEXT: retq
277259
%rot = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32>%x, <4 x i32> %x, <4 x i32> %y)
278260
%or = or <4 x i32> %y, %rot

0 commit comments

Comments
 (0)