Skip to content

Commit 8dab0a4

Browse files
committed
[DAGCombine][X86][AArch64] 'A - (A & (B - 1))' -> 'A & (0 - B)' fold (PR44448)
While we do manage to fold integer-typed IR in middle-end, we can't do that for the main motivational case of pointers. There is @llvm.ptrmask() intrinsic which may or may not be helpful, but i'm not sure it is fully considered canonical yet, not everything is fully aware of it likely. https://rise4fun.com/Alive/ZVdp Name: ptr - (ptr & (alignment-1)) -> ptr & (0 - alignment) %mask = add i64 %alignment, -1 %bias = and i64 %ptr, %mask %r = sub i64 %ptr, %bias => %highbitmask = sub i64 0, %alignment %r = and i64 %ptr, %highbitmask See https://bugs.llvm.org/show_bug.cgi?id=44448 https://reviews.llvm.org/D71499
1 parent c0cbe3f commit 8dab0a4

File tree

3 files changed

+54
-55
lines changed

3 files changed

+54
-55
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3104,6 +3104,21 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
31043104
DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
31053105
N1.getOperand(0)));
31063106

3107+
// A - (A & (B - 1)) -> A & (0 - B)
3108+
if (N1.getOpcode() == ISD::AND && N1.hasOneUse()) {
3109+
SDValue A = N1.getOperand(0);
3110+
SDValue BDec = N1.getOperand(1);
3111+
if (A != N0)
3112+
std::swap(A, BDec);
3113+
if (A == N0 && BDec.getOpcode() == ISD::ADD &&
3114+
isAllOnesOrAllOnesSplat(BDec->getOperand(1))) {
3115+
SDValue B = BDec.getOperand(0);
3116+
SDValue NegB =
3117+
DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), B);
3118+
return DAG.getNode(ISD::AND, DL, VT, A, NegB);
3119+
}
3120+
}
3121+
31073122
// fold (X - (-Y * Z)) -> (X + (Y * Z))
31083123
if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
31093124
if (N1.getOperand(0).getOpcode() == ISD::SUB &&

llvm/test/CodeGen/AArch64/align-down.ll

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@
1717
define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind {
1818
; CHECK-LABEL: t0_32:
1919
; CHECK: // %bb.0:
20-
; CHECK-NEXT: sub w8, w1, #1 // =1
21-
; CHECK-NEXT: and w8, w0, w8
22-
; CHECK-NEXT: sub w0, w0, w8
20+
; CHECK-NEXT: neg w8, w1
21+
; CHECK-NEXT: and w0, w0, w8
2322
; CHECK-NEXT: ret
2423
%mask = add i32 %alignment, -1
2524
%bias = and i32 %ptr, %mask
@@ -29,9 +28,8 @@ define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind {
2928
define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind {
3029
; CHECK-LABEL: t1_64:
3130
; CHECK: // %bb.0:
32-
; CHECK-NEXT: sub x8, x1, #1 // =1
33-
; CHECK-NEXT: and x8, x0, x8
34-
; CHECK-NEXT: sub x0, x0, x8
31+
; CHECK-NEXT: neg x8, x1
32+
; CHECK-NEXT: and x0, x0, x8
3533
; CHECK-NEXT: ret
3634
%mask = add i64 %alignment, -1
3735
%bias = and i64 %ptr, %mask
@@ -42,9 +40,8 @@ define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind {
4240
define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind {
4341
; CHECK-LABEL: t2_commutative:
4442
; CHECK: // %bb.0:
45-
; CHECK-NEXT: sub w8, w1, #1 // =1
46-
; CHECK-NEXT: and w8, w8, w0
47-
; CHECK-NEXT: sub w0, w0, w8
43+
; CHECK-NEXT: neg w8, w1
44+
; CHECK-NEXT: and w0, w0, w8
4845
; CHECK-NEXT: ret
4946
%mask = add i32 %alignment, -1
5047
%bias = and i32 %mask, %ptr ; swapped
@@ -57,9 +54,9 @@ define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind {
5754
define i32 @t3_extrause0(i32 %ptr, i32 %alignment, i32* %mask_storage) nounwind {
5855
; CHECK-LABEL: t3_extrause0:
5956
; CHECK: // %bb.0:
57+
; CHECK-NEXT: neg w9, w1
6058
; CHECK-NEXT: sub w8, w1, #1 // =1
61-
; CHECK-NEXT: and w9, w0, w8
62-
; CHECK-NEXT: sub w0, w0, w9
59+
; CHECK-NEXT: and w0, w0, w9
6360
; CHECK-NEXT: str w8, [x2]
6461
; CHECK-NEXT: ret
6562
%mask = add i32 %alignment, -1

llvm/test/CodeGen/X86/align-down.ll

Lines changed: 31 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,16 @@
1818
define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind {
1919
; X86-LABEL: t0_32:
2020
; X86: # %bb.0:
21-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
22-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
23-
; X86-NEXT: decl %ecx
24-
; X86-NEXT: andl %eax, %ecx
25-
; X86-NEXT: subl %ecx, %eax
21+
; X86-NEXT: xorl %eax, %eax
22+
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
23+
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
2624
; X86-NEXT: retl
2725
;
2826
; X64-LABEL: t0_32:
2927
; X64: # %bb.0:
30-
; X64-NEXT: movl %edi, %eax
31-
; X64-NEXT: decl %esi
32-
; X64-NEXT: andl %edi, %esi
33-
; X64-NEXT: subl %esi, %eax
28+
; X64-NEXT: movl %esi, %eax
29+
; X64-NEXT: negl %eax
30+
; X64-NEXT: andl %edi, %eax
3431
; X64-NEXT: retq
3532
%mask = add i32 %alignment, -1
3633
%bias = and i32 %ptr, %mask
@@ -40,26 +37,19 @@ define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind {
4037
define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind {
4138
; X86-LABEL: t1_64:
4239
; X86: # %bb.0:
43-
; X86-NEXT: pushl %esi
44-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
45-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
46-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
47-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
48-
; X86-NEXT: addl $-1, %ecx
49-
; X86-NEXT: adcl $-1, %esi
50-
; X86-NEXT: andl %edx, %esi
51-
; X86-NEXT: andl %eax, %ecx
52-
; X86-NEXT: subl %ecx, %eax
53-
; X86-NEXT: sbbl %esi, %edx
54-
; X86-NEXT: popl %esi
40+
; X86-NEXT: xorl %edx, %edx
41+
; X86-NEXT: xorl %eax, %eax
42+
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
43+
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
44+
; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
45+
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
5546
; X86-NEXT: retl
5647
;
5748
; X64-LABEL: t1_64:
5849
; X64: # %bb.0:
59-
; X64-NEXT: movq %rdi, %rax
60-
; X64-NEXT: decq %rsi
61-
; X64-NEXT: andq %rdi, %rsi
62-
; X64-NEXT: subq %rsi, %rax
50+
; X64-NEXT: movq %rsi, %rax
51+
; X64-NEXT: negq %rax
52+
; X64-NEXT: andq %rdi, %rax
6353
; X64-NEXT: retq
6454
%mask = add i64 %alignment, -1
6555
%bias = and i64 %ptr, %mask
@@ -70,19 +60,16 @@ define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind {
7060
define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind {
7161
; X86-LABEL: t2_commutative:
7262
; X86: # %bb.0:
73-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
74-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
75-
; X86-NEXT: decl %ecx
76-
; X86-NEXT: andl %eax, %ecx
77-
; X86-NEXT: subl %ecx, %eax
63+
; X86-NEXT: xorl %eax, %eax
64+
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
65+
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
7866
; X86-NEXT: retl
7967
;
8068
; X64-LABEL: t2_commutative:
8169
; X64: # %bb.0:
82-
; X64-NEXT: movl %edi, %eax
83-
; X64-NEXT: decl %esi
84-
; X64-NEXT: andl %edi, %esi
85-
; X64-NEXT: subl %esi, %eax
70+
; X64-NEXT: movl %esi, %eax
71+
; X64-NEXT: negl %eax
72+
; X64-NEXT: andl %edi, %eax
8673
; X64-NEXT: retq
8774
%mask = add i32 %alignment, -1
8875
%bias = and i32 %mask, %ptr ; swapped
@@ -95,22 +82,22 @@ define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind {
9582
define i32 @t3_extrause0(i32 %ptr, i32 %alignment, i32* %mask_storage) nounwind {
9683
; X86-LABEL: t3_extrause0:
9784
; X86: # %bb.0:
98-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
9985
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
100-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
101-
; X86-NEXT: decl %edx
86+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
87+
; X86-NEXT: leal -1(%eax), %edx
10288
; X86-NEXT: movl %edx, (%ecx)
103-
; X86-NEXT: andl %eax, %edx
104-
; X86-NEXT: subl %edx, %eax
89+
; X86-NEXT: negl %eax
90+
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
10591
; X86-NEXT: retl
10692
;
10793
; X64-LABEL: t3_extrause0:
10894
; X64: # %bb.0:
109-
; X64-NEXT: movl %edi, %eax
110-
; X64-NEXT: decl %esi
111-
; X64-NEXT: movl %esi, (%rdx)
112-
; X64-NEXT: andl %edi, %esi
113-
; X64-NEXT: subl %esi, %eax
95+
; X64-NEXT: movl %esi, %eax
96+
; X64-NEXT: leal -1(%rax), %ecx
97+
; X64-NEXT: movl %ecx, (%rdx)
98+
; X64-NEXT: negl %eax
99+
; X64-NEXT: andl %edi, %eax
100+
; X64-NEXT: # kill: def $eax killed $eax killed $rax
114101
; X64-NEXT: retq
115102
%mask = add i32 %alignment, -1
116103
store i32 %mask, i32* %mask_storage

0 commit comments

Comments
 (0)