Skip to content

Commit dcea5f1

Browse files
AZero13RKSimon
andauthored
[TargetLowering] Fold (a | b) ==/!= b -> (a & ~b) ==/!= 0 when and-not exists (#145368)
This is especially helpful for AArch64, which simplifies ands + cmp to tst. Alive2: https://alive2.llvm.org/ce/z/LLgcJJ --------- Co-authored-by: Simon Pilgrim <[email protected]>
1 parent f329689 commit dcea5f1

File tree

4 files changed

+92
-2
lines changed

4 files changed

+92
-2
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5795,6 +5795,8 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
57955795
private:
57965796
SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
57975797
const SDLoc &DL, DAGCombinerInfo &DCI) const;
5798+
SDValue foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
5799+
const SDLoc &DL, DAGCombinerInfo &DCI) const;
57985800
SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
57995801
const SDLoc &DL, DAGCombinerInfo &DCI) const;
58005802

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/CodeGen/MachineFunction.h"
2222
#include "llvm/CodeGen/MachineJumpTableInfo.h"
2323
#include "llvm/CodeGen/MachineRegisterInfo.h"
24+
#include "llvm/CodeGen/SDPatternMatch.h"
2425
#include "llvm/CodeGen/SelectionDAG.h"
2526
#include "llvm/CodeGen/TargetRegisterInfo.h"
2627
#include "llvm/IR/DataLayout.h"
@@ -37,6 +38,7 @@
3738
#include <cctype>
3839
#include <deque>
3940
using namespace llvm;
41+
using namespace llvm::SDPatternMatch;
4042

4143
/// NOTE: The TargetMachine owns TLOF.
4244
TargetLowering::TargetLowering(const TargetMachine &tm)
@@ -4227,6 +4229,42 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
42274229
return SDValue();
42284230
}
42294231

4232+
/// This helper function of SimplifySetCC tries to optimize the comparison when
4233+
/// either operand of the SetCC node is a bitwise-or instruction.
4234+
/// For now, this just transforms (X | Y) ==/!= Y into X & ~Y ==/!= 0.
4235+
SDValue TargetLowering::foldSetCCWithOr(EVT VT, SDValue N0, SDValue N1,
4236+
ISD::CondCode Cond, const SDLoc &DL,
4237+
DAGCombinerInfo &DCI) const {
4238+
if (N1.getOpcode() == ISD::OR && N0.getOpcode() != ISD::OR)
4239+
std::swap(N0, N1);
4240+
4241+
SelectionDAG &DAG = DCI.DAG;
4242+
EVT OpVT = N0.getValueType();
4243+
if (!N0.hasOneUse() || !OpVT.isInteger() ||
4244+
(Cond != ISD::SETEQ && Cond != ISD::SETNE))
4245+
return SDValue();
4246+
4247+
// (X | Y) == Y
4248+
// (X | Y) != Y
4249+
SDValue X;
4250+
if (sd_match(N0, m_Or(m_Value(X), m_Specific(N1))) && hasAndNotCompare(N1)) {
4251+
// If the target supports an 'and-not' or 'and-complement' logic operation,
4252+
// try to use that to make a comparison operation more efficient.
4253+
4254+
// Bail out if the compare operand that we want to turn into a zero is
4255+
// already a zero (otherwise, infinite loop).
4256+
if (isNullConstant(N1))
4257+
return SDValue();
4258+
4259+
// Transform this into: X & ~Y ==/!= 0.
4260+
SDValue NotY = DAG.getNOT(SDLoc(N1), N1, OpVT);
4261+
SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, X, NotY);
4262+
return DAG.getSetCC(DL, VT, NewAnd, DAG.getConstant(0, DL, OpVT), Cond);
4263+
}
4264+
4265+
return SDValue();
4266+
}
4267+
42304268
/// There are multiple IR patterns that could be checking whether certain
42314269
/// truncation of a signed number would be lossy or not. The pattern which is
42324270
/// best at IR level, may not lower optimally. Thus, we want to unfold it.
@@ -5522,6 +5560,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
55225560

55235561
if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
55245562
return V;
5563+
5564+
if (SDValue V = foldSetCCWithOr(VT, N0, N1, Cond, dl, DCI))
5565+
return V;
55255566
}
55265567

55275568
// Fold remainder of division by a constant.

llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,51 @@ define i64 @andnot_sub_with_neg_i64(i64 %a0, i64 %a1) {
9696
%and = and i64 %diff, %a0
9797
ret i64 %and
9898
}
99+
100+
define i32 @and_not_select_eq(i32 %a, i32 %b, i32 %c) {
101+
; CHECK-LABEL: and_not_select_eq:
102+
; CHECK: // %bb.0:
103+
; CHECK-NEXT: bics wzr, w1, w0
104+
; CHECK-NEXT: csel w0, w0, w2, eq
105+
; CHECK-NEXT: ret
106+
%or = or i32 %b, %a
107+
%cmp = icmp eq i32 %or, %a
108+
%a.c = select i1 %cmp, i32 %a, i32 %c
109+
ret i32 %a.c
110+
}
111+
112+
define i32 @and_not_select_ne(i32 %a, i32 %b, i32 %c) {
113+
; CHECK-LABEL: and_not_select_ne:
114+
; CHECK: // %bb.0:
115+
; CHECK-NEXT: bics wzr, w1, w0
116+
; CHECK-NEXT: csel w0, w0, w2, ne
117+
; CHECK-NEXT: ret
118+
%or = or i32 %b, %a
119+
%cmp = icmp ne i32 %or, %a
120+
%a.c = select i1 %cmp, i32 %a, i32 %c
121+
ret i32 %a.c
122+
}
123+
124+
define i32 @and_not_select_eq_swap(i32 %a, i32 %b, i32 %c) {
125+
; CHECK-LABEL: and_not_select_eq_swap:
126+
; CHECK: // %bb.0:
127+
; CHECK-NEXT: bics wzr, w1, w0
128+
; CHECK-NEXT: csel w0, w0, w2, eq
129+
; CHECK-NEXT: ret
130+
%or = or i32 %b, %a
131+
%cmp = icmp eq i32 %a, %or
132+
%a.c = select i1 %cmp, i32 %a, i32 %c
133+
ret i32 %a.c
134+
}
135+
136+
define i32 @and_not_select_ne_swap(i32 %a, i32 %b, i32 %c) {
137+
; CHECK-LABEL: and_not_select_ne_swap:
138+
; CHECK: // %bb.0:
139+
; CHECK-NEXT: bics wzr, w1, w0
140+
; CHECK-NEXT: csel w0, w0, w2, ne
141+
; CHECK-NEXT: ret
142+
%or = or i32 %a, %b
143+
%cmp = icmp ne i32 %a, %or
144+
%a.c = select i1 %cmp, i32 %a, i32 %c
145+
ret i32 %a.c
146+
}

llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -569,8 +569,7 @@ define void @test_successor_with_loop_phi(ptr %A, ptr %B) {
569569
; CHECK-NEXT: ldr w8, [x0]
570570
; CHECK-NEXT: str wzr, [x0]
571571
; CHECK-NEXT: mov x0, x1
572-
; CHECK-NEXT: orr w8, w8, #0x4
573-
; CHECK-NEXT: cmp w8, #4
572+
; CHECK-NEXT: tst w8, #0xfffffffb
574573
; CHECK-NEXT: b.eq LBB7_1
575574
; CHECK-NEXT: ; %bb.2: ; %exit
576575
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)