Skip to content

Commit 4591303

Browse files
committed
[SelectionDAG] Make (a & x) | (~a & y) -> (a & (x ^ y)) ^ y available for all targets
1 parent f800576 commit 4591303

File tree

8 files changed

+194
-237
lines changed

8 files changed

+194
-237
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8108,6 +8108,57 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
81088108
return SDValue();
81098109
}
81108110

8111+
static SDValue foldMaskedMergeImpl(SDValue AndL0, SDValue AndR0, SDValue AndL1,
8112+
SDValue AndR1, const SDLoc &DL,
8113+
SelectionDAG &DAG) {
8114+
if (!isBitwiseNot(AndL0, true) || !AndL0->hasOneUse())
8115+
return SDValue();
8116+
SDValue NotOp = AndL0->getOperand(0);
8117+
if (NotOp == AndR1)
8118+
std::swap(AndR1, AndL1);
8119+
if (NotOp != AndL1)
8120+
return SDValue();
8121+
8122+
// (~(NotOp) & And0_R) | (NotOp & And1_R)
8123+
// --> ((And0_R ^ And1_R) & NotOp) ^ And1_R
8124+
EVT VT = AndL1->getValueType(0);
8125+
SDValue FreezeAndR0 = DAG.getNode(ISD::FREEZE, SDLoc(), VT, AndR0);
8126+
SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, AndR1, FreezeAndR0);
8127+
SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
8128+
SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, FreezeAndR0);
8129+
return Xor1;
8130+
}
8131+
8132+
/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
8133+
/// equivalent `((x ^ y) & m) ^ y)` pattern.
8134+
/// This is typically a better representation for targets without a fused
8135+
/// "and-not" operation.
8136+
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG,
8137+
const SDLoc &DL) {
8138+
// Note that masked-merge variants using XOR or ADD expressions are
8139+
// normalized to OR by InstCombine so we only check for OR.
8140+
assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
8141+
SDValue N0 = Node->getOperand(0);
8142+
if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
8143+
return SDValue();
8144+
SDValue N1 = Node->getOperand(1);
8145+
if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
8146+
return SDValue();
8147+
SDValue N00 = N0->getOperand(0);
8148+
SDValue N01 = N0->getOperand(1);
8149+
SDValue N10 = N1->getOperand(0);
8150+
SDValue N11 = N1->getOperand(1);
8151+
if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
8152+
return Result;
8153+
if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
8154+
return Result;
8155+
if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
8156+
return Result;
8157+
if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
8158+
return Result;
8159+
return SDValue();
8160+
}
8161+
81118162
SDValue DAGCombiner::visitOR(SDNode *N) {
81128163
SDValue N0 = N->getOperand(0);
81138164
SDValue N1 = N->getOperand(1);
@@ -8286,6 +8337,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
82868337
if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
82878338
return R;
82888339

8340+
if (!TLI.hasAndNot(SDValue(N, 0)) && VT.isScalarInteger() && VT != MVT::i1)
8341+
if (SDValue R = foldMaskedMerge(N, DAG, DL))
8342+
return R;
8343+
82898344
return SDValue();
82908345
}
82918346

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -52005,59 +52005,6 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
5200552005
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
5200652006
}
5200752007

52008-
static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
52009-
SDValue And1_L, SDValue And1_R,
52010-
const SDLoc &DL, SelectionDAG &DAG) {
52011-
if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse())
52012-
return SDValue();
52013-
SDValue NotOp = And0_L->getOperand(0);
52014-
if (NotOp == And1_R)
52015-
std::swap(And1_R, And1_L);
52016-
if (NotOp != And1_L)
52017-
return SDValue();
52018-
52019-
// (~(NotOp) & And0_R) | (NotOp & And1_R)
52020-
// --> ((And0_R ^ And1_R) & NotOp) ^ And1_R
52021-
EVT VT = And1_L->getValueType(0);
52022-
SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R);
52023-
SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R);
52024-
SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
52025-
SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R);
52026-
return Xor1;
52027-
}
52028-
52029-
/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
52030-
/// equivalent `((x ^ y) & m) ^ y)` pattern.
52031-
/// This is typically a better representation for targets without a fused
52032-
/// "and-not" operation. This function is intended to be called from a
52033-
/// `TargetLowering::PerformDAGCombine` callback on `ISD::OR` nodes.
52034-
static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) {
52035-
// Note that masked-merge variants using XOR or ADD expressions are
52036-
// normalized to OR by InstCombine so we only check for OR.
52037-
assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
52038-
SDValue N0 = Node->getOperand(0);
52039-
if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
52040-
return SDValue();
52041-
SDValue N1 = Node->getOperand(1);
52042-
if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
52043-
return SDValue();
52044-
52045-
SDLoc DL(Node);
52046-
SDValue N00 = N0->getOperand(0);
52047-
SDValue N01 = N0->getOperand(1);
52048-
SDValue N10 = N1->getOperand(0);
52049-
SDValue N11 = N1->getOperand(1);
52050-
if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
52051-
return Result;
52052-
if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
52053-
return Result;
52054-
if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
52055-
return Result;
52056-
if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
52057-
return Result;
52058-
return SDValue();
52059-
}
52060-
5206152008
/// If this is an add or subtract where one operand is produced by a cmp+setcc,
5206252009
/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
5206352010
/// with CMP+{ADC, SBB}.
@@ -52461,11 +52408,6 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
5246152408
}
5246252409
}
5246352410

52464-
// We should fold "masked merge" patterns when `andn` is not available.
52465-
if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1)
52466-
if (SDValue R = foldMaskedMerge(N, DAG))
52467-
return R;
52468-
5246952411
if (SDValue R = combineOrXorWithSETCC(N->getOpcode(), dl, VT, N0, N1, DAG))
5247052412
return R;
5247152413

llvm/test/CodeGen/RISCV/fold-masked-merge.ll

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,9 @@
1414
define i32 @masked_merge0(i32 %a0, i32 %a1, i32 %a2) {
1515
; CHECK-I-LABEL: masked_merge0:
1616
; CHECK-I: # %bb.0:
17-
; CHECK-I-NEXT: and a1, a0, a1
18-
; CHECK-I-NEXT: not a0, a0
19-
; CHECK-I-NEXT: and a0, a0, a2
20-
; CHECK-I-NEXT: or a0, a1, a0
17+
; CHECK-I-NEXT: xor a1, a1, a2
18+
; CHECK-I-NEXT: and a0, a1, a0
19+
; CHECK-I-NEXT: xor a0, a0, a2
2120
; CHECK-I-NEXT: ret
2221
;
2322
; CHECK-ZBB-LABEL: masked_merge0:
@@ -36,10 +35,9 @@ define i32 @masked_merge0(i32 %a0, i32 %a1, i32 %a2) {
3635
define i16 @masked_merge1(i16 %a0, i16 %a1, i16 %a2) {
3736
; CHECK-I-LABEL: masked_merge1:
3837
; CHECK-I: # %bb.0:
39-
; CHECK-I-NEXT: and a1, a0, a1
40-
; CHECK-I-NEXT: not a0, a0
41-
; CHECK-I-NEXT: and a0, a2, a0
42-
; CHECK-I-NEXT: or a0, a1, a0
38+
; CHECK-I-NEXT: xor a1, a1, a2
39+
; CHECK-I-NEXT: and a0, a1, a0
40+
; CHECK-I-NEXT: xor a0, a0, a2
4341
; CHECK-I-NEXT: ret
4442
;
4543
; CHECK-ZBB-LABEL: masked_merge1:
@@ -58,10 +56,7 @@ define i16 @masked_merge1(i16 %a0, i16 %a1, i16 %a2) {
5856
define i8 @masked_merge2(i8 %a0, i8 %a1, i8 %a2) {
5957
; CHECK-I-LABEL: masked_merge2:
6058
; CHECK-I: # %bb.0:
61-
; CHECK-I-NEXT: not a2, a0
62-
; CHECK-I-NEXT: and a2, a2, a1
63-
; CHECK-I-NEXT: and a0, a1, a0
64-
; CHECK-I-NEXT: or a0, a2, a0
59+
; CHECK-I-NEXT: mv a0, a1
6560
; CHECK-I-NEXT: ret
6661
;
6762
; CHECK-ZBB-LABEL: masked_merge2:
@@ -80,28 +75,25 @@ define i8 @masked_merge2(i8 %a0, i8 %a1, i8 %a2) {
8075
define i64 @masked_merge3(i64 %a0, i64 %a1, i64 %a2) {
8176
; RV32I-LABEL: masked_merge3:
8277
; RV32I: # %bb.0:
78+
; RV32I-NEXT: not a5, a5
79+
; RV32I-NEXT: not a4, a4
80+
; RV32I-NEXT: xor a3, a3, a5
81+
; RV32I-NEXT: xor a2, a2, a4
8382
; RV32I-NEXT: not a2, a2
8483
; RV32I-NEXT: not a3, a3
85-
; RV32I-NEXT: not a4, a4
86-
; RV32I-NEXT: not a5, a5
87-
; RV32I-NEXT: not a6, a0
88-
; RV32I-NEXT: not a7, a1
89-
; RV32I-NEXT: and a5, a7, a5
90-
; RV32I-NEXT: and a4, a6, a4
91-
; RV32I-NEXT: and a1, a3, a1
9284
; RV32I-NEXT: and a0, a2, a0
93-
; RV32I-NEXT: or a0, a4, a0
94-
; RV32I-NEXT: or a1, a5, a1
85+
; RV32I-NEXT: and a1, a3, a1
86+
; RV32I-NEXT: xor a0, a0, a4
87+
; RV32I-NEXT: xor a1, a1, a5
9588
; RV32I-NEXT: ret
9689
;
9790
; RV64I-LABEL: masked_merge3:
9891
; RV64I: # %bb.0:
99-
; RV64I-NEXT: not a1, a1
10092
; RV64I-NEXT: not a2, a2
101-
; RV64I-NEXT: not a3, a0
102-
; RV64I-NEXT: and a2, a3, a2
93+
; RV64I-NEXT: xor a1, a1, a2
94+
; RV64I-NEXT: not a1, a1
10395
; RV64I-NEXT: and a0, a1, a0
104-
; RV64I-NEXT: or a0, a2, a0
96+
; RV64I-NEXT: xor a0, a0, a2
10597
; RV64I-NEXT: ret
10698
;
10799
; RV32ZBB-LABEL: masked_merge3:

llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll

Lines changed: 25 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,13 @@
88
; RUN: llc -mtriple=riscv64 -mattr=+zbb < %s \
99
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-ZBB,RV64,RV64ZBB
1010

11-
; TODO: Should we convert these to X ^ ((X ^ Y) & M) form when Zbb isn't
12-
; present?
1311

1412
define i8 @out8(i8 %x, i8 %y, i8 %mask) {
1513
; CHECK-I-LABEL: out8:
1614
; CHECK-I: # %bb.0:
15+
; CHECK-I-NEXT: xor a0, a0, a1
1716
; CHECK-I-NEXT: and a0, a0, a2
18-
; CHECK-I-NEXT: not a2, a2
19-
; CHECK-I-NEXT: and a1, a1, a2
20-
; CHECK-I-NEXT: or a0, a0, a1
17+
; CHECK-I-NEXT: xor a0, a0, a1
2118
; CHECK-I-NEXT: ret
2219
;
2320
; CHECK-ZBB-LABEL: out8:
@@ -36,10 +33,9 @@ define i8 @out8(i8 %x, i8 %y, i8 %mask) {
3633
define i16 @out16(i16 %x, i16 %y, i16 %mask) {
3734
; CHECK-I-LABEL: out16:
3835
; CHECK-I: # %bb.0:
36+
; CHECK-I-NEXT: xor a0, a0, a1
3937
; CHECK-I-NEXT: and a0, a0, a2
40-
; CHECK-I-NEXT: not a2, a2
41-
; CHECK-I-NEXT: and a1, a1, a2
42-
; CHECK-I-NEXT: or a0, a0, a1
38+
; CHECK-I-NEXT: xor a0, a0, a1
4339
; CHECK-I-NEXT: ret
4440
;
4541
; CHECK-ZBB-LABEL: out16:
@@ -58,10 +54,9 @@ define i16 @out16(i16 %x, i16 %y, i16 %mask) {
5854
define i32 @out32(i32 %x, i32 %y, i32 %mask) {
5955
; CHECK-I-LABEL: out32:
6056
; CHECK-I: # %bb.0:
57+
; CHECK-I-NEXT: xor a0, a0, a1
6158
; CHECK-I-NEXT: and a0, a0, a2
62-
; CHECK-I-NEXT: not a2, a2
63-
; CHECK-I-NEXT: and a1, a1, a2
64-
; CHECK-I-NEXT: or a0, a0, a1
59+
; CHECK-I-NEXT: xor a0, a0, a1
6560
; CHECK-I-NEXT: ret
6661
;
6762
; CHECK-ZBB-LABEL: out32:
@@ -80,22 +75,19 @@ define i32 @out32(i32 %x, i32 %y, i32 %mask) {
8075
define i64 @out64(i64 %x, i64 %y, i64 %mask) {
8176
; RV32I-LABEL: out64:
8277
; RV32I: # %bb.0:
83-
; RV32I-NEXT: and a1, a1, a5
78+
; RV32I-NEXT: xor a0, a0, a2
79+
; RV32I-NEXT: xor a1, a1, a3
8480
; RV32I-NEXT: and a0, a0, a4
85-
; RV32I-NEXT: not a4, a4
86-
; RV32I-NEXT: not a5, a5
87-
; RV32I-NEXT: and a3, a3, a5
88-
; RV32I-NEXT: and a2, a2, a4
89-
; RV32I-NEXT: or a0, a0, a2
90-
; RV32I-NEXT: or a1, a1, a3
81+
; RV32I-NEXT: and a1, a1, a5
82+
; RV32I-NEXT: xor a0, a0, a2
83+
; RV32I-NEXT: xor a1, a1, a3
9184
; RV32I-NEXT: ret
9285
;
9386
; RV64I-LABEL: out64:
9487
; RV64I: # %bb.0:
88+
; RV64I-NEXT: xor a0, a0, a1
9589
; RV64I-NEXT: and a0, a0, a2
96-
; RV64I-NEXT: not a2, a2
97-
; RV64I-NEXT: and a1, a1, a2
98-
; RV64I-NEXT: or a0, a0, a1
90+
; RV64I-NEXT: xor a0, a0, a1
9991
; RV64I-NEXT: ret
10092
;
10193
; RV32ZBB-LABEL: out64:
@@ -660,10 +652,9 @@ define i32 @in_constant_varx_mone_invmask(i32 %x, i32 %y, i32 %mask) {
660652
define i32 @out_constant_varx_42(i32 %x, i32 %y, i32 %mask) {
661653
; CHECK-I-LABEL: out_constant_varx_42:
662654
; CHECK-I: # %bb.0:
663-
; CHECK-I-NEXT: not a1, a2
664-
; CHECK-I-NEXT: and a0, a2, a0
665-
; CHECK-I-NEXT: andi a1, a1, 42
666-
; CHECK-I-NEXT: or a0, a0, a1
655+
; CHECK-I-NEXT: xori a0, a0, 42
656+
; CHECK-I-NEXT: and a0, a0, a2
657+
; CHECK-I-NEXT: xori a0, a0, 42
667658
; CHECK-I-NEXT: ret
668659
;
669660
; CHECK-ZBB-LABEL: out_constant_varx_42:
@@ -704,10 +695,9 @@ define i32 @in_constant_varx_42(i32 %x, i32 %y, i32 %mask) {
704695
define i32 @out_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) {
705696
; CHECK-I-LABEL: out_constant_varx_42_invmask:
706697
; CHECK-I: # %bb.0:
707-
; CHECK-I-NEXT: not a1, a2
708-
; CHECK-I-NEXT: and a0, a1, a0
709-
; CHECK-I-NEXT: andi a1, a2, 42
710-
; CHECK-I-NEXT: or a0, a0, a1
698+
; CHECK-I-NEXT: xori a1, a0, 42
699+
; CHECK-I-NEXT: and a1, a1, a2
700+
; CHECK-I-NEXT: xor a0, a1, a0
711701
; CHECK-I-NEXT: ret
712702
;
713703
; CHECK-ZBB-LABEL: out_constant_varx_42_invmask:
@@ -812,10 +802,9 @@ define i32 @in_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) {
812802
define i32 @out_constant_42_vary(i32 %x, i32 %y, i32 %mask) {
813803
; CHECK-I-LABEL: out_constant_42_vary:
814804
; CHECK-I: # %bb.0:
815-
; CHECK-I-NEXT: not a0, a2
816-
; CHECK-I-NEXT: andi a2, a2, 42
817-
; CHECK-I-NEXT: and a0, a0, a1
818-
; CHECK-I-NEXT: or a0, a2, a0
805+
; CHECK-I-NEXT: xori a0, a1, 42
806+
; CHECK-I-NEXT: and a0, a0, a2
807+
; CHECK-I-NEXT: xor a0, a0, a1
819808
; CHECK-I-NEXT: ret
820809
;
821810
; CHECK-ZBB-LABEL: out_constant_42_vary:
@@ -855,10 +844,9 @@ define i32 @in_constant_42_vary(i32 %x, i32 %y, i32 %mask) {
855844
define i32 @out_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) {
856845
; CHECK-I-LABEL: out_constant_42_vary_invmask:
857846
; CHECK-I: # %bb.0:
858-
; CHECK-I-NEXT: not a0, a2
859-
; CHECK-I-NEXT: andi a0, a0, 42
860-
; CHECK-I-NEXT: and a1, a2, a1
861-
; CHECK-I-NEXT: or a0, a0, a1
847+
; CHECK-I-NEXT: xori a0, a1, 42
848+
; CHECK-I-NEXT: and a0, a0, a2
849+
; CHECK-I-NEXT: xori a0, a0, 42
862850
; CHECK-I-NEXT: ret
863851
;
864852
; CHECK-ZBB-LABEL: out_constant_42_vary_invmask:

0 commit comments

Comments
 (0)