Skip to content

Commit c9d6c84

Browse files
committed
[DAGCombiner] Attempt to fold 'add' nodes to funnel-shift or rotate
1 parent fac9f83 commit c9d6c84

File tree

2 files changed

+57
-71
lines changed

2 files changed

+57
-71
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -649,14 +649,15 @@ namespace {
649649
bool DemandHighBits = true);
650650
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
651651
SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
652-
SDValue InnerPos, SDValue InnerNeg, bool HasPos,
653-
unsigned PosOpcode, unsigned NegOpcode,
654-
const SDLoc &DL);
652+
SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
653+
bool HasPos, unsigned PosOpcode,
654+
unsigned NegOpcode, const SDLoc &DL);
655655
SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
656-
SDValue InnerPos, SDValue InnerNeg, bool HasPos,
657-
unsigned PosOpcode, unsigned NegOpcode,
658-
const SDLoc &DL);
659-
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
656+
SDValue InnerPos, SDValue InnerNeg, bool FromAdd,
657+
bool HasPos, unsigned PosOpcode,
658+
unsigned NegOpcode, const SDLoc &DL);
659+
SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
660+
bool FromAdd);
660661
SDValue MatchLoadCombine(SDNode *N);
661662
SDValue mergeTruncStores(StoreSDNode *N);
662663
SDValue reduceLoadWidth(SDNode *N);
@@ -2982,6 +2983,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
29822983
if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
29832984
return V;
29842985

2986+
if (SDValue V = MatchRotate(N0, N1, SDLoc(N), /*FromAdd=*/true))
2987+
return V;
2988+
29852989
// Try to match AVGFLOOR fixedwidth pattern
29862990
if (SDValue V = foldAddToAvg(N, DL))
29872991
return V;
@@ -8151,7 +8155,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
81518155
return V;
81528156

81538157
// See if this is some rotate idiom.
8154-
if (SDValue Rot = MatchRotate(N0, N1, DL))
8158+
if (SDValue Rot = MatchRotate(N0, N1, DL, /*FromAdd=*/false))
81558159
return Rot;
81568160

81578161
if (SDValue Load = MatchLoadCombine(N))
@@ -8340,7 +8344,7 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
83408344
// The IsRotate flag should be set when the LHS of both shifts is the same.
83418345
// Otherwise if matching a general funnel shift, it should be clear.
83428346
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8343-
SelectionDAG &DAG, bool IsRotate) {
8347+
SelectionDAG &DAG, bool IsRotate, bool FromAdd) {
83448348
const auto &TLI = DAG.getTargetLoweringInfo();
83458349
// If EltSize is a power of 2 then:
83468350
//
@@ -8379,7 +8383,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
83798383
// NOTE: We can only do this when matching operations which won't modify the
83808384
// least Log2(EltSize) significant bits and not a general funnel shift.
83818385
unsigned MaskLoBits = 0;
8382-
if (IsRotate && isPowerOf2_64(EltSize)) {
8386+
if (IsRotate && !FromAdd && isPowerOf2_64(EltSize)) {
83838387
unsigned Bits = Log2_64(EltSize);
83848388
unsigned NegBits = Neg.getScalarValueSizeInBits();
83858389
if (NegBits >= Bits) {
@@ -8462,9 +8466,9 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
84628466
// Neg with outer conversions stripped away.
84638467
SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
84648468
SDValue Neg, SDValue InnerPos,
8465-
SDValue InnerNeg, bool HasPos,
8466-
unsigned PosOpcode, unsigned NegOpcode,
8467-
const SDLoc &DL) {
8469+
SDValue InnerNeg, bool FromAdd,
8470+
bool HasPos, unsigned PosOpcode,
8471+
unsigned NegOpcode, const SDLoc &DL) {
84688472
// fold (or (shl x, (*ext y)),
84698473
// (srl x, (*ext (sub 32, y)))) ->
84708474
// (rotl x, y) or (rotr x, (sub 32, y))
@@ -8474,10 +8478,9 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
84748478
// (rotr x, y) or (rotl x, (sub 32, y))
84758479
EVT VT = Shifted.getValueType();
84768480
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8477-
/*IsRotate*/ true)) {
8481+
/*IsRotate*/ true, FromAdd))
84788482
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
84798483
HasPos ? Pos : Neg);
8480-
}
84818484

84828485
return SDValue();
84838486
}
@@ -8490,9 +8493,9 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
84908493
// TODO: Merge with MatchRotatePosNeg.
84918494
SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
84928495
SDValue Neg, SDValue InnerPos,
8493-
SDValue InnerNeg, bool HasPos,
8494-
unsigned PosOpcode, unsigned NegOpcode,
8495-
const SDLoc &DL) {
8496+
SDValue InnerNeg, bool FromAdd,
8497+
bool HasPos, unsigned PosOpcode,
8498+
unsigned NegOpcode, const SDLoc &DL) {
84968499
EVT VT = N0.getValueType();
84978500
unsigned EltBits = VT.getScalarSizeInBits();
84988501

@@ -8503,10 +8506,10 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
85038506
// fold (or (shl x0, (*ext (sub 32, y))),
85048507
// (srl x1, (*ext y))) ->
85058508
// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8506-
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8509+
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1,
8510+
FromAdd))
85078511
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
85088512
HasPos ? Pos : Neg);
8509-
}
85108513

85118514
// Matching the shift+xor cases, we can't easily use the xor'd shift amount
85128515
// so for now just use the PosOpcode case if its legal.
@@ -8545,11 +8548,12 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
85458548
return SDValue();
85468549
}
85478550

8548-
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8549-
// idioms for rotate, and if the target supports rotation instructions, generate
8550-
// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8551-
// with different shifted sources.
8552-
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8551+
// MatchRotate - Handle an 'or' or 'add' of two operands. If this is one of the
8552+
// many idioms for rotate, and if the target supports rotation instructions,
8553+
// generate a rot[lr]. This also matches funnel shift patterns, similar to
8554+
// rotation but with different shifted sources.
8555+
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
8556+
bool FromAdd) {
85538557
EVT VT = LHS.getValueType();
85548558

85558559
// The target must have at least one rotate/funnel flavor.
@@ -8576,9 +8580,9 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
85768580
if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
85778581
LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
85788582
assert(LHS.getValueType() == RHS.getValueType());
8579-
if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8583+
if (SDValue Rot =
8584+
MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL, FromAdd))
85808585
return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8581-
}
85828586
}
85838587

85848588
// Match "(X shl/srl V1) & V2" where V2 may not be present.
@@ -8758,29 +8762,25 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
87588762
}
87598763

87608764
if (IsRotate && (HasROTL || HasROTR)) {
8761-
SDValue TryL =
8762-
MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8763-
RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8764-
if (TryL)
8765+
if (SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8766+
LExtOp0, RExtOp0, FromAdd, HasROTL,
8767+
ISD::ROTL, ISD::ROTR, DL))
87658768
return TryL;
87668769

8767-
SDValue TryR =
8768-
MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8769-
LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8770-
if (TryR)
8770+
if (SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8771+
RExtOp0, LExtOp0, FromAdd, HasROTR,
8772+
ISD::ROTR, ISD::ROTL, DL))
87718773
return TryR;
87728774
}
87738775

8774-
SDValue TryL =
8775-
MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8776-
LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8777-
if (TryL)
8776+
if (SDValue TryL = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt,
8777+
RHSShiftAmt, LExtOp0, RExtOp0, FromAdd,
8778+
HasFSHL, ISD::FSHL, ISD::FSHR, DL))
87788779
return TryL;
87798780

8780-
SDValue TryR =
8781-
MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8782-
RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8783-
if (TryR)
8781+
if (SDValue TryR = MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt,
8782+
LHSShiftAmt, RExtOp0, LExtOp0, FromAdd,
8783+
HasFSHR, ISD::FSHR, ISD::FSHL, DL))
87848784
return TryR;
87858785

87868786
return SDValue();

llvm/test/CodeGen/NVPTX/add-rotate.ll

Lines changed: 14 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,13 @@ define i32 @test_rotr(i32 %x) {
3838
define i32 @test_rotl_var(i32 %x, i32 %y) {
3939
; CHECK-LABEL: test_rotl_var(
4040
; CHECK: {
41-
; CHECK-NEXT: .reg .b32 %r<7>;
41+
; CHECK-NEXT: .reg .b32 %r<4>;
4242
; CHECK-EMPTY:
4343
; CHECK-NEXT: // %bb.0:
4444
; CHECK-NEXT: ld.param.u32 %r1, [test_rotl_var_param_0];
4545
; CHECK-NEXT: ld.param.u32 %r2, [test_rotl_var_param_1];
46-
; CHECK-NEXT: shl.b32 %r3, %r1, %r2;
47-
; CHECK-NEXT: sub.s32 %r4, 32, %r2;
48-
; CHECK-NEXT: shr.u32 %r5, %r1, %r4;
49-
; CHECK-NEXT: add.s32 %r6, %r3, %r5;
50-
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
46+
; CHECK-NEXT: shf.l.wrap.b32 %r3, %r1, %r1, %r2;
47+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
5148
; CHECK-NEXT: ret;
5249
%shl = shl i32 %x, %y
5350
%sub = sub i32 32, %y
@@ -59,16 +56,13 @@ define i32 @test_rotl_var(i32 %x, i32 %y) {
5956
define i32 @test_rotr_var(i32 %x, i32 %y) {
6057
; CHECK-LABEL: test_rotr_var(
6158
; CHECK: {
62-
; CHECK-NEXT: .reg .b32 %r<7>;
59+
; CHECK-NEXT: .reg .b32 %r<4>;
6360
; CHECK-EMPTY:
6461
; CHECK-NEXT: // %bb.0:
6562
; CHECK-NEXT: ld.param.u32 %r1, [test_rotr_var_param_0];
6663
; CHECK-NEXT: ld.param.u32 %r2, [test_rotr_var_param_1];
67-
; CHECK-NEXT: shr.u32 %r3, %r1, %r2;
68-
; CHECK-NEXT: sub.s32 %r4, 32, %r2;
69-
; CHECK-NEXT: shl.b32 %r5, %r1, %r4;
70-
; CHECK-NEXT: add.s32 %r6, %r3, %r5;
71-
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
64+
; CHECK-NEXT: shf.r.wrap.b32 %r3, %r1, %r1, %r2;
65+
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
7266
; CHECK-NEXT: ret;
7367
%shr = lshr i32 %x, %y
7468
%sub = sub i32 32, %y
@@ -126,18 +120,14 @@ define i32 @test_rotr_var_and(i32 %x, i32 %y) {
126120
define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) {
127121
; CHECK-LABEL: test_fshl_special_case(
128122
; CHECK: {
129-
; CHECK-NEXT: .reg .b32 %r<9>;
123+
; CHECK-NEXT: .reg .b32 %r<5>;
130124
; CHECK-EMPTY:
131125
; CHECK-NEXT: // %bb.0:
132126
; CHECK-NEXT: ld.param.u32 %r1, [test_fshl_special_case_param_0];
133-
; CHECK-NEXT: ld.param.u32 %r2, [test_fshl_special_case_param_2];
134-
; CHECK-NEXT: shl.b32 %r3, %r1, %r2;
135-
; CHECK-NEXT: ld.param.u32 %r4, [test_fshl_special_case_param_1];
136-
; CHECK-NEXT: shr.u32 %r5, %r4, 1;
137-
; CHECK-NEXT: xor.b32 %r6, %r2, 31;
138-
; CHECK-NEXT: shr.u32 %r7, %r5, %r6;
139-
; CHECK-NEXT: add.s32 %r8, %r3, %r7;
140-
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
127+
; CHECK-NEXT: ld.param.u32 %r2, [test_fshl_special_case_param_1];
128+
; CHECK-NEXT: ld.param.u32 %r3, [test_fshl_special_case_param_2];
129+
; CHECK-NEXT: shf.l.wrap.b32 %r4, %r2, %r1, %r3;
130+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
141131
; CHECK-NEXT: ret;
142132
%shl = shl i32 %x0, %y
143133
%srli = lshr i32 %x1, 1
@@ -150,18 +140,14 @@ define i32 @test_fshl_special_case(i32 %x0, i32 %x1, i32 %y) {
150140
define i32 @test_fshr_special_case(i32 %x0, i32 %x1, i32 %y) {
151141
; CHECK-LABEL: test_fshr_special_case(
152142
; CHECK: {
153-
; CHECK-NEXT: .reg .b32 %r<9>;
143+
; CHECK-NEXT: .reg .b32 %r<5>;
154144
; CHECK-EMPTY:
155145
; CHECK-NEXT: // %bb.0:
156146
; CHECK-NEXT: ld.param.u32 %r1, [test_fshr_special_case_param_0];
157147
; CHECK-NEXT: ld.param.u32 %r2, [test_fshr_special_case_param_1];
158148
; CHECK-NEXT: ld.param.u32 %r3, [test_fshr_special_case_param_2];
159-
; CHECK-NEXT: shr.u32 %r4, %r2, %r3;
160-
; CHECK-NEXT: shl.b32 %r5, %r1, 1;
161-
; CHECK-NEXT: xor.b32 %r6, %r3, 31;
162-
; CHECK-NEXT: shl.b32 %r7, %r5, %r6;
163-
; CHECK-NEXT: add.s32 %r8, %r4, %r7;
164-
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
149+
; CHECK-NEXT: shf.r.wrap.b32 %r4, %r2, %r1, %r3;
150+
; CHECK-NEXT: st.param.b32 [func_retval0], %r4;
165151
; CHECK-NEXT: ret;
166152
%shl = lshr i32 %x1, %y
167153
%srli = shl i32 %x0, 1

0 commit comments

Comments
 (0)