Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 80da74b

Browse files
committed
[X86] Remove SDIVREM8_SEXT_HREG/UDIVREM8_ZEXT_HREG and their associated DAG combine and target bits support. Use a post isel peephole instead.
Summary: These nodes exist to overcome an isel problem where we can generate a zero extend of an AH register followed by an extract subreg, and another zero extend. The first zero extend exists to avoid a partial register update copying the AH register into the low 8-bits. The second zero extend exists if the user wanted the remainder zero extended. To make this work we had a DAG combine to morph the DIVREM opcode to a special opcode that included the extend. But then we had to add the new node to computeKnownBits and computeNumSignBits to process the extension portion. This patch instead removes all of that and adds a late peephole to detect the two extends. Reviewers: RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D53449 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@344874 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 75cb0ad commit 80da74b

File tree

3 files changed

+54
-73
lines changed

3 files changed

+54
-73
lines changed

lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,8 @@ namespace {
470470
MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
471471
const SDLoc &dl, MVT VT, SDNode *Node,
472472
SDValue &InFlag);
473+
474+
bool tryOptimizeRem8Extend(SDNode *N);
473475
};
474476
}
475477

@@ -841,22 +843,63 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
841843
}
842844
}
843845

846+
// Look for a redundant movzx/movsx that can occur after an 8-bit divrem.
847+
bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) {
848+
unsigned Opc = N->getMachineOpcode();
849+
if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 &&
850+
Opc != X86::MOVSX64rr8)
851+
return false;
852+
853+
SDValue N0 = N->getOperand(0);
854+
855+
// We need to be extracting the lower bit of an extend.
856+
if (!N0.isMachineOpcode() ||
857+
N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG ||
858+
N0.getConstantOperandVal(1) != X86::sub_8bit)
859+
return false;
860+
861+
// We're looking for either a movsx or movzx to match the original opcode.
862+
unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX
863+
: X86::MOVSX32rr8_NOREX;
864+
SDValue N00 = N0.getOperand(0);
865+
if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc)
866+
return false;
867+
868+
if (Opc == X86::MOVSX64rr8) {
869+
// If we had a sign extend from 8 to 64 bits. We still need to go from 32
870+
// to 64.
871+
MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N),
872+
MVT::i64, N00);
873+
ReplaceUses(N, Extend);
874+
} else {
875+
// Ok we can drop this extend and just use the original extend.
876+
ReplaceUses(N, N00.getNode());
877+
}
878+
879+
return true;
880+
}
844881

845882
void X86DAGToDAGISel::PostprocessISelDAG() {
846883
// Skip peepholes at -O0.
847884
if (TM.getOptLevel() == CodeGenOpt::None)
848885
return;
849886

850-
// Attempt to remove vectors moves that were inserted to zero upper bits.
851-
852887
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
853888

889+
bool MadeChange = false;
854890
while (Position != CurDAG->allnodes_begin()) {
855891
SDNode *N = &*--Position;
856892
// Skip dead nodes and any non-machine opcodes.
857893
if (N->use_empty() || !N->isMachineOpcode())
858894
continue;
859895

896+
if (tryOptimizeRem8Extend(N)) {
897+
MadeChange = true;
898+
continue;
899+
}
900+
901+
// Attempt to remove vectors moves that were inserted to zero upper bits.
902+
860903
if (N->getMachineOpcode() != TargetOpcode::SUBREG_TO_REG)
861904
continue;
862905

@@ -905,11 +948,11 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
905948
// Producing instruction is another vector instruction. We can drop the
906949
// move.
907950
CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2));
908-
909-
// If the move is now dead, delete it.
910-
if (Move.getNode()->use_empty())
911-
CurDAG->RemoveDeadNode(Move.getNode());
951+
MadeChange = true;
912952
}
953+
954+
if (MadeChange)
955+
CurDAG->RemoveDeadNodes();
913956
}
914957

915958

@@ -3370,15 +3413,12 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
33703413
}
33713414

33723415
case ISD::SDIVREM:
3373-
case ISD::UDIVREM:
3374-
case X86ISD::SDIVREM8_SEXT_HREG:
3375-
case X86ISD::UDIVREM8_ZEXT_HREG: {
3416+
case ISD::UDIVREM: {
33763417
SDValue N0 = Node->getOperand(0);
33773418
SDValue N1 = Node->getOperand(1);
33783419

33793420
unsigned Opc, MOpc;
3380-
bool isSigned = (Opcode == ISD::SDIVREM ||
3381-
Opcode == X86ISD::SDIVREM8_SEXT_HREG);
3421+
bool isSigned = Opcode == ISD::SDIVREM;
33823422
if (!isSigned) {
33833423
switch (NVT.SimpleTy) {
33843424
default: llvm_unreachable("Unsupported VT!");
@@ -3517,13 +3557,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
35173557
SDValue Result(RNode, 0);
35183558
InFlag = SDValue(RNode, 1);
35193559

3520-
if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
3521-
Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
3522-
assert(Node->getValueType(1) == MVT::i32 && "Unexpected result type!");
3523-
} else {
3524-
Result =
3525-
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
3526-
}
3560+
Result =
3561+
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
3562+
35273563
ReplaceUses(SDValue(Node, 1), Result);
35283564
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
35293565
dbgs() << '\n');

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -26638,8 +26638,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
2663826638
case X86ISD::UMUL: return "X86ISD::UMUL";
2663926639
case X86ISD::SMUL8: return "X86ISD::SMUL8";
2664026640
case X86ISD::UMUL8: return "X86ISD::UMUL8";
26641-
case X86ISD::SDIVREM8_SEXT_HREG: return "X86ISD::SDIVREM8_SEXT_HREG";
26642-
case X86ISD::UDIVREM8_ZEXT_HREG: return "X86ISD::UDIVREM8_ZEXT_HREG";
2664326641
case X86ISD::INC: return "X86ISD::INC";
2664426642
case X86ISD::DEC: return "X86ISD::DEC";
2664526643
case X86ISD::OR: return "X86ISD::OR";
@@ -29583,13 +29581,6 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2958329581
Known.Zero &= Known2.Zero;
2958429582
break;
2958529583
}
29586-
case X86ISD::UDIVREM8_ZEXT_HREG:
29587-
// TODO: Support more than just the zero extended bits?
29588-
if (Op.getResNo() != 1)
29589-
break;
29590-
// The remainder is zero extended.
29591-
Known.Zero.setBitsFrom(8);
29592-
break;
2959329584
}
2959429585

2959529586
// Handle target shuffles.
@@ -29720,12 +29711,6 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
2972029711
unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1);
2972129712
return std::min(Tmp0, Tmp1);
2972229713
}
29723-
case X86ISD::SDIVREM8_SEXT_HREG:
29724-
// TODO: Support more than just the sign extended bits?
29725-
if (Op.getResNo() != 1)
29726-
break;
29727-
// The remainder is sign extended.
29728-
return VTBits - 7;
2972929714
}
2973029715

2973129716
// Fallback case.
@@ -38242,36 +38227,6 @@ static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
3824238227
return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags);
3824338228
}
3824438229

38245-
/// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) ->
38246-
/// (i8,i32 ({s/u}divrem_sext_hreg (i8 x, i8 y)
38247-
/// This exposes the {s/z}ext to the sdivrem lowering, so that it directly
38248-
/// extends from AH (which we otherwise need to do contortions to access).
38249-
static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) {
38250-
SDValue N0 = N->getOperand(0);
38251-
auto OpcodeN = N->getOpcode();
38252-
auto OpcodeN0 = N0.getOpcode();
38253-
if (!((OpcodeN == ISD::SIGN_EXTEND && OpcodeN0 == ISD::SDIVREM) ||
38254-
(OpcodeN == ISD::ZERO_EXTEND && OpcodeN0 == ISD::UDIVREM)))
38255-
return SDValue();
38256-
38257-
EVT VT = N->getValueType(0);
38258-
EVT InVT = N0.getValueType();
38259-
if (N0.getResNo() != 1 || InVT != MVT::i8 ||
38260-
!(VT == MVT::i32 || VT == MVT::i64))
38261-
return SDValue();
38262-
38263-
SDVTList NodeTys = DAG.getVTList(MVT::i8, MVT::i32);
38264-
auto DivRemOpcode = OpcodeN0 == ISD::SDIVREM ? X86ISD::SDIVREM8_SEXT_HREG
38265-
: X86ISD::UDIVREM8_ZEXT_HREG;
38266-
SDValue R = DAG.getNode(DivRemOpcode, SDLoc(N), NodeTys, N0.getOperand(0),
38267-
N0.getOperand(1));
38268-
DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
38269-
// If this was a 64-bit extend, complete it.
38270-
if (VT == MVT::i64)
38271-
return DAG.getNode(OpcodeN, SDLoc(N), VT, R.getValue(1));
38272-
return R.getValue(1);
38273-
}
38274-
3827538230
// If we face {ANY,SIGN,ZERO}_EXTEND that is applied to a CMOV with constant
3827638231
// operands and the result of CMOV is not used anywhere else - promote CMOV
3827738232
// itself instead of promoting its result. This could be beneficial, because:
@@ -38572,9 +38527,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
3857238527
EVT InVT = N0.getValueType();
3857338528
SDLoc DL(N);
3857438529

38575-
if (SDValue DivRem8 = getDivRem8(N, DAG))
38576-
return DivRem8;
38577-
3857838530
if (SDValue NewCMov = combineToExtendCMOV(N, DAG))
3857938531
return NewCMov;
3858038532

@@ -38775,9 +38727,6 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
3877538727
if (SDValue R = WidenMaskArithmetic(N, DAG, Subtarget))
3877638728
return R;
3877738729

38778-
if (SDValue DivRem8 = getDivRem8(N, DAG))
38779-
return DivRem8;
38780-
3878138730
if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
3878238731
return NewAdd;
3878338732

lib/Target/X86/X86ISelLowering.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -361,10 +361,6 @@ namespace llvm {
361361
// 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS.
362362
SMUL8, UMUL8,
363363

364-
// 8-bit divrem that zero-extend the high result (AH).
365-
UDIVREM8_ZEXT_HREG,
366-
SDIVREM8_SEXT_HREG,
367-
368364
// X86-specific multiply by immediate.
369365
MUL_IMM,
370366

0 commit comments

Comments
 (0)