Skip to content

Commit 6c5d5ce

Browse files
committed
Allow target to handle STRICT floating-point nodes
The ISD::STRICT_ nodes used to implement the constrained floating-point intrinsics are currently never passed to the target back-end, which makes it impossible to handle them correctly (e.g. mark instructions are depending on a floating-point status and control register, or mark instructions as possibly trapping). This patch allows the target to use setOperationAction to switch the action on ISD::STRICT_ nodes to Legal. If this is done, the SelectionDAG common code will stop converting the STRICT nodes to regular floating-point nodes, but instead pass the STRICT nodes to the target using normal SelectionDAG matching rules. To avoid having the back-end duplicate all the floating-point instruction patterns to handle both strict and non-strict variants, we make the MI codegen explicitly aware of the floating-point exceptions by introducing two new concepts: - A new MCID flag "mayRaiseFPException" that the target should set on any instruction that possibly can raise FP exception according to the architecture definition. - A new MI flag FPExcept that CodeGen/SelectionDAG will set on any MI instruction resulting from expansion of any constrained FP intrinsic. Any MI instruction that is *both* marked as mayRaiseFPException *and* FPExcept then needs to be considered as raising exceptions by MI-level codegen (e.g. scheduling). Setting those two new flags is straightforward. The mayRaiseFPException flag is simply set via TableGen by marking all relevant instruction patterns in the .td files. The FPExcept flag is set in SDNodeFlags when creating the STRICT_ nodes in the SelectionDAG, and gets inherited in the MachineSDNode nodes created from it during instruction selection. The flag is then transfered to an MIFlag when creating the MI from the MachineSDNode. This is handled just like fast-math flags like no-nans are handled today. This patch includes both common code changes required to implement the new features, and the SystemZ implementation. Reviewed By: andrew.w.kaylor Differential Revision: https://reviews.llvm.org/D55506 llvm-svn: 362663
1 parent 2f94203 commit 6c5d5ce

File tree

82 files changed

+5788
-372
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+5788
-372
lines changed

llvm/include/llvm/CodeGen/MachineInstr.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,10 @@ class MachineInstr
102102
// no unsigned wrap.
103103
NoSWrap = 1 << 12, // Instruction supports binary operator
104104
// no signed wrap.
105-
IsExact = 1 << 13 // Instruction supports division is
105+
IsExact = 1 << 13, // Instruction supports division is
106106
// known to be exact.
107+
FPExcept = 1 << 14, // Instruction may raise floating-point
108+
// exceptions.
107109
};
108110

109111
private:
@@ -830,6 +832,17 @@ class MachineInstr
830832
return mayLoad(Type) || mayStore(Type);
831833
}
832834

835+
/// Return true if this instruction could possibly raise a floating-point
836+
/// exception. This is the case if the instruction is a floating-point
837+
/// instruction that can in principle raise an exception, as indicated
838+
/// by the MCID::MayRaiseFPException property, *and* at the same time,
839+
/// the instruction is used in a context where we expect floating-point
840+
/// exceptions might be enabled, as indicated by the FPExcept MI flag.
841+
bool mayRaiseFPException() const {
842+
return hasProperty(MCID::MayRaiseFPException) &&
843+
getFlag(MachineInstr::MIFlag::FPExcept);
844+
}
845+
833846
//===--------------------------------------------------------------------===//
834847
// Flags that indicate whether an instruction can be modified by a method.
835848
//===--------------------------------------------------------------------===//

llvm/include/llvm/CodeGen/SelectionDAGNodes.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -368,14 +368,21 @@ struct SDNodeFlags {
368368
bool ApproximateFuncs : 1;
369369
bool AllowReassociation : 1;
370370

371+
// We assume instructions do not raise floating-point exceptions by default,
372+
// and only those marked explicitly may do so. We could choose to represent
373+
// this via a positive "FPExcept" flags like on the MI level, but having a
374+
// negative "NoFPExcept" flag here (that defaults to true) makes the flag
375+
// intersection logic more straightforward.
376+
bool NoFPExcept : 1;
377+
371378
public:
372379
/// Default constructor turns off all optimization flags.
373380
SDNodeFlags()
374381
: AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false),
375382
Exact(false), NoNaNs(false), NoInfs(false),
376383
NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
377384
AllowContract(false), ApproximateFuncs(false),
378-
AllowReassociation(false) {}
385+
AllowReassociation(false), NoFPExcept(true) {}
379386

380387
/// Propagate the fast-math-flags from an IR FPMathOperator.
381388
void copyFMF(const FPMathOperator &FPMO) {
@@ -438,6 +445,10 @@ struct SDNodeFlags {
438445
setDefined();
439446
AllowReassociation = b;
440447
}
448+
void setFPExcept(bool b) {
449+
setDefined();
450+
NoFPExcept = !b;
451+
}
441452

442453
// These are accessors for each flag.
443454
bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
@@ -451,9 +462,10 @@ struct SDNodeFlags {
451462
bool hasAllowContract() const { return AllowContract; }
452463
bool hasApproximateFuncs() const { return ApproximateFuncs; }
453464
bool hasAllowReassociation() const { return AllowReassociation; }
465+
bool hasFPExcept() const { return !NoFPExcept; }
454466

455467
bool isFast() const {
456-
return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs &&
468+
return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && NoFPExcept &&
457469
AllowContract && ApproximateFuncs && AllowReassociation;
458470
}
459471

@@ -473,6 +485,7 @@ struct SDNodeFlags {
473485
AllowContract &= Flags.AllowContract;
474486
ApproximateFuncs &= Flags.ApproximateFuncs;
475487
AllowReassociation &= Flags.AllowReassociation;
488+
NoFPExcept &= Flags.NoFPExcept;
476489
}
477490
};
478491

llvm/include/llvm/MC/MCInstrDesc.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ enum Flag {
134134
FoldableAsLoad,
135135
MayLoad,
136136
MayStore,
137+
MayRaiseFPException,
137138
Predicable,
138139
NotDuplicable,
139140
UnmodeledSideEffects,
@@ -403,6 +404,11 @@ class MCInstrDesc {
403404
/// may not actually modify anything, for example.
404405
bool mayStore() const { return Flags & (1ULL << MCID::MayStore); }
405406

407+
/// Return true if this instruction may raise a floating-point exception.
408+
bool mayRaiseFPException() const {
409+
return Flags & (1ULL << MCID::MayRaiseFPException);
410+
}
411+
406412
/// Return true if this instruction has side
407413
/// effects that are not modeled by other flags. This does not return true
408414
/// for instructions whose effects are captured by:

llvm/include/llvm/Target/Target.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,7 @@ class Instruction {
456456
bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand?
457457
bit mayLoad = ?; // Is it possible for this inst to read memory?
458458
bit mayStore = ?; // Is it possible for this inst to write memory?
459+
bit mayRaiseFPException = 0; // Can this raise a floating-point exception?
459460
bit isConvertibleToThreeAddress = 0; // Can this 2-addr instruction promote?
460461
bit isCommutable = 0; // Is this 3 operand instruction commutable?
461462
bit isTerminator = 0; // Is this part of the terminator for a basic block?

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,53 @@ def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
467467
def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
468468
def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
469469

470+
def strict_fadd : SDNode<"ISD::STRICT_FADD",
471+
SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
472+
def strict_fsub : SDNode<"ISD::STRICT_FSUB",
473+
SDTFPBinOp, [SDNPHasChain]>;
474+
def strict_fmul : SDNode<"ISD::STRICT_FMUL",
475+
SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
476+
def strict_fdiv : SDNode<"ISD::STRICT_FDIV",
477+
SDTFPBinOp, [SDNPHasChain]>;
478+
def strict_frem : SDNode<"ISD::STRICT_FREM",
479+
SDTFPBinOp, [SDNPHasChain]>;
480+
def strict_fma : SDNode<"ISD::STRICT_FMA",
481+
SDTFPTernaryOp, [SDNPHasChain]>;
482+
def strict_fsqrt : SDNode<"ISD::STRICT_FSQRT",
483+
SDTFPUnaryOp, [SDNPHasChain]>;
484+
def strict_fsin : SDNode<"ISD::STRICT_FSIN",
485+
SDTFPUnaryOp, [SDNPHasChain]>;
486+
def strict_fcos : SDNode<"ISD::STRICT_FCOS",
487+
SDTFPUnaryOp, [SDNPHasChain]>;
488+
def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2",
489+
SDTFPUnaryOp, [SDNPHasChain]>;
490+
def strict_fpow : SDNode<"ISD::STRICT_FPOW",
491+
SDTFPBinOp, [SDNPHasChain]>;
492+
def strict_flog2 : SDNode<"ISD::STRICT_FLOG2",
493+
SDTFPUnaryOp, [SDNPHasChain]>;
494+
def strict_frint : SDNode<"ISD::STRICT_FRINT",
495+
SDTFPUnaryOp, [SDNPHasChain]>;
496+
def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT",
497+
SDTFPUnaryOp, [SDNPHasChain]>;
498+
def strict_fceil : SDNode<"ISD::STRICT_FCEIL",
499+
SDTFPUnaryOp, [SDNPHasChain]>;
500+
def strict_ffloor : SDNode<"ISD::STRICT_FFLOOR",
501+
SDTFPUnaryOp, [SDNPHasChain]>;
502+
def strict_fround : SDNode<"ISD::STRICT_FROUND",
503+
SDTFPUnaryOp, [SDNPHasChain]>;
504+
def strict_ftrunc : SDNode<"ISD::STRICT_FTRUNC",
505+
SDTFPUnaryOp, [SDNPHasChain]>;
506+
def strict_fminnum : SDNode<"ISD::STRICT_FMINNUM",
507+
SDTFPBinOp, [SDNPHasChain,
508+
SDNPCommutative, SDNPAssociative]>;
509+
def strict_fmaxnum : SDNode<"ISD::STRICT_FMAXNUM",
510+
SDTFPBinOp, [SDNPHasChain,
511+
SDNPCommutative, SDNPAssociative]>;
512+
def strict_fpround : SDNode<"ISD::STRICT_FP_ROUND",
513+
SDTFPRoundOp, [SDNPHasChain]>;
514+
def strict_fpextend : SDNode<"ISD::STRICT_FP_EXTEND",
515+
SDTFPExtendOp, [SDNPHasChain]>;
516+
470517
def setcc : SDNode<"ISD::SETCC" , SDTSetCC>;
471518
def select : SDNode<"ISD::SELECT" , SDTSelect>;
472519
def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>;
@@ -1177,6 +1224,74 @@ def setle : PatFrag<(ops node:$lhs, node:$rhs),
11771224
def setne : PatFrag<(ops node:$lhs, node:$rhs),
11781225
(setcc node:$lhs, node:$rhs, SETNE)>;
11791226

1227+
// Convenience fragments to match both strict and non-strict fp operations
1228+
def any_fadd : PatFrags<(ops node:$lhs, node:$rhs),
1229+
[(strict_fadd node:$lhs, node:$rhs),
1230+
(fadd node:$lhs, node:$rhs)]>;
1231+
def any_fsub : PatFrags<(ops node:$lhs, node:$rhs),
1232+
[(strict_fsub node:$lhs, node:$rhs),
1233+
(fsub node:$lhs, node:$rhs)]>;
1234+
def any_fmul : PatFrags<(ops node:$lhs, node:$rhs),
1235+
[(strict_fmul node:$lhs, node:$rhs),
1236+
(fmul node:$lhs, node:$rhs)]>;
1237+
def any_fdiv : PatFrags<(ops node:$lhs, node:$rhs),
1238+
[(strict_fdiv node:$lhs, node:$rhs),
1239+
(fdiv node:$lhs, node:$rhs)]>;
1240+
def any_frem : PatFrags<(ops node:$lhs, node:$rhs),
1241+
[(strict_frem node:$lhs, node:$rhs),
1242+
(frem node:$lhs, node:$rhs)]>;
1243+
def any_fma : PatFrags<(ops node:$src1, node:$src2, node:$src3),
1244+
[(strict_fma node:$src1, node:$src2, node:$src3),
1245+
(fma node:$src1, node:$src2, node:$src3)]>;
1246+
def any_fsqrt : PatFrags<(ops node:$src),
1247+
[(strict_fsqrt node:$src),
1248+
(fsqrt node:$src)]>;
1249+
def any_fsin : PatFrags<(ops node:$src),
1250+
[(strict_fsin node:$src),
1251+
(fsin node:$src)]>;
1252+
def any_fcos : PatFrags<(ops node:$src),
1253+
[(strict_fcos node:$src),
1254+
(fcos node:$src)]>;
1255+
def any_fexp2 : PatFrags<(ops node:$src),
1256+
[(strict_fexp2 node:$src),
1257+
(fexp2 node:$src)]>;
1258+
def any_fpow : PatFrags<(ops node:$lhs, node:$rhs),
1259+
[(strict_fpow node:$lhs, node:$rhs),
1260+
(fpow node:$lhs, node:$rhs)]>;
1261+
def any_flog2 : PatFrags<(ops node:$src),
1262+
[(strict_flog2 node:$src),
1263+
(flog2 node:$src)]>;
1264+
def any_frint : PatFrags<(ops node:$src),
1265+
[(strict_frint node:$src),
1266+
(frint node:$src)]>;
1267+
def any_fnearbyint : PatFrags<(ops node:$src),
1268+
[(strict_fnearbyint node:$src),
1269+
(fnearbyint node:$src)]>;
1270+
def any_fceil : PatFrags<(ops node:$src),
1271+
[(strict_fceil node:$src),
1272+
(fceil node:$src)]>;
1273+
def any_ffloor : PatFrags<(ops node:$src),
1274+
[(strict_ffloor node:$src),
1275+
(ffloor node:$src)]>;
1276+
def any_fround : PatFrags<(ops node:$src),
1277+
[(strict_fround node:$src),
1278+
(fround node:$src)]>;
1279+
def any_ftrunc : PatFrags<(ops node:$src),
1280+
[(strict_ftrunc node:$src),
1281+
(ftrunc node:$src)]>;
1282+
def any_fmaxnum : PatFrags<(ops node:$lhs, node:$rhs),
1283+
[(strict_fmaxnum node:$lhs, node:$rhs),
1284+
(fmaxnum node:$lhs, node:$rhs)]>;
1285+
def any_fminnum : PatFrags<(ops node:$lhs, node:$rhs),
1286+
[(strict_fminnum node:$lhs, node:$rhs),
1287+
(fminnum node:$lhs, node:$rhs)]>;
1288+
def any_fpround : PatFrags<(ops node:$src),
1289+
[(strict_fpround node:$src),
1290+
(fpround node:$src)]>;
1291+
def any_fpextend : PatFrags<(ops node:$src),
1292+
[(strict_fpextend node:$src),
1293+
(fpextend node:$src)]>;
1294+
11801295
multiclass binary_atomic_op_ord<SDNode atomic_op> {
11811296
def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
11821297
(!cast<SDPatternOperator>(#NAME) node:$ptr, node:$val)> {

llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,6 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
7878
std::next(MI.getIterator()) == IntoMI.getIterator())
7979
return true;
8080

81-
return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
82-
empty(MI.implicit_operands());
81+
return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
82+
!MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands());
8383
}

llvm/lib/CodeGen/ImplicitNullChecks.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,8 @@ class ImplicitNullChecks : public MachineFunctionPass {
229229
} // end anonymous namespace
230230

231231
bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
232-
if (MI->isCall() || MI->hasUnmodeledSideEffects())
232+
if (MI->isCall() || MI->mayRaiseFPException() ||
233+
MI->hasUnmodeledSideEffects())
233234
return false;
234235
auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
235236
(void)IsRegMask;

llvm/lib/CodeGen/MIRParser/MILexer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
204204
.Case("nuw" , MIToken::kw_nuw)
205205
.Case("nsw" , MIToken::kw_nsw)
206206
.Case("exact" , MIToken::kw_exact)
207+
.Case("fpexcept", MIToken::kw_fpexcept)
207208
.Case("debug-location", MIToken::kw_debug_location)
208209
.Case("same_value", MIToken::kw_cfi_same_value)
209210
.Case("offset", MIToken::kw_cfi_offset)

llvm/lib/CodeGen/MIRParser/MILexer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ struct MIToken {
7373
kw_nuw,
7474
kw_nsw,
7575
kw_exact,
76+
kw_fpexcept,
7677
kw_debug_location,
7778
kw_cfi_same_value,
7879
kw_cfi_offset,

llvm/lib/CodeGen/MIRParser/MIParser.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1136,7 +1136,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
11361136
Token.is(MIToken::kw_reassoc) ||
11371137
Token.is(MIToken::kw_nuw) ||
11381138
Token.is(MIToken::kw_nsw) ||
1139-
Token.is(MIToken::kw_exact)) {
1139+
Token.is(MIToken::kw_exact) ||
1140+
Token.is(MIToken::kw_fpexcept)) {
11401141
// Mine frame and fast math flags
11411142
if (Token.is(MIToken::kw_frame_setup))
11421143
Flags |= MachineInstr::FrameSetup;
@@ -1162,6 +1163,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
11621163
Flags |= MachineInstr::NoSWrap;
11631164
if (Token.is(MIToken::kw_exact))
11641165
Flags |= MachineInstr::IsExact;
1166+
if (Token.is(MIToken::kw_fpexcept))
1167+
Flags |= MachineInstr::FPExcept;
11651168

11661169
lex();
11671170
}

llvm/lib/CodeGen/MIRPrinter.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,8 @@ void MIPrinter::print(const MachineInstr &MI) {
713713
OS << "nsw ";
714714
if (MI.getFlag(MachineInstr::IsExact))
715715
OS << "exact ";
716+
if (MI.getFlag(MachineInstr::FPExcept))
717+
OS << "fpexcept ";
716718

717719
OS << TII->getName(MI.getOpcode());
718720
if (I < E)

llvm/lib/CodeGen/MachineCSE.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
382382

383383
// Ignore stuff that we obviously can't move.
384384
if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
385-
MI->hasUnmodeledSideEffects())
385+
MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
386386
return false;
387387

388388
if (MI->mayLoad()) {

llvm/lib/CodeGen/MachineInstr.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1178,7 +1178,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
11781178
}
11791179

11801180
if (isPosition() || isDebugInstr() || isTerminator() ||
1181-
hasUnmodeledSideEffects())
1181+
mayRaiseFPException() || hasUnmodeledSideEffects())
11821182
return false;
11831183

11841184
// See if this instruction does a load. If so, we have to guarantee that the
@@ -1544,6 +1544,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
15441544
OS << "nsw ";
15451545
if (getFlag(MachineInstr::IsExact))
15461546
OS << "exact ";
1547+
if (getFlag(MachineInstr::FPExcept))
1548+
OS << "fpexcept ";
15471549

15481550
// Print the opcode name.
15491551
if (TII)

llvm/lib/CodeGen/MachinePipeliner.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,8 @@ static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
579579
/// Return true if the instruction causes a chain between memory
580580
/// references before and after it.
581581
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
582-
return MI.isCall() || MI.hasUnmodeledSideEffects() ||
582+
return MI.isCall() || MI.mayRaiseFPException() ||
583+
MI.hasUnmodeledSideEffects() ||
583584
(MI.hasOrderedMemoryRef() &&
584585
(!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
585586
}
@@ -3238,6 +3239,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
32383239

32393240
// Assume ordered loads and stores may have a loop carried dependence.
32403241
if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
3242+
SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
32413243
SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
32423244
return true;
32433245

llvm/lib/CodeGen/PeepholeOptimizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1825,7 +1825,7 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
18251825
assert(Def->isBitcast() && "Invalid definition");
18261826

18271827
// Bail if there are effects that a plain copy will not expose.
1828-
if (Def->hasUnmodeledSideEffects())
1828+
if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
18291829
return ValueTrackerResult();
18301830

18311831
// Bitcasts with more than one def are not supported.

llvm/lib/CodeGen/ScheduleDAGInstrs.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
712712
AAForDep = UseAA ? AA : nullptr;
713713

714714
BarrierChain = nullptr;
715+
SUnit *FPBarrierChain = nullptr;
715716

716717
this->TrackLaneMasks = TrackLaneMasks;
717718
MISUnitMap.clear();
@@ -871,9 +872,21 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
871872
addBarrierChain(NonAliasStores);
872873
addBarrierChain(NonAliasLoads);
873874

875+
// Add dependency against previous FP barrier and reset FP barrier.
876+
if (FPBarrierChain)
877+
FPBarrierChain->addPredBarrier(BarrierChain);
878+
FPBarrierChain = BarrierChain;
879+
874880
continue;
875881
}
876882

883+
// Instructions that may raise FP exceptions depend on each other.
884+
if (MI.mayRaiseFPException()) {
885+
if (FPBarrierChain)
886+
FPBarrierChain->addPredBarrier(SU);
887+
FPBarrierChain = SU;
888+
}
889+
877890
// If it's not a store or a variant load, we're done.
878891
if (!MI.mayStore() &&
879892
!(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))

llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -883,6 +883,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
883883

884884
if (Flags.hasExact())
885885
MI->setFlag(MachineInstr::MIFlag::IsExact);
886+
887+
if (Flags.hasFPExcept())
888+
MI->setFlag(MachineInstr::MIFlag::FPExcept);
886889
}
887890

888891
// Emit all of the actual operands of this instruction, adding them to the

0 commit comments

Comments
 (0)