swiftlang
diff --git a/‎llvm/lib/Target/X86/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎llvm/lib/Target/X86/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎llvm/lib/Target/X86/X86.h
Lines changed: 4 additions & 0 deletions b/‎llvm/lib/Target/X86/X86.h
Lines changed: 4 additions & 0 deletions
diff --git a/‎llvm/lib/Target/X86/X86FloatingPoint.cpp
Lines changed: 3 additions & 0 deletions b/‎llvm/lib/Target/X86/X86FloatingPoint.cpp
Lines changed: 3 additions & 0 deletions
diff --git a/‎llvm/lib/Target/X86/X86InsertWait.cpp
Lines changed: 151 additions & 0 deletions b/‎llvm/lib/Target/X86/X86InsertWait.cpp
Lines changed: 151 additions & 0 deletions
diff --git a/‎llvm/lib/Target/X86/X86InstrFPStack.td
Lines changed: 4 additions & 3 deletions b/‎llvm/lib/Target/X86/X86InstrFPStack.td
Lines changed: 4 additions & 3 deletions
diff --git a/‎llvm/lib/Target/X86/X86TargetMachine.cpp
Lines changed: 1 addition & 0 deletions b/‎llvm/lib/Target/X86/X86TargetMachine.cpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎llvm/test/CodeGen/X86/O0-pipeline.ll
Lines changed: 1 addition & 0 deletions b/‎llvm/test/CodeGen/X86/O0-pipeline.ll
Lines changed: 1 addition & 0 deletions
diff --git a/‎llvm/test/CodeGen/X86/O3-pipeline.ll
Lines changed: 1 addition & 0 deletions b/‎llvm/test/CodeGen/X86/O3-pipeline.ll
Lines changed: 1 addition & 0 deletions
diff --git a/‎llvm/test/CodeGen/X86/constrained-fp80-trunc-ext.ll
Lines changed: 4 additions & 0 deletions b/‎llvm/test/CodeGen/X86/constrained-fp80-trunc-ext.ll
Lines changed: 4 additions & 0 deletions
@@ -69,6 +69,7 @@ set(sources
   X86VZeroUpper.cpp
   X86WinAllocaExpander.cpp
   X86WinEHState.cpp
+  X86InsertWait.cpp
   )
 
 add_llvm_target(X86CodeGen ${sources})
 
@@ -129,6 +129,10 @@ FunctionPass *createX86DiscriminateMemOpsPass();
 /// This pass applies profiling information to insert cache prefetches.
 FunctionPass *createX86InsertPrefetchPass();
 
+/// This pass insert wait instruction after X87 instructions which could raise
+/// fp exceptions when strict-fp enabled.
+FunctionPass *createX86InsertX87waitPass();
+
 InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
                                                   X86Subtarget &,
                                                   X86RegisterBankInfo &);
 
@@ -1364,6 +1364,9 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
   MBB->remove(&*I++);
   I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
 
+  if (!MI.mayRaiseFPException())
+    I->setFlag(MachineInstr::MIFlag::NoFPExcept);
+
   // If both operands are killed, pop one off of the stack in addition to
   // overwriting the other one.
   if (KillsOp0 && KillsOp1 && Op0 != Op1) {
 
@@ -0,0 +1,151 @@
+//-  X86Insertwait.cpp - Strict-Fp:Insert wait instruction X87 instructions --//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which insert x86 wait instructions after each
+// X87 instructions when strict float is enabled.
+//
+// The logic to insert a wait instruction after an X87 instruction is as below:
+// 1. If the X87 instruction don't raise float exception nor is a load/store
+//    instruction, or is a x87 control instruction, don't insert wait.
+// 2. If the X87 instruction is an instruction which the following instruction
+//    is an X87 exception synchronizing X87 instruction, don't insert wait.
+// 3. For other situations, insert wait instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-insert-wait"
+
+namespace {
+
+class WaitInsert : public MachineFunctionPass {
+public:
+  static char ID;
+
+  WaitInsert() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return "X86 insert wait instruction";
+  }
+
+private:
+  const TargetInstrInfo *TII; // Machine instruction info.
+};
+
+} // namespace
+
+char WaitInsert::ID = 0;
+
+FunctionPass *llvm::createX86InsertX87waitPass() { return new WaitInsert(); }
+
+/// Return true if the Reg is X87 register.
+static bool isX87Reg(unsigned Reg) {
+  return (Reg == X86::FPCW || Reg == X86::FPSW ||
+          (Reg >= X86::ST0 && Reg <= X86::ST7));
+}
+
+/// check if the instruction is X87 instruction
+static bool isX87Instruction(MachineInstr &MI) {
+  for (const MachineOperand &MO : MI.operands()) {
+    if (!MO.isReg())
+      continue;
+    if (isX87Reg(MO.getReg()))
+      return true;
+  }
+  return false;
+}
+
+static bool isX87ControlInstruction(MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case X86::FNINIT:
+  case X86::FLDCW16m:
+  case X86::FNSTCW16m:
+  case X86::FNSTSW16r:
+  case X86::FNSTSWm:
+  case X86::FNCLEX:
+  case X86::FLDENVm:
+  case X86::FSTENVm:
+  case X86::FRSTORm:
+  case X86::FSAVEm:
+  case X86::FINCSTP:
+  case X86::FDECSTP:
+  case X86::FFREE:
+  case X86::FFREEP:
+  case X86::FNOP:
+  case X86::WAIT:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static bool isX87NonWaitingControlInstruction(MachineInstr &MI) {
+  // a few special control instructions don't perform a wait operation
+  switch (MI.getOpcode()) {
+  case X86::FNINIT:
+  case X86::FNSTSW16r:
+  case X86::FNSTSWm:
+  case X86::FNSTCW16m:
+  case X86::FNCLEX:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool WaitInsert::runOnMachineFunction(MachineFunction &MF) {
+  if (!MF.getFunction().hasFnAttribute(Attribute::StrictFP))
+    return false;
+
+  const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+  TII = ST.getInstrInfo();
+  bool Changed = false;
+
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineBasicBlock::iterator MI = MBB.begin(); MI != MBB.end(); ++MI) {
+      // Jump non X87 instruction.
+      if (!isX87Instruction(*MI))
+        continue;
+      // If the instruction instruction neither has float exception nor is
+      // a load/store instruction, or the instruction is x87 control
+      // instruction, do not insert wait.
+      if (!(MI->mayRaiseFPException() || MI->mayLoadOrStore()) ||
+          isX87ControlInstruction(*MI))
+        continue;
+      // If the following instruction is an X87 instruction and isn't an X87
+      // non-waiting control instruction, we can omit insert wait instruction.
+      MachineBasicBlock::iterator AfterMI = std::next(MI);
+      if (AfterMI != MBB.end() && isX87Instruction(*AfterMI) &&
+          !isX87NonWaitingControlInstruction(*AfterMI))
+        continue;
+
+      BuildMI(MBB, AfterMI, MI->getDebugLoc(), TII->get(X86::WAIT));
+      LLVM_DEBUG(dbgs() << "\nInsert wait after:\t" << *MI);
+      // Jump the newly inserting wait
+      ++MI;
+      Changed = true;
+    }
+  }
+  return Changed;
+}
@@ -601,6 +601,7 @@ let SchedRW = [WriteMove], Uses = [FPCW] in {
 def LD_Frr   : FPI<0xD9, MRM0r, (outs), (ins RSTi:$op), "fld\t$op">;
 def ST_Frr   : FPI<0xDD, MRM2r, (outs), (ins RSTi:$op), "fst\t$op">;
 def ST_FPrr  : FPI<0xDD, MRM3r, (outs), (ins RSTi:$op), "fstp\t$op">;
+let mayRaiseFPException = 0 in
 def XCH_F    : FPI<0xD9, MRM1r, (outs), (ins RSTi:$op), "fxch\t$op">;
 }
 
@@ -620,13 +621,13 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
                 [(set RFP80:$dst, fpimm1)]>;
 }
 
-let SchedRW = [WriteFLD0], Uses = [FPCW] in
+let SchedRW = [WriteFLD0], Uses = [FPCW], mayRaiseFPException = 0 in
 def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">;
 
-let SchedRW = [WriteFLD1], Uses = [FPCW] in
+let SchedRW = [WriteFLD1], Uses = [FPCW], mayRaiseFPException = 0 in
 def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
 
-let SchedRW = [WriteFLDC], Defs = [FPSW], Uses = [FPCW] in {
+let SchedRW = [WriteFLDC], Defs = [FPSW], Uses = [FPCW], mayRaiseFPException = 0 in {
 def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
 def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
 def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;
 
@@ -519,6 +519,7 @@ void X86PassConfig::addPreEmitPass() {
   }
   addPass(createX86DiscriminateMemOpsPass());
   addPass(createX86InsertPrefetchPass());
+  addPass(createX86InsertX87waitPass());
 }
 
 void X86PassConfig::addPreEmitPass2() {
 
@@ -65,6 +65,7 @@
 ; CHECK-NEXT:       X86 vzeroupper inserter
 ; CHECK-NEXT:       X86 Discriminate Memory Operands
 ; CHECK-NEXT:       X86 Insert Cache Prefetches
+; CHECK-NEXT:       X86 insert wait instruction
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
 
@@ -174,6 +174,7 @@
 ; CHECK-NEXT:       Compressing EVEX instrs to VEX encoding when possible
 ; CHECK-NEXT:       X86 Discriminate Memory Operands
 ; CHECK-NEXT:       X86 Insert Cache Prefetches
+; CHECK-NEXT:       X86 insert wait instruction
 ; CHECK-NEXT:       Contiguously Lay Out Funclets
 ; CHECK-NEXT:       StackMap Liveness Analysis
 ; CHECK-NEXT:       Live DEBUG_VALUE analysis
 
@@ -6,6 +6,7 @@ define x86_fp80 @constrained_fpext_f32_as_fp80(float %mem) #0 {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    flds -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    wait
 ; CHECK-NEXT:    retq
 entry:
   %ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(
@@ -19,6 +20,7 @@ define float @constrained_fptrunc_f80_to_f32(x86_fp80 %reg) #0 {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    wait
 ; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    retq
   %trunc = call float @llvm.experimental.constrained.fptrunc.f32.f80(
@@ -33,6 +35,7 @@ define x86_fp80 @constrained_fpext_f64_to_f80(double %mem) #0 {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    wait
 ; CHECK-NEXT:    retq
 entry:
   %ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f64(
@@ -46,6 +49,7 @@ define double @constrained_fptrunc_f80_to_f64(x86_fp80 %reg) #0 {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    wait
 ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; CHECK-NEXT:    retq
   %trunc = call double @llvm.experimental.constrained.fptrunc.f64.f80(
Original file line number	Diff line number	Diff line change
`@@ -69,6 +69,7 @@ set(sources`
`69`	`69`	`X86VZeroUpper.cpp`
`70`	`70`	`X86WinAllocaExpander.cpp`
`71`	`71`	`X86WinEHState.cpp`
	`72`	`+ X86InsertWait.cpp`
`72`	`73`	`)`
`73`	`74`
`74`	`75`	`add_llvm_target(X86CodeGen ${sources})`
Original file line number	Diff line number	Diff line change
`@@ -519,6 +519,7 @@ void X86PassConfig::addPreEmitPass() {`
`519`	`519`	`}`
`520`	`520`	`addPass(createX86DiscriminateMemOpsPass());`
`521`	`521`	`addPass(createX86InsertPrefetchPass());`
	`522`	`+ addPass(createX86InsertX87waitPass());`
`522`	`523`	`}`
`523`	`524`
`524`	`525`	`void X86PassConfig::addPreEmitPass2() {`