Skip to content

Commit 8fdafb7

Browse files
committed
Insert wait instruction after X87 instructions which could raise
float-point exception. This patch also modify some mayRaiseFPException flag which set in D68854. Differential Revision: https://reviews.llvm.org/D72750
1 parent 066e817 commit 8fdafb7

27 files changed

+811
-3
lines changed

llvm/lib/Target/X86/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ set(sources
6969
X86VZeroUpper.cpp
7070
X86WinAllocaExpander.cpp
7171
X86WinEHState.cpp
72+
X86InsertWait.cpp
7273
)
7374

7475
add_llvm_target(X86CodeGen ${sources})

llvm/lib/Target/X86/X86.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ FunctionPass *createX86DiscriminateMemOpsPass();
129129
/// This pass applies profiling information to insert cache prefetches.
130130
FunctionPass *createX86InsertPrefetchPass();
131131

132+
/// This pass insert wait instruction after X87 instructions which could raise
133+
/// fp exceptions when strict-fp enabled.
134+
FunctionPass *createX86InsertX87waitPass();
135+
132136
InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
133137
X86Subtarget &,
134138
X86RegisterBankInfo &);

llvm/lib/Target/X86/X86FloatingPoint.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,6 +1364,9 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
13641364
MBB->remove(&*I++);
13651365
I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
13661366

1367+
if (!MI.mayRaiseFPException())
1368+
I->setFlag(MachineInstr::MIFlag::NoFPExcept);
1369+
13671370
// If both operands are killed, pop one off of the stack in addition to
13681371
// overwriting the other one.
13691372
if (KillsOp0 && KillsOp1 && Op0 != Op1) {

llvm/lib/Target/X86/X86InsertWait.cpp

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
//- X86Insertwait.cpp - Strict-Fp:Insert wait instruction X87 instructions --//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file defines the pass which insert x86 wait instructions after each
10+
// X87 instructions when strict float is enabled.
11+
//
12+
// The logic to insert a wait instruction after an X87 instruction is as below:
13+
// 1. If the X87 instruction don't raise float exception nor is a load/store
14+
// instruction, or is a x87 control instruction, don't insert wait.
15+
// 2. If the X87 instruction is an instruction which the following instruction
16+
// is an X87 exception synchronizing X87 instruction, don't insert wait.
17+
// 3. For other situations, insert wait instruction.
18+
//
19+
//===----------------------------------------------------------------------===//
20+
21+
#include "X86.h"
22+
#include "X86InstrInfo.h"
23+
#include "X86Subtarget.h"
24+
#include "llvm/CodeGen/MachineBasicBlock.h"
25+
#include "llvm/CodeGen/MachineFunction.h"
26+
#include "llvm/CodeGen/MachineFunctionPass.h"
27+
#include "llvm/CodeGen/MachineInstr.h"
28+
#include "llvm/CodeGen/MachineInstrBuilder.h"
29+
#include "llvm/CodeGen/MachineOperand.h"
30+
#include "llvm/CodeGen/TargetInstrInfo.h"
31+
#include "llvm/IR/DebugLoc.h"
32+
#include "llvm/Support/Debug.h"
33+
34+
using namespace llvm;
35+
36+
#define DEBUG_TYPE "x86-insert-wait"
37+
38+
namespace {
39+
40+
class WaitInsert : public MachineFunctionPass {
41+
public:
42+
static char ID;
43+
44+
WaitInsert() : MachineFunctionPass(ID) {}
45+
46+
bool runOnMachineFunction(MachineFunction &MF) override;
47+
48+
StringRef getPassName() const override {
49+
return "X86 insert wait instruction";
50+
}
51+
52+
private:
53+
const TargetInstrInfo *TII; // Machine instruction info.
54+
};
55+
56+
} // namespace
57+
58+
char WaitInsert::ID = 0;
59+
60+
FunctionPass *llvm::createX86InsertX87waitPass() { return new WaitInsert(); }
61+
62+
/// Return true if the Reg is X87 register.
63+
static bool isX87Reg(unsigned Reg) {
64+
return (Reg == X86::FPCW || Reg == X86::FPSW ||
65+
(Reg >= X86::ST0 && Reg <= X86::ST7));
66+
}
67+
68+
/// check if the instruction is X87 instruction
69+
static bool isX87Instruction(MachineInstr &MI) {
70+
for (const MachineOperand &MO : MI.operands()) {
71+
if (!MO.isReg())
72+
continue;
73+
if (isX87Reg(MO.getReg()))
74+
return true;
75+
}
76+
return false;
77+
}
78+
79+
static bool isX87ControlInstruction(MachineInstr &MI) {
80+
switch (MI.getOpcode()) {
81+
case X86::FNINIT:
82+
case X86::FLDCW16m:
83+
case X86::FNSTCW16m:
84+
case X86::FNSTSW16r:
85+
case X86::FNSTSWm:
86+
case X86::FNCLEX:
87+
case X86::FLDENVm:
88+
case X86::FSTENVm:
89+
case X86::FRSTORm:
90+
case X86::FSAVEm:
91+
case X86::FINCSTP:
92+
case X86::FDECSTP:
93+
case X86::FFREE:
94+
case X86::FFREEP:
95+
case X86::FNOP:
96+
case X86::WAIT:
97+
return true;
98+
default:
99+
return false;
100+
}
101+
}
102+
103+
static bool isX87NonWaitingControlInstruction(MachineInstr &MI) {
104+
// a few special control instructions don't perform a wait operation
105+
switch (MI.getOpcode()) {
106+
case X86::FNINIT:
107+
case X86::FNSTSW16r:
108+
case X86::FNSTSWm:
109+
case X86::FNSTCW16m:
110+
case X86::FNCLEX:
111+
return true;
112+
default:
113+
return false;
114+
}
115+
}
116+
117+
bool WaitInsert::runOnMachineFunction(MachineFunction &MF) {
118+
if (!MF.getFunction().hasFnAttribute(Attribute::StrictFP))
119+
return false;
120+
121+
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
122+
TII = ST.getInstrInfo();
123+
bool Changed = false;
124+
125+
for (MachineBasicBlock &MBB : MF) {
126+
for (MachineBasicBlock::iterator MI = MBB.begin(); MI != MBB.end(); ++MI) {
127+
// Jump non X87 instruction.
128+
if (!isX87Instruction(*MI))
129+
continue;
130+
// If the instruction instruction neither has float exception nor is
131+
// a load/store instruction, or the instruction is x87 control
132+
// instruction, do not insert wait.
133+
if (!(MI->mayRaiseFPException() || MI->mayLoadOrStore()) ||
134+
isX87ControlInstruction(*MI))
135+
continue;
136+
// If the following instruction is an X87 instruction and isn't an X87
137+
// non-waiting control instruction, we can omit insert wait instruction.
138+
MachineBasicBlock::iterator AfterMI = std::next(MI);
139+
if (AfterMI != MBB.end() && isX87Instruction(*AfterMI) &&
140+
!isX87NonWaitingControlInstruction(*AfterMI))
141+
continue;
142+
143+
BuildMI(MBB, AfterMI, MI->getDebugLoc(), TII->get(X86::WAIT));
144+
LLVM_DEBUG(dbgs() << "\nInsert wait after:\t" << *MI);
145+
// Jump the newly inserting wait
146+
++MI;
147+
Changed = true;
148+
}
149+
}
150+
return Changed;
151+
}

llvm/lib/Target/X86/X86InstrFPStack.td

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,7 @@ let SchedRW = [WriteMove], Uses = [FPCW] in {
601601
def LD_Frr : FPI<0xD9, MRM0r, (outs), (ins RSTi:$op), "fld\t$op">;
602602
def ST_Frr : FPI<0xDD, MRM2r, (outs), (ins RSTi:$op), "fst\t$op">;
603603
def ST_FPrr : FPI<0xDD, MRM3r, (outs), (ins RSTi:$op), "fstp\t$op">;
604+
let mayRaiseFPException = 0 in
604605
def XCH_F : FPI<0xD9, MRM1r, (outs), (ins RSTi:$op), "fxch\t$op">;
605606
}
606607

@@ -620,13 +621,13 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
620621
[(set RFP80:$dst, fpimm1)]>;
621622
}
622623

623-
let SchedRW = [WriteFLD0], Uses = [FPCW] in
624+
let SchedRW = [WriteFLD0], Uses = [FPCW], mayRaiseFPException = 0 in
624625
def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">;
625626

626-
let SchedRW = [WriteFLD1], Uses = [FPCW] in
627+
let SchedRW = [WriteFLD1], Uses = [FPCW], mayRaiseFPException = 0 in
627628
def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
628629

629-
let SchedRW = [WriteFLDC], Defs = [FPSW], Uses = [FPCW] in {
630+
let SchedRW = [WriteFLDC], Defs = [FPSW], Uses = [FPCW], mayRaiseFPException = 0 in {
630631
def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
631632
def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
632633
def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;

llvm/lib/Target/X86/X86TargetMachine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,7 @@ void X86PassConfig::addPreEmitPass() {
519519
}
520520
addPass(createX86DiscriminateMemOpsPass());
521521
addPass(createX86InsertPrefetchPass());
522+
addPass(createX86InsertX87waitPass());
522523
}
523524

524525
void X86PassConfig::addPreEmitPass2() {

llvm/test/CodeGen/X86/O0-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
; CHECK-NEXT: X86 vzeroupper inserter
6666
; CHECK-NEXT: X86 Discriminate Memory Operands
6767
; CHECK-NEXT: X86 Insert Cache Prefetches
68+
; CHECK-NEXT: X86 insert wait instruction
6869
; CHECK-NEXT: Contiguously Lay Out Funclets
6970
; CHECK-NEXT: StackMap Liveness Analysis
7071
; CHECK-NEXT: Live DEBUG_VALUE analysis

llvm/test/CodeGen/X86/O3-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@
174174
; CHECK-NEXT: Compressing EVEX instrs to VEX encoding when possible
175175
; CHECK-NEXT: X86 Discriminate Memory Operands
176176
; CHECK-NEXT: X86 Insert Cache Prefetches
177+
; CHECK-NEXT: X86 insert wait instruction
177178
; CHECK-NEXT: Contiguously Lay Out Funclets
178179
; CHECK-NEXT: StackMap Liveness Analysis
179180
; CHECK-NEXT: Live DEBUG_VALUE analysis

llvm/test/CodeGen/X86/constrained-fp80-trunc-ext.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ define x86_fp80 @constrained_fpext_f32_as_fp80(float %mem) #0 {
66
; CHECK: # %bb.0: # %entry
77
; CHECK-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
88
; CHECK-NEXT: flds -{{[0-9]+}}(%rsp)
9+
; CHECK-NEXT: wait
910
; CHECK-NEXT: retq
1011
entry:
1112
%ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(
@@ -19,6 +20,7 @@ define float @constrained_fptrunc_f80_to_f32(x86_fp80 %reg) #0 {
1920
; CHECK: # %bb.0:
2021
; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
2122
; CHECK-NEXT: fstps -{{[0-9]+}}(%rsp)
23+
; CHECK-NEXT: wait
2224
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
2325
; CHECK-NEXT: retq
2426
%trunc = call float @llvm.experimental.constrained.fptrunc.f32.f80(
@@ -33,6 +35,7 @@ define x86_fp80 @constrained_fpext_f64_to_f80(double %mem) #0 {
3335
; CHECK: # %bb.0: # %entry
3436
; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
3537
; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp)
38+
; CHECK-NEXT: wait
3639
; CHECK-NEXT: retq
3740
entry:
3841
%ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f64(
@@ -46,6 +49,7 @@ define double @constrained_fptrunc_f80_to_f64(x86_fp80 %reg) #0 {
4649
; CHECK: # %bb.0:
4750
; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
4851
; CHECK-NEXT: fstpl -{{[0-9]+}}(%rsp)
52+
; CHECK-NEXT: wait
4953
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
5054
; CHECK-NEXT: retq
5155
%trunc = call double @llvm.experimental.constrained.fptrunc.f64.f80(

0 commit comments

Comments
 (0)