Skip to content

Commit 7d626e7

Browse files
committed
[X86] Move RDFLAGS/WRFLAGS expansion until after RA
The register allocator may introduce reloads in the middle of reading and writing the EFLAGS register, due to the RDFLAGS & WRFLAGS pseudos being expanded before RA. This may cause an issue where the stack pointer was adjusted but the stack offset for the reload wasn't accounted for (see [1]). To avoid this, expand these pseudos after register allocation. [1] #59102 Reviewed By: craig.topper, nickdesaulniers, pengfei Differential Revision: https://reviews.llvm.org/D140045
1 parent 0534791 commit 7d626e7

File tree

4 files changed

+205
-37
lines changed

4 files changed

+205
-37
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -37306,41 +37306,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3730637306
case X86::CMOV_VK64:
3730737307
return EmitLoweredSelect(MI, BB);
3730837308

37309-
case X86::RDFLAGS32:
37310-
case X86::RDFLAGS64: {
37311-
unsigned PushF =
37312-
MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
37313-
unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r;
37314-
MachineInstr *Push = BuildMI(*BB, MI, DL, TII->get(PushF));
37315-
// Permit reads of the EFLAGS and DF registers without them being defined.
37316-
// This intrinsic exists to read external processor state in flags, such as
37317-
// the trap flag, interrupt flag, and direction flag, none of which are
37318-
// modeled by the backend.
37319-
assert(Push->getOperand(2).getReg() == X86::EFLAGS &&
37320-
"Unexpected register in operand!");
37321-
Push->getOperand(2).setIsUndef();
37322-
assert(Push->getOperand(3).getReg() == X86::DF &&
37323-
"Unexpected register in operand!");
37324-
Push->getOperand(3).setIsUndef();
37325-
BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg());
37326-
37327-
MI.eraseFromParent(); // The pseudo is gone now.
37328-
return BB;
37329-
}
37330-
37331-
case X86::WRFLAGS32:
37332-
case X86::WRFLAGS64: {
37333-
unsigned Push =
37334-
MI.getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r;
37335-
unsigned PopF =
37336-
MI.getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64;
37337-
BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI.getOperand(0).getReg());
37338-
BuildMI(*BB, MI, DL, TII->get(PopF));
37339-
37340-
MI.eraseFromParent(); // The pseudo is gone now.
37341-
return BB;
37342-
}
37343-
3734437309
case X86::FP32_TO_INT16_IN_MEM:
3734537310
case X86::FP32_TO_INT32_IN_MEM:
3734637311
case X86::FP32_TO_INT64_IN_MEM:

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5060,6 +5060,45 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
50605060
return true;
50615061
}
50625062

5063+
case X86::RDFLAGS32:
5064+
case X86::RDFLAGS64: {
5065+
unsigned Is64Bit = MI.getOpcode() == X86::RDFLAGS64;
5066+
MachineBasicBlock &MBB = *MIB->getParent();
5067+
5068+
MachineInstr *NewMI =
5069+
BuildMI(MBB, MI, MIB->getDebugLoc(),
5070+
get(Is64Bit ? X86::PUSHF64 : X86::PUSHF32))
5071+
.getInstr();
5072+
5073+
// Permit reads of the EFLAGS and DF registers without them being defined.
5074+
// This intrinsic exists to read external processor state in flags, such as
5075+
// the trap flag, interrupt flag, and direction flag, none of which are
5076+
// modeled by the backend.
5077+
assert(NewMI->getOperand(2).getReg() == X86::EFLAGS &&
5078+
"Unexpected register in operand! Should be EFLAGS.");
5079+
NewMI->getOperand(2).setIsUndef();
5080+
assert(NewMI->getOperand(3).getReg() == X86::DF &&
5081+
"Unexpected register in operand! Should be DF.");
5082+
NewMI->getOperand(3).setIsUndef();
5083+
5084+
MIB->setDesc(get(Is64Bit ? X86::POP64r : X86::POP32r));
5085+
return true;
5086+
}
5087+
5088+
case X86::WRFLAGS32:
5089+
case X86::WRFLAGS64: {
5090+
unsigned Is64Bit = MI.getOpcode() == X86::WRFLAGS64;
5091+
MachineBasicBlock &MBB = *MIB->getParent();
5092+
5093+
BuildMI(MBB, MI, MIB->getDebugLoc(),
5094+
get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
5095+
.addReg(MI.getOperand(0).getReg());
5096+
BuildMI(MBB, MI, MIB->getDebugLoc(),
5097+
get(Is64Bit ? X86::POPF64 : X86::POPF32));
5098+
MI.eraseFromParent();
5099+
return true;
5100+
}
5101+
50635102
// KNL does not recognize dependency-breaking idioms for mask registers,
50645103
// so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
50655104
// Using %k0 as the undef input register is a performance heuristic based

llvm/lib/Target/X86/X86InstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,7 +1381,7 @@ def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src", []>,
13811381

13821382
}
13831383

1384-
let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
1384+
let isPseudo = 1, mayLoad = 1, mayStore = 1,
13851385
SchedRW = [WriteRMW], Defs = [ESP] in {
13861386
let Uses = [ESP] in
13871387
def RDFLAGS32 : PseudoI<(outs GR32:$dst), (ins),
@@ -1394,7 +1394,7 @@ let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
13941394
Requires<[In64BitMode]>;
13951395
}
13961396

1397-
let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
1397+
let isPseudo = 1, mayLoad = 1, mayStore = 1,
13981398
SchedRW = [WriteRMW] in {
13991399
let Defs = [ESP, EFLAGS, DF], Uses = [ESP] in
14001400
def WRFLAGS32 : PseudoI<(outs), (ins GR32:$src),

llvm/test/CodeGen/X86/x86-64-flags-intrinsics.ll

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,167 @@ entry:
5252
call void @llvm.x86.flags.write.u64(i64 %arg)
5353
ret void
5454
}
55+
56+
define i64 @read_flags_reg_pressure() nounwind {
57+
; CHECK-LABEL: read_flags_reg_pressure:
58+
; CHECK: # %bb.0:
59+
; CHECK-NEXT: pushq %rbp
60+
; CHECK-NEXT: pushq %r15
61+
; CHECK-NEXT: pushq %r14
62+
; CHECK-NEXT: pushq %r13
63+
; CHECK-NEXT: pushq %r12
64+
; CHECK-NEXT: pushq %rbx
65+
; CHECK-NEXT: subq $16, %rsp
66+
; CHECK-NEXT: #APP
67+
; CHECK-NEXT: #NO_APP
68+
; CHECK-NEXT: movq %rdx, (%rsp) # 8-byte Spill
69+
; CHECK-NEXT: pushfq
70+
; CHECK-NEXT: popq %rdx
71+
; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
72+
; CHECK-NEXT: movq (%rsp), %rdx # 8-byte Reload
73+
; CHECK-NEXT: #APP
74+
; CHECK-NEXT: #NO_APP
75+
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
76+
; CHECK-NEXT: addq $16, %rsp
77+
; CHECK-NEXT: popq %rbx
78+
; CHECK-NEXT: popq %r12
79+
; CHECK-NEXT: popq %r13
80+
; CHECK-NEXT: popq %r14
81+
; CHECK-NEXT: popq %r15
82+
; CHECK-NEXT: popq %rbp
83+
; CHECK-NEXT: retq
84+
;
85+
; WIN64-LABEL: read_flags_reg_pressure:
86+
; WIN64: # %bb.0:
87+
; WIN64-NEXT: pushq %rbp
88+
; WIN64-NEXT: pushq %r15
89+
; WIN64-NEXT: pushq %r14
90+
; WIN64-NEXT: pushq %r13
91+
; WIN64-NEXT: pushq %r12
92+
; WIN64-NEXT: pushq %rsi
93+
; WIN64-NEXT: pushq %rdi
94+
; WIN64-NEXT: pushq %rbx
95+
; WIN64-NEXT: subq $16, %rsp
96+
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
97+
; WIN64-NEXT: #APP
98+
; WIN64-NEXT: #NO_APP
99+
; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
100+
; WIN64-NEXT: pushfq
101+
; WIN64-NEXT: popq %rdx
102+
; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
103+
; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
104+
; WIN64-NEXT: #APP
105+
; WIN64-NEXT: #NO_APP
106+
; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
107+
; WIN64-NEXT: addq $16, %rsp
108+
; WIN64-NEXT: popq %rbx
109+
; WIN64-NEXT: popq %rdi
110+
; WIN64-NEXT: popq %rsi
111+
; WIN64-NEXT: popq %r12
112+
; WIN64-NEXT: popq %r13
113+
; WIN64-NEXT: popq %r14
114+
; WIN64-NEXT: popq %r15
115+
; WIN64-NEXT: popq %rbp
116+
; WIN64-NEXT: retq
117+
%1 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "", "={ax},={bx},={cx},={dx},={si},={di},={bp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"()
118+
%2 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 0
119+
%3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 1
120+
%4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 2
121+
%5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 3
122+
%6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 4
123+
%7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 5
124+
%8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 6
125+
%9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 7
126+
%10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 8
127+
%11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 9
128+
%12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 10
129+
%13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 11
130+
%14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 12
131+
%15 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 13
132+
%16 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %1, 14
133+
%17 = tail call i64 @llvm.x86.flags.read.u64()
134+
tail call void asm sideeffect "", "{ax},{bx},{cx},{dx},{si},{di},{bp},{r8},{r9},{r10},{r11},{r12},{r13},{r14},{r15},~{dirflag},~{fpsr},~{flags}"(i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9, i64 %10, i64 %11, i64 %12, i64 %13, i64 %14, i64 %15, i64 %16)
135+
ret i64 %17
136+
}
137+
138+
define void @write_flags_reg_pressure(i64 noundef %0) nounwind {
139+
; CHECK-LABEL: write_flags_reg_pressure:
140+
; CHECK: # %bb.0:
141+
; CHECK-NEXT: pushq %rbp
142+
; CHECK-NEXT: pushq %r15
143+
; CHECK-NEXT: pushq %r14
144+
; CHECK-NEXT: pushq %r13
145+
; CHECK-NEXT: pushq %r12
146+
; CHECK-NEXT: pushq %rbx
147+
; CHECK-NEXT: subq $16, %rsp
148+
; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
149+
; CHECK-NEXT: #APP
150+
; CHECK-NEXT: #NO_APP
151+
; CHECK-NEXT: movq %rdx, (%rsp) # 8-byte Spill
152+
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
153+
; CHECK-NEXT: pushq %rdx
154+
; CHECK-NEXT: popfq
155+
; CHECK-NEXT: movq (%rsp), %rdx # 8-byte Reload
156+
; CHECK-NEXT: #APP
157+
; CHECK-NEXT: #NO_APP
158+
; CHECK-NEXT: addq $16, %rsp
159+
; CHECK-NEXT: popq %rbx
160+
; CHECK-NEXT: popq %r12
161+
; CHECK-NEXT: popq %r13
162+
; CHECK-NEXT: popq %r14
163+
; CHECK-NEXT: popq %r15
164+
; CHECK-NEXT: popq %rbp
165+
; CHECK-NEXT: retq
166+
;
167+
; WIN64-LABEL: write_flags_reg_pressure:
168+
; WIN64: # %bb.0:
169+
; WIN64-NEXT: pushq %rbp
170+
; WIN64-NEXT: pushq %r15
171+
; WIN64-NEXT: pushq %r14
172+
; WIN64-NEXT: pushq %r13
173+
; WIN64-NEXT: pushq %r12
174+
; WIN64-NEXT: pushq %rsi
175+
; WIN64-NEXT: pushq %rdi
176+
; WIN64-NEXT: pushq %rbx
177+
; WIN64-NEXT: subq $16, %rsp
178+
; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp
179+
; WIN64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
180+
; WIN64-NEXT: #APP
181+
; WIN64-NEXT: #NO_APP
182+
; WIN64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
183+
; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
184+
; WIN64-NEXT: pushq %rdx
185+
; WIN64-NEXT: popfq
186+
; WIN64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
187+
; WIN64-NEXT: #APP
188+
; WIN64-NEXT: #NO_APP
189+
; WIN64-NEXT: addq $16, %rsp
190+
; WIN64-NEXT: popq %rbx
191+
; WIN64-NEXT: popq %rdi
192+
; WIN64-NEXT: popq %rsi
193+
; WIN64-NEXT: popq %r12
194+
; WIN64-NEXT: popq %r13
195+
; WIN64-NEXT: popq %r14
196+
; WIN64-NEXT: popq %r15
197+
; WIN64-NEXT: popq %rbp
198+
; WIN64-NEXT: retq
199+
%2 = tail call { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "", "={ax},={bx},={cx},={dx},={si},={di},={bp},={r8},={r9},={r10},={r11},={r12},={r13},={r14},={r15},~{dirflag},~{fpsr},~{flags}"()
200+
%3 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 0
201+
%4 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 1
202+
%5 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 2
203+
%6 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 3
204+
%7 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 4
205+
%8 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 5
206+
%9 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 6
207+
%10 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 7
208+
%11 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 8
209+
%12 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 9
210+
%13 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 10
211+
%14 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 11
212+
%15 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 12
213+
%16 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 13
214+
%17 = extractvalue { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 } %2, 14
215+
tail call void @llvm.x86.flags.write.u64(i64 %0)
216+
tail call void asm sideeffect "", "{ax},{bx},{cx},{dx},{si},{di},{bp},{r8},{r9},{r10},{r11},{r12},{r13},{r14},{r15},~{dirflag},~{fpsr},~{flags}"(i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9, i64 %10, i64 %11, i64 %12, i64 %13, i64 %14, i64 %15, i64 %16, i64 %17)
217+
ret void
218+
}

0 commit comments

Comments
 (0)