17
17
// performed by the setcc. Instead, we can use:
18
18
// xor %eax, %eax; seta %al
19
19
// This both avoids the stall, and encodes shorter.
20
+ //
21
+ // Furthurmore, we can use:
22
+ // setzua %al
23
+ // if feature zero-upper is available. It's faster than the xor+setcc sequence.
24
+ // When r16-r31 is used, it even encodes shorter.
20
25
// ===----------------------------------------------------------------------===//
21
26
22
27
#include " X86.h"
@@ -46,6 +51,7 @@ class X86FixupSetCCPass : public MachineFunctionPass {
46
51
47
52
private:
48
53
MachineRegisterInfo *MRI = nullptr ;
54
+ const X86Subtarget *ST = nullptr ;
49
55
const X86InstrInfo *TII = nullptr ;
50
56
51
57
enum { SearchBound = 16 };
@@ -61,7 +67,8 @@ FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); }
61
67
bool X86FixupSetCCPass::runOnMachineFunction (MachineFunction &MF) {
62
68
bool Changed = false ;
63
69
MRI = &MF.getRegInfo ();
64
- TII = MF.getSubtarget <X86Subtarget>().getInstrInfo ();
70
+ ST = &MF.getSubtarget <X86Subtarget>();
71
+ TII = ST->getInstrInfo ();
65
72
66
73
SmallVector<MachineInstr*, 4 > ToErase;
67
74
@@ -79,7 +86,8 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
79
86
continue ;
80
87
81
88
MachineInstr *ZExt = nullptr ;
82
- for (auto &Use : MRI->use_instructions (MI.getOperand (0 ).getReg ()))
89
+ Register Reg0 = MI.getOperand (0 ).getReg ();
90
+ for (auto &Use : MRI->use_instructions (Reg0))
83
91
if (Use.getOpcode () == X86::MOVZX32rr8)
84
92
ZExt = &Use;
85
93
@@ -98,9 +106,8 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
98
106
continue ;
99
107
100
108
// On 32-bit, we need to be careful to force an ABCD register.
101
- const TargetRegisterClass *RC = MF.getSubtarget <X86Subtarget>().is64Bit ()
102
- ? &X86::GR32RegClass
103
- : &X86::GR32_ABCDRegClass;
109
+ const TargetRegisterClass *RC =
110
+ ST->is64Bit () ? &X86::GR32RegClass : &X86::GR32_ABCDRegClass;
104
111
if (!MRI->constrainRegClass (ZExt->getOperand (0 ).getReg (), RC)) {
105
112
// If we cannot constrain the register, we would need an additional copy
106
113
// and are better off keeping the MOVZX32rr8 we have now.
@@ -110,17 +117,24 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
110
117
++NumSubstZexts;
111
118
Changed = true ;
112
119
113
- // Initialize a register with 0. This must go before the eflags def
120
+ // X86 setcc/setzucc only takes an output GR8, so fake a GR32 input by
121
+ // inserting the setcc/setzucc result into the low byte of the zeroed
122
+ // register.
114
123
Register ZeroReg = MRI->createVirtualRegister (RC);
115
- BuildMI (MBB, FlagsDefMI, MI.getDebugLoc (), TII->get (X86::MOV32r0),
116
- ZeroReg);
124
+ if (ST->hasZU ()) {
125
+ MI.setDesc (TII->get (X86::SETZUCCr));
126
+ BuildMI (*ZExt->getParent (), ZExt, ZExt->getDebugLoc (),
127
+ TII->get (TargetOpcode::IMPLICIT_DEF), ZeroReg);
128
+ } else {
129
+ // Initialize a register with 0. This must go before the eflags def
130
+ BuildMI (MBB, FlagsDefMI, MI.getDebugLoc (), TII->get (X86::MOV32r0),
131
+ ZeroReg);
132
+ }
117
133
118
- // X86 setcc only takes an output GR8, so fake a GR32 input by inserting
119
- // the setcc result into the low byte of the zeroed register.
120
134
BuildMI (*ZExt->getParent (), ZExt, ZExt->getDebugLoc (),
121
135
TII->get (X86::INSERT_SUBREG), ZExt->getOperand (0 ).getReg ())
122
136
.addReg (ZeroReg)
123
- .addReg (MI. getOperand ( 0 ). getReg () )
137
+ .addReg (Reg0 )
124
138
.addImm (X86::sub_8bit);
125
139
ToErase.push_back (ZExt);
126
140
}
0 commit comments