Skip to content

Commit 49e8d6c

Browse files
KanRobertAlexisPerry
authored andcommitted
[X86][FixupSetCC] Substitute setcc + zext pair with setzucc if possible (llvm#96594)
1 parent fd20462 commit 49e8d6c

File tree

2 files changed

+73
-11
lines changed

2 files changed

+73
-11
lines changed

llvm/lib/Target/X86/X86FixupSetCC.cpp

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717
// performed by the setcc. Instead, we can use:
1818
// xor %eax, %eax; seta %al
1919
// This both avoids the stall, and encodes shorter.
20+
//
21+
// Furthurmore, we can use:
22+
// setzua %al
23+
// if feature zero-upper is available. It's faster than the xor+setcc sequence.
24+
// When r16-r31 is used, it even encodes shorter.
2025
//===----------------------------------------------------------------------===//
2126

2227
#include "X86.h"
@@ -46,6 +51,7 @@ class X86FixupSetCCPass : public MachineFunctionPass {
4651

4752
private:
4853
MachineRegisterInfo *MRI = nullptr;
54+
const X86Subtarget *ST = nullptr;
4955
const X86InstrInfo *TII = nullptr;
5056

5157
enum { SearchBound = 16 };
@@ -61,7 +67,8 @@ FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); }
6167
bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
6268
bool Changed = false;
6369
MRI = &MF.getRegInfo();
64-
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
70+
ST = &MF.getSubtarget<X86Subtarget>();
71+
TII = ST->getInstrInfo();
6572

6673
SmallVector<MachineInstr*, 4> ToErase;
6774

@@ -79,7 +86,8 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
7986
continue;
8087

8188
MachineInstr *ZExt = nullptr;
82-
for (auto &Use : MRI->use_instructions(MI.getOperand(0).getReg()))
89+
Register Reg0 = MI.getOperand(0).getReg();
90+
for (auto &Use : MRI->use_instructions(Reg0))
8391
if (Use.getOpcode() == X86::MOVZX32rr8)
8492
ZExt = &Use;
8593

@@ -98,9 +106,8 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
98106
continue;
99107

100108
// On 32-bit, we need to be careful to force an ABCD register.
101-
const TargetRegisterClass *RC = MF.getSubtarget<X86Subtarget>().is64Bit()
102-
? &X86::GR32RegClass
103-
: &X86::GR32_ABCDRegClass;
109+
const TargetRegisterClass *RC =
110+
ST->is64Bit() ? &X86::GR32RegClass : &X86::GR32_ABCDRegClass;
104111
if (!MRI->constrainRegClass(ZExt->getOperand(0).getReg(), RC)) {
105112
// If we cannot constrain the register, we would need an additional copy
106113
// and are better off keeping the MOVZX32rr8 we have now.
@@ -110,17 +117,24 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
110117
++NumSubstZexts;
111118
Changed = true;
112119

113-
// Initialize a register with 0. This must go before the eflags def
120+
// X86 setcc/setzucc only takes an output GR8, so fake a GR32 input by
121+
// inserting the setcc/setzucc result into the low byte of the zeroed
122+
// register.
114123
Register ZeroReg = MRI->createVirtualRegister(RC);
115-
BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0),
116-
ZeroReg);
124+
if (ST->hasZU()) {
125+
MI.setDesc(TII->get(X86::SETZUCCr));
126+
BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(),
127+
TII->get(TargetOpcode::IMPLICIT_DEF), ZeroReg);
128+
} else {
129+
// Initialize a register with 0. This must go before the eflags def
130+
BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0),
131+
ZeroReg);
132+
}
117133

118-
// X86 setcc only takes an output GR8, so fake a GR32 input by inserting
119-
// the setcc result into the low byte of the zeroed register.
120134
BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(),
121135
TII->get(X86::INSERT_SUBREG), ZExt->getOperand(0).getReg())
122136
.addReg(ZeroReg)
123-
.addReg(MI.getOperand(0).getReg())
137+
.addReg(Reg0)
124138
.addImm(X86::sub_8bit);
125139
ToErase.push_back(ZExt);
126140
}

llvm/test/CodeGen/X86/apx/setzucc.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64 -mattr=+zu | FileCheck %s
3+
4+
define i16 @i8(i8 %x) nounwind {
5+
; CHECK-LABEL: i8:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: cmpb $3, %dil
8+
; CHECK-NEXT: setzuae %al
9+
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
10+
; CHECK-NEXT: retq
11+
%t0 = icmp ugt i8 %x, 2
12+
%zext = zext i1 %t0 to i16
13+
ret i16 %zext
14+
}
15+
16+
define i16 @i16(i16 %x) nounwind {
17+
; CHECK-LABEL: i16:
18+
; CHECK: # %bb.0:
19+
; CHECK-NEXT: cmpw $2, %di
20+
; CHECK-NEXT: setzub %al
21+
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
22+
; CHECK-NEXT: retq
23+
%t0 = icmp ult i16 %x, 2
24+
%if = select i1 %t0, i16 1, i16 0
25+
ret i16 %if
26+
}
27+
28+
define i32 @i32(i32 %x) nounwind {
29+
; CHECK-LABEL: i32:
30+
; CHECK: # %bb.0:
31+
; CHECK-NEXT: cmpl $1, %edi
32+
; CHECK-NEXT: setzue %al
33+
; CHECK-NEXT: retq
34+
%t0 = icmp eq i32 %x, 1
35+
%if = select i1 %t0, i32 1, i32 0
36+
ret i32 %if
37+
}
38+
39+
define i64 @i64(i64 %x) nounwind {
40+
; CHECK-LABEL: i64:
41+
; CHECK: # %bb.0:
42+
; CHECK-NEXT: cmpq $1, %rdi
43+
; CHECK-NEXT: setzune %al
44+
; CHECK-NEXT: retq
45+
%t0 = icmp ne i64 %x, 1
46+
%if = select i1 %t0, i64 1, i64 0
47+
ret i64 %if
48+
}

0 commit comments

Comments
 (0)