Skip to content

Commit 30b8975

Browse files
committed
[X86][APX] Exclusively emit setzucc to avoid false dependency
1. Added pattern to emit SetZUCC instruction with APX NDD enabled to avoid false dependency with SetCC. SetCC is emitted with APX NDD disabled. 2. Reverted part of llvm#129506 (changing setzucc back to setcc + zext). Keeping the check of SetZUCC instruction will call rewriteSetCC for SetZUCC instruction and remove redundant test after SetZUCC in X86 Flags Copy Lowering pass. 3. Also added SetZUCC support in FixupSetCC pass to eliminate zext instruction after SetZUCC.
1 parent cb647ec commit 30b8975

17 files changed

+2631
-1179
lines changed

llvm/lib/Target/X86/X86FixupSetCC.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,11 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
7979
if (MI.definesRegister(X86::EFLAGS, /*TRI=*/nullptr))
8080
FlagsDefMI = &MI;
8181

82-
// Find a setcc that is used by a zext.
82+
// Find a setcc/setzucc (if ZU is enabled) that is used by a zext.
8383
// This doesn't have to be the only use, the transformation is safe
8484
// regardless.
85-
if (MI.getOpcode() != X86::SETCCr)
85+
if (MI.getOpcode() != X86::SETCCr &&
86+
(!ST->hasZU() || MI.getOpcode() != X86::SETZUCCr))
8687
continue;
8788

8889
MachineInstr *ZExt = nullptr;
@@ -122,7 +123,8 @@ bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
122123
// register.
123124
Register ZeroReg = MRI->createVirtualRegister(RC);
124125
if (ST->hasZU()) {
125-
MI.setDesc(TII->get(X86::SETZUCCr));
126+
if (MI.getOpcode() != X86::SETZUCCr)
127+
MI.setDesc(TII->get(X86::SETZUCCr));
126128
BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(),
127129
TII->get(TargetOpcode::IMPLICIT_DEF), ZeroReg);
128130
} else {

llvm/lib/Target/X86/X86FlagsCopyLowering.cpp

Lines changed: 4 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -746,8 +746,10 @@ Register X86FlagsCopyLoweringPass::promoteCondToReg(
746746
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
747747
const DebugLoc &TestLoc, X86::CondCode Cond) {
748748
Register Reg = MRI->createVirtualRegister(PromoteRC);
749-
auto SetI = BuildMI(TestMBB, TestPos, TestLoc, TII->get(X86::SETCCr), Reg)
750-
.addImm(Cond);
749+
auto SetI =
750+
BuildMI(TestMBB, TestPos, TestLoc,
751+
TII->get(Subtarget->hasZU() ? X86::SETZUCCr : X86::SETCCr), Reg)
752+
.addImm(Cond);
751753
(void)SetI;
752754
LLVM_DEBUG(dbgs() << " save cond: "; SetI->dump());
753755
++NumSetCCsInserted;
@@ -791,29 +793,6 @@ void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &MBB,
791793
if (!CondReg)
792794
CondReg = promoteCondToReg(MBB, Pos, Loc, Cond);
793795

794-
if (X86::isSETZUCC(MI.getOpcode())) {
795-
// SETZUCC is generated for register only for now.
796-
assert(!MI.mayStore() && "Cannot handle memory variants");
797-
assert(MI.getOperand(0).isReg() &&
798-
"Cannot have a non-register defined operand to SETZUcc!");
799-
Register OldReg = MI.getOperand(0).getReg();
800-
// Drop Kill flags on the old register before replacing. CondReg may have
801-
// a longer live range.
802-
MRI->clearKillFlags(OldReg);
803-
for (auto &Use : MRI->use_instructions(OldReg)) {
804-
assert(Use.getOpcode() == X86::INSERT_SUBREG &&
805-
"SETZUCC should be only used by INSERT_SUBREG");
806-
Use.getOperand(2).setReg(CondReg);
807-
// Recover MOV32r0 before INSERT_SUBREG, which removed by SETZUCC.
808-
Register ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
809-
BuildMI(*Use.getParent(), &Use, Use.getDebugLoc(), TII->get(X86::MOV32r0),
810-
ZeroReg);
811-
Use.getOperand(1).setReg(ZeroReg);
812-
}
813-
MI.eraseFromParent();
814-
return;
815-
}
816-
817796
// Rewriting a register def is trivial: we just replace the register and
818797
// remove the setcc.
819798
if (!MI.mayStore()) {

llvm/lib/Target/X86/X86InstrCMovSetCC.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,11 +137,14 @@ let Predicates = [HasCMOV, HasCF] in {
137137
}
138138

139139
// SetCC instructions.
140-
let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in {
140+
let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1, Predicates = [NoNDD] in {
141141
def SETCCr : I<0x90, MRMXrCC, (outs GR8:$dst), (ins ccode:$cond),
142142
"set${cond}\t$dst",
143143
[(set GR8:$dst, (X86setcc timm:$cond, EFLAGS))]>,
144144
TB, Sched<[WriteSETCC]>;
145+
}
146+
147+
let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in {
145148
def SETCCm : I<0x90, MRMXmCC, (outs), (ins i8mem:$dst, ccode:$cond),
146149
"set${cond}\t$dst",
147150
[(store (X86setcc timm:$cond, EFLAGS), addr:$dst)]>,
@@ -152,7 +155,8 @@ let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in {
152155
let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1,
153156
hasSideEffects = 0, Predicates = [In64BitMode], Predicates = [HasNDD] in {
154157
def SETZUCCr : I<0x40, MRMXrCC, (outs GR8:$dst), (ins ccode:$cond),
155-
"setzu${cond}\t$dst", []>,
158+
"setzu${cond}\t$dst",
159+
[(set GR8:$dst, (X86setcc timm:$cond, EFLAGS))]>,
156160
XD, ZU, NoCD8, Sched<[WriteSETCC]>;
157161
def SETCCr_EVEX : I<0x40, MRMXrCC, (outs GR8:$dst), (ins ccode:$cond),
158162
"set${cond}\t$dst", []>,

llvm/test/CodeGen/X86/apx/add.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,7 @@ define i1 @add64ri_reloc(i16 %k) {
758758
; CHECK-NEXT: addq %rax, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x01,0xc0]
759759
; CHECK-NEXT: addq $val, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x05,A,A,A,A]
760760
; CHECK-NEXT: # fixup A - offset: 2, value: val, kind: reloc_signed_4byte
761-
; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
761+
; CHECK-NEXT: setzune %al # encoding: [0x62,0xf4,0x7f,0x18,0x45,0xc0]
762762
; CHECK-NEXT: retq # encoding: [0xc3]
763763
;
764764
; NF-LABEL: add64ri_reloc:
@@ -768,7 +768,7 @@ define i1 @add64ri_reloc(i16 %k) {
768768
; NF-NEXT: addq %rax, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x01,0xc0]
769769
; NF-NEXT: addq $val, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x05,A,A,A,A]
770770
; NF-NEXT: # fixup A - offset: 2, value: val, kind: reloc_signed_4byte
771-
; NF-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
771+
; NF-NEXT: setzune %al # encoding: [0x62,0xf4,0x7f,0x18,0x45,0xc0]
772772
; NF-NEXT: retq # encoding: [0xc3]
773773
%g = getelementptr inbounds i16, ptr @val, i16 %k
774774
%cmp = icmp ne ptr %g, null

0 commit comments

Comments
 (0)