Skip to content

Commit 97a1570

Browse files
committed
X86InstrInfo: Optimize more combinations of SUB+CMP
`X86InstrInfo::optimizeCompareInstr` would only optimize a `SUB` followed by a `CMP` in `isRedundantFlagInstr`. This extends the code to also look for other combinations like `CMP`+`CMP`, `TEST`+`TEST`, `SUB x,0`+`TEST`. - Change `isRedundantFlagInstr` to run `analyzeCompareInstr` on the candidate instruction and compare the results. This normalizes things and gives consistent results for various comparisons (`CMP x, y`, `SUB x, y`) and immediate cases (`TEST x, x`, `SUB x, 0`, `CMP x, 0`...). - Turn `isRedundantFlagInstr` into a member function so it can call `analyzeCompare`. - We now also check `isRedundantFlagInstr` even if `IsCmpZero` is true, since we now have cases like `TEST`+`TEST`. Differential Revision: https://reviews.llvm.org/D110865
1 parent e50f02b commit 97a1570

File tree

5 files changed

+95
-58
lines changed

5 files changed

+95
-58
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 78 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4011,42 +4011,72 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
40114011
return false;
40124012
}
40134013

4014-
/// Check whether the first instruction, whose only
4015-
/// purpose is to update flags, can be made redundant.
4016-
/// CMPrr can be made redundant by SUBrr if the operands are the same.
4017-
/// This function can be extended later on.
4018-
/// SrcReg, SrcRegs: register operands for FlagI.
4019-
/// ImmValue: immediate for FlagI if it takes an immediate.
4020-
inline static bool isRedundantFlagInstr(const MachineInstr &FlagI,
4014+
bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
40214015
Register SrcReg, Register SrcReg2,
40224016
int64_t ImmMask, int64_t ImmValue,
4023-
const MachineInstr &OI) {
4024-
if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
4025-
(FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) ||
4026-
(FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) ||
4027-
(FlagI.getOpcode() == X86::CMP8rr && OI.getOpcode() == X86::SUB8rr)) &&
4028-
((OI.getOperand(1).getReg() == SrcReg &&
4029-
OI.getOperand(2).getReg() == SrcReg2) ||
4030-
(OI.getOperand(1).getReg() == SrcReg2 &&
4031-
OI.getOperand(2).getReg() == SrcReg)))
4032-
return true;
4033-
4034-
if (ImmMask != 0 &&
4035-
((FlagI.getOpcode() == X86::CMP64ri32 &&
4036-
OI.getOpcode() == X86::SUB64ri32) ||
4037-
(FlagI.getOpcode() == X86::CMP64ri8 &&
4038-
OI.getOpcode() == X86::SUB64ri8) ||
4039-
(FlagI.getOpcode() == X86::CMP32ri && OI.getOpcode() == X86::SUB32ri) ||
4040-
(FlagI.getOpcode() == X86::CMP32ri8 &&
4041-
OI.getOpcode() == X86::SUB32ri8) ||
4042-
(FlagI.getOpcode() == X86::CMP16ri && OI.getOpcode() == X86::SUB16ri) ||
4043-
(FlagI.getOpcode() == X86::CMP16ri8 &&
4044-
OI.getOpcode() == X86::SUB16ri8) ||
4045-
(FlagI.getOpcode() == X86::CMP8ri && OI.getOpcode() == X86::SUB8ri)) &&
4046-
OI.getOperand(1).getReg() == SrcReg &&
4047-
OI.getOperand(2).getImm() == ImmValue)
4048-
return true;
4049-
return false;
4017+
const MachineInstr &OI,
4018+
bool *IsSwapped) const {
4019+
switch (OI.getOpcode()) {
4020+
case X86::CMP64rr:
4021+
case X86::CMP32rr:
4022+
case X86::CMP16rr:
4023+
case X86::CMP8rr:
4024+
case X86::SUB64rr:
4025+
case X86::SUB32rr:
4026+
case X86::SUB16rr:
4027+
case X86::SUB8rr: {
4028+
Register OISrcReg;
4029+
Register OISrcReg2;
4030+
int64_t OIMask;
4031+
int64_t OIValue;
4032+
if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) ||
4033+
OIMask != ImmMask || OIValue != ImmValue)
4034+
return false;
4035+
if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) {
4036+
*IsSwapped = false;
4037+
return true;
4038+
}
4039+
if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) {
4040+
*IsSwapped = true;
4041+
return true;
4042+
}
4043+
return false;
4044+
}
4045+
case X86::CMP64ri32:
4046+
case X86::CMP64ri8:
4047+
case X86::CMP32ri:
4048+
case X86::CMP32ri8:
4049+
case X86::CMP16ri:
4050+
case X86::CMP16ri8:
4051+
case X86::CMP8ri:
4052+
case X86::SUB64ri32:
4053+
case X86::SUB64ri8:
4054+
case X86::SUB32ri:
4055+
case X86::SUB32ri8:
4056+
case X86::SUB16ri:
4057+
case X86::SUB16ri8:
4058+
case X86::SUB8ri:
4059+
case X86::TEST64rr:
4060+
case X86::TEST32rr:
4061+
case X86::TEST16rr:
4062+
case X86::TEST8rr: {
4063+
if (ImmMask != 0) {
4064+
Register OISrcReg;
4065+
Register OISrcReg2;
4066+
int64_t OIMask;
4067+
int64_t OIValue;
4068+
if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) &&
4069+
SrcReg == OISrcReg && ImmMask == OIMask && OIValue == ImmValue) {
4070+
assert(SrcReg2 == X86::NoRegister && OISrcReg2 == X86::NoRegister &&
4071+
"should not have 2nd register");
4072+
return true;
4073+
}
4074+
}
4075+
return FlagI.isIdenticalTo(OI);
4076+
}
4077+
default:
4078+
return false;
4079+
}
40504080
}
40514081

40524082
/// Check whether the definition can be converted
@@ -4275,21 +4305,26 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
42754305

42764306
bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
42774307

4308+
// Transformation currently requires SSA values.
4309+
if (SrcReg2.isPhysical())
4310+
return false;
4311+
MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
4312+
assert(SrcRegDef && "Must have a definition (SSA)");
4313+
42784314
MachineInstr *MI = nullptr;
42794315
MachineInstr *Sub = nullptr;
42804316
MachineInstr *Movr0Inst = nullptr;
42814317
bool NoSignFlag = false;
42824318
bool ClearsOverflowFlag = false;
42834319
bool ShouldUpdateCC = false;
4320+
bool IsSwapped = false;
42844321
X86::CondCode NewCC = X86::COND_INVALID;
42854322

42864323
// Search backward from CmpInstr for the next instruction defining EFLAGS.
42874324
const TargetRegisterInfo *TRI = &getRegisterInfo();
42884325
MachineBasicBlock &CmpMBB = *CmpInstr.getParent();
42894326
MachineBasicBlock::reverse_iterator From =
42904327
std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
4291-
MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
4292-
assert(SrcRegDef && "Must have a definition (SSA)");
42934328
for (MachineBasicBlock *MBB = &CmpMBB;;) {
42944329
for (MachineInstr &Inst : make_range(From, MBB->rend())) {
42954330
// Try to use EFLAGS from the instruction defining %SrcReg. Example:
@@ -4329,8 +4364,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
43294364
// sub x, y or cmp x, y
43304365
// ... // EFLAGS not changed
43314366
// cmp x, y // <-- can be removed
4332-
if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2,
4333-
CmpMask, CmpValue, Inst)) {
4367+
if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue,
4368+
Inst, &IsSwapped)) {
43344369
Sub = &Inst;
43354370
break;
43364371
}
@@ -4360,10 +4395,6 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
43604395
From = MBB->rbegin();
43614396
}
43624397

4363-
bool IsSwapped =
4364-
(SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
4365-
Sub->getOperand(2).getReg() == SrcReg);
4366-
43674398
// Scan forward from the instruction after CmpInstr for uses of EFLAGS.
43684399
// It is safe to remove CmpInstr if EFLAGS is redefined or killed.
43694400
// If we are done with the basic block, we need to check whether EFLAGS is
@@ -4386,7 +4417,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
43864417

43874418
// EFLAGS is used by this instruction.
43884419
X86::CondCode OldCC = X86::COND_INVALID;
4389-
if (IsCmpZero || IsSwapped) {
4420+
if (MI || IsSwapped) {
43904421
// We decode the condition code from opcode.
43914422
if (Instr.isBranch())
43924423
OldCC = X86::getCondFromBranch(Instr);
@@ -4398,7 +4429,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
43984429
if (OldCC == X86::COND_INVALID) return false;
43994430
}
44004431
X86::CondCode ReplacementCC = X86::COND_INVALID;
4401-
if (IsCmpZero) {
4432+
if (MI) {
44024433
switch (OldCC) {
44034434
default: break;
44044435
case X86::COND_A: case X86::COND_AE:
@@ -4456,14 +4487,15 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
44564487

44574488
// If EFLAGS is not killed nor re-defined, we should check whether it is
44584489
// live-out. If it is live-out, do not optimize.
4459-
if ((IsCmpZero || IsSwapped) && !IsSafe) {
4490+
if ((MI || IsSwapped) && !IsSafe) {
44604491
for (MachineBasicBlock *Successor : CmpMBB.successors())
44614492
if (Successor->isLiveIn(X86::EFLAGS))
44624493
return false;
44634494
}
44644495

44654496
// The instruction to be updated is either Sub or MI.
4466-
Sub = IsCmpZero ? MI : Sub;
4497+
assert((MI == nullptr || Sub == nullptr) && "Should not have Sub and MI set");
4498+
Sub = MI != nullptr ? MI : Sub;
44674499
MachineBasicBlock *SubBB = Sub->getParent();
44684500
// Move Movr0Inst to the appropriate place before Sub.
44694501
if (Movr0Inst) {

llvm/lib/Target/X86/X86InstrInfo.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,21 @@ class X86InstrInfo final : public X86GenInstrInfo {
626626
unsigned &SrcOpIdx1,
627627
unsigned &SrcOpIdx2,
628628
bool IsIntrinsic = false) const;
629+
630+
/// Returns true when instruction \p FlagI produces the same flags as \p OI.
631+
/// The caller should pass in the results of calling analyzeCompare on \p OI:
632+
/// \p SrcReg, \p SrcReg2, \p ImmMask, \p ImmValue.
633+
/// If the flags match \p OI as if it had the input operands swapped then the
634+
/// function succeeds and sets \p IsSwapped to true.
635+
///
636+
/// Examples of OI, FlagI pairs returning true:
637+
/// CMP %1, 42 and CMP %1, 42
638+
/// CMP %1, %2 and %3 = SUB %1, %2
639+
/// TEST %1, %1 and %2 = SUB %1, 0
640+
/// CMP %1, %2 and %3 = SUB %2, %1 ; IsSwapped=true
641+
bool isRedundantFlagInstr(const MachineInstr &FlagI, Register SrcReg,
642+
Register SrcReg2, int64_t ImmMask, int64_t ImmValue,
643+
const MachineInstr &OI, bool *IsSwapped) const;
629644
};
630645

631646
} // namespace llvm

llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ define i16 @main_bb_2E_i9_2E_i_2E_i932_2E_ce(%struct.list* %l_addr.01.0.i2.i.i92
6767
; CHECK-NEXT: LBB0_6: ## %NodeBlock
6868
; CHECK-NEXT: js LBB0_9
6969
; CHECK-NEXT: ## %bb.7: ## %LeafBlock1
70-
; CHECK-NEXT: testl %eax, %eax
7170
; CHECK-NEXT: jne LBB0_3
7271
; CHECK-NEXT: ## %bb.8: ## %bb12.i.i935.exitStub
7372
; CHECK-NEXT: movl %edi, (%esi)

llvm/test/CodeGen/X86/optimize-compare.mir

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,7 @@ body: |
212212
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
213213
; CHECK-NEXT: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags
214214
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
215-
; CHECK-NEXT: CMP32rr [[COPY1]], [[COPY]], implicit-def $eflags
216-
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
215+
; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
217216
%0:gr32 = COPY $esi
218217
%1:gr32 = COPY $edi
219218
CMP32rr %0, %1, implicit-def $eflags
@@ -231,7 +230,6 @@ body: |
231230
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
232231
; CHECK-NEXT: CMP64ri8 [[COPY]], 15, implicit-def $eflags
233232
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
234-
; CHECK-NEXT: CMP64ri8 [[COPY]], 15, implicit-def $eflags
235233
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
236234
%0:gr64 = COPY $rsi
237235
%1:gr64 = COPY $rdi
@@ -249,7 +247,6 @@ body: |
249247
; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY $ax
250248
; CHECK-NEXT: TEST16rr [[COPY]], [[COPY]], implicit-def $eflags
251249
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
252-
; CHECK-NEXT: TEST16rr [[COPY]], [[COPY]], implicit-def $eflags
253250
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
254251
%0:gr16 = COPY $ax
255252
TEST16rr %0, %0, implicit-def $eflags
@@ -267,8 +264,7 @@ body: |
267264
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
268265
; CHECK-NEXT: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags
269266
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
270-
; CHECK-NEXT: CMP32rr [[COPY1]], [[COPY]], implicit-def $eflags
271-
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
267+
; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags
272268
%0:gr32 = COPY $esi
273269
%1:gr32 = COPY $edi
274270
CMP32rr %0, %1, implicit-def $eflags
@@ -285,7 +281,6 @@ body: |
285281
; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
286282
; CHECK-NEXT: CMP32ri [[COPY]], -12345, implicit-def $eflags
287283
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
288-
; CHECK-NEXT: CMP32ri [[COPY]], -12345, implicit-def $eflags
289284
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
290285
%0:gr32 = COPY $esi
291286
CMP32ri %0, -12345, implicit-def $eflags
@@ -323,7 +318,6 @@ body: |
323318
; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
324319
; CHECK-NEXT: CMP32ri8 [[COPY]], 0, implicit-def $eflags
325320
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
326-
; CHECK-NEXT: TEST32rr [[COPY]], [[COPY]], implicit-def $eflags
327321
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
328322
%0:gr32 = COPY $esi
329323
CMP32ri8 %0, 0, implicit-def $eflags
@@ -340,7 +334,6 @@ body: |
340334
; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi
341335
; CHECK-NEXT: TEST32rr [[COPY]], [[COPY]], implicit-def $eflags
342336
; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags
343-
; CHECK-NEXT: CMP32ri8 [[COPY]], 0, implicit-def $eflags
344337
; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags
345338
%0:gr32 = COPY $esi
346339
TEST32rr %0, %0, implicit-def $eflags
@@ -359,7 +352,6 @@ body: |
359352
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
360353
; CHECK-NEXT: CMP64ri32 [[COPY]], @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags
361354
; CHECK-NEXT: $cl = SETCCr 7, implicit $eflags
362-
; CHECK-NEXT: CMP64ri32 [[COPY]], @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags
363355
; CHECK-NEXT: $cl = SETCCr 3, implicit $eflags
364356
%0:gr64 = COPY $rsi
365357
CMP64ri32 %0, @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags

llvm/test/CodeGen/X86/postalloc-coalescing.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ define fastcc i32 @_Z18yy_get_next_bufferv() nounwind {
1515
; CHECK-NEXT: jne .LBB0_1
1616
; CHECK-NEXT: .LBB0_3: # %bb158
1717
; CHECK-NEXT: movb %al, 0
18-
; CHECK-NEXT: cmpl $-1, %eax
1918
; CHECK-NEXT: xorl %eax, %eax
2019
; CHECK-NEXT: retl
2120
entry:

0 commit comments

Comments
 (0)