-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86][CodeGen] Support using NF instructions for flag copy lowering #93508
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: Shengchen Kan (KanRobert) ChangesPatch is 20.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93508.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index 80ff98b466173..6048f3251048f 100644
--- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -65,6 +65,7 @@ STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
STATISTIC(NumTestsInserted, "Number of test instructions inserted");
STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
+STATISTIC(NumNFsConvertedTo, "Number of NF instructions converted to");
namespace {
@@ -235,6 +236,43 @@ static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
return NewMBB;
}
+enum EFLAGSClobber {
+ NoClobber,
+ EvitableClobber,
+ InevitableClobber
+};
+
+// TODO: Generate the full mapping with tablgen
+unsigned getNFVariant(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return 0;
+ case X86::ADD32rr:
+ return X86::ADD32rr_NF;
+ case X86::ADD32rr_ND:
+ return X86::ADD32rr_NF_ND;
+ case X86::SUB32rr:
+ return X86::SUB32rr_NF;
+ case X86::SUB32rr_ND:
+ return X86::SUB32rr_NF_ND;
+ case X86::IMUL32rr:
+ return X86::IMUL32rr_NF;
+ case X86::IMUL32rr_ND:
+ return X86::IMUL32rr_NF_ND;
+ }
+}
+
+static EFLAGSClobber getClobberType(const MachineInstr &MI) {
+ const MachineOperand *FlagDef =
+ MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
+ if (!FlagDef)
+ return NoClobber;
+ if (FlagDef->isDead() && getNFVariant(MI.getOpcode()))
+ return EvitableClobber;
+
+ return InevitableClobber;
+}
+
bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
<< " **********\n");
@@ -254,14 +292,100 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
// turn copied again we visit the first one first. This ensures we can find
// viable locations for testing the original EFLAGS that dominate all the
// uses across complex CFGs.
- SmallVector<MachineInstr *, 4> Copies;
+ SmallSetVector<MachineInstr *, 4> Copies;
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
for (MachineBasicBlock *MBB : RPOT)
for (MachineInstr &MI : *MBB)
if (MI.getOpcode() == TargetOpcode::COPY &&
MI.getOperand(0).getReg() == X86::EFLAGS)
- Copies.push_back(&MI);
+ Copies.insert(&MI);
+
+ // Try to elminate the copys by transform the instructions between copy and
+ // copydef to the NF (no flags update) variants, e.g.
+ //
+ // %1:gr64 = COPY $eflags
+ // OP1 implicit-def dead $eflags
+ // $eflags = COPY %1
+ // OP2 cc, implicit $eflags
+ //
+ // ->
+ //
+ // OP1_NF
+ // OP2 implicit $eflags
+ if (Subtarget->hasNF()) {
+ SmallSetVector<MachineInstr *, 4> RemovedCopies;
+ // CopyIIt may be invalidated by removing copies.
+ auto CopyIIt = Copies.begin(), CopyIEnd = Copies.end();
+ while (CopyIIt != CopyIEnd) {
+ auto NCopyIIt = std::next(CopyIIt);
+ SmallSetVector<MachineInstr *, 4> EvitableClobbers;
+ MachineInstr *CopyI = *CopyIIt;
+ MachineOperand &VOp = CopyI->getOperand(1);
+ MachineInstr *CopyDefI = MRI->getVRegDef(VOp.getReg());
+ MachineBasicBlock *CopyIMBB = CopyI->getParent();
+ MachineBasicBlock *CopyDefIMBB = CopyDefI->getParent();
+ // Walk all basic blocks reachable in depth-first iteration on the inverse
+ // CFG from CopyIMBB to CopyDefIMBB. These blocks are all the blocks that
+ // may be executed between the execution of CopyDefIMBB and CopyIMBB. On
+ // all execution paths, instructions from CopyDefI to CopyI (exclusive)
+ // has to be NF-convertible if it clobbers flags.
+ for (auto BI = idf_begin(CopyIMBB), BE = idf_end(CopyDefIMBB); BI != BE;
+ ++BI) {
+ MachineBasicBlock *MBB = *BI;
+ for (auto I = (MBB != CopyDefIMBB)
+ ? MBB->begin()
+ : std::next(MachineBasicBlock::iterator(CopyDefI)),
+ E = (MBB != CopyIMBB) ? MBB->end()
+ : MachineBasicBlock::iterator(CopyI);
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ EFLAGSClobber ClobberType = getClobberType(MI);
+ if (ClobberType == NoClobber)
+ continue;
+
+ if (ClobberType == InevitableClobber)
+ goto ProcessNextCopyI;
+
+ assert(ClobberType == EvitableClobber && "unexpected workflow");
+ EvitableClobbers.insert(&MI);
+ }
+ }
+ // Covert evitable clobbers into NF variants and remove the copyies.
+ RemovedCopies.insert(CopyI);
+ RemovedCopies.insert(CopyDefI);
+ CopyI->eraseFromParent();
+ CopyDefI->eraseFromParent();
+ ++NumCopiesEliminated;
+ for (auto *Clobber : EvitableClobbers) {
+ unsigned NewOpc = getNFVariant(Clobber->getOpcode());
+ assert(NewOpc && "evitable clobber must have a NF variant");
+ Clobber->setDesc(TII->get(NewOpc));
+ Clobber->removeOperand(
+ Clobber->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)
+ ->getOperandNo());
+ ++NumNFsConvertedTo;
+ }
+ ProcessNextCopyI:
+ CopyIIt = NCopyIIt;
+ }
+ Copies.set_subtract(RemovedCopies);
+ }
+ // For the rest of copies that cannot be eliminated by NF transform, we use
+ // setcc to preserve the flags in GPR32 before OP1, and recheck its value
+ // before using the flags, e.g.
+ //
+ // %1:gr64 = COPY $eflags
+ // OP1 implicit-def dead $eflags
+ // $eflags = COPY %1
+ // OP2 cc, implicit $eflags
+ //
+ // ->
+ //
+ // %1:gr8 = SETCCr cc, implicit $eflags
+ // OP1 implicit-def dead $eflags
+ // TEST8rr %1, %1, implicit-def $eflags
+ // OP2 ne, implicit $eflags
for (MachineInstr *CopyI : Copies) {
MachineBasicBlock &MBB = *CopyI->getParent();
diff --git a/llvm/test/CodeGen/X86/apx/flags-copy-lowering.mir b/llvm/test/CodeGen/X86/apx/flags-copy-lowering.mir
index b7cadc7afe003..76992df8e1e23 100644
--- a/llvm/test/CodeGen/X86/apx/flags-copy-lowering.mir
+++ b/llvm/test/CodeGen/X86/apx/flags-copy-lowering.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -mtriple=x86_64 -run-pass x86-flags-copy-lowering -mattr=+ndd -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64 -run-pass x86-flags-copy-lowering -mattr=+ndd -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,NDD %s
+# RUN: llc -mtriple=x86_64 -run-pass x86-flags-copy-lowering -mattr=+ndd,+nf -verify-machineinstrs -o - %s | FileCheck --check-prefixes=CHECK,NDD-NF %s
# Lower various interesting copy patterns of EFLAGS without using LAHF/SAHF.
...
@@ -241,15 +242,23 @@ body: |
bb.0:
liveins: $edi
- ; CHECK-LABEL: name: test_ccmp
- ; CHECK: liveins: $edi
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
- ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 1, implicit $eflags
- ; CHECK-NEXT: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr $edi, $edi, implicit-def dead $eflags
- ; CHECK-NEXT: TEST8rr [[SETCCr]], [[SETCCr]], implicit-def $eflags
- ; CHECK-NEXT: CCMP32rr [[ADD32rr]], [[ADD32rr]], 0, 5, implicit-def $eflags, implicit killed $eflags
- ; CHECK-NEXT: RET 0, $al
+ ; NDD-LABEL: name: test_ccmp
+ ; NDD: liveins: $edi
+ ; NDD-NEXT: {{ $}}
+ ; NDD-NEXT: MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ ; NDD-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 1, implicit $eflags
+ ; NDD-NEXT: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr $edi, $edi, implicit-def dead $eflags
+ ; NDD-NEXT: TEST8rr [[SETCCr]], [[SETCCr]], implicit-def $eflags
+ ; NDD-NEXT: CCMP32rr [[ADD32rr]], [[ADD32rr]], 0, 5, implicit-def $eflags, implicit killed $eflags
+ ; NDD-NEXT: RET 0, $al
+ ;
+ ; NDD-NF-LABEL: name: test_ccmp
+ ; NDD-NF: liveins: $edi
+ ; NDD-NF-NEXT: {{ $}}
+ ; NDD-NF-NEXT: MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ ; NDD-NF-NEXT: [[ADD32rr_NF:%[0-9]+]]:gr32 = ADD32rr_NF $edi, $edi
+ ; NDD-NF-NEXT: CCMP32rr [[ADD32rr_NF]], [[ADD32rr_NF]], 0, 1, implicit-def $eflags, implicit $eflags
+ ; NDD-NF-NEXT: RET 0, $al
MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
%1:gr64 = COPY $eflags
%2:gr32 = ADD32rr $edi, $edi, implicit-def dead $eflags
@@ -264,20 +273,289 @@ body: |
bb.0:
liveins: $edi
- ; CHECK-LABEL: name: test_ctest
+ ; NDD-LABEL: name: test_ctest
+ ; NDD: liveins: $edi
+ ; NDD-NEXT: {{ $}}
+ ; NDD-NEXT: MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ ; NDD-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 1, implicit $eflags
+ ; NDD-NEXT: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr $edi, $edi, implicit-def dead $eflags
+ ; NDD-NEXT: TEST8rr [[SETCCr]], [[SETCCr]], implicit-def $eflags
+ ; NDD-NEXT: CTEST32rr [[ADD32rr]], [[ADD32rr]], 0, 5, implicit-def $eflags, implicit killed $eflags
+ ; NDD-NEXT: RET 0, $al
+ ;
+ ; NDD-NF-LABEL: name: test_ctest
+ ; NDD-NF: liveins: $edi
+ ; NDD-NF-NEXT: {{ $}}
+ ; NDD-NF-NEXT: MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ ; NDD-NF-NEXT: [[ADD32rr_NF:%[0-9]+]]:gr32 = ADD32rr_NF $edi, $edi
+ ; NDD-NF-NEXT: CTEST32rr [[ADD32rr_NF]], [[ADD32rr_NF]], 0, 1, implicit-def $eflags, implicit $eflags
+ ; NDD-NF-NEXT: RET 0, $al
+ MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ %1:gr64 = COPY $eflags
+ %2:gr32 = ADD32rr $edi, $edi, implicit-def dead $eflags
+ $eflags = COPY %1
+ CTEST32rr %2, %2, 0, 1, implicit-def $eflags, implicit $eflags
+ RET 0, $al
+
+...
+---
+name: test_evitable_clobber
+body: |
+ bb.0:
+ liveins: $edi
+
+ ; NDD-LABEL: name: test_evitable_clobber
+ ; NDD: liveins: $edi
+ ; NDD-NEXT: {{ $}}
+ ; NDD-NEXT: MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ ; NDD-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 7, implicit $eflags
+ ; NDD-NEXT: [[ADD32rr_ND:%[0-9]+]]:gr32 = ADD32rr_ND $edi, $edi, implicit-def dead $eflags
+ ; NDD-NEXT: TEST8rr [[SETCCr]], [[SETCCr]], implicit-def $eflags
+ ; NDD-NEXT: $eax = CMOV32rr_ND $edi, [[ADD32rr_ND]], 5, implicit killed $eflags
+ ; NDD-NEXT: RET 0, $al
+ ;
+ ; NDD-NF-LABEL: name: test_evitable_clobber
+ ; NDD-NF: liveins: $edi
+ ; NDD-NF-NEXT: {{ $}}
+ ; NDD-NF-NEXT: MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ ; NDD-NF-NEXT: [[ADD32rr_NF_ND:%[0-9]+]]:gr32 = ADD32rr_NF_ND $edi, $edi
+ ; NDD-NF-NEXT: $eax = CMOV32rr_ND $edi, [[ADD32rr_NF_ND]], 7, implicit $eflags
+ ; NDD-NF-NEXT: RET 0, $al
+ MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ %1:gr64 = COPY $eflags
+ %2:gr32 = ADD32rr_ND $edi, $edi, implicit-def dead $eflags
+ $eflags = COPY %1
+ $eax = CMOV32rr_ND $edi, %2, 7, implicit $eflags
+ RET 0, $al
+
+...
+---
+name: test_inevitable_clobber
+body: |
+ bb.0:
+ liveins: $edi
+
+ ; CHECK-LABEL: name: test_inevitable_clobber
; CHECK: liveins: $edi
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
- ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 1, implicit $eflags
- ; CHECK-NEXT: [[ADD32rr:%[0-9]+]]:gr32 = ADD32rr $edi, $edi, implicit-def dead $eflags
+ ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 7, implicit $eflags
+ ; CHECK-NEXT: [[ADC32rr_ND:%[0-9]+]]:gr32 = ADC32rr_ND $edi, $edi, implicit-def dead $eflags, implicit $eflags
; CHECK-NEXT: TEST8rr [[SETCCr]], [[SETCCr]], implicit-def $eflags
- ; CHECK-NEXT: CTEST32rr [[ADD32rr]], [[ADD32rr]], 0, 5, implicit-def $eflags, implicit killed $eflags
+ ; CHECK-NEXT: $eax = CMOV32rr_ND $edi, [[ADC32rr_ND]], 5, implicit killed $eflags
; CHECK-NEXT: RET 0, $al
MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
%1:gr64 = COPY $eflags
- %2:gr32 = ADD32rr $edi, $edi, implicit-def dead $eflags
+ %2:gr32 = ADC32rr_ND $edi, $edi, implicit-def dead $eflags, implicit $eflags
$eflags = COPY %1
- CTEST32rr %2, %2, 0, 1, implicit-def $eflags, implicit $eflags
+ $eax = CMOV32rr_ND $edi, %2, 7, implicit $eflags
+ RET 0, $al
+...
+---
+name: test_evitable_clobber_crossbb
+body: |
+ ; NDD-LABEL: name: test_evitable_clobber_crossbb
+ ; NDD: bb.0:
+ ; NDD-NEXT: successors: %bb.1(0x80000000)
+ ; NDD-NEXT: liveins: $edi
+ ; NDD-NEXT: {{ $}}
+ ; NDD-NEXT: MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ ; NDD-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 7, implicit $eflags
+ ; NDD-NEXT: [[SETCCr1:%[0-9]+]]:gr8 = SETCCr 2, implicit $eflags
+ ; NDD-NEXT: [[ADD32rr_ND:%[0-9]+]]:gr32 = ADD32rr_ND $edi, $edi, implicit-def dead $eflags
+ ; NDD-NEXT: JCC_1 %bb.1, 4, implicit $eflags
+ ; NDD-NEXT: RET 0, $al
+ ; NDD-NEXT: {{ $}}
+ ; NDD-NEXT: bb.1:
+ ; NDD-NEXT: TEST8rr [[SETCCr]], [[SETCCr]], implicit-def $eflags
+ ; NDD-NEXT: $eax = CMOV32rr_ND $edi, [[ADD32rr_ND]], 5, implicit killed $eflags
+ ; NDD-NEXT: dead [[ADD8ri_ND:%[0-9]+]]:gr8 = ADD8ri_ND [[SETCCr1]], 255, implicit-def $eflags
+ ; NDD-NEXT: $eax = ADC32rr_ND $eax, $edi, implicit-def dead $eflags, implicit killed $eflags
+ ; NDD-NEXT: RET 0, $al
+ ;
+ ; NDD-NF-LABEL: name: test_evitable_clobber_crossbb
+ ; NDD-NF: bb.0:
+ ; NDD-NF-NEXT: successors: %bb.1(0x80000000)
+ ; NDD-NF-NEXT: liveins: $edi
+ ; NDD-NF-NEXT: {{ $}}
+ ; NDD-NF-NEXT: MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ ; NDD-NF-NEXT: [[ADD32rr_NF_ND:%[0-9]+]]:gr32 = ADD32rr_NF_ND $edi, $edi
+ ; NDD-NF-NEXT: JCC_1 %bb.1, 4, implicit $eflags
+ ; NDD-NF-NEXT: RET 0, $al
+ ; NDD-NF-NEXT: {{ $}}
+ ; NDD-NF-NEXT: bb.1:
+ ; NDD-NF-NEXT: $eax = CMOV32rr_ND $edi, [[ADD32rr_NF_ND]], 7, implicit $eflags
+ ; NDD-NF-NEXT: $eax = ADC32rr_ND $eax, $edi, implicit-def dead $eflags, implicit $eflags
+ ; NDD-NF-NEXT: RET 0, $al
+ bb.0:
+ liveins: $edi
+
+ MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ %1:gr64 = COPY $eflags
+ %2:gr32 = ADD32rr_ND $edi, $edi, implicit-def dead $eflags
+ JCC_1 %bb.1, 4, implicit $eflags
RET 0, $al
+ bb.1:
+ $eflags = COPY %1
+ $eax = CMOV32rr_ND $edi, %2, 7, implicit $eflags
+ $eax = ADC32rr_ND $eax, $edi, implicit-def dead $eflags, implicit $eflags
+ RET 0, $al
+...
+---
+name: test_inevitable_clobber_crossbb
+body: |
+ ; CHECK-LABEL: name: test_inevitable_clobber_crossbb
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $edi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 7, implicit $eflags
+ ; CHECK-NEXT: [[ADC32rr_ND:%[0-9]+]]:gr32 = ADC32rr_ND $edi, $edi, implicit-def dead $eflags, implicit $eflags
+ ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit $eflags
+ ; CHECK-NEXT: RET 0, $al
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: TEST8rr [[SETCCr]], [[SETCCr]], implicit-def $eflags
+ ; CHECK-NEXT: $eax = CMOV32rr_ND $edi, [[ADC32rr_ND]], 5, implicit killed $eflags
+ ; CHECK-NEXT: RET 0, $al
+ bb.0:
+ liveins: $edi
+
+ MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ %1:gr64 = COPY $eflags
+ %2:gr32 = ADC32rr_ND $edi, $edi, implicit-def dead $eflags, implicit $eflags
+ JCC_1 %bb.1, 4, implicit $eflags
+ RET 0, $al
+
+ bb.1:
+ $eflags = COPY %1
+ $eax = CMOV32rr_ND $edi, %2, 7, implicit $eflags
+ RET 0, $al
+...
+---
+name: test_evitable_clobber_crossbb_complex
+body: |
+ ; NDD-LABEL: name: test_evitable_clobber_crossbb_complex
+ ; NDD: bb.0:
+ ; NDD-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; NDD-NEXT: liveins: $edi
+ ; NDD-NEXT: {{ $}}
+ ; NDD-NEXT: MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ ; NDD-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 7, implicit $eflags
+ ; NDD-NEXT: [[SUB32rr_ND:%[0-9]+]]:gr32 = SUB32rr_ND $edi, $edi, implicit-def dead $eflags
+ ; NDD-NEXT: JCC_1 %bb.2, 4, implicit $eflags
+ ; NDD-NEXT: {{ $}}
+ ; NDD-NEXT: bb.1:
+ ; NDD-NEXT: successors: %bb.3(0x80000000)
+ ; NDD-NEXT: {{ $}}
+ ; NDD-NEXT: $eax = IMUL32rr_ND $eax, $edi, implicit-def dead $eflags
+ ; NDD-NEXT: JMP_1 %bb.3
+ ; NDD-NEXT: {{ $}}
+ ; NDD-NEXT: bb.2:
+ ; NDD-NEXT: successors: %bb.3(0x80000000)
+ ; NDD-NEXT: {{ $}}
+ ; NDD-NEXT: $eax = IMUL32rr $eax, $esi, implicit-def dead $eflags
+ ; NDD-NEXT: JMP_1 %bb.3
+ ; NDD-NEXT: {{ $}}
+ ; NDD-NEXT: bb.3:
+ ; NDD-NEXT: TEST8rr [[SETCCr]], [[SETCCr]], implicit-def $eflags
+ ; NDD-NEXT: $eax = CMOV32rr_ND $edi, [[SUB32rr_ND]], 5, implicit killed $eflags
+ ; NDD-NEXT: RET 0, $al
+ ;
+ ; NDD-NF-LABEL: name: test_evitable_clobber_crossbb_complex
+ ; NDD-NF: bb.0:
+ ; NDD-NF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; NDD-NF-NEXT: liveins: $edi
+ ; NDD-NF-NEXT: {{ $}}
+ ; NDD-NF-NEXT: MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ ; NDD-NF-NEXT: [[SUB32rr_NF_ND:%[0-9]+]]:gr32 = SUB32rr_NF_ND $edi, $edi
+ ; NDD-NF-NEXT: JCC_1 %bb.2, 4, implicit $eflags
+ ; NDD-NF-NEXT: {{ $}}
+ ; NDD-NF-NEXT: bb.1:
+ ; NDD-NF-NEXT: successors: %bb.3(0x80000000)
+ ; NDD-NF-NEXT: {{ $}}
+ ; NDD-NF-NEXT: $eax = IMUL32rr_NF_ND $eax, $edi
+ ; NDD-NF-NEXT: JMP_1 %bb.3
+ ; NDD-NF-NEXT: {{ $}}
+ ; NDD-NF-NEXT: bb.2:
+ ; NDD-NF-NEXT: successors: %bb.3(0x80000000)
+ ; NDD-NF-NEXT: {{ $}}
+ ; NDD-NF-NEXT: $eax = IMUL32rr_NF $eax, $esi
+ ; NDD-NF-NEXT: JMP_1 %bb.3
+ ; NDD-NF-NEXT: {{ $}}
+ ; NDD-NF-NEXT: bb.3:
+ ; NDD-NF-NEXT: $eax = CMOV32rr_ND $edi, [[SUB32rr_NF_ND]], 7, implicit $eflags
+ ; NDD-NF-NEXT: RET 0, $al
+ bb.0:
+ liveins: $edi
+
+ MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ %1:gr64 = COPY $eflags
+ %2:gr32 = SUB32rr_ND $edi, $edi, implicit-def dead $eflags
+ JCC_1 %bb.2, 4, implicit $eflags
+
+ bb.1:
+ $eax = IMUL32rr_ND $eax, $edi, implicit-def dead $eflags
+ JMP_1 %bb.3
+
+ bb.2:
+ $eax = IMUL32rr $eax, $esi, implicit-def dead $eflags
+ JMP_1 %bb.3
+
+ bb.3:
+ $eflags = COPY %1
+ $eax = CMOV32rr_ND $edi, %2, 7, implicit $eflags
+ RET 0, $al
+...
+---
+name: test_inevitable_clobber_crossbb_complex
+body: |
+ ; CHECK-LABEL: name: test_inevitable_clobber_crossbb_complex
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $edi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: MUL32r $edi, implicit-def $eax, implicit-def dead $edx, implicit-def $eflags, implicit $eax
+ ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 7, implicit $eflags
+ ; CHECK-NEXT: [[SUB32rr_ND:%[0-9]+]]:gr32 = SUB32rr_ND $edi, $edi, implicit-def dead $eflags
+ ; CHECK-NEXT: JCC_1 %bb.2, 4, implicit $eflags
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $eax = IMUL32rr_ND $eax, $edi, implicit-def dead $eflags
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $eax = SBB32rr $eax, $esi, implicit-def dead $eflags, implicit $eflags
+ ; CHECK-NEXT: JMP_1 %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: TEST8rr [[SETCCr]], [[SETCCr]], implicit-def $eflags
+ ; CHECK-NEXT: $eax = ...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
The generated table will be used in #93508
@phoebewang @e-kud @FreddyLeaf This PR is ready for review. |
The generated table will be used in llvm#93508
ret <2 x i128> %z | ||
} | ||
|
||
; TODO: Remove the 2nd cmpl by using NF imul. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I checked with this, firstly this was done by sinkCmpExpression
in CodeGenPrepare.
But even we don't do the sink, e.g., https://godbolt.org/z/fMbsnrYdo
We still cannot pass it through EFLAGS
.
That means, We can always assume the def and use of EFLAGS
occur within the same BB. So you don't need to traverse BBs to search the dead def EFLAGS. Searching inside the BB is enough. Or if you have concern, you can add an assert to make sure of def/use of EFLAGS
are in the same BB.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Offline talked with Phoebe. We need to traverse BBs, and at the same time, we need to update liveins of BBs.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done @phoebewang
if (ClobberType == InevitableClobber) | ||
goto ProcessNextCopyI; | ||
|
||
assert(ClobberType == EvitableClobber && "unexpected workflow"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can't this be failed? I think we cannot transform another $eflags = COPY %x
in MBB
at least, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then it's a InevitableClobber and handled at line 323.
Ping @phoebewang ? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
No description provided.