|
12 | 12 | #include "llvm/BinaryFormat/ELF.h"
|
13 | 13 | #include "llvm/BinaryFormat/MachO.h"
|
14 | 14 | #include "llvm/MC/MCAsmBackend.h"
|
| 15 | +#include "llvm/MC/MCAsmLayout.h" |
15 | 16 | #include "llvm/MC/MCAssembler.h"
|
| 17 | +#include "llvm/MC/MCCodeEmitter.h" |
16 | 18 | #include "llvm/MC/MCContext.h"
|
17 | 19 | #include "llvm/MC/MCDwarf.h"
|
18 | 20 | #include "llvm/MC/MCELFObjectWriter.h"
|
@@ -103,6 +105,14 @@ cl::opt<bool> X86AlignBranchWithin32BBoundaries(
|
103 | 105 | "assumptions about labels corresponding to particular instructions, "
|
104 | 106 | "and should be used with caution."));
|
105 | 107 |
|
| 108 | +cl::opt<bool> X86PadForAlign( |
| 109 | + "x86-pad-for-align", cl::init(true), cl::Hidden, |
| 110 | + cl::desc("Pad previous instructions to implement align directives")); |
| 111 | + |
| 112 | +cl::opt<bool> X86PadForBranchAlign( |
| 113 | + "x86-pad-for-branch-align", cl::init(true), cl::Hidden, |
| 114 | + cl::desc("Pad previous instructions to implement branch alignment")); |
| 115 | + |
106 | 116 | class X86ELFObjectWriter : public MCELFObjectTargetWriter {
|
107 | 117 | public:
|
108 | 118 | X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
|
@@ -173,6 +183,10 @@ class X86AsmBackend : public MCAsmBackend {
|
173 | 183 | void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
|
174 | 184 | MCInst &Res) const override;
|
175 | 185 |
|
| 186 | + bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, |
| 187 | + unsigned &RemainingSize) const; |
| 188 | + void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override; |
| 189 | + |
176 | 190 | bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
|
177 | 191 | };
|
178 | 192 | } // end anonymous namespace
|
@@ -639,6 +653,168 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst,
|
639 | 653 | Res.setOpcode(RelaxedOp);
|
640 | 654 | }
|
641 | 655 |
|
| 656 | +static bool canBeRelaxedForPadding(const MCRelaxableFragment &RF) { |
| 657 | + // TODO: There are lots of other tricks we could apply for increasing |
| 658 | + // encoding size without impacting performance. |
| 659 | + auto &Inst = RF.getInst(); |
| 660 | + auto &STI = *RF.getSubtargetInfo(); |
| 661 | + bool is16BitMode = STI.getFeatureBits()[X86::Mode16Bit]; |
| 662 | + return getRelaxedOpcode(Inst, is16BitMode) != Inst.getOpcode(); |
| 663 | +} |
| 664 | + |
| 665 | +bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, |
| 666 | + MCCodeEmitter &Emitter, |
| 667 | + unsigned &RemainingSize) const { |
| 668 | + if (!canBeRelaxedForPadding(RF)) |
| 669 | + return false; |
| 670 | + |
| 671 | + MCInst Relaxed; |
| 672 | + relaxInstruction(RF.getInst(), *RF.getSubtargetInfo(), Relaxed); |
| 673 | + |
| 674 | + SmallVector<MCFixup, 4> Fixups; |
| 675 | + SmallString<15> Code; |
| 676 | + raw_svector_ostream VecOS(Code); |
| 677 | + Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo()); |
| 678 | + const unsigned OldSize = RF.getContents().size(); |
| 679 | + const unsigned NewSize = Code.size(); |
| 680 | + assert(NewSize >= OldSize && "size decrease during relaxation?"); |
| 681 | + unsigned Delta = NewSize - OldSize; |
| 682 | + if (Delta > RemainingSize) |
| 683 | + return false; |
| 684 | + RF.setInst(Relaxed); |
| 685 | + RF.getContents() = Code; |
| 686 | + RF.getFixups() = Fixups; |
| 687 | + RemainingSize -= Delta; |
| 688 | + return true; |
| 689 | +} |
| 690 | + |
| 691 | +void X86AsmBackend::finishLayout(MCAssembler const &Asm, |
| 692 | + MCAsmLayout &Layout) const { |
| 693 | + // See if we can further relax some instructions to cut down on the number of |
| 694 | + // nop bytes required for code alignment. The actual win is in reducing |
| 695 | + // instruction count, not number of bytes. Modern X86-64 can easily end up |
| 696 | + // decode limited. It is often better to reduce the number of instructions |
| 697 | + // (i.e. eliminate nops) even at the cost of increasing the size and |
| 698 | + // complexity of others. |
| 699 | + if (!X86PadForAlign && !X86PadForBranchAlign) |
| 700 | + return; |
| 701 | + |
| 702 | + DenseSet<MCFragment *> LabeledFragments; |
| 703 | + for (const MCSymbol &S : Asm.symbols()) |
| 704 | + LabeledFragments.insert(S.getFragment(false)); |
| 705 | + |
| 706 | + for (MCSection &Sec : Asm) { |
| 707 | + if (!Sec.getKind().isText()) |
| 708 | + continue; |
| 709 | + |
| 710 | + SmallVector<MCRelaxableFragment *, 4> Relaxable; |
| 711 | + for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) { |
| 712 | + MCFragment &F = *I; |
| 713 | + |
| 714 | + if (LabeledFragments.count(&F)) |
| 715 | + Relaxable.clear(); |
| 716 | + |
| 717 | + if (F.getKind() == MCFragment::FT_Data || |
| 718 | + F.getKind() == MCFragment::FT_CompactEncodedInst) |
| 719 | + // Skip and ignore |
| 720 | + continue; |
| 721 | + |
| 722 | + if (F.getKind() == MCFragment::FT_Relaxable) { |
| 723 | + auto &RF = cast<MCRelaxableFragment>(*I); |
| 724 | + Relaxable.push_back(&RF); |
| 725 | + continue; |
| 726 | + } |
| 727 | + |
| 728 | + auto canHandle = [](MCFragment &F) -> bool { |
| 729 | + switch (F.getKind()) { |
| 730 | + default: |
| 731 | + return false; |
| 732 | + case MCFragment::FT_Align: |
| 733 | + return X86PadForAlign; |
| 734 | + case MCFragment::FT_BoundaryAlign: |
| 735 | + return X86PadForBranchAlign; |
| 736 | + } |
| 737 | + }; |
| 738 | + // For any unhandled kind, assume we can't change layout. |
| 739 | + if (!canHandle(F)) { |
| 740 | + Relaxable.clear(); |
| 741 | + continue; |
| 742 | + } |
| 743 | + |
| 744 | + const uint64_t OrigOffset = Layout.getFragmentOffset(&F); |
| 745 | + const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F); |
| 746 | + if (OrigSize == 0 || Relaxable.empty()) { |
| 747 | + Relaxable.clear(); |
| 748 | + continue; |
| 749 | + } |
| 750 | + |
| 751 | + // To keep the effects local, prefer to relax instructions closest to |
| 752 | + // the align directive. This is purely about human understandability |
| 753 | + // of the resulting code. If we later find a reason to expand |
| 754 | + // particular instructions over others, we can adjust. |
| 755 | + MCFragment *FirstChangedFragment = nullptr; |
| 756 | + unsigned RemainingSize = OrigSize; |
| 757 | + while (!Relaxable.empty() && RemainingSize != 0) { |
| 758 | + auto &RF = *Relaxable.pop_back_val(); |
| 759 | + // Give the backend a chance to play any tricks it wishes to increase |
| 760 | + // the encoding size of the given instruction. Target independent code |
| 761 | + // will try further relaxation, but target's may play further tricks. |
| 762 | + if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize)) |
| 763 | + FirstChangedFragment = &RF; |
| 764 | + |
| 765 | + // If we have an instruction which hasn't been fully relaxed, we can't |
| 766 | + // skip past it and insert bytes before it. Changing its starting |
| 767 | + // offset might require a larger negative offset than it can encode. |
| 768 | + // We don't need to worry about larger positive offsets as none of the |
| 769 | + // possible offsets between this and our align are visible, and the |
| 770 | + // ones afterwards aren't changing. |
| 771 | + if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) |
| 772 | + break; |
| 773 | + } |
| 774 | + Relaxable.clear(); |
| 775 | + |
| 776 | + if (FirstChangedFragment) { |
| 777 | + // Make sure the offsets for any fragments in the effected range get |
| 778 | + // updated. Note that this (conservatively) invalidates the offsets of |
| 779 | + // those following, but this is not required. |
| 780 | + Layout.invalidateFragmentsFrom(FirstChangedFragment); |
| 781 | + } |
| 782 | + |
| 783 | + // BoundaryAlign explicitly tracks it's size (unlike align) |
| 784 | + if (F.getKind() == MCFragment::FT_BoundaryAlign) |
| 785 | + cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize); |
| 786 | + |
| 787 | + const uint64_t FinalOffset = Layout.getFragmentOffset(&F); |
| 788 | + const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F); |
| 789 | + assert(OrigOffset + OrigSize == FinalOffset + FinalSize && |
| 790 | + "can't move start of next fragment!"); |
| 791 | + assert(FinalSize == RemainingSize && "inconsistent size computation?"); |
| 792 | + |
| 793 | + // If we're looking at a boundary align, make sure we don't try to pad |
| 794 | + // its target instructions for some following directive. Doing so would |
| 795 | + // break the alignment of the current boundary align. |
| 796 | + if (F.getKind() == MCFragment::FT_BoundaryAlign) { |
| 797 | + auto &BF = cast<MCBoundaryAlignFragment>(F); |
| 798 | + const MCFragment *F = BF.getNextNode(); |
| 799 | + // If the branch is unfused, it is emitted into one fragment, otherwise |
| 800 | + // it is emitted into two fragments at most, the next |
| 801 | + // MCBoundaryAlignFragment(if exists) also marks the end of the branch. |
| 802 | + for (int i = 0, N = BF.isFused() ? 2 : 1; |
| 803 | + i != N && !isa<MCBoundaryAlignFragment>(F); |
| 804 | + ++i, F = F->getNextNode(), I++) { |
| 805 | + } |
| 806 | + } |
| 807 | + } |
| 808 | + } |
| 809 | + |
| 810 | + // The layout is done. Mark every fragment as valid. |
| 811 | + for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { |
| 812 | + MCSection &Section = *Layout.getSectionOrder()[i]; |
| 813 | + Layout.getFragmentOffset(&*Section.getFragmentList().rbegin()); |
| 814 | + Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin()); |
| 815 | + } |
| 816 | +} |
| 817 | + |
642 | 818 | /// Write a sequence of optimal nops to the output, covering \p Count
|
643 | 819 | /// bytes.
|
644 | 820 | /// \return - true on success, false on failure
|
|
0 commit comments