Skip to content

Commit 64454a7

Browse files
rafaelaulerGerritBuildBot
authored andcommitted
Rebase: [Facebook] [MC] Introduce NeverAlign fragment type
Summary: Introduce NeverAlign fragment type. The intended usage of this fragment is to insert it before a pair of macro-op fusion eligible instructions. NeverAlign fragment ensures that the next fragment (first instruction in the pair) does not end at a given alignment boundary by emitting a minimal size nop if necessary. In effect, it ensures that a pair of macro-fusible instructions is not split by a given alignment boundary, which is a precondition for macro-op fusion in modern Intel Cores (64B = cache line size, see Intel Architecture Optimization Reference Manual, 2.3.2.1 Legacy Decode Pipeline: Macro-Fusion). This patch introduces functionality used by BOLT when emitting code with MacroFusion alignment already in place. The use case is different from BoundaryAlign and instruction bundling: - BoundaryAlign can be extended to perform the desired alignment for the first instruction in the macro-op fusion pair (D101817). However, this approach has higher overhead due to reliance on relaxation as BoundaryAlign requires in the general case - see https://reviews.llvm.org/D97982#2710638. - Instruction bundling: the intent of NeverAlign fragment is to prevent the first instruction in a pair ending at a given alignment boundary, by inserting at most one minimum size nop. It's OK if either instruction crosses the cache line. Padding both instructions using bundles to not cross the alignment boundary would result in excessive padding. There's no straightforward way to request instruction bundling to avoid a given end alignment for the first instruction in the bundle. LLVM: https://reviews.llvm.org/D97982 Manual rebase conflict history: https://phabricator.intern.facebook.com/D30142613 Test Plan: sandcastle Reviewers: #llvm-bolt Subscribers: phabricatorlinter Differential Revision: https://phabricator.intern.facebook.com/D31361547
1 parent b2fe35a commit 64454a7

File tree

10 files changed

+363
-37
lines changed

10 files changed

+363
-37
lines changed

bolt/lib/Core/BinaryEmitter.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
487487
// This assumes the second instruction in the macro-op pair will get
488488
// assigned to its own MCRelaxableFragment. Since all JCC instructions
489489
// are relaxable, we should be safe.
490+
Streamer.emitNeverAlignCodeAtEnd(/*Alignment to avoid=*/64, *BC.STI);
490491
}
491492

492493
if (!EmitCodeOnly) {

llvm/include/llvm/MC/MCFragment.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class MCFragment : public ilist_node_with_parent<MCFragment, MCSection> {
3333
public:
3434
enum FragmentType : uint8_t {
3535
FT_Align,
36+
FT_NeverAlign,
3637
FT_Data,
3738
FT_CompactEncodedInst,
3839
FT_Fill,
@@ -344,6 +345,27 @@ class MCAlignFragment : public MCFragment {
344345
}
345346
};
346347

348+
class MCNeverAlignFragment : public MCFragment {
349+
/// The alignment the end of the next fragment should avoid.
350+
unsigned Alignment;
351+
352+
/// When emitting Nops some subtargets have specific nop encodings.
353+
const MCSubtargetInfo &STI;
354+
355+
public:
356+
MCNeverAlignFragment(unsigned Alignment, const MCSubtargetInfo &STI,
357+
MCSection *Sec = nullptr)
358+
: MCFragment(FT_NeverAlign, false, Sec), Alignment(Alignment), STI(STI) {}
359+
360+
unsigned getAlignment() const { return Alignment; }
361+
362+
const MCSubtargetInfo &getSubtargetInfo() const { return STI; }
363+
364+
static bool classof(const MCFragment *F) {
365+
return F->getKind() == MCFragment::FT_NeverAlign;
366+
}
367+
};
368+
347369
class MCFillFragment : public MCFragment {
348370
uint8_t ValueSize;
349371
/// Value to use for filling bytes.

llvm/include/llvm/MC/MCObjectStreamer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ class MCObjectStreamer : public MCStreamer {
157157
unsigned MaxBytesToEmit = 0) override;
158158
void emitCodeAlignment(Align ByteAlignment, const MCSubtargetInfo *STI,
159159
unsigned MaxBytesToEmit = 0) override;
160+
void emitNeverAlignCodeAtEnd(unsigned ByteAlignment,
161+
const MCSubtargetInfo &STI) override;
160162
void emitValueToOffset(const MCExpr *Offset, unsigned char Value,
161163
SMLoc Loc) override;
162164
void emitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column,

llvm/include/llvm/MC/MCStreamer.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,12 @@ class MCStreamer {
887887
virtual void emitCodeAlignment(Align Alignment, const MCSubtargetInfo *STI,
888888
unsigned MaxBytesToEmit = 0);
889889

890+
/// If the end of the fragment following this NeverAlign fragment ever gets
891+
/// aligned to \p ByteAlignment, this fragment emits a single nop before the
892+
/// following fragment to break this end-alignment.
893+
virtual void emitNeverAlignCodeAtEnd(unsigned ByteAlignment,
894+
const MCSubtargetInfo &STI);
895+
890896
/// Emit some number of copies of \p Value until the byte offset \p
891897
/// Offset is reached.
892898
///

llvm/lib/MC/MCAssembler.cpp

Lines changed: 81 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,43 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, const MCFixup &Fixup,
298298
return IsResolved;
299299
}
300300

301+
/// Check if the branch crosses the boundary.
302+
///
303+
/// \param StartAddr start address of the fused/unfused branch.
304+
/// \param Size size of the fused/unfused branch.
305+
/// \param BoundaryAlignment alignment requirement of the branch.
306+
/// \returns true if the branch cross the boundary.
307+
static bool mayCrossBoundary(uint64_t StartAddr, uint64_t Size,
308+
Align BoundaryAlignment) {
309+
uint64_t EndAddr = StartAddr + Size;
310+
return (StartAddr >> Log2(BoundaryAlignment)) !=
311+
((EndAddr - 1) >> Log2(BoundaryAlignment));
312+
}
313+
314+
/// Check if the branch is against the boundary.
315+
///
316+
/// \param StartAddr start address of the fused/unfused branch.
317+
/// \param Size size of the fused/unfused branch.
318+
/// \param BoundaryAlignment alignment requirement of the branch.
319+
/// \returns true if the branch is against the boundary.
320+
static bool isAgainstBoundary(uint64_t StartAddr, uint64_t Size,
321+
Align BoundaryAlignment) {
322+
uint64_t EndAddr = StartAddr + Size;
323+
return (EndAddr & (BoundaryAlignment.value() - 1)) == 0;
324+
}
325+
326+
/// Check if the branch needs padding.
327+
///
328+
/// \param StartAddr start address of the fused/unfused branch.
329+
/// \param Size size of the fused/unfused branch.
330+
/// \param BoundaryAlignment alignment requirement of the branch.
331+
/// \returns true if the branch needs padding.
332+
static bool needPadding(uint64_t StartAddr, uint64_t Size,
333+
Align BoundaryAlignment) {
334+
return mayCrossBoundary(StartAddr, Size, BoundaryAlignment) ||
335+
isAgainstBoundary(StartAddr, Size, BoundaryAlignment);
336+
}
337+
301338
uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
302339
const MCFragment &F) const {
303340
assert(getBackendPtr() && "Requires assembler backend");
@@ -358,6 +395,41 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
358395
return Size;
359396
}
360397

398+
case MCFragment::FT_NeverAlign: {
399+
// Disclaimer: NeverAlign fragment size depends on the size of its immediate
400+
// successor, but NeverAlign need not be a MCRelaxableFragment.
401+
// NeverAlign fragment size is recomputed if the successor is relaxed:
402+
// - If RelaxableFragment is relaxed, it gets invalidated by marking its
403+
// predecessor as LastValidFragment.
404+
// - This forces the assembler to call MCAsmLayout::layoutFragment on that
405+
// relaxable fragment, which in turn will always ask the predecessor to
406+
// compute its size (see "computeFragmentSize(prev)" in layoutFragment).
407+
//
408+
// In short, the simplest way to ensure that computeFragmentSize() is sane
409+
// is to establish the following rule: it should never examine fragments
410+
// after the current fragment in the section. If we logically need to
411+
// examine any fragment after the current fragment, we need to do that using
412+
// relaxation, inside MCAssembler::layoutSectionOnce.
413+
const MCNeverAlignFragment &NAF = cast<MCNeverAlignFragment>(F);
414+
const MCFragment *NF = F.getNextNode();
415+
uint64_t Offset = Layout.getFragmentOffset(&NAF);
416+
size_t NextFragSize = 0;
417+
if (const auto *NextFrag = dyn_cast<MCRelaxableFragment>(NF)) {
418+
NextFragSize = NextFrag->getContents().size();
419+
} else if (const auto *NextFrag = dyn_cast<MCDataFragment>(NF)) {
420+
NextFragSize = NextFrag->getContents().size();
421+
} else {
422+
llvm_unreachable("Didn't find the expected fragment after NeverAlign");
423+
}
424+
// Check if the next fragment ends at the alignment we want to avoid.
425+
if (isAgainstBoundary(Offset, NextFragSize, Align(NAF.getAlignment()))) {
426+
// Avoid this alignment by introducing minimum nop.
427+
assert(getBackend().getMinimumNopSize() != NAF.getAlignment());
428+
return getBackend().getMinimumNopSize();
429+
}
430+
return 0;
431+
}
432+
361433
case MCFragment::FT_Org: {
362434
const MCOrgFragment &OF = cast<MCOrgFragment>(F);
363435
MCValue Value;
@@ -581,6 +653,15 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
581653
break;
582654
}
583655

656+
case MCFragment::FT_NeverAlign: {
657+
const MCNeverAlignFragment &NAF = cast<MCNeverAlignFragment>(F);
658+
if (!Asm.getBackend().writeNopData(OS, FragmentSize,
659+
&NAF.getSubtargetInfo()))
660+
report_fatal_error("unable to write nop sequence of " +
661+
Twine(FragmentSize) + " bytes");
662+
break;
663+
}
664+
584665
case MCFragment::FT_Data:
585666
++stats::EmittedDataFragments;
586667
OS << cast<MCDataFragment>(F).getContents();
@@ -1052,43 +1133,6 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
10521133
return OldSize != LF.getContents().size();
10531134
}
10541135

1055-
/// Check if the branch crosses the boundary.
1056-
///
1057-
/// \param StartAddr start address of the fused/unfused branch.
1058-
/// \param Size size of the fused/unfused branch.
1059-
/// \param BoundaryAlignment alignment requirement of the branch.
1060-
/// \returns true if the branch cross the boundary.
1061-
static bool mayCrossBoundary(uint64_t StartAddr, uint64_t Size,
1062-
Align BoundaryAlignment) {
1063-
uint64_t EndAddr = StartAddr + Size;
1064-
return (StartAddr >> Log2(BoundaryAlignment)) !=
1065-
((EndAddr - 1) >> Log2(BoundaryAlignment));
1066-
}
1067-
1068-
/// Check if the branch is against the boundary.
1069-
///
1070-
/// \param StartAddr start address of the fused/unfused branch.
1071-
/// \param Size size of the fused/unfused branch.
1072-
/// \param BoundaryAlignment alignment requirement of the branch.
1073-
/// \returns true if the branch is against the boundary.
1074-
static bool isAgainstBoundary(uint64_t StartAddr, uint64_t Size,
1075-
Align BoundaryAlignment) {
1076-
uint64_t EndAddr = StartAddr + Size;
1077-
return (EndAddr & (BoundaryAlignment.value() - 1)) == 0;
1078-
}
1079-
1080-
/// Check if the branch needs padding.
1081-
///
1082-
/// \param StartAddr start address of the fused/unfused branch.
1083-
/// \param Size size of the fused/unfused branch.
1084-
/// \param BoundaryAlignment alignment requirement of the branch.
1085-
/// \returns true if the branch needs padding.
1086-
static bool needPadding(uint64_t StartAddr, uint64_t Size,
1087-
Align BoundaryAlignment) {
1088-
return mayCrossBoundary(StartAddr, Size, BoundaryAlignment) ||
1089-
isAgainstBoundary(StartAddr, Size, BoundaryAlignment);
1090-
}
1091-
10921136
bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout,
10931137
MCBoundaryAlignFragment &BF) {
10941138
// BoundaryAlignFragment that doesn't need to align any fragment should not be

llvm/lib/MC/MCFragment.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,9 @@ void MCFragment::destroy() {
274274
case FT_Align:
275275
delete cast<MCAlignFragment>(this);
276276
return;
277+
case FT_NeverAlign:
278+
delete cast<MCNeverAlignFragment>(this);
279+
return;
277280
case FT_Data:
278281
delete cast<MCDataFragment>(this);
279282
return;
@@ -342,6 +345,9 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
342345
OS << "<";
343346
switch (getKind()) {
344347
case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
348+
case MCFragment::FT_NeverAlign:
349+
OS << "MCNeverAlignFragment";
350+
break;
345351
case MCFragment::FT_Data: OS << "MCDataFragment"; break;
346352
case MCFragment::FT_CompactEncodedInst:
347353
OS << "MCCompactEncodedInstFragment"; break;
@@ -381,6 +387,12 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
381387
<< " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
382388
break;
383389
}
390+
case MCFragment::FT_NeverAlign: {
391+
const MCNeverAlignFragment *NAF = cast<MCNeverAlignFragment>(this);
392+
OS << "\n ";
393+
OS << " Alignment:" << NAF->getAlignment() << ">";
394+
break;
395+
}
384396
case MCFragment::FT_Data: {
385397
const auto *DF = cast<MCDataFragment>(this);
386398
OS << "\n ";

llvm/lib/MC/MCObjectStreamer.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,11 @@ void MCObjectStreamer::emitCodeAlignment(Align Alignment,
658658
cast<MCAlignFragment>(getCurrentFragment())->setEmitNops(true, STI);
659659
}
660660

661+
void MCObjectStreamer::emitNeverAlignCodeAtEnd(unsigned ByteAlignment,
662+
const MCSubtargetInfo &STI) {
663+
insert(new MCNeverAlignFragment(ByteAlignment, STI));
664+
}
665+
661666
void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset,
662667
unsigned char Value,
663668
SMLoc Loc) {

llvm/lib/MC/MCStreamer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,6 +1235,8 @@ void MCStreamer::emitValueToAlignment(Align Alignment, int64_t Value,
12351235
unsigned MaxBytesToEmit) {}
12361236
void MCStreamer::emitCodeAlignment(Align Alignment, const MCSubtargetInfo *STI,
12371237
unsigned MaxBytesToEmit) {}
1238+
void MCStreamer::emitNeverAlignCodeAtEnd(unsigned ByteAlignment,
1239+
const MCSubtargetInfo &STI) {}
12381240
void MCStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value,
12391241
SMLoc Loc) {}
12401242
void MCStreamer::emitBundleAlignMode(Align Alignment) {}

llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,6 +1153,7 @@ class X86AsmParser : public MCTargetAsmParser {
11531153
bool parseDirectiveArch();
11541154
bool parseDirectiveNops(SMLoc L);
11551155
bool parseDirectiveEven(SMLoc L);
1156+
bool parseDirectiveAvoidEndAlign(SMLoc L);
11561157
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
11571158

11581159
/// CodeView FPO data directives.
@@ -4601,6 +4602,8 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
46014602
return false;
46024603
} else if (IDVal == ".nops")
46034604
return parseDirectiveNops(DirectiveID.getLoc());
4605+
else if (IDVal == ".avoid_end_align")
4606+
return parseDirectiveAvoidEndAlign(DirectiveID.getLoc());
46044607
else if (IDVal == ".even")
46054608
return parseDirectiveEven(DirectiveID.getLoc());
46064609
else if (IDVal == ".cv_fpo_proc")
@@ -4695,6 +4698,27 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) {
46954698
return false;
46964699
}
46974700

4701+
/// Directive for NeverAlign fragment testing, not for general usage!
4702+
/// parseDirectiveAvoidEndAlign
4703+
/// ::= .avoid_end_align alignment
4704+
bool X86AsmParser::parseDirectiveAvoidEndAlign(SMLoc L) {
4705+
int64_t Alignment = 0;
4706+
SMLoc AlignmentLoc;
4707+
AlignmentLoc = getTok().getLoc();
4708+
if (getParser().checkForValidSection() ||
4709+
getParser().parseAbsoluteExpression(Alignment))
4710+
return true;
4711+
4712+
if (getParser().parseEOL("unexpected token in directive"))
4713+
return true;
4714+
4715+
if (Alignment <= 0)
4716+
return Error(AlignmentLoc, "expected a positive alignment");
4717+
4718+
getParser().getStreamer().emitNeverAlignCodeAtEnd(Alignment, getSTI());
4719+
return false;
4720+
}
4721+
46984722
/// ParseDirectiveCode
46994723
/// ::= .code16 | .code32 | .code64
47004724
bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {

0 commit comments

Comments
 (0)