-
Notifications
You must be signed in to change notification settings - Fork 14.3k
release/19.x: [MIPS] Optimize sortRelocs for o32 #106008
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mc Author: Alex Rønne Petersen (alexrp) ChangesManual backport of #104723. Full diff: https://github.com/llvm/llvm-project/pull/106008.diff 4 Files Affected:
diff --git a/llvm/include/llvm/MC/MCELFObjectWriter.h b/llvm/include/llvm/MC/MCELFObjectWriter.h
index 9b74cbc3d3a520..cbb010fa2fd802 100644
--- a/llvm/include/llvm/MC/MCELFObjectWriter.h
+++ b/llvm/include/llvm/MC/MCELFObjectWriter.h
@@ -37,19 +37,14 @@ struct ELFRelocationEntry {
const MCSymbolELF *Symbol; // The symbol to relocate with.
unsigned Type; // The type of the relocation.
uint64_t Addend; // The addend to use.
- const MCSymbolELF *OriginalSymbol; // The original value of Symbol if we changed it.
- uint64_t OriginalAddend; // The original value of addend.
ELFRelocationEntry(uint64_t Offset, const MCSymbolELF *Symbol, unsigned Type,
- uint64_t Addend, const MCSymbolELF *OriginalSymbol,
- uint64_t OriginalAddend)
- : Offset(Offset), Symbol(Symbol), Type(Type), Addend(Addend),
- OriginalSymbol(OriginalSymbol), OriginalAddend(OriginalAddend) {}
+ uint64_t Addend)
+ : Offset(Offset), Symbol(Symbol), Type(Type), Addend(Addend) {}
void print(raw_ostream &Out) const {
Out << "Off=" << Offset << ", Sym=" << Symbol << ", Type=" << Type
- << ", Addend=" << Addend << ", OriginalSymbol=" << OriginalSymbol
- << ", OriginalAddend=" << OriginalAddend;
+ << ", Addend=" << Addend;
}
LLVM_DUMP_METHOD void dump() const { print(errs()); }
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index f958905a26aa42..8e8705500126e7 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -1393,22 +1393,17 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
bool RelocateWithSymbol =
shouldRelocateWithSymbol(Asm, Target, SymA, C, Type) ||
(Parent->getType() == ELF::SHT_LLVM_CALL_GRAPH_PROFILE);
- uint64_t Addend = 0;
-
- FixedValue = !RelocateWithSymbol && SymA && !SymA->isUndefined()
- ? C + Asm.getSymbolOffset(*SymA)
- : C;
- if (usesRela(TO, FixupSection)) {
- Addend = FixedValue;
- FixedValue = 0;
- }
+ uint64_t Addend = !RelocateWithSymbol && SymA && !SymA->isUndefined()
+ ? C + Asm.getSymbolOffset(*SymA)
+ : C;
+ FixedValue = usesRela(TO, FixupSection) ? 0 : Addend;
if (!RelocateWithSymbol) {
const auto *SectionSymbol =
SecA ? cast<MCSymbolELF>(SecA->getBeginSymbol()) : nullptr;
if (SectionSymbol)
SectionSymbol->setUsedInReloc();
- ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend, SymA, C);
+ ELFRelocationEntry Rec(FixupOffset, SectionSymbol, Type, Addend);
Relocations[&FixupSection].push_back(Rec);
return;
}
@@ -1423,7 +1418,7 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
else
RenamedSymA->setUsedInReloc();
}
- ELFRelocationEntry Rec(FixupOffset, RenamedSymA, Type, Addend, SymA, C);
+ ELFRelocationEntry Rec(FixupOffset, RenamedSymA, Type, Addend);
Relocations[&FixupSection].push_back(Rec);
}
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 4d6a00c14a3575..faf9772ab75756 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -40,20 +40,8 @@ struct MipsRelocationEntry {
bool Matched = false; ///< Is this relocation part of a match.
MipsRelocationEntry(const ELFRelocationEntry &R) : R(R) {}
-
- void print(raw_ostream &Out) const {
- R.print(Out);
- Out << ", Matched=" << Matched;
- }
};
-#ifndef NDEBUG
-raw_ostream &operator<<(raw_ostream &OS, const MipsRelocationEntry &RHS) {
- RHS.print(OS);
- return OS;
-}
-#endif
-
class MipsELFObjectWriter : public MCELFObjectTargetWriter {
public:
MipsELFObjectWriter(uint8_t OSABI, bool HasRelocationAddend, bool Is64);
@@ -115,17 +103,11 @@ static InputIt find_best(InputIt First, InputIt Last, UnaryPredicate Predicate,
for (InputIt I = First; I != Last; ++I) {
unsigned Matched = Predicate(*I);
if (Matched != FindBest_NoMatch) {
- LLVM_DEBUG(dbgs() << std::distance(First, I) << " is a match (";
- I->print(dbgs()); dbgs() << ")\n");
- if (Best == Last || BetterThan(*I, *Best)) {
- LLVM_DEBUG(dbgs() << ".. and it beats the last one\n");
+ if (Best == Last || BetterThan(*I, *Best))
Best = I;
- }
}
- if (Matched == FindBest_PerfectMatch) {
- LLVM_DEBUG(dbgs() << ".. and it is unbeatable\n");
+ if (Matched == FindBest_PerfectMatch)
break;
- }
}
return Best;
@@ -147,8 +129,7 @@ static unsigned getMatchingLoType(const ELFRelocationEntry &Reloc) {
if (Type == ELF::R_MIPS16_HI16)
return ELF::R_MIPS16_LO16;
- if (Reloc.OriginalSymbol &&
- Reloc.OriginalSymbol->getBinding() != ELF::STB_LOCAL)
+ if (Reloc.Symbol && Reloc.Symbol->getBinding() != ELF::STB_LOCAL)
return ELF::R_MIPS_NONE;
if (Type == ELF::R_MIPS_GOT16)
@@ -161,54 +142,12 @@ static unsigned getMatchingLoType(const ELFRelocationEntry &Reloc) {
return ELF::R_MIPS_NONE;
}
-/// Determine whether a relocation (X) matches the one given in R.
-///
-/// A relocation matches if:
-/// - It's type matches that of a corresponding low part. This is provided in
-/// MatchingType for efficiency.
-/// - It's based on the same symbol.
-/// - It's offset of greater or equal to that of the one given in R.
-/// It should be noted that this rule assumes the programmer does not use
-/// offsets that exceed the alignment of the symbol. The carry-bit will be
-/// incorrect if this is not true.
-///
-/// A matching relocation is unbeatable if:
-/// - It is not already involved in a match.
-/// - It's offset is exactly that of the one given in R.
-static FindBestPredicateResult isMatchingReloc(const MipsRelocationEntry &X,
- const ELFRelocationEntry &R,
- unsigned MatchingType) {
- if (X.R.Type == MatchingType && X.R.OriginalSymbol == R.OriginalSymbol) {
- if (!X.Matched &&
- X.R.OriginalAddend == R.OriginalAddend)
- return FindBest_PerfectMatch;
- else if (X.R.OriginalAddend >= R.OriginalAddend)
- return FindBest_Match;
- }
- return FindBest_NoMatch;
-}
-
-/// Determine whether Candidate or PreviousBest is the better match.
-/// The return value is true if Candidate is the better match.
-///
-/// A matching relocation is a better match if:
-/// - It has a smaller addend.
-/// - It is not already involved in a match.
-static bool compareMatchingRelocs(const MipsRelocationEntry &Candidate,
- const MipsRelocationEntry &PreviousBest) {
- if (Candidate.R.OriginalAddend != PreviousBest.R.OriginalAddend)
- return Candidate.R.OriginalAddend < PreviousBest.R.OriginalAddend;
- return PreviousBest.Matched && !Candidate.Matched;
-}
-
-#ifndef NDEBUG
-/// Print all the relocations.
-template <class Container>
-static void dumpRelocs(const char *Prefix, const Container &Relocs) {
- for (const auto &R : Relocs)
- dbgs() << Prefix << R << "\n";
+// Determine whether a relocation X is a low-part and matches the high-part R
+// perfectly by symbol and addend.
+static bool isMatchingReloc(unsigned MatchingType, const ELFRelocationEntry &R,
+ const ELFRelocationEntry &X) {
+ return X.Type == MatchingType && X.Symbol == R.Symbol && X.Addend == R.Addend;
}
-#endif
MipsELFObjectWriter::MipsELFObjectWriter(uint8_t OSABI,
bool HasRelocationAddend, bool Is64)
@@ -436,65 +375,52 @@ void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
if (hasRelocationAddend())
return;
- if (Relocs.size() < 2)
- return;
-
// Sort relocations by the address they are applied to.
llvm::sort(Relocs,
[](const ELFRelocationEntry &A, const ELFRelocationEntry &B) {
return A.Offset < B.Offset;
});
+ // Place relocations in a list for reorder convenience. Hi16 contains the
+ // iterators of high-part relocations.
std::list<MipsRelocationEntry> Sorted;
- std::list<ELFRelocationEntry> Remainder;
-
- LLVM_DEBUG(dumpRelocs("R: ", Relocs));
-
- // Separate the movable relocations (AHL relocations using the high bits) from
- // the immobile relocations (everything else). This does not preserve high/low
- // matches that already existed in the input.
- copy_if_else(Relocs.begin(), Relocs.end(), std::back_inserter(Remainder),
- std::back_inserter(Sorted), [](const ELFRelocationEntry &Reloc) {
- return getMatchingLoType(Reloc) != ELF::R_MIPS_NONE;
- });
-
- for (auto &R : Remainder) {
- LLVM_DEBUG(dbgs() << "Matching: " << R << "\n");
+ SmallVector<std::list<MipsRelocationEntry>::iterator, 0> Hi16;
+ for (auto &R : Relocs) {
+ Sorted.push_back(R);
+ if (getMatchingLoType(R) != ELF::R_MIPS_NONE)
+ Hi16.push_back(std::prev(Sorted.end()));
+ }
+ for (auto I : Hi16) {
+ auto &R = I->R;
unsigned MatchingType = getMatchingLoType(R);
- assert(MatchingType != ELF::R_MIPS_NONE &&
- "Wrong list for reloc that doesn't need a match");
-
- // Find the best matching relocation for the current high part.
- // See isMatchingReloc for a description of a matching relocation and
- // compareMatchingRelocs for a description of what 'best' means.
- auto InsertionPoint =
- find_best(Sorted.begin(), Sorted.end(),
- [&R, &MatchingType](const MipsRelocationEntry &X) {
- return isMatchingReloc(X, R, MatchingType);
- },
- compareMatchingRelocs);
-
- // If we matched then insert the high part in front of the match and mark
- // both relocations as being involved in a match. We only mark the high
- // part for cosmetic reasons in the debug output.
+ // If the next relocation is a perfect match, continue;
+ if (std::next(I) != Sorted.end() &&
+ isMatchingReloc(MatchingType, R, std::next(I)->R))
+ continue;
+ // Otherwise, find the best matching low-part relocation with the following
+ // criteria. It must have the same symbol and its addend is no lower than
+ // that of the current high-part.
//
- // If we failed to find a match then the high part is orphaned. This is not
- // permitted since the relocation cannot be evaluated without knowing the
- // carry-in. We can sometimes handle this using a matching low part that is
- // already used in a match but we already cover that case in
- // isMatchingReloc and compareMatchingRelocs. For the remaining cases we
- // should insert the high part at the end of the list. This will cause the
- // linker to fail but the alternative is to cause the linker to bind the
- // high part to a semi-matching low part and silently calculate the wrong
- // value. Unfortunately we have no means to warn the user that we did this
- // so leave it up to the linker to complain about it.
- if (InsertionPoint != Sorted.end())
- InsertionPoint->Matched = true;
- Sorted.insert(InsertionPoint, R)->Matched = true;
- }
+ // (1) %lo with a smaller offset is preferred.
+ // (2) %lo with the same offset that is unmatched is preferred.
+ // (3) later %lo is preferred.
+ auto Best = Sorted.end();
+ for (auto J = Sorted.begin(); J != Sorted.end(); ++J) {
+ auto &R1 = J->R;
+ if (R1.Type == MatchingType && R.Symbol == R1.Symbol &&
+ R.Addend <= R1.Addend &&
+ (Best == Sorted.end() || R1.Addend < Best->R.Addend ||
+ (!Best->Matched && R1.Addend == Best->R.Addend)))
+ Best = J;
+ }
+ if (Best != Sorted.end() && R.Addend == Best->R.Addend)
+ Best->Matched = true;
- LLVM_DEBUG(dumpRelocs("S: ", Sorted));
+ // Move the high-part before the low-part, or if not found, the end of the
+ // list. The unmatched high-part will lead to a linker warning/error.
+ Sorted.splice(Best, Sorted, I);
+ }
assert(Relocs.size() == Sorted.size() && "Some relocs were not consumed");
diff --git a/llvm/test/MC/Mips/sort-relocation-table.s b/llvm/test/MC/Mips/sort-relocation-table.s
index cc951956fd24a0..7d126ba9f049d8 100644
--- a/llvm/test/MC/Mips/sort-relocation-table.s
+++ b/llvm/test/MC/Mips/sort-relocation-table.s
@@ -150,8 +150,8 @@
lui $2, %hi(sym1)
# CHECK-LABEL: Section ({{[0-9]+}}) .rel.mips_hilo_8b {
-# CHECK-NEXT: 0x8 R_MIPS_HI16 sym1
# CHECK-NEXT: 0x0 R_MIPS_LO16 sym1
+# CHECK-NEXT: 0x8 R_MIPS_HI16 sym1
# CHECK-NEXT: 0x4 R_MIPS_LO16 sym1
# CHECK-NEXT: }
@@ -331,8 +331,8 @@
lui $2, %got(local1)
# CHECK-LABEL: Section ({{[0-9]+}}) .rel.mips_gotlo_8b {
-# CHECK-NEXT: 0x8 R_MIPS_GOT16 .text
# CHECK-NEXT: 0x0 R_MIPS_LO16 .text
+# CHECK-NEXT: 0x8 R_MIPS_GOT16 .text
# CHECK-NEXT: 0x4 R_MIPS_LO16 .text
# CHECK-NEXT: }
@@ -372,9 +372,9 @@
# CHECK-LABEL: Section ({{[0-9]+}}) .rel.mips_gotlo_10 {
# CHECK-NEXT: 0x0 R_MIPS_GOT16 .text
# CHECK-NEXT: 0x4 R_MIPS_LO16 .text
+# CHECK-NEXT: 0x8 R_MIPS_GOT16 .text
# CHECK-NEXT: 0xC R_MIPS_GOT16 .text
# CHECK-NEXT: 0x10 R_MIPS_LO16 .text
-# CHECK-NEXT: 0x8 R_MIPS_GOT16 .text
# CHECK-NEXT: }
# Finally, do test 2 for R_MIPS_GOT16 on external symbols to prove they are
|
I am not convinced that we should take something that's a pure optimization between RC3 and final unless this is fixing a regression or is very important. I want to avoid risk now so that we don't end up making things worse for the final release. |
@tru please see the numbers in #104562 (comment). The Zig project has no choice but to keep all MIPS32 testing (both local and in CI) disabled until this fix is in effect. So I'd say it's reasonably important. |
Yeah, this has been an issue for a while AIUI. I don't think it affects C/C++ projects in general because of separate compilation. Zig, OTOH, uses a compilation model that's more like a "unity build", which results in tons of relocations in the single module that goes through |
Has a decision been reached on this? (Not familiar with how exactly the process works.) Also, even if this doesn't make it for 19.1.0-final, can it be considered for a subsequent bug fix release? |
I would still like a domain expert to weigh in. |
I don't think that it is a bugfix, thus not need to be backported. |
Yeah I tend to agree that this is a seemingly nice to have thing, but it's not really qualifying for a bugfix or a regression. |
Definitely agree it's not a regression, but I think I would quibble a bit with the idea that taking ~1h6min to compile something that normally takes ~3min is not a bug in some sense. 🙂 But ok, philosophical debates aside: Would it be reasonable to at least consider it a critical performance issue for the purposes of a bug fix release? I acknowledge that there'd still need to be an assessment as to whether the patch is safe, but it seems like it should at least fit one of the criteria on its face. In regards to safety, perhaps confidence in the patch would increase in the time between 19.1.0-final and the subsequent bug fix releases? |
If so, then I think that it is a real bug ;)
|
The input is usually ordered by offset, so inspecting the output is sufficient. The super expensive relocation dump is not conventional.
For MIPS's o32 ABI (REL), https://reviews.llvm.org/D19718 introduced `OriginalAddend` to find the matching R_MIPS_LO16 relocation for R_MIPS_GOT16 when STT_SECTION conversion is applicable. lw $2, %lo(local1) lui $2, %got(local1) However, we could just store the original `Addend` in `ELFRelocationEntry` and remove `OriginalAddend`. Note: The relocation ordering algorithm in https://reviews.llvm.org/D19718 is inefficient (llvm#104562), which will be addressed by another patch.
The o32 ABI specifies: > Each relocation type of R_MIPS_HI16 must have an associated R_MIPS_LO16 entry immediately following it in the list of relocations. [...] the addend AHL is computed as (AHI << 16) + (short)ALO In practice, the high-part and low-part relocations may not be adjacent in assembly files, requiring the assembler to reorder relocations. http://reviews.llvm.org/D19718 performed the reordering, but did not optimize for the common case where a %lo immediately follows its matching %hi. The quadratic time complexity could make sections with many relocations very slow to process. This patch implements the fast path, simplifies the code, and makes the behavior more similar to GNU assembler (for the .rel.mips_hilo_8b test). We also remove `OriginalSymbol`, removing overhead for other targets. Fix llvm#104562 Pull Request: llvm#104723
ff2fa6d
to
fcd8989
Compare
I don't think this should be backported in the current form, because it breaks ABI. This is not strictly impossible at this stage, but also very undesirable. |
This will have to wait for LLVM 20. I know it's not optimal for zig, but getting it in this late and it being abi breaking is tricky. |
Manual backport of #104723.