Skip to content

Commit e1a5a56

Browse files
committed
[TableGen] Rework EmitIntrinsicToBuiltinMap
Rework `IntrinsicEmitter::EmitIntrinsicToBuiltinMap` for improved peformance as well as refactored the code. Performance: - Current generated code does a linear search on the TargetPrefix, followed by a binary search on the builtin name within that target's builtins. - Improve the performance of this code in 2 ways: (a) Build a table of lambdas, one per target, and use binary search on the target prefix to lookup the lambda for that target. (b) Improve the (common) case of when all builtins for a target share a common prefix. Within that target's lambda, check thus common prefix first, and then do the binary search on the builtin name with the common prefix removed. This should help both code size by creating a smaller static string table and runtime by reducing the cost of binary search on smaller strings. (c) Do not generate the builtin table when a target has a single builtin (in which case that will also be the common suffix). Refactor: - Use range based for loops for iterating over maps. - Use formatv() to simplify the emission code.
1 parent 69f76c7 commit e1a5a56

File tree

4 files changed

+175
-60
lines changed

4 files changed

+175
-60
lines changed

llvm/include/llvm/ADT/STLExtras.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1847,7 +1847,7 @@ OutputIt replace_copy(R &&Range, OutputIt Out, const T &OldValue,
18471847
/// begin/end explicitly.
18481848
template <typename R, typename T>
18491849
void replace(R &&Range, const T &OldValue, const T &NewValue) {
1850-
return std::replace(adl_begin(Range), adl_end(Range), OldValue, NewValue);
1850+
std::replace(adl_begin(Range), adl_end(Range), OldValue, NewValue);
18511851
}
18521852

18531853
/// Provide wrappers to std::move which take ranges instead of having to
@@ -1982,6 +1982,8 @@ auto upper_bound(R &&Range, T &&Value, Compare C) {
19821982
std::forward<T>(Value), C);
19831983
}
19841984

1985+
/// Provide wrappers to std::min_element which take ranges instead of having to
1986+
/// pass begin/end explicitly.
19851987
template <typename R> auto min_element(R &&Range) {
19861988
return std::min_element(adl_begin(Range), adl_end(Range));
19871989
}
@@ -1990,6 +1992,8 @@ template <typename R, typename Compare> auto min_element(R &&Range, Compare C) {
19901992
return std::min_element(adl_begin(Range), adl_end(Range), C);
19911993
}
19921994

1995+
/// Provide wrappers to std::max_element which take ranges instead of having to
1996+
/// pass begin/end explicitly.
19931997
template <typename R> auto max_element(R &&Range) {
19941998
return std::max_element(adl_begin(Range), adl_end(Range));
19951999
}
@@ -1998,6 +2002,13 @@ template <typename R, typename Compare> auto max_element(R &&Range, Compare C) {
19982002
return std::max_element(adl_begin(Range), adl_end(Range), C);
19992003
}
20002004

2005+
/// Provide wrappers to std::mismatch which take ranges instead of having to
2006+
/// pass begin/end explicitly.
2007+
template <typename R1, typename R2> auto mismatch(R1 &&Range1, R2 &&Range2) {
2008+
return std::mismatch(adl_begin(Range1), adl_end(Range1), adl_begin(Range2),
2009+
adl_end(Range2));
2010+
}
2011+
20012012
template <typename R>
20022013
void stable_sort(R &&Range) {
20032014
std::stable_sort(adl_begin(Range), adl_end(Range));

llvm/include/llvm/IR/Intrinsics.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@ namespace Intrinsic {
100100
StringRef Name);
101101

102102
/// Map a Clang builtin name to an intrinsic ID.
103-
ID getIntrinsicForClangBuiltin(const char *Prefix, StringRef BuiltinName);
103+
ID getIntrinsicForClangBuiltin(StringRef TargetPrefix, StringRef BuiltinName);
104104

105105
/// Map a MS builtin name to an intrinsic ID.
106-
ID getIntrinsicForMSBuiltin(const char *Prefix, StringRef BuiltinName);
106+
ID getIntrinsicForMSBuiltin(StringRef TargetPrefix, StringRef BuiltinName);
107107

108108
/// Returns true if the intrinsic ID is for one of the "Constrained
109109
/// Floating-Point Intrinsics".

llvm/include/llvm/TableGen/StringToOffsetTable.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ class StringToOffsetTable {
2626
std::string AggregateString;
2727

2828
public:
29-
bool Empty() const { return StringOffset.empty(); }
29+
bool empty() const { return StringOffset.empty(); }
30+
size_t size() const { return AggregateString.size(); }
3031

3132
unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) {
3233
auto IterBool =

llvm/utils/TableGen/IntrinsicEmitter.cpp

Lines changed: 159 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/Twine.h"
2020
#include "llvm/Support/CommandLine.h"
2121
#include "llvm/Support/ErrorHandling.h"
22+
#include "llvm/Support/FormatVariadic.h"
2223
#include "llvm/Support/ModRef.h"
2324
#include "llvm/Support/raw_ostream.h"
2425
#include "llvm/TableGen/Error.h"
@@ -28,6 +29,7 @@
2829
#include <algorithm>
2930
#include <array>
3031
#include <cassert>
32+
#include <cctype>
3133
#include <map>
3234
#include <optional>
3335
#include <string>
@@ -99,7 +101,7 @@ void IntrinsicEmitter::run(raw_ostream &OS, bool Enums) {
99101
// Emit the intrinsic parameter attributes.
100102
EmitAttributes(Ints, OS);
101103

102-
// Emit code to translate GCC builtins into LLVM intrinsics.
104+
// Emit code to translate Clang builtins into LLVM intrinsics.
103105
EmitIntrinsicToBuiltinMap(Ints, true, OS);
104106

105107
// Emit code to translate MS builtins into LLVM intrinsics.
@@ -595,15 +597,15 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
595597
void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
596598
const CodeGenIntrinsicTable &Ints, bool IsClang, raw_ostream &OS) {
597599
StringRef CompilerName = IsClang ? "Clang" : "MS";
598-
StringRef UpperCompilerName = IsClang ? "CLANG" : "MS";
599600
// map<TargetPrefix, map<BuiltinName, EnumName>>. Note that we iterate over
600601
// both maps in the code below. For the inner map, entries need to be emitted
601602
// in the sorted order of `BuiltinName` because we use std::lower_bound to
602603
// search these entries. For the outer map, it doesn't need be be sorted, but
603604
// we use a map to eliminate non-determinism in the emitted code.
604-
typedef std::map<StringRef, std::map<StringRef, StringRef>> BIMTy;
605+
using BIMTy = std::map<StringRef, std::map<StringRef, StringRef>>;
605606
BIMTy BuiltinMap;
606-
StringToOffsetTable Table;
607+
StringToOffsetTable OldTable;
608+
607609
for (const CodeGenIntrinsic &Int : Ints) {
608610
StringRef BuiltinName = IsClang ? Int.ClangBuiltinName : Int.MSBuiltinName;
609611
if (BuiltinName.empty())
@@ -615,70 +617,171 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
615617
PrintFatalError(Int.TheDef->getLoc(),
616618
"Intrinsic '" + Int.TheDef->getName() + "': duplicate " +
617619
CompilerName + " builtin name!");
618-
Table.GetOrAddStringOffset(BuiltinName);
619-
}
620620

621-
OS << "// Get the LLVM intrinsic that corresponds to a builtin.\n";
622-
OS << "// This is used by the C front-end. The builtin name is passed\n";
623-
OS << "// in as BuiltinName, and a target prefix (e.g. 'ppc') is passed\n";
624-
OS << "// in as TargetPrefix. The result is assigned to 'IntrinsicID'.\n";
625-
OS << "#ifdef GET_LLVM_INTRINSIC_FOR_" << UpperCompilerName << "_BUILTIN\n";
621+
OldTable.GetOrAddStringOffset(BuiltinName);
622+
}
626623

627-
OS << "Intrinsic::ID Intrinsic::getIntrinsicFor" << CompilerName
628-
<< "Builtin(const char "
629-
<< "*TargetPrefixStr, StringRef BuiltinNameStr) {\n";
624+
// For each target, determine the common prefix for all the builtins for that
625+
// target. Populate the string table with the names of all the builtins after
626+
// removing this common prefix.
627+
DenseMap<StringRef, StringRef> CommonPrefixMap;
628+
StringToOffsetTable Table;
629+
for (const auto &[TargetPrefix, Map] : BuiltinMap) {
630+
// The Map is guaranteed to be non-empty here.
631+
StringRef CommonPrefix = Map.begin()->first;
632+
for (auto &[BuiltinName, EnumName] : Map) {
633+
// Update the common prefix.
634+
const char *Mismatch = mismatch(CommonPrefix, BuiltinName).first;
635+
CommonPrefix = CommonPrefix.take_front(Mismatch - CommonPrefix.begin());
636+
if (CommonPrefix.empty())
637+
break;
638+
}
639+
CommonPrefixMap[TargetPrefix] = CommonPrefix;
640+
for (auto &[BuiltinName, EnumName] : Map) {
641+
StringRef Suffix = BuiltinName.substr(CommonPrefix.size());
642+
if (!Suffix.empty())
643+
Table.GetOrAddStringOffset(Suffix);
644+
}
645+
}
630646

631-
if (Table.Empty()) {
632-
OS << " return Intrinsic::not_intrinsic;\n";
633-
OS << "}\n";
634-
OS << "#endif\n\n";
647+
std::string PreprocessorGuard =
648+
"GET_LLVM_INTRINSIC_FOR_" + CompilerName.upper() + "_BUILTIN";
649+
650+
OS << formatv(R"(
651+
// Get the LLVM intrinsic that corresponds to a builtin. This is used by the
652+
// C front-end. The builtin name is passed in as BuiltinName, and a target
653+
// prefix (e.g. 'ppc') is passed in as TargetPrefix.
654+
#ifdef {0}
655+
656+
Intrinsic::ID
657+
Intrinsic::getIntrinsicFor{1}Builtin(StringRef TargetPrefix,
658+
StringRef BuiltinName) {{
659+
using namespace Intrinsic;
660+
)",
661+
PreprocessorGuard, CompilerName);
662+
663+
if (BuiltinMap.empty()) {
664+
OS << formatv(R"(
665+
return not_intrinsic;
666+
}
667+
#endif // {0}
668+
)",
669+
PreprocessorGuard);
635670
return;
636671
}
637672

638-
OS << " static constexpr char BuiltinNames[] = {\n";
639-
Table.EmitCharArray(OS);
640-
OS << " };\n\n";
673+
if (!Table.empty()) {
674+
OS << " static constexpr char BuiltinNames[] = {\n";
675+
Table.EmitCharArray(OS);
676+
OS << " };\n\n";
677+
678+
OS << R"(
679+
struct BuiltinEntry {
680+
ID IntrinsicID;
681+
unsigned StrTabOffset;
682+
const char *getName() const { return &BuiltinNames[StrTabOffset]; }
683+
bool operator<(StringRef RHS) const {
684+
return strncmp(getName(), RHS.data(), RHS.size()) < 0;
685+
}
686+
};
641687
642-
OS << " struct BuiltinEntry {\n";
643-
OS << " Intrinsic::ID IntrinID;\n";
644-
OS << " unsigned StrTabOffset;\n";
645-
OS << " const char *getName() const {\n";
646-
OS << " return &BuiltinNames[StrTabOffset];\n";
647-
OS << " }\n";
648-
OS << " bool operator<(StringRef RHS) const {\n";
649-
OS << " return strncmp(getName(), RHS.data(), RHS.size()) < 0;\n";
650-
OS << " }\n";
651-
OS << " };\n";
688+
)";
689+
}
652690

653-
OS << " StringRef TargetPrefix(TargetPrefixStr);\n\n";
691+
auto GetLambdaName = [](StringRef TargetPrefix) -> std::string {
692+
return "Get" + TargetPrefix.str() + "Intrinsic";
693+
};
654694

655-
// Note: this could emit significantly better code if we cared.
656-
for (auto &I : BuiltinMap) {
657-
OS << " ";
658-
if (!I.first.empty())
659-
OS << "if (TargetPrefix == \"" << I.first << "\") ";
695+
// Emit a per target lambda to find the builtin within that target's builtins.
696+
// It will return the intrinsic ID if it finds the builtin, else
697+
// returns not_intrinsic.
698+
bool HasTargetIndependentBuiltins = false;
699+
for (const auto &[TargetPrefix, Map] : BuiltinMap) {
700+
HasTargetIndependentBuiltins |= TargetPrefix.empty();
701+
if (!TargetPrefix.empty())
702+
OS << formatv(" // Lookup builtins for {0}.\n", TargetPrefix);
660703
else
661-
OS << "/* Target Independent Builtins */ ";
662-
OS << "{\n";
663-
664-
// Emit the comparisons for this target prefix.
665-
OS << " static constexpr BuiltinEntry " << I.first << "Names[] = {\n";
666-
for (const auto &P : I.second) {
667-
OS << " {Intrinsic::" << P.second << ", "
668-
<< Table.GetOrAddStringOffset(P.first) << "}, // " << P.first << "\n";
704+
OS << " // Lookup target independent builtins.\n";
705+
706+
std::string LambdaName = GetLambdaName(TargetPrefix);
707+
OS << formatv(" auto {0} = [](StringRef Name) -> ID {{", LambdaName);
708+
StringRef CommonPrefix = CommonPrefixMap[TargetPrefix];
709+
if (!CommonPrefix.empty())
710+
OS << formatv(R"(
711+
if (!Name.consume_front("{0}"))
712+
return not_intrinsic;
713+
)",
714+
CommonPrefix);
715+
716+
// We need the array only for > 1 entries. If there is just one entry, it
717+
// will be covered by the common prefix check.
718+
if (Map.size() > 1) {
719+
// Emit the comparisons for this target prefix.
720+
OS << " static constexpr BuiltinEntry Names[] = {\n";
721+
for (const auto &[BuiltinName, EnumName] : Map) {
722+
StringRef Suffix = BuiltinName.substr(CommonPrefix.size());
723+
OS << formatv(" {{{0}, {1}}, // {2}\n", EnumName,
724+
Table.GetOrAddStringOffset(Suffix), BuiltinName);
725+
}
726+
OS << R"( }; // Names.
727+
auto II = lower_bound(Names, Name);
728+
if (II != std::end(Names) && II->getName() == Name)
729+
return II->IntrinsicID;
730+
return not_intrinsic;
731+
)";
732+
} else {
733+
// Single entry case. If he prefix check passed, just return the single
734+
// intrinsic ID.
735+
const auto &[BuiltinName, EnumName] = *Map.begin();
736+
OS << formatv(" return {0}; // {1}\n", EnumName, BuiltinName);
669737
}
670-
OS << " };\n";
671-
OS << " auto I = std::lower_bound(std::begin(" << I.first << "Names),\n";
672-
OS << " std::end(" << I.first << "Names),\n";
673-
OS << " BuiltinNameStr);\n";
674-
OS << " if (I != std::end(" << I.first << "Names) &&\n";
675-
OS << " I->getName() == BuiltinNameStr)\n";
676-
OS << " return I->IntrinID;\n";
677-
OS << " }\n";
738+
OS << formatv(" }; // end {0}.\n\n", LambdaName);
678739
}
679-
OS << " return Intrinsic::not_intrinsic;\n";
680-
OS << "}\n";
681-
OS << "#endif\n\n";
740+
741+
// After emitting the lambdas, emit a lookup table for the lambdas (except the
742+
// target independent one). Use binary search, similar to the table for
743+
// builtin names.
744+
OS << R"(
745+
struct TargetEntry {
746+
StringRef TargetPrefix;
747+
function_ref<ID(StringRef)> GetIntrinsicFn;
748+
bool operator<(StringRef RHS) const {
749+
return TargetPrefix < RHS;
750+
};
751+
};
752+
static const TargetEntry TargetTable[] = {
753+
)";
754+
755+
for (const auto &[TargetPrefix, Map] : BuiltinMap) {
756+
if (TargetPrefix.empty())
757+
continue;
758+
OS << formatv(R"( {{"{0}", {1}},)", TargetPrefix,
759+
GetLambdaName(TargetPrefix))
760+
<< "\n";
761+
}
762+
OS << " };\n";
763+
764+
// Now for the actual lookup, first check the target independent lambda if
765+
// we emitted one.
766+
if (HasTargetIndependentBuiltins) {
767+
OS << formatv(R"(
768+
ID IntrinsicID = {0}(BuiltinName);
769+
if (IntrinsicID != not_intrinsic)
770+
return IntrinsicID;
771+
)",
772+
GetLambdaName(""));
773+
}
774+
775+
// If a target independent builtin was not found, lookup the target specific.
776+
OS << formatv(R"(
777+
auto II = lower_bound(TargetTable, TargetPrefix);
778+
if (II != std::end(TargetTable) && II->TargetPrefix == TargetPrefix)
779+
return II->GetIntrinsicFn(BuiltinName);
780+
return not_intrinsic;
781+
}
782+
#endif // {0}
783+
)",
784+
PreprocessorGuard);
682785
}
683786

684787
static void EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS) {

0 commit comments

Comments
 (0)