Skip to content

Commit f5ee88c

Browse files
committed
[TableGen] Rework EmitIntrinsicToBuiltinMap
Rework `IntrinsicEmitter::EmitIntrinsicToBuiltinMap` for improved peformance as well as refactored the code. Performance: - Current generated code does a linear search on the TargetPrefix, followed by a binary search on the builtin name within that target's builtins. - Improve the performance of this code in 2 ways: (a) Use binary search on the target prefix to lookup the builtin table for a target. (b) Improve the (common) case of when all builtins for a target share a common prefix. Within that target's lambda, check thus common prefix first, and then do the binary search on the builtin name with the common prefix removed. This should help both code size by creating a smaller static string table and runtime by reducing the cost of binary search on smaller strings. Refactor: - Use range based for loops for iterating over maps. - Use formatv() and C++ raw string literals to simplify the emission code.
1 parent 52bfb26 commit f5ee88c

File tree

5 files changed

+221
-68
lines changed

5 files changed

+221
-68
lines changed

llvm/benchmarks/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
set(LLVM_LINK_COMPONENTS
2+
Core
23
Support)
34

45
add_benchmark(DummyYAML DummyYAML.cpp PARTIAL_SOURCES_INTENDED)
56
add_benchmark(xxhash xxhash.cpp PARTIAL_SOURCES_INTENDED)
7+
add_benchmark(GetIntrinsicForClangBuiltin GetIntrinsicForClangBuiltin.cpp PARTIAL_SOURCES_INTENDED)
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#include "benchmark/benchmark.h"
2+
#include "llvm/IR/Intrinsics.h"
3+
4+
using namespace llvm;
5+
using namespace Intrinsic;
6+
7+
// Benchmark intrinsic lookup from a variety of targets.
8+
static void BM_GetIntrinsicForClangBuiltin(benchmark::State &state) {
9+
static const char *Builtins[] = {
10+
"__builtin_adjust_trampoline",
11+
"__builtin_trap",
12+
"__builtin_arm_ttest",
13+
"__builtin_amdgcn_cubetc",
14+
"__builtin_amdgcn_udot2",
15+
"__builtin_arm_stc",
16+
"__builtin_bpf_compare",
17+
"__builtin_HEXAGON_A2_max",
18+
"__builtin_lasx_xvabsd_b",
19+
"__builtin_mips_dlsa",
20+
"__nvvm_floor_f",
21+
"__builtin_altivec_vslb",
22+
"__builtin_r600_read_tgid_x",
23+
"__builtin_riscv_aes64im",
24+
"__builtin_s390_vcksm",
25+
"__builtin_ve_vl_pvfmksge_Mvl",
26+
"__builtin_ia32_axor64",
27+
"__builtin_bitrev",
28+
};
29+
static const char *Targets[] = {"", "aarch64", "amdgcn", "mips",
30+
"nvvm", "r600", "riscv"};
31+
32+
for (auto _ : state) {
33+
for (auto Builtin : Builtins)
34+
for (auto Target : Targets)
35+
getIntrinsicForClangBuiltin(Target, Builtin);
36+
}
37+
}
38+
39+
static void
40+
BM_GetIntrinsicForClangBuiltinHexagonFirst(benchmark::State &state) {
41+
// Exercise the worst case by looking for the first builtin for a target
42+
// that has a lot of builtins.
43+
for (auto _ : state)
44+
getIntrinsicForClangBuiltin("hexagon", "__builtin_HEXAGON_A2_abs");
45+
}
46+
47+
BENCHMARK(BM_GetIntrinsicForClangBuiltin);
48+
BENCHMARK(BM_GetIntrinsicForClangBuiltinHexagonFirst);
49+
50+
BENCHMARK_MAIN();

llvm/include/llvm/IR/Intrinsics.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@ namespace Intrinsic {
100100
StringRef Name);
101101

102102
/// Map a Clang builtin name to an intrinsic ID.
103-
ID getIntrinsicForClangBuiltin(const char *Prefix, StringRef BuiltinName);
103+
ID getIntrinsicForClangBuiltin(StringRef TargetPrefix, StringRef BuiltinName);
104104

105105
/// Map a MS builtin name to an intrinsic ID.
106-
ID getIntrinsicForMSBuiltin(const char *Prefix, StringRef BuiltinName);
106+
ID getIntrinsicForMSBuiltin(StringRef TargetPrefix, StringRef BuiltinName);
107107

108108
/// Returns true if the intrinsic ID is for one of the "Constrained
109109
/// Floating-Point Intrinsics".

llvm/include/llvm/TableGen/StringToOffsetTable.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/ADT/StringMap.h"
1515
#include "llvm/Support/raw_ostream.h"
1616
#include <cctype>
17+
#include <optional>
1718

1819
namespace llvm {
1920

@@ -26,7 +27,8 @@ class StringToOffsetTable {
2627
std::string AggregateString;
2728

2829
public:
29-
bool Empty() const { return StringOffset.empty(); }
30+
bool empty() const { return StringOffset.empty(); }
31+
size_t size() const { return AggregateString.size(); }
3032

3133
unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) {
3234
auto IterBool =
@@ -41,6 +43,15 @@ class StringToOffsetTable {
4143
return IterBool.first->second;
4244
}
4345

46+
// Returns the offset of `Str` in the table if its preset, else return
47+
// std::nullopt.
48+
std::optional<unsigned> GetStringOffset(StringRef Str) const {
49+
auto II = StringOffset.find(Str);
50+
if (II == StringOffset.end())
51+
return std::nullopt;
52+
return II->second;
53+
}
54+
4455
void EmitString(raw_ostream &O) {
4556
// Escape the string.
4657
SmallString<256> Str;

llvm/utils/TableGen/IntrinsicEmitter.cpp

Lines changed: 155 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/Twine.h"
2020
#include "llvm/Support/CommandLine.h"
2121
#include "llvm/Support/ErrorHandling.h"
22+
#include "llvm/Support/FormatVariadic.h"
2223
#include "llvm/Support/ModRef.h"
2324
#include "llvm/Support/raw_ostream.h"
2425
#include "llvm/TableGen/Error.h"
@@ -28,6 +29,7 @@
2829
#include <algorithm>
2930
#include <array>
3031
#include <cassert>
32+
#include <cctype>
3133
#include <map>
3234
#include <optional>
3335
#include <string>
@@ -99,7 +101,7 @@ void IntrinsicEmitter::run(raw_ostream &OS, bool Enums) {
99101
// Emit the intrinsic parameter attributes.
100102
EmitAttributes(Ints, OS);
101103

102-
// Emit code to translate GCC builtins into LLVM intrinsics.
104+
// Emit code to translate Clang builtins into LLVM intrinsics.
103105
EmitIntrinsicToBuiltinMap(Ints, true, OS);
104106

105107
// Emit code to translate MS builtins into LLVM intrinsics.
@@ -596,89 +598,177 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
596598
const CodeGenIntrinsicTable &Ints, bool IsClang, raw_ostream &OS) {
597599
StringRef CompilerName = IsClang ? "Clang" : "MS";
598600
StringRef UpperCompilerName = IsClang ? "CLANG" : "MS";
599-
// map<TargetPrefix, map<BuiltinName, EnumName>>. Note that we iterate over
600-
// both maps in the code below. For the inner map, entries need to be emitted
601-
// in the sorted order of `BuiltinName` because we use std::lower_bound to
602-
// search these entries. For the outer map, it doesn't need be be sorted, but
603-
// we use a map to eliminate non-determinism in the emitted code.
604-
typedef std::map<StringRef, std::map<StringRef, StringRef>> BIMTy;
605-
BIMTy BuiltinMap;
606-
StringToOffsetTable Table;
601+
602+
// map<TargetPrefix, pair<map<BuiltinName, EnumName>, CommonPrefix>.
603+
// Note that we iterate over both the maps in the code below and both
604+
// iterations need to iterate in sorted key order. For the inner map, entries
605+
// need to be emitted in the sorted order of `BuiltinName` with `CommonPrefix`
606+
// rempved, because we use std::lower_bound to search these entries. For the
607+
// outer map as well, entries need to be emitted in sorter order of
608+
// `TargetPrefix` as we use std::lower_bound to search these entries.
609+
using BIMEntryTy =
610+
std::pair<std::map<StringRef, StringRef>, std::optional<StringRef>>;
611+
std::map<StringRef, BIMEntryTy> BuiltinMap;
612+
607613
for (const CodeGenIntrinsic &Int : Ints) {
608614
StringRef BuiltinName = IsClang ? Int.ClangBuiltinName : Int.MSBuiltinName;
609615
if (BuiltinName.empty())
610616
continue;
611617
// Get the map for this target prefix.
612-
std::map<StringRef, StringRef> &BIM = BuiltinMap[Int.TargetPrefix];
618+
auto &[Map, CommonPrefix] = BuiltinMap[Int.TargetPrefix];
613619

614-
if (!BIM.insert(std::pair(BuiltinName, Int.EnumName)).second)
620+
if (!Map.insert({BuiltinName, Int.EnumName}).second)
615621
PrintFatalError(Int.TheDef->getLoc(),
616622
"Intrinsic '" + Int.TheDef->getName() + "': duplicate " +
617623
CompilerName + " builtin name!");
618-
Table.GetOrAddStringOffset(BuiltinName);
619-
}
620624

621-
OS << "// Get the LLVM intrinsic that corresponds to a builtin.\n";
622-
OS << "// This is used by the C front-end. The builtin name is passed\n";
623-
OS << "// in as BuiltinName, and a target prefix (e.g. 'ppc') is passed\n";
624-
OS << "// in as TargetPrefix. The result is assigned to 'IntrinsicID'.\n";
625-
OS << "#ifdef GET_LLVM_INTRINSIC_FOR_" << UpperCompilerName << "_BUILTIN\n";
625+
// Update common prefix.
626+
if (!CommonPrefix) {
627+
// For the first builtin for this target, initialize the common prefix.
628+
CommonPrefix = BuiltinName;
629+
continue;
630+
}
626631

627-
OS << "Intrinsic::ID Intrinsic::getIntrinsicFor" << CompilerName
628-
<< "Builtin(const char "
629-
<< "*TargetPrefixStr, StringRef BuiltinNameStr) {\n";
632+
// Update the common prefix. Note that this assumes that `take_front` will
633+
// never set the `Data` pointer in CommonPrefix to nullptr.
634+
const char *Mismatch = mismatch(*CommonPrefix, BuiltinName).first;
635+
*CommonPrefix = CommonPrefix->take_front(Mismatch - CommonPrefix->begin());
636+
}
630637

631-
if (Table.Empty()) {
632-
OS << " return Intrinsic::not_intrinsic;\n";
633-
OS << "}\n";
634-
OS << "#endif\n\n";
638+
// Populate the string table with the names of all the builtins after
639+
// removing this common prefix.
640+
StringToOffsetTable Table;
641+
for (const auto &[TargetPrefix, Entry] : BuiltinMap) {
642+
auto &[Map, CommonPrefix] = Entry;
643+
for (auto &[BuiltinName, EnumName] : Map) {
644+
StringRef Suffix = BuiltinName.substr(CommonPrefix->size());
645+
Table.GetOrAddStringOffset(Suffix);
646+
}
647+
}
648+
649+
OS << formatv(R"(
650+
// Get the LLVM intrinsic that corresponds to a builtin. This is used by the
651+
// C front-end. The builtin name is passed in as BuiltinName, and a target
652+
// prefix (e.g. 'ppc') is passed in as TargetPrefix.
653+
#ifdef GET_LLVM_INTRINSIC_FOR_{0}_BUILTIN
654+
Intrinsic::ID
655+
Intrinsic::getIntrinsicFor{1}Builtin(StringRef TargetPrefix,
656+
StringRef BuiltinName) {{
657+
using namespace Intrinsic;
658+
)",
659+
UpperCompilerName, CompilerName);
660+
661+
if (BuiltinMap.empty()) {
662+
OS << formatv(R"(
663+
return not_intrinsic;
664+
}
665+
#endif // GET_LLVM_INTRINSIC_FOR_{0}_BUILTIN
666+
)",
667+
UpperCompilerName);
635668
return;
636669
}
637670

638-
OS << " static constexpr char BuiltinNames[] = {\n";
639-
Table.EmitCharArray(OS);
640-
OS << " };\n\n";
671+
if (!Table.empty()) {
672+
OS << " static constexpr char BuiltinNames[] = {\n";
673+
Table.EmitCharArray(OS);
674+
OS << " };\n\n";
675+
676+
OS << R"(
677+
struct BuiltinEntry {
678+
constexpr BuiltinEntry(ID IntrinsicID, unsigned Offset)
679+
: IntrinsicID(IntrinsicID), Suffix(&BuiltinNames[Offset]) {}
680+
ID IntrinsicID;
681+
StringRef Suffix;
682+
bool operator<(StringRef RHS) const { return Suffix < RHS; }
683+
};
641684
642-
OS << " struct BuiltinEntry {\n";
643-
OS << " Intrinsic::ID IntrinID;\n";
644-
OS << " unsigned StrTabOffset;\n";
645-
OS << " const char *getName() const {\n";
646-
OS << " return &BuiltinNames[StrTabOffset];\n";
647-
OS << " }\n";
648-
OS << " bool operator<(StringRef RHS) const {\n";
649-
OS << " return strncmp(getName(), RHS.data(), RHS.size()) < 0;\n";
650-
OS << " }\n";
651-
OS << " };\n";
685+
)";
686+
}
652687

653-
OS << " StringRef TargetPrefix(TargetPrefixStr);\n\n";
654-
655-
// Note: this could emit significantly better code if we cared.
656-
for (auto &I : BuiltinMap) {
657-
OS << " ";
658-
if (!I.first.empty())
659-
OS << "if (TargetPrefix == \"" << I.first << "\") ";
660-
else
661-
OS << "/* Target Independent Builtins */ ";
662-
OS << "{\n";
663-
664-
// Emit the comparisons for this target prefix.
665-
OS << " static constexpr BuiltinEntry " << I.first << "Names[] = {\n";
666-
for (const auto &P : I.second) {
667-
OS << " {Intrinsic::" << P.second << ", "
668-
<< Table.GetOrAddStringOffset(P.first) << "}, // " << P.first << "\n";
688+
// Emit a per target table of bultin names.
689+
bool HasTargetIndependentBuiltins = false;
690+
StringRef TargetIndepndentCommonPrefix;
691+
for (const auto &[TargetPrefix, Entry] : BuiltinMap) {
692+
const auto &[Map, CommonPrefix] = Entry;
693+
if (!TargetPrefix.empty()) {
694+
OS << formatv(" // Builtins for {0}.\n", TargetPrefix);
695+
} else {
696+
OS << " // Target independent builtins.\n";
697+
HasTargetIndependentBuiltins = true;
698+
TargetIndepndentCommonPrefix = *CommonPrefix;
699+
}
700+
701+
// Emit the builtin table for this target prefix.
702+
OS << formatv(" static constexpr BuiltinEntry {0}Names[] = {{\n",
703+
TargetPrefix);
704+
for (const auto &[BuiltinName, EnumName] : Map) {
705+
StringRef Suffix = BuiltinName.substr(CommonPrefix->size());
706+
OS << formatv(" {{{0}, {1}}, // {2}\n", EnumName,
707+
*Table.GetStringOffset(Suffix), BuiltinName);
669708
}
670-
OS << " };\n";
671-
OS << " auto I = std::lower_bound(std::begin(" << I.first << "Names),\n";
672-
OS << " std::end(" << I.first << "Names),\n";
673-
OS << " BuiltinNameStr);\n";
674-
OS << " if (I != std::end(" << I.first << "Names) &&\n";
675-
OS << " I->getName() == BuiltinNameStr)\n";
676-
OS << " return I->IntrinID;\n";
677-
OS << " }\n";
709+
OS << formatv(" }; // {0}Names\n\n", TargetPrefix);
678710
}
679-
OS << " return Intrinsic::not_intrinsic;\n";
680-
OS << "}\n";
681-
OS << "#endif\n\n";
711+
712+
// After emitting the builtin tables for all targets, emit a lookup table for
713+
// all targets. We will use binary search, similar to the table for builtin
714+
// names to lookup into this table.
715+
OS << R"(
716+
struct TargetEntry {
717+
StringRef TargetPrefix;
718+
ArrayRef<BuiltinEntry> Names;
719+
StringRef CommonPrefix;
720+
bool operator<(StringRef RHS) const {
721+
return TargetPrefix < RHS;
722+
};
723+
};
724+
static constexpr TargetEntry TargetTable[] = {
725+
)";
726+
727+
for (const auto &[TargetPrefix, Entry] : BuiltinMap) {
728+
const auto &[Map, CommonPrefix] = Entry;
729+
if (TargetPrefix.empty())
730+
continue;
731+
OS << formatv(R"( {{"{0}", {0}Names, "{2}"},)", TargetPrefix,
732+
TargetPrefix, CommonPrefix)
733+
<< "\n";
734+
}
735+
OS << " };\n";
736+
737+
// Now for the actual lookup, first check the target independent table if
738+
// we emitted one.
739+
if (HasTargetIndependentBuiltins) {
740+
OS << formatv(R"(
741+
// Check if it's a target independent builtin.
742+
// Copy the builtin name so we can use it in consume_front without clobbering
743+
// if for the lookup in the target specific table.
744+
StringRef Suffix = BuiltinName;
745+
if (Suffix.consume_front("{0}")) {{
746+
auto II = lower_bound(Names, Suffix);
747+
if (II != std::end(Names) && II->Suffix == Suffix)
748+
return II->IntrinsicID;
749+
}
750+
)",
751+
TargetIndepndentCommonPrefix);
752+
}
753+
754+
// If a target independent builtin was not found, lookup the target specific.
755+
OS << formatv(R"(
756+
auto TI = lower_bound(TargetTable, TargetPrefix);
757+
if (TI == std::end(TargetTable) || TI->TargetPrefix != TargetPrefix)
758+
return not_intrinsic;
759+
// This is the last use of BuiltinName, so no need to copy before using it in
760+
// consume_front.
761+
if (!BuiltinName.consume_front(TI->CommonPrefix))
762+
return not_intrinsic;
763+
auto II = lower_bound(TI->Names, BuiltinName);
764+
if (II == std::end(TI->Names) || II->Suffix != BuiltinName)
765+
return not_intrinsic;
766+
return II->IntrinsicID;
767+
}
768+
#endif // GET_LLVM_INTRINSIC_FOR_{0}_BUILTIN
769+
770+
)",
771+
UpperCompilerName);
682772
}
683773

684774
static void EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS) {

0 commit comments

Comments
 (0)