Skip to content

Commit dbc2f74

Browse files
committed
[TableGen] Rework EmitIntrinsicToBuiltinMap
Rework `IntrinsicEmitter::EmitIntrinsicToBuiltinMap` for improved peformance as well as refactored the code. Performance: - Current generated code does a linear search on the TargetPrefix, followed by a binary search on the builtin name within that target's builtins. - Improve the performance of this code in 2 ways: (a) Use binary search on the target prefix to lookup the builtin table for a target. (b) Improve the (common) case of when all builtins for a target share a common prefix. Within that target's lambda, check thus common prefix first, and then do the binary search on the builtin name with the common prefix removed. This should help both code size by creating a smaller static string table and runtime by reducing the cost of binary search on smaller strings. Refactor: - Use range based for loops for iterating over maps. - Use formatv() and C++ raw string literals to simplify the emission code.
1 parent 8a677c1 commit dbc2f74

File tree

5 files changed

+212
-68
lines changed

5 files changed

+212
-68
lines changed

llvm/benchmarks/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
set(LLVM_LINK_COMPONENTS
2+
Core
23
Support)
34

45
add_benchmark(DummyYAML DummyYAML.cpp PARTIAL_SOURCES_INTENDED)
56
add_benchmark(xxhash xxhash.cpp PARTIAL_SOURCES_INTENDED)
7+
add_benchmark(GetIntrinsicForClangBuiltin GetIntrinsicForClangBuiltin.cpp PARTIAL_SOURCES_INTENDED)
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#include "benchmark/benchmark.h"
2+
#include "llvm/IR/Intrinsics.h"
3+
4+
using namespace llvm;
5+
using namespace Intrinsic;
6+
7+
// Benchmark intrinsic lookup from a variety of targets.
8+
static void BM_GetIntrinsicForClangBuiltin(benchmark::State &state) {
9+
static const char *Builtins[] = {
10+
"__builtin_adjust_trampoline",
11+
"__builtin_trap",
12+
"__builtin_arm_ttest",
13+
"__builtin_amdgcn_cubetc",
14+
"__builtin_amdgcn_udot2",
15+
"__builtin_arm_stc",
16+
"__builtin_bpf_compare",
17+
"__builtin_HEXAGON_A2_max",
18+
"__builtin_lasx_xvabsd_b",
19+
"__builtin_mips_dlsa",
20+
"__nvvm_floor_f",
21+
"__builtin_altivec_vslb",
22+
"__builtin_r600_read_tgid_x",
23+
"__builtin_riscv_aes64im",
24+
"__builtin_s390_vcksm",
25+
"__builtin_ve_vl_pvfmksge_Mvl",
26+
"__builtin_ia32_axor64",
27+
"__builtin_bitrev",
28+
};
29+
static const char *Targets[] = {"", "aarch64", "amdgcn", "mips",
30+
"nvvm", "r600", "riscv"};
31+
32+
for (auto _ : state) {
33+
for (auto Builtin : Builtins)
34+
for (auto Target : Targets)
35+
getIntrinsicForClangBuiltin(Target, Builtin);
36+
}
37+
}
38+
39+
static void
40+
BM_GetIntrinsicForClangBuiltinHexagonFirst(benchmark::State &state) {
41+
// Exercise the worst case by looking for the first builtin for a target
42+
// that has a lot of builtins.
43+
for (auto _ : state)
44+
getIntrinsicForClangBuiltin("hexagon", "__builtin_HEXAGON_A2_abs");
45+
}
46+
47+
BENCHMARK(BM_GetIntrinsicForClangBuiltin);
48+
BENCHMARK(BM_GetIntrinsicForClangBuiltinHexagonFirst);
49+
50+
BENCHMARK_MAIN();

llvm/include/llvm/IR/Intrinsics.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@ namespace Intrinsic {
100100
StringRef Name);
101101

102102
/// Map a Clang builtin name to an intrinsic ID.
103-
ID getIntrinsicForClangBuiltin(const char *Prefix, StringRef BuiltinName);
103+
ID getIntrinsicForClangBuiltin(StringRef TargetPrefix, StringRef BuiltinName);
104104

105105
/// Map a MS builtin name to an intrinsic ID.
106-
ID getIntrinsicForMSBuiltin(const char *Prefix, StringRef BuiltinName);
106+
ID getIntrinsicForMSBuiltin(StringRef TargetPrefix, StringRef BuiltinName);
107107

108108
/// Returns true if the intrinsic ID is for one of the "Constrained
109109
/// Floating-Point Intrinsics".

llvm/include/llvm/TableGen/StringToOffsetTable.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ class StringToOffsetTable {
2626
std::string AggregateString;
2727

2828
public:
29-
bool Empty() const { return StringOffset.empty(); }
29+
bool empty() const { return StringOffset.empty(); }
30+
size_t size() const { return AggregateString.size(); }
3031

3132
unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) {
3233
auto IterBool =

llvm/utils/TableGen/IntrinsicEmitter.cpp

Lines changed: 156 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/Twine.h"
2020
#include "llvm/Support/CommandLine.h"
2121
#include "llvm/Support/ErrorHandling.h"
22+
#include "llvm/Support/FormatVariadic.h"
2223
#include "llvm/Support/ModRef.h"
2324
#include "llvm/Support/raw_ostream.h"
2425
#include "llvm/TableGen/Error.h"
@@ -28,6 +29,7 @@
2829
#include <algorithm>
2930
#include <array>
3031
#include <cassert>
32+
#include <cctype>
3133
#include <map>
3234
#include <optional>
3335
#include <string>
@@ -99,7 +101,7 @@ void IntrinsicEmitter::run(raw_ostream &OS, bool Enums) {
99101
// Emit the intrinsic parameter attributes.
100102
EmitAttributes(Ints, OS);
101103

102-
// Emit code to translate GCC builtins into LLVM intrinsics.
104+
// Emit code to translate Clang builtins into LLVM intrinsics.
103105
EmitIntrinsicToBuiltinMap(Ints, true, OS);
104106

105107
// Emit code to translate MS builtins into LLVM intrinsics.
@@ -596,89 +598,178 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
596598
const CodeGenIntrinsicTable &Ints, bool IsClang, raw_ostream &OS) {
597599
StringRef CompilerName = IsClang ? "Clang" : "MS";
598600
StringRef UpperCompilerName = IsClang ? "CLANG" : "MS";
599-
// map<TargetPrefix, map<BuiltinName, EnumName>>. Note that we iterate over
600-
// both maps in the code below. For the inner map, entries need to be emitted
601-
// in the sorted order of `BuiltinName` because we use std::lower_bound to
602-
// search these entries. For the outer map, it doesn't need be be sorted, but
603-
// we use a map to eliminate non-determinism in the emitted code.
604-
typedef std::map<StringRef, std::map<StringRef, StringRef>> BIMTy;
605-
BIMTy BuiltinMap;
606-
StringToOffsetTable Table;
601+
602+
// map<TargetPrefix, pair<map<BuiltinName, EnumName>, CommonPrefix>.
603+
// Note that we iterate over both the maps in the code below and both
604+
// iterations need to iterate in sorted key order. For the inner map, entries
605+
// need to be emitted in the sorted order of `BuiltinName - CommonPrefix`
606+
// because we use std::lower_bound to search these entries. For the outer map
607+
// as well, entries need to be emitted in sorter order of `TargetPrefix` as we
608+
// use std::lower_bound to search these entries.
609+
using BIMEntryTy = std::pair<std::map<StringRef, StringRef>, StringRef>;
610+
std::map<StringRef, BIMEntryTy> BuiltinMap;
611+
607612
for (const CodeGenIntrinsic &Int : Ints) {
608613
StringRef BuiltinName = IsClang ? Int.ClangBuiltinName : Int.MSBuiltinName;
609614
if (BuiltinName.empty())
610615
continue;
611616
// Get the map for this target prefix.
612-
std::map<StringRef, StringRef> &BIM = BuiltinMap[Int.TargetPrefix];
617+
auto &[Map, CommonPrefix] = BuiltinMap[Int.TargetPrefix];
613618

614-
if (!BIM.insert(std::pair(BuiltinName, Int.EnumName)).second)
619+
if (!Map.insert({BuiltinName, Int.EnumName}).second)
615620
PrintFatalError(Int.TheDef->getLoc(),
616621
"Intrinsic '" + Int.TheDef->getName() + "': duplicate " +
617622
CompilerName + " builtin name!");
618-
Table.GetOrAddStringOffset(BuiltinName);
619-
}
620623

621-
OS << "// Get the LLVM intrinsic that corresponds to a builtin.\n";
622-
OS << "// This is used by the C front-end. The builtin name is passed\n";
623-
OS << "// in as BuiltinName, and a target prefix (e.g. 'ppc') is passed\n";
624-
OS << "// in as TargetPrefix. The result is assigned to 'IntrinsicID'.\n";
625-
OS << "#ifdef GET_LLVM_INTRINSIC_FOR_" << UpperCompilerName << "_BUILTIN\n";
624+
// Update common prefix.
625+
if (!CommonPrefix.data()) {
626+
// For the first builtin for this target, initialize the common prefix.
627+
CommonPrefix = BuiltinName;
628+
continue;
629+
}
626630

627-
OS << "Intrinsic::ID Intrinsic::getIntrinsicFor" << CompilerName
628-
<< "Builtin(const char "
629-
<< "*TargetPrefixStr, StringRef BuiltinNameStr) {\n";
631+
// Update the common prefix. Note that this assumes that `take_front` will
632+
// never set the `Data` pointer in CommonPrefix to nullptr.
633+
const char *Mismatch = mismatch(CommonPrefix, BuiltinName).first;
634+
CommonPrefix = CommonPrefix.take_front(Mismatch - CommonPrefix.begin());
635+
}
630636

631-
if (Table.Empty()) {
632-
OS << " return Intrinsic::not_intrinsic;\n";
633-
OS << "}\n";
634-
OS << "#endif\n\n";
637+
// Populate the string table with the names of all the builtins after
638+
// removing this common prefix.
639+
StringToOffsetTable Table;
640+
for (const auto &[TargetPrefix, Entry] : BuiltinMap) {
641+
auto &[Map, CommonPrefix] = Entry;
642+
for (auto &[BuiltinName, EnumName] : Map) {
643+
StringRef Suffix = BuiltinName.substr(CommonPrefix.size());
644+
if (!Suffix.empty())
645+
Table.GetOrAddStringOffset(Suffix);
646+
}
647+
}
648+
649+
OS << formatv(R"(
650+
// Get the LLVM intrinsic that corresponds to a builtin. This is used by the
651+
// C front-end. The builtin name is passed in as BuiltinName, and a target
652+
// prefix (e.g. 'ppc') is passed in as TargetPrefix.
653+
#ifdef GET_LLVM_INTRINSIC_FOR_{0}_BUILTIN
654+
Intrinsic::ID
655+
Intrinsic::getIntrinsicFor{1}Builtin(StringRef TargetPrefix,
656+
StringRef BuiltinName) {{
657+
using namespace Intrinsic;
658+
)",
659+
UpperCompilerName, CompilerName);
660+
661+
if (BuiltinMap.empty()) {
662+
OS << formatv(R"(
663+
return not_intrinsic;
664+
}
665+
#endif // GET_LLVM_INTRINSIC_FOR_{0}_BUILTIN
666+
)",
667+
UpperCompilerName);
635668
return;
636669
}
637670

638-
OS << " static constexpr char BuiltinNames[] = {\n";
639-
Table.EmitCharArray(OS);
640-
OS << " };\n\n";
671+
if (!Table.empty()) {
672+
OS << " static constexpr char BuiltinNames[] = {\n";
673+
Table.EmitCharArray(OS);
674+
OS << " };\n\n";
675+
676+
OS << R"(
677+
struct BuiltinEntry {
678+
ID IntrinsicID;
679+
unsigned StrTabOffset;
680+
const char *getName() const { return &BuiltinNames[StrTabOffset]; }
681+
bool operator<(StringRef RHS) const {
682+
return strncmp(getName(), RHS.data(), RHS.size()) < 0;
683+
}
684+
};
641685
642-
OS << " struct BuiltinEntry {\n";
643-
OS << " Intrinsic::ID IntrinID;\n";
644-
OS << " unsigned StrTabOffset;\n";
645-
OS << " const char *getName() const {\n";
646-
OS << " return &BuiltinNames[StrTabOffset];\n";
647-
OS << " }\n";
648-
OS << " bool operator<(StringRef RHS) const {\n";
649-
OS << " return strncmp(getName(), RHS.data(), RHS.size()) < 0;\n";
650-
OS << " }\n";
651-
OS << " };\n";
686+
)";
687+
}
652688

653-
OS << " StringRef TargetPrefix(TargetPrefixStr);\n\n";
654-
655-
// Note: this could emit significantly better code if we cared.
656-
for (auto &I : BuiltinMap) {
657-
OS << " ";
658-
if (!I.first.empty())
659-
OS << "if (TargetPrefix == \"" << I.first << "\") ";
660-
else
661-
OS << "/* Target Independent Builtins */ ";
662-
OS << "{\n";
663-
664-
// Emit the comparisons for this target prefix.
665-
OS << " static constexpr BuiltinEntry " << I.first << "Names[] = {\n";
666-
for (const auto &P : I.second) {
667-
OS << " {Intrinsic::" << P.second << ", "
668-
<< Table.GetOrAddStringOffset(P.first) << "}, // " << P.first << "\n";
689+
// Emit a per target table of bultin names.
690+
bool HasTargetIndependentBuiltins = false;
691+
StringRef TargetIndepndentCommonPrefix;
692+
for (const auto &[TargetPrefix, Entry] : BuiltinMap) {
693+
const auto &[Map, CommonPrefix] = Entry;
694+
if (!TargetPrefix.empty()) {
695+
OS << formatv(" // Builtins for {0}.\n", TargetPrefix);
696+
} else {
697+
OS << " // Target independent builtins.\n";
698+
HasTargetIndependentBuiltins = true;
699+
TargetIndepndentCommonPrefix = CommonPrefix;
700+
}
701+
702+
// Emit the builtin table for this target prefix.
703+
OS << formatv(" static constexpr BuiltinEntry {0}Names[] = {{\n",
704+
TargetPrefix);
705+
for (const auto &[BuiltinName, EnumName] : Map) {
706+
StringRef Suffix = BuiltinName.substr(CommonPrefix.size());
707+
OS << formatv(" {{{0}, {1}}, // {2}\n", EnumName,
708+
Table.GetOrAddStringOffset(Suffix), BuiltinName);
669709
}
670-
OS << " };\n";
671-
OS << " auto I = std::lower_bound(std::begin(" << I.first << "Names),\n";
672-
OS << " std::end(" << I.first << "Names),\n";
673-
OS << " BuiltinNameStr);\n";
674-
OS << " if (I != std::end(" << I.first << "Names) &&\n";
675-
OS << " I->getName() == BuiltinNameStr)\n";
676-
OS << " return I->IntrinID;\n";
677-
OS << " }\n";
710+
OS << formatv(" }; // {0}Names\n\n", TargetPrefix);
678711
}
679-
OS << " return Intrinsic::not_intrinsic;\n";
680-
OS << "}\n";
681-
OS << "#endif\n\n";
712+
713+
// After emitting the builtin tables for all targets, emit a lookup table for
714+
// all targets. We will use binary search, similar to the table for builtin
715+
// names to lookup into this table.
716+
OS << R"(
717+
struct TargetEntry {
718+
StringRef TargetPrefix;
719+
ArrayRef<BuiltinEntry> Names;
720+
StringRef CommonPrefix;
721+
bool operator<(StringRef RHS) const {
722+
return TargetPrefix < RHS;
723+
};
724+
};
725+
static constexpr TargetEntry TargetTable[] = {
726+
)";
727+
728+
for (const auto &[TargetPrefix, Entry] : BuiltinMap) {
729+
const auto &[Map, CommonPrefix] = Entry;
730+
if (TargetPrefix.empty())
731+
continue;
732+
OS << formatv(R"( {{"{0}", {0}Names, "{2}"},)", TargetPrefix,
733+
TargetPrefix, CommonPrefix)
734+
<< "\n";
735+
}
736+
OS << " };\n";
737+
738+
// Now for the actual lookup, first check the target independent table if
739+
// we emitted one.
740+
if (HasTargetIndependentBuiltins) {
741+
OS << formatv(R"(
742+
// Check if it's a target independent builtin.
743+
// Copy the builtin name so we can use it in consume_front without clobbering
744+
// if for the lookup in the target specific table.
745+
StringRef Suffix = BuiltinName;
746+
if (Suffix.consume_front("{0}")) {{
747+
auto II = lower_bound(Names, Suffix);
748+
if (II != std::end(Names) && II->getName() == Suffix)
749+
return II->IntrinsicID;
750+
}
751+
)",
752+
TargetIndepndentCommonPrefix);
753+
}
754+
755+
// If a target independent builtin was not found, lookup the target specific.
756+
OS << formatv(R"(
757+
auto TI = lower_bound(TargetTable, TargetPrefix);
758+
if (TI == std::end(TargetTable) || TI->TargetPrefix != TargetPrefix)
759+
return not_intrinsic;
760+
// This is the last use of BuiltinName, so no need to copy before using it in
761+
// consume_front.
762+
if (!BuiltinName.consume_front(TI->CommonPrefix))
763+
return not_intrinsic;
764+
auto II = lower_bound(TI->Names, BuiltinName);
765+
if (II == std::end(TI->Names) || II->getName() != BuiltinName)
766+
return not_intrinsic;
767+
return II->IntrinsicID;
768+
}
769+
#endif // GET_LLVM_INTRINSIC_FOR_{0}_BUILTIN
770+
771+
)",
772+
UpperCompilerName);
682773
}
683774

684775
static void EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS) {

0 commit comments

Comments
 (0)