Skip to content

Switch the intrinsic names to a string table #118929

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions llvm/include/llvm/IR/Intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,6 @@ namespace Intrinsic {
Function *getDeclarationIfExists(Module *M, ID id, ArrayRef<Type *> Tys,
FunctionType *FT = nullptr);

/// Looks up Name in NameTable via binary search. NameTable must be sorted
/// and all entries must start with "llvm.". If NameTable contains an exact
/// match for Name or a prefix of Name followed by a dot, its index in
/// NameTable is returned. Otherwise, -1 is returned.
int lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
StringRef Name, StringRef Target = "");

/// Map a Clang builtin name to an intrinsic ID.
ID getIntrinsicForClangBuiltin(StringRef TargetPrefix, StringRef BuiltinName);

Expand Down
64 changes: 41 additions & 23 deletions llvm/lib/IR/Intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,13 @@
using namespace llvm;

/// Table of string intrinsic names indexed by enum value.
static constexpr const char *const IntrinsicNameTable[] = {
"not_intrinsic",
#define GET_INTRINSIC_NAME_TABLE
#include "llvm/IR/IntrinsicImpl.inc"
#undef GET_INTRINSIC_NAME_TABLE
};

StringRef Intrinsic::getBaseName(ID id) {
assert(id < num_intrinsics && "Invalid intrinsic ID!");
return IntrinsicNameTable[id];
return IntrinsicNameTable + IntrinsicNameOffsetTable[id];
}

StringRef Intrinsic::getName(ID id) {
Expand Down Expand Up @@ -621,9 +618,12 @@ bool Intrinsic::isTargetIntrinsic(Intrinsic::ID IID) {
return IID > TargetInfos[0].Count;
}

int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
StringRef Name,
StringRef Target) {
/// Looks up Name in NameTable via binary search. NameTable must be sorted
/// and all entries must start with "llvm.". If NameTable contains an exact
/// match for Name or a prefix of Name followed by a dot, its index in
/// NameTable is returned. Otherwise, -1 is returned.
static int lookupLLVMIntrinsicByName(ArrayRef<unsigned> NameOffsetTable,
StringRef Name, StringRef Target = "") {
assert(Name.starts_with("llvm.") && "Unexpected intrinsic prefix");
assert(Name.drop_front(5).starts_with(Target) && "Unexpected target");

Expand All @@ -638,36 +638,53 @@ int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
if (!Target.empty())
CmpEnd += 1 + Target.size(); // skip the .target component.

const char *const *Low = NameTable.begin();
const char *const *High = NameTable.end();
const char *const *LastLow = Low;
const unsigned *Low = NameOffsetTable.begin();
const unsigned *High = NameOffsetTable.end();
const unsigned *LastLow = Low;
while (CmpEnd < Name.size() && High - Low > 0) {
size_t CmpStart = CmpEnd;
CmpEnd = Name.find('.', CmpStart + 1);
CmpEnd = CmpEnd == StringRef::npos ? Name.size() : CmpEnd;
auto Cmp = [CmpStart, CmpEnd](const char *LHS, const char *RHS) {
return strncmp(LHS + CmpStart, RHS + CmpStart, CmpEnd - CmpStart) < 0;
auto Cmp = [CmpStart, CmpEnd](auto LHS, auto RHS) {
// `equal_range` requires the comparison to work with either side being an
// offset or the value. Detect which kind each side is to set up the
// compared strings.
const char *LHSStr;
if constexpr (std::is_integral_v<decltype(LHS)>) {
LHSStr = &IntrinsicNameTable[LHS];
} else {
LHSStr = LHS;
}
const char *RHSStr;
if constexpr (std::is_integral_v<decltype(RHS)>) {
RHSStr = &IntrinsicNameTable[RHS];
} else {
RHSStr = RHS;
}
return strncmp(LHSStr + CmpStart, RHSStr + CmpStart, CmpEnd - CmpStart) <
0;
};
LastLow = Low;
std::tie(Low, High) = std::equal_range(Low, High, Name.data(), Cmp);
}
if (High - Low > 0)
LastLow = Low;

if (LastLow == NameTable.end())
if (LastLow == NameOffsetTable.end())
return -1;
StringRef NameFound = *LastLow;
StringRef NameFound = &IntrinsicNameTable[*LastLow];
if (Name == NameFound ||
(Name.starts_with(NameFound) && Name[NameFound.size()] == '.'))
return LastLow - NameTable.begin();
return LastLow - NameOffsetTable.begin();
return -1;
}

/// Find the segment of \c IntrinsicNameTable for intrinsics with the same
/// Find the segment of \c IntrinsicNameOffsetTable for intrinsics with the same
/// target as \c Name, or the generic table if \c Name is not target specific.
///
/// Returns the relevant slice of \c IntrinsicNameTable and the target name.
static std::pair<ArrayRef<const char *>, StringRef>
/// Returns the relevant slice of \c IntrinsicNameOffsetTable and the target
/// name.
static std::pair<ArrayRef<unsigned>, StringRef>
findTargetSubtable(StringRef Name) {
assert(Name.starts_with("llvm."));

Expand All @@ -680,25 +697,26 @@ findTargetSubtable(StringRef Name) {
// We've either found the target or just fall back to the generic set, which
// is always first.
const auto &TI = It != Targets.end() && It->Name == Target ? *It : Targets[0];
return {ArrayRef(&IntrinsicNameTable[1] + TI.Offset, TI.Count), TI.Name};
return {ArrayRef(&IntrinsicNameOffsetTable[1] + TI.Offset, TI.Count),
TI.Name};
}

/// This does the actual lookup of an intrinsic ID which matches the given
/// function name.
Intrinsic::ID Intrinsic::lookupIntrinsicID(StringRef Name) {
auto [NameTable, Target] = findTargetSubtable(Name);
int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name, Target);
auto [NameOffsetTable, Target] = findTargetSubtable(Name);
int Idx = lookupLLVMIntrinsicByName(NameOffsetTable, Name, Target);
if (Idx == -1)
return Intrinsic::not_intrinsic;

// Intrinsic IDs correspond to the location in IntrinsicNameTable, but we have
// an index into a sub-table.
int Adjust = NameTable.data() - IntrinsicNameTable;
int Adjust = NameOffsetTable.data() - IntrinsicNameOffsetTable;
Intrinsic::ID ID = static_cast<Intrinsic::ID>(Idx + Adjust);

// If the intrinsic is not overloaded, require an exact match. If it is
// overloaded, require either exact or prefix match.
const auto MatchSize = strlen(NameTable[Idx]);
const auto MatchSize = strlen(&IntrinsicNameTable[NameOffsetTable[Idx]]);
assert(Name.size() >= MatchSize && "Expected either exact or prefix match");
bool IsExactMatch = Name.size() == MatchSize;
return IsExactMatch || Intrinsic::isOverloaded(ID) ? ID
Expand Down
29 changes: 13 additions & 16 deletions llvm/unittests/IR/IntrinsicsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,24 +63,21 @@ class IntrinsicsTest : public ::testing::Test {
};

TEST(IntrinsicNameLookup, Basic) {
static constexpr const char *const NameTable[] = {
"llvm.foo", "llvm.foo.a", "llvm.foo.b", "llvm.foo.b.a", "llvm.foo.c",
};
using namespace Intrinsic;
EXPECT_EQ(Intrinsic::memcpy, lookupIntrinsicID("llvm.memcpy"));

static constexpr std::pair<const char *, int> Tests[] = {
{"llvm.foo", 0}, {"llvm.foo.f64", 0}, {"llvm.foo.b", 2},
{"llvm.foo.b.a", 3}, {"llvm.foo.c", 4}, {"llvm.foo.c.f64", 4},
{"llvm.bar", -1},
};
// Partial, either between dots or not the last dot are not intrinsics.
EXPECT_EQ(not_intrinsic, lookupIntrinsicID("llvm.mem"));
EXPECT_EQ(not_intrinsic, lookupIntrinsicID("llvm.gc"));

for (const auto &[Name, ExpectedIdx] : Tests) {
int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name);
EXPECT_EQ(ExpectedIdx, Idx);
if (!StringRef(Name).starts_with("llvm.foo"))
continue;
Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name, "foo");
EXPECT_EQ(ExpectedIdx, Idx);
}
// Look through intrinsic names with internal dots.
EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline"));

// Check that overloaded names are mapped to the underlying ID.
EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i8"));
EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i32"));
EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i64"));
EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i1024"));
}

// Tests to verify getIntrinsicForClangBuiltin.
Expand Down
28 changes: 26 additions & 2 deletions llvm/utils/TableGen/IntrinsicEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,13 +239,37 @@ static constexpr IntrinsicTargetInfo TargetInfos[] = {

void IntrinsicEmitter::EmitIntrinsicToNameTable(
const CodeGenIntrinsicTable &Ints, raw_ostream &OS) {
// Built up a table of the intrinsic names.
constexpr StringLiteral NotIntrinsic = "not_intrinsic";
StringToOffsetTable Table;
Table.GetOrAddStringOffset(NotIntrinsic);
for (const auto &Int : Ints)
Table.GetOrAddStringOffset(Int.Name);

OS << R"(// Intrinsic ID to name table.
#ifdef GET_INTRINSIC_NAME_TABLE
// Note that entry #0 is the invalid intrinsic!

)";

Table.EmitStringLiteralDef(OS, "static constexpr char IntrinsicNameTable[]",
/*Indent=*/"");

OS << R"(
static constexpr unsigned IntrinsicNameOffsetTable[] = {
)";

OS << formatv(" {}, // {}\n", Table.GetStringOffset(NotIntrinsic),
NotIntrinsic);
for (const auto &Int : Ints)
OS << " \"" << Int.Name << "\",\n";
OS << "#endif\n\n";
OS << formatv(" {}, // {}\n", Table.GetStringOffset(Int.Name), Int.Name);

OS << R"(
}; // IntrinsicNameOffsetTable

#endif

)";
}

void IntrinsicEmitter::EmitIntrinsicToOverloadTable(
Expand Down
Loading