-
Notifications
You must be signed in to change notification settings - Fork 14.4k
Switch the intrinsic names to a string table #118929
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This avoids the need to dynamically relocate each pointer in the table. To make this work, this PR also moves the binary search of intrinsic names to an internal function with an adjusted signature, and switches the unittesting to test against actual intrinsics.
@llvm/pr-subscribers-tablegen @llvm/pr-subscribers-llvm-ir Author: Chandler Carruth (chandlerc) ChangesThis avoids the need to dynamically relocate each pointer in the table. To make this work, this PR also moves the binary search of intrinsic names to an internal function with an adjusted signature, and switches the unittesting to test against actual intrinsics. Depends on #118833 Full diff: https://github.com/llvm/llvm-project/pull/118929.diff 4 Files Affected:
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index 89dfff256e0c43..82f72131b9d2f4 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -112,13 +112,6 @@ namespace Intrinsic {
Function *getDeclarationIfExists(Module *M, ID id, ArrayRef<Type *> Tys,
FunctionType *FT = nullptr);
- /// Looks up Name in NameTable via binary search. NameTable must be sorted
- /// and all entries must start with "llvm.". If NameTable contains an exact
- /// match for Name or a prefix of Name followed by a dot, its index in
- /// NameTable is returned. Otherwise, -1 is returned.
- int lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
- StringRef Name, StringRef Target = "");
-
/// Map a Clang builtin name to an intrinsic ID.
ID getIntrinsicForClangBuiltin(StringRef TargetPrefix, StringRef BuiltinName);
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index 3130a0bd2955a5..4de76ffb11718e 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -34,16 +34,13 @@
using namespace llvm;
/// Table of string intrinsic names indexed by enum value.
-static constexpr const char *const IntrinsicNameTable[] = {
- "not_intrinsic",
#define GET_INTRINSIC_NAME_TABLE
#include "llvm/IR/IntrinsicImpl.inc"
#undef GET_INTRINSIC_NAME_TABLE
-};
StringRef Intrinsic::getBaseName(ID id) {
assert(id < num_intrinsics && "Invalid intrinsic ID!");
- return IntrinsicNameTable[id];
+ return IntrinsicNameTable + IntrinsicNameOffsetTable[id];
}
StringRef Intrinsic::getName(ID id) {
@@ -621,9 +618,12 @@ bool Intrinsic::isTargetIntrinsic(Intrinsic::ID IID) {
return IID > TargetInfos[0].Count;
}
-int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
- StringRef Name,
- StringRef Target) {
+/// Looks up Name in NameTable via binary search. NameTable must be sorted
+/// and all entries must start with "llvm.". If NameTable contains an exact
+/// match for Name or a prefix of Name followed by a dot, its index in
+/// NameTable is returned. Otherwise, -1 is returned.
+static int lookupLLVMIntrinsicByName(ArrayRef<int> NameOffsetTable,
+ StringRef Name, StringRef Target = "") {
assert(Name.starts_with("llvm.") && "Unexpected intrinsic prefix");
assert(Name.drop_front(5).starts_with(Target) && "Unexpected target");
@@ -638,15 +638,31 @@ int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
if (!Target.empty())
CmpEnd += 1 + Target.size(); // skip the .target component.
- const char *const *Low = NameTable.begin();
- const char *const *High = NameTable.end();
- const char *const *LastLow = Low;
+ const int *Low = NameOffsetTable.begin();
+ const int *High = NameOffsetTable.end();
+ const int *LastLow = Low;
while (CmpEnd < Name.size() && High - Low > 0) {
size_t CmpStart = CmpEnd;
CmpEnd = Name.find('.', CmpStart + 1);
CmpEnd = CmpEnd == StringRef::npos ? Name.size() : CmpEnd;
- auto Cmp = [CmpStart, CmpEnd](const char *LHS, const char *RHS) {
- return strncmp(LHS + CmpStart, RHS + CmpStart, CmpEnd - CmpStart) < 0;
+ auto Cmp = [CmpStart, CmpEnd](auto LHS, auto RHS) {
+ // `equal_range` requires the comparison to work with either side being an
+ // offset or the value. Detect which kind each side is to set up the
+ // compared strings.
+ const char *LHSStr;
+ if constexpr (std::is_integral_v<decltype(LHS)>) {
+ LHSStr = &IntrinsicNameTable[LHS];
+ } else {
+ LHSStr = LHS;
+ }
+ const char *RHSStr;
+ if constexpr (std::is_integral_v<decltype(RHS)>) {
+ RHSStr = &IntrinsicNameTable[RHS];
+ } else {
+ RHSStr = RHS;
+ }
+ return strncmp(LHSStr + CmpStart, RHSStr + CmpStart, CmpEnd - CmpStart) <
+ 0;
};
LastLow = Low;
std::tie(Low, High) = std::equal_range(Low, High, Name.data(), Cmp);
@@ -654,21 +670,21 @@ int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
if (High - Low > 0)
LastLow = Low;
- if (LastLow == NameTable.end())
+ if (LastLow == NameOffsetTable.end())
return -1;
- StringRef NameFound = *LastLow;
+ StringRef NameFound = &IntrinsicNameTable[*LastLow];
if (Name == NameFound ||
(Name.starts_with(NameFound) && Name[NameFound.size()] == '.'))
- return LastLow - NameTable.begin();
+ return LastLow - NameOffsetTable.begin();
return -1;
}
-/// Find the segment of \c IntrinsicNameTable for intrinsics with the same
+/// Find the segment of \c IntrinsicNameOffsetTable for intrinsics with the same
/// target as \c Name, or the generic table if \c Name is not target specific.
///
-/// Returns the relevant slice of \c IntrinsicNameTable and the target name.
-static std::pair<ArrayRef<const char *>, StringRef>
-findTargetSubtable(StringRef Name) {
+/// Returns the relevant slice of \c IntrinsicNameOffsetTable and the target
+/// name.
+static std::pair<ArrayRef<int>, StringRef> findTargetSubtable(StringRef Name) {
assert(Name.starts_with("llvm."));
ArrayRef<IntrinsicTargetInfo> Targets(TargetInfos);
@@ -680,25 +696,26 @@ findTargetSubtable(StringRef Name) {
// We've either found the target or just fall back to the generic set, which
// is always first.
const auto &TI = It != Targets.end() && It->Name == Target ? *It : Targets[0];
- return {ArrayRef(&IntrinsicNameTable[1] + TI.Offset, TI.Count), TI.Name};
+ return {ArrayRef(&IntrinsicNameOffsetTable[1] + TI.Offset, TI.Count),
+ TI.Name};
}
/// This does the actual lookup of an intrinsic ID which matches the given
/// function name.
Intrinsic::ID Intrinsic::lookupIntrinsicID(StringRef Name) {
- auto [NameTable, Target] = findTargetSubtable(Name);
- int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name, Target);
+ auto [NameOffsetTable, Target] = findTargetSubtable(Name);
+ int Idx = lookupLLVMIntrinsicByName(NameOffsetTable, Name, Target);
if (Idx == -1)
return Intrinsic::not_intrinsic;
// Intrinsic IDs correspond to the location in IntrinsicNameTable, but we have
// an index into a sub-table.
- int Adjust = NameTable.data() - IntrinsicNameTable;
+ int Adjust = NameOffsetTable.data() - IntrinsicNameOffsetTable;
Intrinsic::ID ID = static_cast<Intrinsic::ID>(Idx + Adjust);
// If the intrinsic is not overloaded, require an exact match. If it is
// overloaded, require either exact or prefix match.
- const auto MatchSize = strlen(NameTable[Idx]);
+ const auto MatchSize = strlen(&IntrinsicNameTable[NameOffsetTable[Idx]]);
assert(Name.size() >= MatchSize && "Expected either exact or prefix match");
bool IsExactMatch = Name.size() == MatchSize;
return IsExactMatch || Intrinsic::isOverloaded(ID) ? ID
diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp
index b32a1991448595..ad9af2075e371d 100644
--- a/llvm/unittests/IR/IntrinsicsTest.cpp
+++ b/llvm/unittests/IR/IntrinsicsTest.cpp
@@ -63,24 +63,21 @@ class IntrinsicsTest : public ::testing::Test {
};
TEST(IntrinsicNameLookup, Basic) {
- static constexpr const char *const NameTable[] = {
- "llvm.foo", "llvm.foo.a", "llvm.foo.b", "llvm.foo.b.a", "llvm.foo.c",
- };
+ using namespace Intrinsic;
+ EXPECT_EQ(Intrinsic::memcpy, lookupIntrinsicID("llvm.memcpy"));
- static constexpr std::pair<const char *, int> Tests[] = {
- {"llvm.foo", 0}, {"llvm.foo.f64", 0}, {"llvm.foo.b", 2},
- {"llvm.foo.b.a", 3}, {"llvm.foo.c", 4}, {"llvm.foo.c.f64", 4},
- {"llvm.bar", -1},
- };
+ // Partial, either between dots or not the last dot are not intrinsics.
+ EXPECT_EQ(not_intrinsic, lookupIntrinsicID("llvm.mem"));
+ EXPECT_EQ(not_intrinsic, lookupIntrinsicID("llvm.gc"));
- for (const auto &[Name, ExpectedIdx] : Tests) {
- int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name);
- EXPECT_EQ(ExpectedIdx, Idx);
- if (!StringRef(Name).starts_with("llvm.foo"))
- continue;
- Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name, "foo");
- EXPECT_EQ(ExpectedIdx, Idx);
- }
+ // Look through intrinsic names with internal dots.
+ EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline"));
+
+ // Check that overloaded names are mapped to the underlying ID.
+ EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i8"));
+ EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i32"));
+ EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i64"));
+ EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i1024"));
}
// Tests to verify getIntrinsicForClangBuiltin.
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 070d7522a97be9..d986620c52018e 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -239,13 +239,37 @@ static constexpr IntrinsicTargetInfo TargetInfos[] = {
void IntrinsicEmitter::EmitIntrinsicToNameTable(
const CodeGenIntrinsicTable &Ints, raw_ostream &OS) {
+ // Built up a table of the intrinsic names.
+ constexpr StringLiteral NotIntrinsic = "not_intrinsic";
+ StringToOffsetTable Table;
+ Table.GetOrAddStringOffset(NotIntrinsic);
+ for (const auto &Int : Ints)
+ Table.GetOrAddStringOffset(Int.Name);
+
OS << R"(// Intrinsic ID to name table.
#ifdef GET_INTRINSIC_NAME_TABLE
// Note that entry #0 is the invalid intrinsic!
+
+)";
+
+ Table.EmitStringLiteralDef(OS, "static constexpr char IntrinsicNameTable[]",
+ /*Indent=*/"");
+
+ OS << R"(
+static constexpr int IntrinsicNameOffsetTable[] = {
)";
+
+ OS << formatv(" {}, // {}\n", Table.GetStringOffset(NotIntrinsic),
+ NotIntrinsic);
for (const auto &Int : Ints)
- OS << " \"" << Int.Name << "\",\n";
- OS << "#endif\n\n";
+ OS << formatv(" {}, // {}\n", Table.GetStringOffset(Int.Name), Int.Name);
+
+ OS << R"(
+}; // IntrinsicNameOffsetTable
+
+#endif
+
+)";
}
void IntrinsicEmitter::EmitIntrinsicToOverloadTable(
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just one comment above, LGTM otherwise! Thanks for finishing this, I had started on this but got sidetracked.
/*Indent=*/""); | ||
|
||
OS << R"( | ||
static constexpr int IntrinsicNameOffsetTable[] = { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
static constexpr int IntrinsicNameOffsetTable[] = { | |
static constexpr unsigned IntrinsicNameOffsetTable[] = { |
Or uint32_t
-- or can these offset be negative?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🤷🏻 I just default to int
when it doesn't matter. But I guess LLVM uses unsigned
more widely, so sure.
They cannot be negative.
…On Sat, Dec 7, 2024 at 1:28 PM Nikita Popov ***@***.***> wrote:
***@***.**** commented on this pull request.
------------------------------
In llvm/utils/TableGen/IntrinsicEmitter.cpp
<#118929 (comment)>:
> OS << R"(// Intrinsic ID to name table.
#ifdef GET_INTRINSIC_NAME_TABLE
// Note that entry #0 is the invalid intrinsic!
+
+)";
+
+ Table.EmitStringLiteralDef(OS, "static constexpr char IntrinsicNameTable[]",
+ /*Indent=*/"");
+
+ OS << R"(
+static constexpr int IntrinsicNameOffsetTable[] = {
⬇️ Suggested change
-static constexpr int IntrinsicNameOffsetTable[] = {
+static constexpr unsigned IntrinsicNameOffsetTable[] = {
Or uint32_t -- or can these offset be negative?
—
Reply to this email directly, view it on GitHub
<#118929 (review)>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/APRMUB4XCPWGTSX57EGGTBL2ENSATAVCNFSM6AAAAABTD72F3CVHI2DSMVQWIX3LMV43YUDVNRWFEZLROVSXG5CSMV3GSZLXHMZDIOBWGY4DOMJVHE>
.
You are receiving this because your review was requested.Message ID:
***@***.***>
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Switched to unsigned
, PTAL.
/*Indent=*/""); | ||
|
||
OS << R"( | ||
static constexpr int IntrinsicNameOffsetTable[] = { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🤷🏻 I just default to int
when it doesn't matter. But I guess LLVM uses unsigned
more widely, so sure.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM to me. Approving.
Thanks for review, merging once pre-merge builds come back. |
This avoids the need to dynamically relocate each pointer in the table.
To make this work, this PR also moves the binary search of intrinsic names to an internal function with an adjusted signature, and switches the unittesting to test against actual intrinsics.