-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LLVM][TableGen] Add overloaded intrinsic name conflict checks #109314
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-ir Author: Rahul Joshi (jurahul) ChangesFull diff: https://github.com/llvm/llvm-project/pull/109314.diff 4 Files Affected:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 0a74a217a5f010..ff0174cfc9ecc3 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1834,6 +1834,10 @@ def int_is_constant : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty],
[IntrNoMem, IntrWillReturn, IntrConvergent],
"llvm.is.constant">;
+def int_is_constant0 : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty],
+ [IntrNoMem, IntrWillReturn, IntrConvergent],
+ "llvm.is.constant.my">;
+
// Introduce a use of the argument without generating any code.
def int_fake_use : Intrinsic<[], [llvm_vararg_ty]>;
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 7ed82c2ece464a..009b20cf8e635c 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -236,6 +236,8 @@ void DbgAssignIntrinsic::setValue(Value *V) {
MetadataAsValue::get(getContext(), ValueAsMetadata::get(V)));
}
+// Note: a modified version of this fuction exists in CodeGenIntrinsics.cpp.
+// Any changes here likely need to be reflected there as well.
int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
StringRef Name) {
assert(Name.starts_with("llvm.") && "Unexpected intrinsic prefix");
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
index e566b7ceedf38e..4c3d5c3b1b2f13 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
@@ -68,6 +68,7 @@ CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC) {
Targets.back().Count = Intrinsics.size() - Targets.back().Offset;
CheckDuplicateIntrinsics();
+ CheckOverloadConflicts();
}
// Check for duplicate intrinsic names.
@@ -94,6 +95,168 @@ void CodeGenIntrinsicTable::CheckDuplicateIntrinsics() const {
PrintFatalNote(First.TheDef, "Previous definition here");
}
+// Note: This is a modified version of `Intrinsic::lookupLLVMIntrinsicByName`
+// in IntrinsicInst.cpp file.
+template <typename T>
+int lookupLLVMIntrinsicByName(ArrayRef<const T *> NameTable, StringRef Name,
+ function_ref<const char *(const T *)> ToString) {
+ using ToStringTy = function_ref<const char *(const T *)>;
+ assert(Name.starts_with("llvm.") && "Unexpected intrinsic prefix");
+
+ // Do successive binary searches of the dotted name components. For
+ // "llvm.gc.experimental.statepoint.p1i8.p1i32", we will find the range of
+ // intrinsics starting with "llvm.gc", then "llvm.gc.experimental", then
+ // "llvm.gc.experimental.statepoint", and then we will stop as the range is
+ // size 1. During the search, we can skip the prefix that we already know is
+ // identical. By using strncmp we consider names with differing suffixes to
+ // be part of the equal range.
+ size_t CmpEnd = 4; // Skip the "llvm" component.
+
+ struct Compare {
+ Compare(size_t CmpStart, size_t CmpEnd, ToStringTy ToString)
+ : CmpStart(CmpStart), CmpEnd(CmpEnd), ToString(ToString) {}
+ bool operator()(const T *LHS, const char *RHS) {
+ return strncmp(ToString(LHS) + CmpStart, RHS + CmpStart,
+ CmpEnd - CmpStart) < 0;
+ }
+ bool operator()(const char *LHS, const CodeGenIntrinsic *RHS) {
+ return strncmp(LHS + CmpStart, ToString(RHS) + CmpStart,
+ CmpEnd - CmpStart) < 0;
+ }
+ const size_t CmpStart, CmpEnd;
+ ToStringTy ToString;
+ };
+
+ auto Low = NameTable.begin();
+ auto High = NameTable.end();
+ auto LastLow = Low;
+ while (CmpEnd < Name.size() && High - Low > 0) {
+ size_t CmpStart = CmpEnd;
+ CmpEnd = Name.find('.', CmpStart + 1);
+ CmpEnd = CmpEnd == StringRef::npos ? Name.size() : CmpEnd;
+ LastLow = Low;
+ Compare Cmp(CmpStart, CmpEnd, ToString);
+ std::tie(Low, High) = std::equal_range(Low, High, Name.data(), Cmp);
+ }
+ if (High - Low > 0)
+ LastLow = Low;
+
+ if (LastLow == NameTable.end())
+ return -1;
+ StringRef NameFound = (*LastLow)->Name;
+ if (Name == NameFound ||
+ (Name.starts_with(NameFound) && Name[NameFound.size()] == '.'))
+ return LastLow - NameTable.begin();
+ return -1;
+}
+
+// Check for conflicts with overloaded intrinsics. If an intrinsic's name has
+// the name of an overloaded intrinsic as a prefix, then it may be ambiguious
+// and confusing as to which of the two a specific call refers to. Hence
+// disallow such cases.
+//
+// As an example, if `llvm.foo` is overloaded, it will appear as
+// `llvm.foo.<mangling_suffix>` in the IR. Now, if another intrinsic is called
+// `llvm.foo.bar`, it may be ambiguious as to whether `.bar` is a mangling
+// suffix for the overloaded `llvm.foo` or if its the `llvm.foo.bar` intrinsic.
+// Note though that `llvm.foobar` is OK. So the prefix check will check if
+// there name of the overloaded intrinsic is a prefix of another one and the
+// next letter after the prefix is a `.`.
+//
+// Also note that the `.bar` suffix in the example above may not be a valid
+// mangling suffix for `llvm.foo`, so we could check that and allow
+// `llvm.foo.bar` as there is no ambiguity. But LLVM's intrinsic name matching
+// does not support this (in llvm::Intrinsic::lookupLLVMIntrinsicByName).
+// However the ambiguity is still there, so we do not allow this case (i.e.,
+// the check is overly strict).
+void CodeGenIntrinsicTable::CheckOverloadConflicts() const {
+ // Collect all overloaded intrinsic names in a vector. `Intrinsics` are
+ // already mostly sorted by name (all target independent ones first, sorted by
+ // their name, followed by target specific ones, sorted by their name).
+ // However we would like to detect cases where an overloaded target
+ // independent one shares a prefix with a target depndent one. So we need to
+ // collect and resort all of them by name.
+ std::vector<const CodeGenIntrinsic *> OverloadedIntrinsics;
+ for (const CodeGenIntrinsic &Int : Intrinsics)
+ if (Int.isOverloaded)
+ OverloadedIntrinsics.push_back(&Int);
+
+ sort(OverloadedIntrinsics,
+ [](const CodeGenIntrinsic *Int1, const CodeGenIntrinsic *Int2) {
+ return Int1->Name < Int2->Name;
+ });
+
+ ArrayRef<const CodeGenIntrinsic *> AR = OverloadedIntrinsics;
+ auto ToString = [](const CodeGenIntrinsic *Int) -> const char * {
+ return Int->Name.c_str();
+ };
+
+ for (const CodeGenIntrinsic &Int : Intrinsics) {
+ int index =
+ lookupLLVMIntrinsicByName<CodeGenIntrinsic>(AR, Int.Name, ToString);
+ if (index == -1 || AR[index] == &Int)
+ continue;
+ const CodeGenIntrinsic &Overloaded = *AR[index];
+
+ // Allow only a single ".xxx" suffix after the matched name, if we know that
+ // the it likely won't match a mangling suffix.
+ StringRef Suffix =
+ StringRef(Int.Name).drop_front(Overloaded.Name.size() + 1);
+ bool IsSuffixOk = [&]() {
+ // Allow only a single "." separated token.
+ if (Suffix.find('.') != StringRef::npos)
+ return false;
+ // Do not allow if suffix can potentially match a mangling suffix.
+ // See getMangledTypeStr() for the mangling suffixes possible. It includes
+ // pointer : p[0-9]+
+ // array : a[0-9]+[.+]
+ // struct: : s_/sl_[.+]
+ // function : f_[.+]
+ // vector : v/nxv[0-9]+[.+]
+ // target type : t[.*]
+ // integer : i[0-9]+
+ // named types : See `NamedTypes` below.
+
+ // 1. Do not allow anything with characters other that [0-9A-Za-z]. That
+ // means really no _, so that eliminates functions and structs.
+ if (Suffix.find('_') != StringRef::npos)
+ return false;
+
+ // [a|v][0-9|$][.*] // $ is end of string.
+ if (is_contained("av", Suffix[0]) &&
+ (Suffix.size() == 1 || isDigit(Suffix[1])))
+ return false;
+ // nxv[0-9|$][.*]
+ if (Suffix.starts_with("nxv") &&
+ (Suffix.size() == 3 || isDigit(Suffix[3])))
+ return false;
+ // t[.*]
+ if (Suffix.starts_with('t'))
+ return false;
+ // [p|i][0-9]+
+ if ((Suffix[0] == 'i' || Suffix[0] == 'p') &&
+ all_of(Suffix.drop_front(), isDigit))
+ return false;
+ // Match one of the named types.
+ StringLiteral NamedTypes[] = {"isVoid", "Metadata", "f16", "f32",
+ "f64", "f80", "f128", "bf16",
+ "ppcf128", "x86amx"};
+ if (is_contained(NamedTypes, Suffix))
+ return false;
+ return true;
+ }();
+ if (IsSuffixOk)
+ continue;
+
+ PrintError(Int.TheDef->getLoc(),
+ Twine("Intrinsic `") + Int.Name + "` cannot share prefix `" +
+ Overloaded.Name + "` with an overloaded intrinsic");
+ PrintFatalError(Overloaded.TheDef->getLoc(), "Overloaded intrinsic `" +
+ Overloaded.Name +
+ "` defined here");
+ }
+}
+
CodeGenIntrinsic &CodeGenIntrinsicMap::operator[](const Record *Record) {
if (!Record->isSubClassOf("Intrinsic"))
PrintFatalError("Intrinsic defs should be subclass of 'Intrinsic' class");
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
index 2df598da3f2507..10d43633366825 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.h
@@ -192,6 +192,7 @@ class CodeGenIntrinsicTable {
private:
void CheckDuplicateIntrinsics() const;
+ void CheckOverloadConflicts() const;
};
// This class builds `CodeGenIntrinsic` on demand for a given Def.
|
@@ -94,6 +95,168 @@ void CodeGenIntrinsicTable::CheckDuplicateIntrinsics() const { | |||
PrintFatalNote(First.TheDef, "Previous definition here"); | |||
} | |||
|
|||
// Note: This is a modified version of `Intrinsic::lookupLLVMIntrinsicByName` | |||
// in IntrinsicInst.cpp file. | |||
template <typename T> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
1st point of feedback: I made this templated so that we can share the same code here and in Intrinsic::lookupLLVMIntrinsicByName
. I am proposing we move this to a common header in Support that both pieces of code use.
// Allow only a single "." separated token. | ||
if (Suffix.find('.') != StringRef::npos) | ||
return false; | ||
// Do not allow if suffix can potentially match a mangling suffix. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
2nd point of feedback: is this ok? I don't think we can have the getMangledTypeStr() code and this code adjacent to each other (as one operates on LLVM Types and others on strings and we cannot/do not want do link LLVM Core into TableGen). So we will keep these separate with a comment in both places mentioning each other and asking to keep the 2 pieces in sync.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, this check is not super accurate but will catch most obvious cases I think. For example, the llvm.foo.i32
example in my discourse thread.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe the mangling suffix is just a convention, and the parsing allows it to be anything as long as the name is unique for that type signature in the module. Is this adding special meaning to the convention?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't believe that is true. For intrinsics that LLVM knows about (i.e., functions that begin with llvm. and have a name in the intrinsic table) the mangling suffix is enforced to be the one that conforms to what getMangledTypeStr() (and Intrinsic::getName in turn) requires. Apparently, that is how remangling upgrade works. See @nikic's comments on #108315 where I tried to make it strict (I am planning to revisit that PR at some point in future with the idea of an option controlled strict mangling check).
With that assumption, what this is trying to do is essentially catch several patterns that can look like mangling suffixes and disallowing them as suffix of another intrinsic.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As an example, the following assembly:
define void @main() {
%t = call i1 @llvm.is.constant.foo(float 0.0)
ret void
}
when run through: ./build/bin/llvm-as < test.ll | ./build/bin/llvm-dis
generates:
; ModuleID = '<stdin>'
source_filename = "<stdin>"
define void @main() {
%t = call i1 @llvm.is.constant.f32(float 0.000000e+00)
ret void
}
; Function Attrs: convergent nocallback nofree nosync nounwind willreturn memory(none)
declare i1 @llvm.is.constant.f32(float) #0
attributes #0 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
Note that llvm.is.constant.foo
(with a bogus .foo mangling) was renamed to llvm.is.constant.f32
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unrelated, but why is llvm.is.constant convergent?
def int_is_constant0 : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], | ||
[IntrNoMem, IntrWillReturn, IntrConvergent], | ||
"llvm.is.constant.my">; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should be able to define an intrinsic in a .td test file to test this without cluttering the real intrinsic namespace
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah agreed. This was just a WIP prototype.
StringRef(Int.Name).drop_front(Overloaded.Name.size() + 1); | ||
bool IsSuffixOk = [&]() { | ||
// Allow only a single "." separated token. | ||
if (Suffix.find('.') != StringRef::npos) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
.contains
|
||
// 1. Do not allow anything with characters other that [0-9A-Za-z]. That | ||
// means really no _, so that eliminates functions and structs. | ||
if (Suffix.find('_') != StringRef::npos) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
.contains
if (is_contained(NamedTypes, Suffix)) | ||
return false; | ||
return true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
return !is_contained
int lookupLLVMIntrinsicByName(ArrayRef<const T *> NameTable, StringRef Name, | ||
function_ref<const char *(const T *)> ToString) { | ||
using ToStringTy = function_ref<const char *(const T *)>; | ||
assert(Name.starts_with("llvm.") && "Unexpected intrinsic prefix"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This probably shouldn't be an assert
// Allow only a single "." separated token. | ||
if (Suffix.find('.') != StringRef::npos) | ||
return false; | ||
// Do not allow if suffix can potentially match a mangling suffix. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe the mangling suffix is just a convention, and the parsing allows it to be anything as long as the name is unique for that type signature in the module. Is this adding special meaning to the convention?
This is now superseded by #110324. |
No description provided.