Skip to content

Commit 5bde2aa

Browse files
labathMichael137
andauthored
[lldb] Improve type name parsing (#91586)
Parsing of '::' scopes in TypeQuery was very naive and failed for names with '::''s in template arguments. Interestingly, one of the functions it was calling (Type::GetTypeScopeAndBasename) was already doing the same thing, and getting it (mostly (*)) right. This refactors the function so that it can return the scope results, fixing the parsing of names like std::vector<int, std::allocator<int>>::iterator. Two callers of GetTypeScopeAndBasename are deleted as the functions are not used (I presume they stopped being used once we started pruning type search results more eagerly). (*) This implementation is still not correct when one takes c++ operators into account -- e.g., something like `X<&A::operator<>::T` is a legitimate type name. We do have an implementation that is able to handle names like these (CPlusPlusLanguage::MethodName), but using it is not trivial, because it is hidden in a language plugin and specific to method name parsing. --------- Co-authored-by: Michael Buch <[email protected]>
1 parent 0ebe48f commit 5bde2aa

File tree

10 files changed

+180
-302
lines changed

10 files changed

+180
-302
lines changed

lldb/include/lldb/Symbol/Type.h

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
#include "llvm/ADT/APSInt.h"
2323
#include "llvm/ADT/DenseSet.h"
24+
#include "llvm/ADT/STLForwardCompat.h"
25+
#include "llvm/Support/raw_ostream.h"
2426

2527
#include <optional>
2628
#include <set>
@@ -492,12 +494,37 @@ class Type : public std::enable_shared_from_this<Type>, public UserID {
492494

493495
static int Compare(const Type &a, const Type &b);
494496

497+
// Represents a parsed type name coming out of GetTypeScopeAndBasename. The
498+
// structure holds StringRefs pointing to portions of the original name, and
499+
// so must not be used after the name is destroyed.
500+
struct ParsedName {
501+
lldb::TypeClass type_class = lldb::eTypeClassAny;
502+
503+
// Scopes of the type, starting with the outermost. Absolute type references
504+
// have a "::" as the first scope.
505+
llvm::SmallVector<llvm::StringRef> scope;
506+
507+
llvm::StringRef basename;
508+
509+
friend bool operator==(const ParsedName &lhs, const ParsedName &rhs) {
510+
return lhs.type_class == rhs.type_class && lhs.scope == rhs.scope &&
511+
lhs.basename == rhs.basename;
512+
}
513+
514+
friend llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
515+
const ParsedName &name) {
516+
return os << llvm::formatv(
517+
"Type::ParsedName({0:x}, [{1}], {2})",
518+
llvm::to_underlying(name.type_class),
519+
llvm::make_range(name.scope.begin(), name.scope.end()),
520+
name.basename);
521+
}
522+
};
495523
// From a fully qualified typename, split the type into the type basename and
496524
// the remaining type scope (namespaces/classes).
497-
static bool GetTypeScopeAndBasename(llvm::StringRef name,
498-
llvm::StringRef &scope,
499-
llvm::StringRef &basename,
500-
lldb::TypeClass &type_class);
525+
static std::optional<ParsedName>
526+
GetTypeScopeAndBasename(llvm::StringRef name);
527+
501528
void SetEncodingType(Type *encoding_type) { m_encoding_type = encoding_type; }
502529

503530
uint32_t GetEncodingMask();

lldb/include/lldb/Symbol/TypeList.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,6 @@ class TypeList {
4949

5050
void ForEach(std::function<bool(lldb::TypeSP &type_sp)> const &callback);
5151

52-
void RemoveMismatchedTypes(llvm::StringRef qualified_typename,
53-
bool exact_match);
54-
55-
void RemoveMismatchedTypes(llvm::StringRef type_scope,
56-
llvm::StringRef type_basename,
57-
lldb::TypeClass type_class, bool exact_match);
58-
59-
void RemoveMismatchedTypes(lldb::TypeClass type_class);
60-
6152
private:
6253
typedef collection::iterator iterator;
6354
typedef collection::const_iterator const_iterator;

lldb/include/lldb/Symbol/TypeMap.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,6 @@ class TypeMap {
5555

5656
bool Remove(const lldb::TypeSP &type_sp);
5757

58-
void RemoveMismatchedTypes(llvm::StringRef type_scope,
59-
llvm::StringRef type_basename,
60-
lldb::TypeClass type_class, bool exact_match);
61-
6258
private:
6359
typedef collection::iterator iterator;
6460
typedef collection::const_iterator const_iterator;

lldb/source/Symbol/Type.cpp

Lines changed: 59 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "lldb/Target/ExecutionContext.h"
3030
#include "lldb/Target/Process.h"
3131
#include "lldb/Target/Target.h"
32+
#include "lldb/lldb-enumerations.h"
3233

3334
#include "llvm/ADT/StringRef.h"
3435

@@ -85,27 +86,23 @@ static CompilerContextKind ConvertTypeClass(lldb::TypeClass type_class) {
8586

8687
TypeQuery::TypeQuery(llvm::StringRef name, TypeQueryOptions options)
8788
: m_options(options) {
88-
llvm::StringRef scope, basename;
89-
lldb::TypeClass type_class = lldb::eTypeClassAny;
90-
if (Type::GetTypeScopeAndBasename(name, scope, basename, type_class)) {
91-
if (scope.consume_front("::"))
92-
m_options |= e_exact_match;
89+
if (std::optional<Type::ParsedName> parsed_name =
90+
Type::GetTypeScopeAndBasename(name)) {
91+
llvm::ArrayRef scope = parsed_name->scope;
9392
if (!scope.empty()) {
94-
std::pair<llvm::StringRef, llvm::StringRef> scope_pair =
95-
scope.split("::");
96-
while (!scope_pair.second.empty()) {
97-
m_context.push_back({CompilerContextKind::AnyDeclContext,
98-
ConstString(scope_pair.first.str())});
99-
scope_pair = scope_pair.second.split("::");
93+
if (scope[0] == "::") {
94+
m_options |= e_exact_match;
95+
scope = scope.drop_front();
96+
}
97+
for (llvm::StringRef s : scope) {
98+
m_context.push_back(
99+
{CompilerContextKind::AnyDeclContext, ConstString(s)});
100100
}
101-
m_context.push_back({CompilerContextKind::AnyDeclContext,
102-
ConstString(scope_pair.first.str())});
103101
}
104-
m_context.push_back(
105-
{ConvertTypeClass(type_class), ConstString(basename.str())});
102+
m_context.push_back({ConvertTypeClass(parsed_name->type_class),
103+
ConstString(parsed_name->basename)});
106104
} else {
107-
m_context.push_back(
108-
{CompilerContextKind::AnyType, ConstString(name.str())});
105+
m_context.push_back({CompilerContextKind::AnyType, ConstString(name)});
109106
}
110107
}
111108

@@ -773,65 +770,56 @@ ConstString Type::GetQualifiedName() {
773770
return GetForwardCompilerType().GetTypeName();
774771
}
775772

776-
bool Type::GetTypeScopeAndBasename(llvm::StringRef name,
777-
llvm::StringRef &scope,
778-
llvm::StringRef &basename,
779-
TypeClass &type_class) {
780-
type_class = eTypeClassAny;
773+
std::optional<Type::ParsedName>
774+
Type::GetTypeScopeAndBasename(llvm::StringRef name) {
775+
ParsedName result;
781776

782777
if (name.empty())
783-
return false;
784-
785-
// Clear the scope in case we have just a type class and a basename.
786-
scope = llvm::StringRef();
787-
basename = name;
788-
if (basename.consume_front("struct "))
789-
type_class = eTypeClassStruct;
790-
else if (basename.consume_front("class "))
791-
type_class = eTypeClassClass;
792-
else if (basename.consume_front("union "))
793-
type_class = eTypeClassUnion;
794-
else if (basename.consume_front("enum "))
795-
type_class = eTypeClassEnumeration;
796-
else if (basename.consume_front("typedef "))
797-
type_class = eTypeClassTypedef;
798-
799-
size_t namespace_separator = basename.find("::");
800-
if (namespace_separator == llvm::StringRef::npos) {
801-
// If "name" started a type class we need to return true with no scope.
802-
return type_class != eTypeClassAny;
803-
}
804-
805-
size_t template_begin = basename.find('<');
806-
while (namespace_separator != llvm::StringRef::npos) {
807-
if (template_begin != llvm::StringRef::npos &&
808-
namespace_separator > template_begin) {
809-
size_t template_depth = 1;
810-
llvm::StringRef template_arg =
811-
basename.drop_front(template_begin + 1);
812-
while (template_depth > 0 && !template_arg.empty()) {
813-
if (template_arg.front() == '<')
814-
template_depth++;
815-
else if (template_arg.front() == '>')
816-
template_depth--;
817-
template_arg = template_arg.drop_front(1);
778+
return std::nullopt;
779+
780+
if (name.consume_front("struct "))
781+
result.type_class = eTypeClassStruct;
782+
else if (name.consume_front("class "))
783+
result.type_class = eTypeClassClass;
784+
else if (name.consume_front("union "))
785+
result.type_class = eTypeClassUnion;
786+
else if (name.consume_front("enum "))
787+
result.type_class = eTypeClassEnumeration;
788+
else if (name.consume_front("typedef "))
789+
result.type_class = eTypeClassTypedef;
790+
791+
if (name.consume_front("::"))
792+
result.scope.push_back("::");
793+
794+
bool prev_is_colon = false;
795+
size_t template_depth = 0;
796+
size_t name_begin = 0;
797+
for (const auto &pos : llvm::enumerate(name)) {
798+
switch (pos.value()) {
799+
case ':':
800+
if (prev_is_colon && template_depth == 0) {
801+
result.scope.push_back(name.slice(name_begin, pos.index() - 1));
802+
name_begin = pos.index() + 1;
818803
}
819-
if (template_depth != 0)
820-
return false; // We have an invalid type name. Bail out.
821-
if (template_arg.empty())
822-
break; // The template ends at the end of the full name.
823-
basename = template_arg;
824-
} else {
825-
basename = basename.drop_front(namespace_separator + 2);
804+
break;
805+
case '<':
806+
++template_depth;
807+
break;
808+
case '>':
809+
if (template_depth == 0)
810+
return std::nullopt; // Invalid name.
811+
--template_depth;
812+
break;
826813
}
827-
template_begin = basename.find('<');
828-
namespace_separator = basename.find("::");
829-
}
830-
if (basename.size() < name.size()) {
831-
scope = name.take_front(name.size() - basename.size());
832-
return true;
814+
prev_is_colon = pos.value() == ':';
833815
}
834-
return false;
816+
817+
if (name_begin < name.size() && template_depth == 0)
818+
result.basename = name.substr(name_begin);
819+
else
820+
return std::nullopt;
821+
822+
return result;
835823
}
836824

837825
ModuleSP Type::GetModule() {

lldb/source/Symbol/TypeList.cpp

Lines changed: 0 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -96,112 +96,3 @@ void TypeList::Dump(Stream *s, bool show_context) {
9696
if (Type *t = pos->get())
9797
t->Dump(s, show_context);
9898
}
99-
100-
void TypeList::RemoveMismatchedTypes(llvm::StringRef qualified_typename,
101-
bool exact_match) {
102-
llvm::StringRef type_scope;
103-
llvm::StringRef type_basename;
104-
TypeClass type_class = eTypeClassAny;
105-
if (!Type::GetTypeScopeAndBasename(qualified_typename, type_scope,
106-
type_basename, type_class)) {
107-
type_basename = qualified_typename;
108-
type_scope = "";
109-
}
110-
return RemoveMismatchedTypes(type_scope, type_basename, type_class,
111-
exact_match);
112-
}
113-
114-
void TypeList::RemoveMismatchedTypes(llvm::StringRef type_scope,
115-
llvm::StringRef type_basename,
116-
TypeClass type_class, bool exact_match) {
117-
// Our "collection" type currently is a std::map which doesn't have any good
118-
// way to iterate and remove items from the map so we currently just make a
119-
// new list and add all of the matching types to it, and then swap it into
120-
// m_types at the end
121-
collection matching_types;
122-
123-
iterator pos, end = m_types.end();
124-
125-
for (pos = m_types.begin(); pos != end; ++pos) {
126-
Type *the_type = pos->get();
127-
bool keep_match = false;
128-
TypeClass match_type_class = eTypeClassAny;
129-
130-
if (type_class != eTypeClassAny) {
131-
match_type_class = the_type->GetForwardCompilerType().GetTypeClass();
132-
if ((match_type_class & type_class) == 0)
133-
continue;
134-
}
135-
136-
ConstString match_type_name_const_str(the_type->GetQualifiedName());
137-
if (match_type_name_const_str) {
138-
const char *match_type_name = match_type_name_const_str.GetCString();
139-
llvm::StringRef match_type_scope;
140-
llvm::StringRef match_type_basename;
141-
if (Type::GetTypeScopeAndBasename(match_type_name, match_type_scope,
142-
match_type_basename,
143-
match_type_class)) {
144-
if (match_type_basename == type_basename) {
145-
const size_t type_scope_size = type_scope.size();
146-
const size_t match_type_scope_size = match_type_scope.size();
147-
if (exact_match || (type_scope_size == match_type_scope_size)) {
148-
keep_match = match_type_scope == type_scope;
149-
} else {
150-
if (match_type_scope_size > type_scope_size) {
151-
const size_t type_scope_pos = match_type_scope.rfind(type_scope);
152-
if (type_scope_pos == match_type_scope_size - type_scope_size) {
153-
if (type_scope_pos >= 2) {
154-
// Our match scope ends with the type scope we were looking
155-
// for, but we need to make sure what comes before the
156-
// matching type scope is a namespace boundary in case we are
157-
// trying to match: type_basename = "d" type_scope = "b::c::"
158-
// We want to match:
159-
// match_type_scope "a::b::c::"
160-
// But not:
161-
// match_type_scope "a::bb::c::"
162-
// So below we make sure what comes before "b::c::" in
163-
// match_type_scope is "::", or the namespace boundary
164-
if (match_type_scope[type_scope_pos - 1] == ':' &&
165-
match_type_scope[type_scope_pos - 2] == ':') {
166-
keep_match = true;
167-
}
168-
}
169-
}
170-
}
171-
}
172-
}
173-
} else {
174-
// The type we are currently looking at doesn't exists in a namespace
175-
// or class, so it only matches if there is no type scope...
176-
keep_match = type_scope.empty() && type_basename == match_type_name;
177-
}
178-
}
179-
180-
if (keep_match) {
181-
matching_types.push_back(*pos);
182-
}
183-
}
184-
m_types.swap(matching_types);
185-
}
186-
187-
void TypeList::RemoveMismatchedTypes(TypeClass type_class) {
188-
if (type_class == eTypeClassAny)
189-
return;
190-
191-
// Our "collection" type currently is a std::map which doesn't have any good
192-
// way to iterate and remove items from the map so we currently just make a
193-
// new list and add all of the matching types to it, and then swap it into
194-
// m_types at the end
195-
collection matching_types;
196-
197-
iterator pos, end = m_types.end();
198-
199-
for (pos = m_types.begin(); pos != end; ++pos) {
200-
Type *the_type = pos->get();
201-
TypeClass match_type_class =
202-
the_type->GetForwardCompilerType().GetTypeClass();
203-
if (match_type_class & type_class)
204-
matching_types.push_back(*pos);
205-
}
206-
m_types.swap(matching_types);
207-
}

0 commit comments

Comments
 (0)