Skip to content

[clangd] Improve filtering logic for undesired proto symbols #110091

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 57 additions & 12 deletions clang-tools-extra/clangd/index/SymbolCollector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
Expand Down Expand Up @@ -75,18 +76,62 @@ bool isPrivateProtoDecl(const NamedDecl &ND) {
if (ND.getIdentifier() == nullptr)
return false;
auto Name = ND.getIdentifier()->getName();
if (!Name.contains('_'))
return false;
// Nested proto entities (e.g. Message::Nested) have top-level decls
// that shouldn't be used (Message_Nested). Ignore them completely.
// The nested entities are dangling type aliases, we may want to reconsider
// including them in the future.
// For enum constants, SOME_ENUM_CONSTANT is not private and should be
// indexed. Outer_INNER is private. This heuristic relies on naming style, it
// will include OUTER_INNER and exclude some_enum_constant.
// FIXME: the heuristic relies on naming style (i.e. no underscore in
// user-defined names) and can be improved.
return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
// There are some internal helpers like _internal_set_foo();
if (Name.contains("_internal_"))
return true;

// https://protobuf.dev/reference/cpp/cpp-generated/#nested-types
// Nested entities (messages/enums) has two names, one at the top-level scope,
// with a mangled name created by prepending all the outer types. These names
// are almost never preferred by the developers, so exclude them from index.
// e.g.
// message Foo {
// message Bar {}
// enum E { A }
// }
//
// yields:
// class Foo_Bar {};
// enum Foo_E { Foo_E_A };
// class Foo {
// using Bar = Foo_Bar;
// static constexpr Foo_E A = Foo_E_A;
// };

// We get rid of Foo_Bar and Foo_E by discarding any top-level entries with
// `_` in the name. This relies on original message/enum not having `_` in the
// name. Hence might go wrong in certain cases.
if (ND.getDeclContext()->isNamespace()) {
// Strip off some known public suffix helpers for enums, rest of the helpers
// are generated inside record decls so we don't care.
// https://protobuf.dev/reference/cpp/cpp-generated/#enum
Name.consume_back("_descriptor");
Name.consume_back("_IsValid");
Name.consume_back("_Name");
Name.consume_back("_Parse");
Name.consume_back("_MIN");
Name.consume_back("_MAX");
Name.consume_back("_ARRAYSIZE");
return Name.contains('_');
}

// EnumConstantDecls need some special attention, despite being nested in a
// TagDecl, they might still have mangled names. We filter those by checking
// if it has parent's name as a prefix.
// This might go wrong if a nested entity has a name that starts with parent's
// name, e.g: enum Foo { Foo_X }.
if (llvm::isa<EnumConstantDecl>(&ND)) {
auto *DC = llvm::cast<EnumDecl>(ND.getDeclContext());
if (!DC || !DC->getIdentifier())
return false;
auto CtxName = DC->getIdentifier()->getName();
return !CtxName.empty() && Name.consume_front(CtxName) &&
Name.consume_front("_");
}

// Now we're only left with fields/methods without an `_internal_` in the
// name, they're intended for public use.
return false;
}

// We only collect #include paths for symbols that are suitable for global code
Expand Down
64 changes: 54 additions & 10 deletions clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,19 +201,63 @@ TEST_F(ShouldCollectSymbolTest, NoPrivateProtoSymbol) {
build(
R"(// Generated by the protocol buffer compiler. DO NOT EDIT!
namespace nx {
class Top_Level {};
class TopLevel {};
enum Kind {
KIND_OK,
Kind_Not_Ok,
enum Outer_Enum : int {
Outer_Enum_KIND1,
Outer_Enum_Kind_2,
};
bool Outer_Enum_IsValid(int);

class Outer_Inner {};
class Outer {
using Inner = Outer_Inner;
using Enum = Outer_Enum;
static constexpr Enum KIND1 = Outer_Enum_KIND1;
static constexpr Enum Kind_2 = Outer_Enum_Kind_2;
static bool Enum_IsValid(int);
int &x();
void set_x();
void _internal_set_x();

int &Outer_y();
};
enum Foo {
FOO_VAL1,
Foo_VAL2,
};
bool Foo_IsValid(int);
})");
EXPECT_TRUE(shouldCollect("nx::TopLevel"));
EXPECT_TRUE(shouldCollect("nx::Kind::KIND_OK"));
EXPECT_TRUE(shouldCollect("nx::Kind"));

EXPECT_FALSE(shouldCollect("nx::Top_Level"));
EXPECT_FALSE(shouldCollect("nx::Kind::Kind_Not_Ok"));
// Make sure all the mangled names for Outer::Enum is discarded.
EXPECT_FALSE(shouldCollect("nx::Outer_Enum"));
EXPECT_FALSE(shouldCollect("nx::Outer_Enum_KIND1"));
EXPECT_FALSE(shouldCollect("nx::Outer_Enum_Kind_2"));
EXPECT_FALSE(shouldCollect("nx::Outer_Enum_IsValid"));
// But nested aliases are preserved.
EXPECT_TRUE(shouldCollect("nx::Outer::Enum"));
EXPECT_TRUE(shouldCollect("nx::Outer::KIND1"));
EXPECT_TRUE(shouldCollect("nx::Outer::Kind_2"));
EXPECT_TRUE(shouldCollect("nx::Outer::Enum_IsValid"));

// Check for Outer::Inner.
EXPECT_FALSE(shouldCollect("nx::Outer_Inner"));
EXPECT_TRUE(shouldCollect("nx::Outer"));
EXPECT_TRUE(shouldCollect("nx::Outer::Inner"));

// Make sure field related information is preserved, unless it's explicitly
// marked with `_internal_`.
EXPECT_TRUE(shouldCollect("nx::Outer::x"));
EXPECT_TRUE(shouldCollect("nx::Outer::set_x"));
EXPECT_FALSE(shouldCollect("nx::Outer::_internal_set_x"));
EXPECT_TRUE(shouldCollect("nx::Outer::Outer_y"));

// Handling of a top-level enum
EXPECT_TRUE(shouldCollect("nx::Foo::FOO_VAL1"));
EXPECT_TRUE(shouldCollect("nx::FOO_VAL1"));
EXPECT_TRUE(shouldCollect("nx::Foo_IsValid"));
// Our heuristic goes wrong here, if the user has a nested name that starts
// with parent's name.
EXPECT_FALSE(shouldCollect("nx::Foo::Foo_VAL2"));
EXPECT_FALSE(shouldCollect("nx::Foo_VAL2"));
}

TEST_F(ShouldCollectSymbolTest, DoubleCheckProtoHeaderComment) {
Expand Down
Loading