Skip to content

Commit 3d9cf8b

Browse files
authored
[clangd] Improve filtering logic for undesired proto symbols (#110091)
This used to filter any names with `_` in them, apart from enum-constants. Resulting in discrepancies in behavior when we had fields that have `_` in the name, or for accessors like `set_`, `has_`. The logic seems to be trying to filter mangled names for nested entries, so adjusted logic to only do so for top-level decls, while still preserving some public top-level helpers. Heuristics are still leaning towards false-negatives, e.g. if a top-level entity has `_` in its name (`message Foo_Bar {}`), it'll be filtered, or an enum that prefixes its type name to constants (`enum Foo { Foo_OK }`).
1 parent 18df9d2 commit 3d9cf8b

File tree

2 files changed

+111
-22
lines changed

2 files changed

+111
-22
lines changed

clang-tools-extra/clangd/index/SymbolCollector.cpp

Lines changed: 57 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "llvm/ADT/DenseMap.h"
4242
#include "llvm/ADT/SmallVector.h"
4343
#include "llvm/ADT/StringRef.h"
44+
#include "llvm/Support/Casting.h"
4445
#include "llvm/Support/ErrorHandling.h"
4546
#include "llvm/Support/FileSystem.h"
4647
#include "llvm/Support/Path.h"
@@ -75,18 +76,62 @@ bool isPrivateProtoDecl(const NamedDecl &ND) {
7576
if (ND.getIdentifier() == nullptr)
7677
return false;
7778
auto Name = ND.getIdentifier()->getName();
78-
if (!Name.contains('_'))
79-
return false;
80-
// Nested proto entities (e.g. Message::Nested) have top-level decls
81-
// that shouldn't be used (Message_Nested). Ignore them completely.
82-
// The nested entities are dangling type aliases, we may want to reconsider
83-
// including them in the future.
84-
// For enum constants, SOME_ENUM_CONSTANT is not private and should be
85-
// indexed. Outer_INNER is private. This heuristic relies on naming style, it
86-
// will include OUTER_INNER and exclude some_enum_constant.
87-
// FIXME: the heuristic relies on naming style (i.e. no underscore in
88-
// user-defined names) and can be improved.
89-
return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
79+
// There are some internal helpers like _internal_set_foo();
80+
if (Name.contains("_internal_"))
81+
return true;
82+
83+
// https://protobuf.dev/reference/cpp/cpp-generated/#nested-types
84+
// Nested entities (messages/enums) has two names, one at the top-level scope,
85+
// with a mangled name created by prepending all the outer types. These names
86+
// are almost never preferred by the developers, so exclude them from index.
87+
// e.g.
88+
// message Foo {
89+
// message Bar {}
90+
// enum E { A }
91+
// }
92+
//
93+
// yields:
94+
// class Foo_Bar {};
95+
// enum Foo_E { Foo_E_A };
96+
// class Foo {
97+
// using Bar = Foo_Bar;
98+
// static constexpr Foo_E A = Foo_E_A;
99+
// };
100+
101+
// We get rid of Foo_Bar and Foo_E by discarding any top-level entries with
102+
// `_` in the name. This relies on original message/enum not having `_` in the
103+
// name. Hence might go wrong in certain cases.
104+
if (ND.getDeclContext()->isNamespace()) {
105+
// Strip off some known public suffix helpers for enums, rest of the helpers
106+
// are generated inside record decls so we don't care.
107+
// https://protobuf.dev/reference/cpp/cpp-generated/#enum
108+
Name.consume_back("_descriptor");
109+
Name.consume_back("_IsValid");
110+
Name.consume_back("_Name");
111+
Name.consume_back("_Parse");
112+
Name.consume_back("_MIN");
113+
Name.consume_back("_MAX");
114+
Name.consume_back("_ARRAYSIZE");
115+
return Name.contains('_');
116+
}
117+
118+
// EnumConstantDecls need some special attention, despite being nested in a
119+
// TagDecl, they might still have mangled names. We filter those by checking
120+
// if it has parent's name as a prefix.
121+
// This might go wrong if a nested entity has a name that starts with parent's
122+
// name, e.g: enum Foo { Foo_X }.
123+
if (llvm::isa<EnumConstantDecl>(&ND)) {
124+
auto *DC = llvm::cast<EnumDecl>(ND.getDeclContext());
125+
if (!DC || !DC->getIdentifier())
126+
return false;
127+
auto CtxName = DC->getIdentifier()->getName();
128+
return !CtxName.empty() && Name.consume_front(CtxName) &&
129+
Name.consume_front("_");
130+
}
131+
132+
// Now we're only left with fields/methods without an `_internal_` in the
133+
// name, they're intended for public use.
134+
return false;
90135
}
91136

92137
// We only collect #include paths for symbols that are suitable for global code

clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -201,19 +201,63 @@ TEST_F(ShouldCollectSymbolTest, NoPrivateProtoSymbol) {
201201
build(
202202
R"(// Generated by the protocol buffer compiler. DO NOT EDIT!
203203
namespace nx {
204-
class Top_Level {};
205-
class TopLevel {};
206-
enum Kind {
207-
KIND_OK,
208-
Kind_Not_Ok,
204+
enum Outer_Enum : int {
205+
Outer_Enum_KIND1,
206+
Outer_Enum_Kind_2,
209207
};
208+
bool Outer_Enum_IsValid(int);
209+
210+
class Outer_Inner {};
211+
class Outer {
212+
using Inner = Outer_Inner;
213+
using Enum = Outer_Enum;
214+
static constexpr Enum KIND1 = Outer_Enum_KIND1;
215+
static constexpr Enum Kind_2 = Outer_Enum_Kind_2;
216+
static bool Enum_IsValid(int);
217+
int &x();
218+
void set_x();
219+
void _internal_set_x();
220+
221+
int &Outer_y();
222+
};
223+
enum Foo {
224+
FOO_VAL1,
225+
Foo_VAL2,
226+
};
227+
bool Foo_IsValid(int);
210228
})");
211-
EXPECT_TRUE(shouldCollect("nx::TopLevel"));
212-
EXPECT_TRUE(shouldCollect("nx::Kind::KIND_OK"));
213-
EXPECT_TRUE(shouldCollect("nx::Kind"));
214229

215-
EXPECT_FALSE(shouldCollect("nx::Top_Level"));
216-
EXPECT_FALSE(shouldCollect("nx::Kind::Kind_Not_Ok"));
230+
// Make sure all the mangled names for Outer::Enum is discarded.
231+
EXPECT_FALSE(shouldCollect("nx::Outer_Enum"));
232+
EXPECT_FALSE(shouldCollect("nx::Outer_Enum_KIND1"));
233+
EXPECT_FALSE(shouldCollect("nx::Outer_Enum_Kind_2"));
234+
EXPECT_FALSE(shouldCollect("nx::Outer_Enum_IsValid"));
235+
// But nested aliases are preserved.
236+
EXPECT_TRUE(shouldCollect("nx::Outer::Enum"));
237+
EXPECT_TRUE(shouldCollect("nx::Outer::KIND1"));
238+
EXPECT_TRUE(shouldCollect("nx::Outer::Kind_2"));
239+
EXPECT_TRUE(shouldCollect("nx::Outer::Enum_IsValid"));
240+
241+
// Check for Outer::Inner.
242+
EXPECT_FALSE(shouldCollect("nx::Outer_Inner"));
243+
EXPECT_TRUE(shouldCollect("nx::Outer"));
244+
EXPECT_TRUE(shouldCollect("nx::Outer::Inner"));
245+
246+
// Make sure field related information is preserved, unless it's explicitly
247+
// marked with `_internal_`.
248+
EXPECT_TRUE(shouldCollect("nx::Outer::x"));
249+
EXPECT_TRUE(shouldCollect("nx::Outer::set_x"));
250+
EXPECT_FALSE(shouldCollect("nx::Outer::_internal_set_x"));
251+
EXPECT_TRUE(shouldCollect("nx::Outer::Outer_y"));
252+
253+
// Handling of a top-level enum
254+
EXPECT_TRUE(shouldCollect("nx::Foo::FOO_VAL1"));
255+
EXPECT_TRUE(shouldCollect("nx::FOO_VAL1"));
256+
EXPECT_TRUE(shouldCollect("nx::Foo_IsValid"));
257+
// Our heuristic goes wrong here, if the user has a nested name that starts
258+
// with parent's name.
259+
EXPECT_FALSE(shouldCollect("nx::Foo::Foo_VAL2"));
260+
EXPECT_FALSE(shouldCollect("nx::Foo_VAL2"));
217261
}
218262

219263
TEST_F(ShouldCollectSymbolTest, DoubleCheckProtoHeaderComment) {

0 commit comments

Comments
 (0)