Skip to content

[clangd] Avoid libFormat's objective-c guessing heuristic where possible #84133

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions clang-tools-extra/clangd/ClangdServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ void ClangdServer::formatFile(PathRef File, std::optional<Range> Rng,
auto Action = [File = File.str(), Code = std::move(*Code),
Ranges = std::vector<tooling::Range>{RequestedRange},
CB = std::move(CB), this]() mutable {
format::FormatStyle Style = getFormatStyleForFile(File, Code, TFS);
format::FormatStyle Style = getFormatStyleForFile(File, Code, TFS, true);
tooling::Replacements IncludeReplaces =
format::sortIncludes(Style, Code, Ranges, File);
auto Changed = tooling::applyAllReplacements(Code, IncludeReplaces);
Expand Down Expand Up @@ -551,7 +551,7 @@ void ClangdServer::formatOnType(PathRef File, Position Pos,
auto Action = [File = File.str(), Code = std::move(*Code),
TriggerText = TriggerText.str(), CursorPos = *CursorPos,
CB = std::move(CB), this]() mutable {
auto Style = getFormatStyleForFile(File, Code, TFS);
auto Style = getFormatStyleForFile(File, Code, TFS, false);
std::vector<TextEdit> Result;
for (const tooling::Replacement &R :
formatIncremental(Code, CursorPos, TriggerText, Style))
Expand Down Expand Up @@ -605,7 +605,7 @@ void ClangdServer::rename(PathRef File, Position Pos, llvm::StringRef NewName,

if (Opts.WantFormat) {
auto Style = getFormatStyleForFile(File, InpAST->Inputs.Contents,
*InpAST->Inputs.TFS);
*InpAST->Inputs.TFS, false);
llvm::Error Err = llvm::Error::success();
for (auto &E : R->GlobalChanges)
Err =
Expand Down Expand Up @@ -762,7 +762,7 @@ void ClangdServer::applyTweak(PathRef File, Range Sel, StringRef TweakID,
for (auto &It : (*Effect)->ApplyEdits) {
Edit &E = It.second;
format::FormatStyle Style =
getFormatStyleForFile(File, E.InitialCode, TFS);
getFormatStyleForFile(File, E.InitialCode, TFS, false);
if (llvm::Error Err = reformatEdit(E, Style))
elog("Failed to format {0}: {1}", It.first(), std::move(Err));
}
Expand Down Expand Up @@ -825,7 +825,7 @@ void ClangdServer::findHover(PathRef File, Position Pos,
if (!InpAST)
return CB(InpAST.takeError());
format::FormatStyle Style = getFormatStyleForFile(
File, InpAST->Inputs.Contents, *InpAST->Inputs.TFS);
File, InpAST->Inputs.Contents, *InpAST->Inputs.TFS, false);
CB(clangd::getHover(InpAST->AST, Pos, std::move(Style), Index));
};

Expand Down
4 changes: 2 additions & 2 deletions clang-tools-extra/clangd/CodeComplete.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1628,7 +1628,7 @@ class CodeCompleteFlow {
IsUsingDeclaration = Recorder->CCContext.isUsingDeclaration();
auto Style = getFormatStyleForFile(SemaCCInput.FileName,
SemaCCInput.ParseInput.Contents,
*SemaCCInput.ParseInput.TFS);
*SemaCCInput.ParseInput.TFS, false);
const auto NextToken = findTokenAfterCompletionPoint(
Recorder->CCSema->getPreprocessor().getCodeCompletionLoc(),
Recorder->CCSema->getSourceManager(), Recorder->CCSema->LangOpts);
Expand Down Expand Up @@ -1719,7 +1719,7 @@ class CodeCompleteFlow {
ProxSources[FileName].Cost = 0;
FileProximity.emplace(ProxSources);

auto Style = getFormatStyleForFile(FileName, Content, TFS);
auto Style = getFormatStyleForFile(FileName, Content, TFS, false);
// This will only insert verbatim headers.
Inserter.emplace(FileName, Content, Style,
/*BuildDir=*/"", /*HeaderSearchInfo=*/nullptr);
Expand Down
2 changes: 1 addition & 1 deletion clang-tools-extra/clangd/IncludeCleaner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ std::vector<Diag> generateMissingIncludeDiagnostics(
const SourceManager &SM = AST.getSourceManager();
const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());

auto FileStyle = getFormatStyleForFile(AST.tuPath(), Code, TFS);
auto FileStyle = getFormatStyleForFile(AST.tuPath(), Code, TFS, false);

tooling::HeaderIncludes HeaderIncludes(AST.tuPath(), Code,
FileStyle.IncludeStyle);
Expand Down
2 changes: 1 addition & 1 deletion clang-tools-extra/clangd/ParsedAST.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,7 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs,
// (e.g. incomplete type) and attach include insertion fixes to diagnostics.
if (Inputs.Index && !BuildDir.getError()) {
auto Style =
getFormatStyleForFile(Filename, Inputs.Contents, *Inputs.TFS);
getFormatStyleForFile(Filename, Inputs.Contents, *Inputs.TFS, false);
auto Inserter = std::make_shared<IncludeInserter>(
Filename, Inputs.Contents, Style, BuildDir.get(),
&Clang->getPreprocessor().getHeaderSearchInfo());
Expand Down
16 changes: 15 additions & 1 deletion clang-tools-extra/clangd/SourceCode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,21 @@ std::optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {

format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
llvm::StringRef Content,
const ThreadsafeFS &TFS) {
const ThreadsafeFS &TFS,
bool FormatFile) {
// Unless we're formatting a substantial amount of code (the entire file
// or an arbitrarily large range), skip libFormat's heuristic check for
// .h files that tries to determine whether the file contains objective-c
// code. (This is accomplished by passing empty code contents to getStyle().
// The heuristic is the only thing that looks at the contents.)
// This is a workaround for PR60151, a known issue in libFormat where this
// heuristic can OOM on large files. If we *are* formatting the entire file,
// there's no point in doing this because the actual format::reformat() call
// will run into the same OOM; we'd just be risking inconsistencies between
// clangd and clang-format on smaller .h files where they disagree on what
// language is detected.
if (!FormatFile)
Content = {};
auto Style = format::getStyle(format::DefaultFormatStyle, File,
format::DefaultFallbackStyle, Content,
TFS.view(/*CWD=*/std::nullopt).get());
Expand Down
6 changes: 5 additions & 1 deletion clang-tools-extra/clangd/SourceCode.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,13 @@ std::optional<std::string> getCanonicalPath(const FileEntryRef F,
/// FIXME: should we be caching the .clang-format file search?
/// This uses format::DefaultFormatStyle and format::DefaultFallbackStyle,
/// though the latter may have been overridden in main()!
/// \p FormatFile indicates whether the returned FormatStyle is used
/// to format the entire main file (or a range selected by the user
/// which can be arbitrarily long).
format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add some unit tests for this function in SourceCodeTest.cpp?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I added a unit test which checks the detected language in various cases, including checking that the new FormatFile flag successfully bypasses the language guessing heuristic

llvm::StringRef Content,
const ThreadsafeFS &TFS);
const ThreadsafeFS &TFS,
bool FormatFile);

/// Cleanup and format the given replacements.
llvm::Expected<tooling::Replacements>
Expand Down
2 changes: 1 addition & 1 deletion clang-tools-extra/clangd/tool/Check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ class Checker {

// FIXME: Check that resource-dir/built-in-headers exist?

Style = getFormatStyleForFile(File, Inputs.Contents, TFS);
Style = getFormatStyleForFile(File, Inputs.Contents, TFS, false);

return true;
}
Expand Down
38 changes: 38 additions & 0 deletions clang-tools-extra/clangd/unittests/SourceCodeTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1090,6 +1090,44 @@ TEST(ApplyEditsTest, EndLineOutOfRange) {
FailedWithMessage("Line value is out of range (100)"));
}

TEST(FormatStyleForFile, LanguageGuessingHeuristic) {
StringRef ObjCContent = "@interface Foo\n@end\n";
StringRef CppContent = "class Foo {};\n";
using LK = format::FormatStyle::LanguageKind;
struct TestCase {
llvm::StringRef Filename;
llvm::StringRef Contents;
bool FormatFile;
LK ExpectedLanguage;
} TestCases[] = {
// If the file extension identifies the file as ObjC, the guessed
// language should be ObjC regardless of content or FormatFile flag.
{"foo.mm", ObjCContent, true, LK::LK_ObjC},
{"foo.mm", ObjCContent, false, LK::LK_ObjC},
{"foo.mm", CppContent, true, LK::LK_ObjC},
{"foo.mm", CppContent, false, LK::LK_ObjC},

// If the file extension is ambiguous like .h, FormatFile=true should
// result in using libFormat's heuristic to guess the language based
// on the file contents.
{"foo.h", ObjCContent, true, LK::LK_ObjC},
{"foo.h", CppContent, true, LK::LK_Cpp},

// With FomatFile=false, the language guessing heuristic should be
// bypassed
{"foo.h", ObjCContent, false, LK::LK_Cpp},
{"foo.h", CppContent, false, LK::LK_Cpp},
};

MockFS FS;
for (const auto &[Filename, Contents, FormatFile, ExpectedLanguage] :
TestCases) {
EXPECT_EQ(
getFormatStyleForFile(Filename, Contents, FS, FormatFile).Language,
ExpectedLanguage);
}
}

} // namespace
} // namespace clangd
} // namespace clang