Skip to content

Commit 3093d73

Browse files
[clangd] Avoid libFormat's objective-c guessing heuristic where possible (#84133)
This avoids a known libFormat bug where the heuristic can OOM on certain large files (particularly single-header libraries such as miniaudio.h). The OOM will still happen on affected files if you actually try to format them (this is harder to avoid since the underlyting issue affects the actual formatting logic, not just the language-guessing heuristic), but at least it's avoided during non-modifying operations like hover, and modifying operations that do local formatting like code completion. Fixes clangd/clangd#719 Fixes clangd/clangd#1384 Fixes #70945
1 parent 561ddb1 commit 3093d73

File tree

8 files changed

+68
-12
lines changed

8 files changed

+68
-12
lines changed

clang-tools-extra/clangd/ClangdServer.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ void ClangdServer::formatFile(PathRef File, std::optional<Range> Rng,
523523
auto Action = [File = File.str(), Code = std::move(*Code),
524524
Ranges = std::vector<tooling::Range>{RequestedRange},
525525
CB = std::move(CB), this]() mutable {
526-
format::FormatStyle Style = getFormatStyleForFile(File, Code, TFS);
526+
format::FormatStyle Style = getFormatStyleForFile(File, Code, TFS, true);
527527
tooling::Replacements IncludeReplaces =
528528
format::sortIncludes(Style, Code, Ranges, File);
529529
auto Changed = tooling::applyAllReplacements(Code, IncludeReplaces);
@@ -551,7 +551,7 @@ void ClangdServer::formatOnType(PathRef File, Position Pos,
551551
auto Action = [File = File.str(), Code = std::move(*Code),
552552
TriggerText = TriggerText.str(), CursorPos = *CursorPos,
553553
CB = std::move(CB), this]() mutable {
554-
auto Style = getFormatStyleForFile(File, Code, TFS);
554+
auto Style = getFormatStyleForFile(File, Code, TFS, false);
555555
std::vector<TextEdit> Result;
556556
for (const tooling::Replacement &R :
557557
formatIncremental(Code, CursorPos, TriggerText, Style))
@@ -605,7 +605,7 @@ void ClangdServer::rename(PathRef File, Position Pos, llvm::StringRef NewName,
605605

606606
if (Opts.WantFormat) {
607607
auto Style = getFormatStyleForFile(File, InpAST->Inputs.Contents,
608-
*InpAST->Inputs.TFS);
608+
*InpAST->Inputs.TFS, false);
609609
llvm::Error Err = llvm::Error::success();
610610
for (auto &E : R->GlobalChanges)
611611
Err =
@@ -762,7 +762,7 @@ void ClangdServer::applyTweak(PathRef File, Range Sel, StringRef TweakID,
762762
for (auto &It : (*Effect)->ApplyEdits) {
763763
Edit &E = It.second;
764764
format::FormatStyle Style =
765-
getFormatStyleForFile(File, E.InitialCode, TFS);
765+
getFormatStyleForFile(File, E.InitialCode, TFS, false);
766766
if (llvm::Error Err = reformatEdit(E, Style))
767767
elog("Failed to format {0}: {1}", It.first(), std::move(Err));
768768
}
@@ -825,7 +825,7 @@ void ClangdServer::findHover(PathRef File, Position Pos,
825825
if (!InpAST)
826826
return CB(InpAST.takeError());
827827
format::FormatStyle Style = getFormatStyleForFile(
828-
File, InpAST->Inputs.Contents, *InpAST->Inputs.TFS);
828+
File, InpAST->Inputs.Contents, *InpAST->Inputs.TFS, false);
829829
CB(clangd::getHover(InpAST->AST, Pos, std::move(Style), Index));
830830
};
831831

clang-tools-extra/clangd/CodeComplete.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1628,7 +1628,7 @@ class CodeCompleteFlow {
16281628
IsUsingDeclaration = Recorder->CCContext.isUsingDeclaration();
16291629
auto Style = getFormatStyleForFile(SemaCCInput.FileName,
16301630
SemaCCInput.ParseInput.Contents,
1631-
*SemaCCInput.ParseInput.TFS);
1631+
*SemaCCInput.ParseInput.TFS, false);
16321632
const auto NextToken = findTokenAfterCompletionPoint(
16331633
Recorder->CCSema->getPreprocessor().getCodeCompletionLoc(),
16341634
Recorder->CCSema->getSourceManager(), Recorder->CCSema->LangOpts);
@@ -1719,7 +1719,7 @@ class CodeCompleteFlow {
17191719
ProxSources[FileName].Cost = 0;
17201720
FileProximity.emplace(ProxSources);
17211721

1722-
auto Style = getFormatStyleForFile(FileName, Content, TFS);
1722+
auto Style = getFormatStyleForFile(FileName, Content, TFS, false);
17231723
// This will only insert verbatim headers.
17241724
Inserter.emplace(FileName, Content, Style,
17251725
/*BuildDir=*/"", /*HeaderSearchInfo=*/nullptr);

clang-tools-extra/clangd/IncludeCleaner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ std::vector<Diag> generateMissingIncludeDiagnostics(
116116
const SourceManager &SM = AST.getSourceManager();
117117
const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());
118118

119-
auto FileStyle = getFormatStyleForFile(AST.tuPath(), Code, TFS);
119+
auto FileStyle = getFormatStyleForFile(AST.tuPath(), Code, TFS, false);
120120

121121
tooling::HeaderIncludes HeaderIncludes(AST.tuPath(), Code,
122122
FileStyle.IncludeStyle);

clang-tools-extra/clangd/ParsedAST.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs,
626626
// (e.g. incomplete type) and attach include insertion fixes to diagnostics.
627627
if (Inputs.Index && !BuildDir.getError()) {
628628
auto Style =
629-
getFormatStyleForFile(Filename, Inputs.Contents, *Inputs.TFS);
629+
getFormatStyleForFile(Filename, Inputs.Contents, *Inputs.TFS, false);
630630
auto Inserter = std::make_shared<IncludeInserter>(
631631
Filename, Inputs.Contents, Style, BuildDir.get(),
632632
&Clang->getPreprocessor().getHeaderSearchInfo());

clang-tools-extra/clangd/SourceCode.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,21 @@ std::optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {
582582

583583
format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
584584
llvm::StringRef Content,
585-
const ThreadsafeFS &TFS) {
585+
const ThreadsafeFS &TFS,
586+
bool FormatFile) {
587+
// Unless we're formatting a substantial amount of code (the entire file
588+
// or an arbitrarily large range), skip libFormat's heuristic check for
589+
// .h files that tries to determine whether the file contains objective-c
590+
// code. (This is accomplished by passing empty code contents to getStyle().
591+
// The heuristic is the only thing that looks at the contents.)
592+
// This is a workaround for PR60151, a known issue in libFormat where this
593+
// heuristic can OOM on large files. If we *are* formatting the entire file,
594+
// there's no point in doing this because the actual format::reformat() call
595+
// will run into the same OOM; we'd just be risking inconsistencies between
596+
// clangd and clang-format on smaller .h files where they disagree on what
597+
// language is detected.
598+
if (!FormatFile)
599+
Content = {};
586600
auto Style = format::getStyle(format::DefaultFormatStyle, File,
587601
format::DefaultFallbackStyle, Content,
588602
TFS.view(/*CWD=*/std::nullopt).get());

clang-tools-extra/clangd/SourceCode.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,13 @@ std::optional<std::string> getCanonicalPath(const FileEntryRef F,
171171
/// FIXME: should we be caching the .clang-format file search?
172172
/// This uses format::DefaultFormatStyle and format::DefaultFallbackStyle,
173173
/// though the latter may have been overridden in main()!
174+
/// \p FormatFile indicates whether the returned FormatStyle is used
175+
/// to format the entire main file (or a range selected by the user
176+
/// which can be arbitrarily long).
174177
format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
175178
llvm::StringRef Content,
176-
const ThreadsafeFS &TFS);
179+
const ThreadsafeFS &TFS,
180+
bool FormatFile);
177181

178182
/// Cleanup and format the given replacements.
179183
llvm::Expected<tooling::Replacements>

clang-tools-extra/clangd/tool/Check.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ class Checker {
226226

227227
// FIXME: Check that resource-dir/built-in-headers exist?
228228

229-
Style = getFormatStyleForFile(File, Inputs.Contents, TFS);
229+
Style = getFormatStyleForFile(File, Inputs.Contents, TFS, false);
230230

231231
return true;
232232
}

clang-tools-extra/clangd/unittests/SourceCodeTests.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,6 +1090,44 @@ TEST(ApplyEditsTest, EndLineOutOfRange) {
10901090
FailedWithMessage("Line value is out of range (100)"));
10911091
}
10921092

1093+
TEST(FormatStyleForFile, LanguageGuessingHeuristic) {
1094+
StringRef ObjCContent = "@interface Foo\n@end\n";
1095+
StringRef CppContent = "class Foo {};\n";
1096+
using LK = format::FormatStyle::LanguageKind;
1097+
struct TestCase {
1098+
llvm::StringRef Filename;
1099+
llvm::StringRef Contents;
1100+
bool FormatFile;
1101+
LK ExpectedLanguage;
1102+
} TestCases[] = {
1103+
// If the file extension identifies the file as ObjC, the guessed
1104+
// language should be ObjC regardless of content or FormatFile flag.
1105+
{"foo.mm", ObjCContent, true, LK::LK_ObjC},
1106+
{"foo.mm", ObjCContent, false, LK::LK_ObjC},
1107+
{"foo.mm", CppContent, true, LK::LK_ObjC},
1108+
{"foo.mm", CppContent, false, LK::LK_ObjC},
1109+
1110+
// If the file extension is ambiguous like .h, FormatFile=true should
1111+
// result in using libFormat's heuristic to guess the language based
1112+
// on the file contents.
1113+
{"foo.h", ObjCContent, true, LK::LK_ObjC},
1114+
{"foo.h", CppContent, true, LK::LK_Cpp},
1115+
1116+
// With FomatFile=false, the language guessing heuristic should be
1117+
// bypassed
1118+
{"foo.h", ObjCContent, false, LK::LK_Cpp},
1119+
{"foo.h", CppContent, false, LK::LK_Cpp},
1120+
};
1121+
1122+
MockFS FS;
1123+
for (const auto &[Filename, Contents, FormatFile, ExpectedLanguage] :
1124+
TestCases) {
1125+
EXPECT_EQ(
1126+
getFormatStyleForFile(Filename, Contents, FS, FormatFile).Language,
1127+
ExpectedLanguage);
1128+
}
1129+
}
1130+
10931131
} // namespace
10941132
} // namespace clangd
10951133
} // namespace clang

0 commit comments

Comments
 (0)