Skip to content

[clang][deps] Lazy dependency directives rdar://125519128 #8484

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,6 @@ class DependencyScanningCASFilesystem : public llvm::cas::ThreadSafeFileSystem {
getDirectiveTokens(const Twine &Path);

private:
/// Check whether the file should be scanned for preprocessor directives.
bool shouldScanForDirectives(StringRef Filename);

IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS;

struct FileEntry {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ class EntryRef {
/// The underlying cached entry.
const CachedFileSystemEntry &Entry;

friend class DependencyScanningWorkerFilesystem;

public:
EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
: Filename(Name), Entry(Entry) {}
Expand Down Expand Up @@ -316,14 +318,15 @@ class DependencyScanningWorkerFilesystem
///
/// Attempts to use the local and shared caches first, then falls back to
/// using the underlying filesystem.
llvm::ErrorOr<EntryRef>
getOrCreateFileSystemEntry(StringRef Filename,
bool DisableDirectivesScanning = false);
llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename);

private:
/// Check whether the file should be scanned for preprocessor directives.
bool shouldScanForDirectives(StringRef Filename);
/// Ensure the directive tokens are populated for this file entry.
///
/// Returns true if the directive tokens are populated for this file entry,
/// false if not (i.e. this entry is not a file or its scan fails).
bool ensureDirectiveTokensArePopulated(EntryRef Entry);

private:
/// For a filename that's not yet associated with any entry in the caches,
/// uses the underlying filesystem to either look up the entry based in the
/// shared cache indexed by unique ID, or creates new entry from scratch.
Expand All @@ -333,11 +336,6 @@ class DependencyScanningWorkerFilesystem
computeAndStoreResult(StringRef OriginalFilename,
StringRef FilenameForLookup);

/// Scan for preprocessor directives for the given entry if necessary and
/// returns a wrapper object with reference semantics.
EntryRef scanForDirectivesIfNecessary(const CachedFileSystemEntry &Entry,
StringRef Filename, bool Disable);

/// Represents a filesystem entry that has been stat-ed (and potentially read)
/// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
struct TentativeEntry {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,35 +200,11 @@ DependencyScanningCASFilesystem::getOriginal(cas::CASID InputDataID) {
return Blob.takeError();
}

/// Whitelist file extensions that should be minimized, treating no extension as
/// a source file that should be minimized.
///
/// This is kinda hacky, it would be better if we knew what kind of file Clang
/// was expecting instead.
static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) {
StringRef Ext = llvm::sys::path::extension(Filename);
if (Ext.empty())
return true; // C++ standard library
return llvm::StringSwitch<bool>(Ext)
.CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
.CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
.CasesLower(".m", ".mm", true)
.CasesLower(".i", ".ii", ".mi", ".mmi", true)
.CasesLower(".def", ".inc", true)
.Default(false);
}

static bool shouldCacheStatFailures(StringRef Filename) {
StringRef Ext = llvm::sys::path::extension(Filename);
if (Ext.empty())
return false; // This may be the module cache directory.
return shouldScanForDirectivesBasedOnExtension(
Filename); // Only cache stat failures on source files.
}

bool DependencyScanningCASFilesystem::shouldScanForDirectives(
StringRef RawFilename) {
return shouldScanForDirectivesBasedOnExtension(RawFilename);
return true;
}

llvm::cas::CachingOnDiskFileSystem &
Expand Down Expand Up @@ -274,10 +250,6 @@ DependencyScanningCASFilesystem::lookupPath(const Twine &Path) {
return LookupPathResult{&Entry, std::error_code()};
}

if (shouldScanForDirectives(PathRef))
scanForDirectives(*CAS.getReference(*FileID), PathRef, Entry.DepTokens,
Entry.DepDirectives);

Entry.Buffer = std::move(*Buffer);
Entry.Status = llvm::vfs::Status(
PathRef, MaybeStatus->getUniqueID(),
Expand Down Expand Up @@ -362,7 +334,18 @@ DependencyScanningCASFilesystem::openFileForRead(const Twine &Path) {
std::optional<ArrayRef<dependency_directives_scan::Directive>>
DependencyScanningCASFilesystem::getDirectiveTokens(const Twine &Path) {
LookupPathResult Result = lookupPath(Path);
if (Result.Entry && !Result.Entry->DepDirectives.empty())
return ArrayRef(Result.Entry->DepDirectives);

if (Result.Entry) {
if (Result.Entry->DepDirectives.empty()) {
SmallString<256> PathStorage;
StringRef PathRef = Path.toStringRef(PathStorage);
FileEntry &Entry = const_cast<FileEntry &>(*Result.Entry);
scanForDirectives(*Entry.CASContents, PathRef, Entry.DepTokens,
Entry.DepDirectives);
}

if (!Result.Entry->DepDirectives.empty())
return ArrayRef(Result.Entry->DepDirectives);
}
return std::nullopt;
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,24 +46,25 @@ DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
return TentativeEntry(Stat, std::move(Buffer), std::move(CASContents));
}

EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
if (Entry.isError() || Entry.isDirectory() || Disable ||
!shouldScanForDirectives(Filename))
return EntryRef(Filename, Entry);
bool DependencyScanningWorkerFilesystem::ensureDirectiveTokensArePopulated(
EntryRef Ref) {
auto &Entry = Ref.Entry;

if (Entry.isError() || Entry.isDirectory())
return false;

CachedFileContents *Contents = Entry.getCachedContents();
assert(Contents && "contents not initialized");

// Double-checked locking.
if (Contents->DepDirectives.load())
return EntryRef(Filename, Entry);
return true;

std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);

// Double-checked locking.
if (Contents->DepDirectives.load())
return EntryRef(Filename, Entry);
return true;

SmallVector<dependency_directives_scan::Directive, 64> Directives;
// Scan the file for preprocessor directives that might affect the
Expand All @@ -74,16 +75,16 @@ EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary(
Contents->DepDirectiveTokens.clear();
// FIXME: Propagate the diagnostic if desired by the client.
Contents->DepDirectives.store(new std::optional<DependencyDirectivesTy>());
return EntryRef(Filename, Entry);
return false;
}

// This function performed double-checked locking using `DepDirectives`.
// Assigning it must be the last thing this function does, otherwise other
// threads may skip the
// critical section (`DepDirectives != nullptr`), leading to a data race.
// threads may skip the critical section (`DepDirectives != nullptr`), leading
// to a data race.
Contents->DepDirectives.store(
new std::optional<DependencyDirectivesTy>(std::move(Directives)));
return EntryRef(Filename, Entry);
return true;
}

DependencyScanningFilesystemSharedCache::
Expand Down Expand Up @@ -167,34 +168,11 @@ DependencyScanningFilesystemSharedCache::CacheShard::
return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
}

/// Whitelist file extensions that should be minimized, treating no extension as
/// a source file that should be minimized.
///
/// This is kinda hacky, it would be better if we knew what kind of file Clang
/// was expecting instead.
static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) {
StringRef Ext = llvm::sys::path::extension(Filename);
if (Ext.empty())
return true; // C++ standard library
return llvm::StringSwitch<bool>(Ext)
.CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
.CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
.CasesLower(".m", ".mm", true)
.CasesLower(".i", ".ii", ".mi", ".mmi", true)
.CasesLower(".def", ".inc", true)
.Default(false);
}

static bool shouldCacheStatFailures(StringRef Filename) {
StringRef Ext = llvm::sys::path::extension(Filename);
if (Ext.empty())
return false; // This may be the module cache directory.
// Only cache stat failures on files that are not expected to change during
// the build.
StringRef FName = llvm::sys::path::filename(Filename);
if (FName == "module.modulemap" || FName == "module.map")
return true;
return shouldScanForDirectivesBasedOnExtension(Filename);
return true;
}

DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem(
Expand All @@ -207,11 +185,6 @@ DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem(
updateWorkingDirForCacheLookup();
}

bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
StringRef Filename) {
return shouldScanForDirectivesBasedOnExtension(Filename);
}

const CachedFileSystemEntry &
DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
TentativeEntry TEntry) {
Expand Down Expand Up @@ -265,7 +238,7 @@ DependencyScanningWorkerFilesystem::computeAndStoreResult(

llvm::ErrorOr<EntryRef>
DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
StringRef OriginalFilename, bool DisableDirectivesScanning) {
StringRef OriginalFilename) {
StringRef FilenameForLookup;
SmallString<256> PathBuf;
if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
Expand All @@ -282,15 +255,11 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
if (const auto *Entry =
findEntryByFilenameWithWriteThrough(FilenameForLookup))
return scanForDirectivesIfNecessary(*Entry, OriginalFilename,
DisableDirectivesScanning)
.unwrapError();
return EntryRef(OriginalFilename, *Entry).unwrapError();
auto MaybeEntry = computeAndStoreResult(OriginalFilename, FilenameForLookup);
if (!MaybeEntry)
return MaybeEntry.getError();
return scanForDirectivesIfNecessary(*MaybeEntry, OriginalFilename,
DisableDirectivesScanning)
.unwrapError();
return EntryRef(OriginalFilename, *MaybeEntry).unwrapError();
}

llvm::ErrorOr<llvm::vfs::Status>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,8 @@ class DependencyScanningAction : public tooling::ToolAction {
-> std::optional<ArrayRef<dependency_directives_scan::Directive>> {
if (llvm::ErrorOr<EntryRef> Entry =
LocalDepFS->getOrCreateFileSystemEntry(File.getName()))
return Entry->getDirectiveTokens();
if (LocalDepFS->ensureDirectiveTokensArePopulated(*Entry))
return Entry->getDirectiveTokens();
return std::nullopt;
};
}
Expand Down
33 changes: 33 additions & 0 deletions clang/test/ClangScanDeps/modules-extension.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// RUN: rm -rf %t
// RUN: split-file %s %t

// This test checks that source files with uncommon extensions still undergo
// dependency directives scan. If header.pch would not and b.h would, the scan
// would fail when parsing `void function(B)` and not knowing the symbol B.

//--- module.modulemap
module __PCH { header "header.pch" }
module B { header "b.h" }

//--- header.pch
#include "b.h"
void function(B);

//--- b.h
typedef int B;

//--- tu.c
int main() {
function(0);
return 0;
}

//--- cdb.json.in
[{
"directory": "DIR",
"file": "DIR/tu.c",
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fimplicit-module-maps -include DIR/header.pch"
}]

// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/deps.json