Skip to content

[Safe Buffers] Serialize unsafe_buffer_usage pragmas #92031

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/SourceManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -1672,6 +1672,11 @@ class SourceManager : public RefCountedBase<SourceManager> {
isInTheSameTranslationUnit(std::pair<FileID, unsigned> &LOffs,
std::pair<FileID, unsigned> &ROffs) const;

/// \param Loc a source location in a loaded AST (of a PCH/Module file).
/// \returns a FileID uniquely identifies the AST of a loaded
/// module/PCH where `Loc` is at.
FileID getUniqueLoadedASTFileID(SourceLocation Loc) const;

/// Determines whether the two decomposed source location is in the same TU.
bool isInTheSameTranslationUnitImpl(
const std::pair<FileID, unsigned> &LOffs,
Expand Down
52 changes: 47 additions & 5 deletions clang/include/clang/Lex/Preprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -2883,11 +2883,41 @@ class Preprocessor {
/// otherwise.
SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region.

// An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one
// translation unit. Each region is represented by a pair of start and end
// locations. A region is "open" if its' start and end locations are
// identical.
SmallVector<std::pair<SourceLocation, SourceLocation>, 8> SafeBufferOptOutMap;
using SafeBufferOptOutRegionsTy =
SmallVector<std::pair<SourceLocation, SourceLocation>, 16>;
// An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
// translation unit. Each region is represented by a pair of start and
// end locations.
SafeBufferOptOutRegionsTy SafeBufferOptOutMap;

// The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs. We use the
// following structure to manage them by their ASTs.
struct {
// A map from unique IDs to region maps of loaded ASTs. The ID identifies a
// loaded AST. See `SourceManager::getUniqueLoadedASTID`.
llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> LoadedRegions;

// Returns a reference to the safe buffer opt-out regions of the loaded
// AST where `Loc` belongs to. (Construct if absent)
SafeBufferOptOutRegionsTy &
findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) {
return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)];
}

// Returns a reference to the safe buffer opt-out regions of the loaded
// AST where `Loc` belongs to. (This const function returns nullptr if
// absent.)
const SafeBufferOptOutRegionsTy *
lookupLoadedOptOutMap(SourceLocation Loc,
const SourceManager &SrcMgr) const {
FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc);
auto Iter = LoadedRegions.find(FID);

if (Iter == LoadedRegions.end())
return nullptr;
return &Iter->getSecond();
}
} LoadedSafeBufferOptOutMap;

public:
/// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
Expand Down Expand Up @@ -2918,6 +2948,18 @@ class Preprocessor {
/// opt-out region
bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);

/// \return a sequence of SourceLocations representing ordered opt-out regions
/// specified by
/// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const;

/// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
/// record of code `PP_UNSAFE_BUFFER_USAGE`.
/// \return true iff the `Preprocessor` has been updated; false `Preprocessor`
/// is same as itself before the call.
bool setDeserializedSafeBufferOptOutMap(
const SmallVectorImpl<SourceLocation> &SrcLocSeqs);

private:
/// Helper functions to forward lexing to the actual lexer. They all share the
/// same signature.
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Serialization/ASTBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,9 @@ enum ASTRecordTypes {
/// Record code for lexical and visible block for delayed namespace in
/// reduced BMI.
DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68,

/// Record code for \#pragma clang unsafe_buffer_usage begin/end
PP_UNSAFE_BUFFER_USAGE = 69,
};

/// Record types used within a source manager block.
Expand Down
18 changes: 18 additions & 0 deletions clang/lib/Basic/SourceManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1911,6 +1911,24 @@ SourceManager::getDecomposedIncludedLoc(FileID FID) const {
return DecompLoc;
}

FileID SourceManager::getUniqueLoadedASTFileID(SourceLocation Loc) const {
assert(isLoadedSourceLocation(Loc) &&
"Must be a source location in a loaded PCH/Module file");

auto [FID, Ignore] = getDecomposedLoc(Loc);
// `LoadedSLocEntryAllocBegin` stores the sorted lowest FID of each loaded
// allocation. Later allocations have lower FileIDs. The call below is to find
// the lowest FID of a loaded allocation from any FID in the same allocation.
// The lowest FID is used to identify a loaded allocation.
const FileID *FirstFID =
llvm::lower_bound(LoadedSLocEntryAllocBegin, FID, std::greater<FileID>{});
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is quite magical to me because I don't understand how FileIDs are assigned and in what order they show up in the LoadedSLocEntryAllocBegin list. But that's probably ok, this looks like completely normal SourceManager code to me.


assert(FirstFID &&
"The failure to find the first FileID of a "
"loaded AST from a loaded source location was unexpected.");
return *FirstFID;
}

bool SourceManager::isInTheSameTranslationUnitImpl(
const std::pair<FileID, unsigned> &LOffs,
const std::pair<FileID, unsigned> &ROffs) const {
Expand Down
110 changes: 91 additions & 19 deletions clang/lib/Lex/Preprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Capacity.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
Expand Down Expand Up @@ -1483,26 +1484,56 @@ void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
}

bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
const SourceLocation &Loc) const {
// Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
auto FirstRegionEndingAfterLoc = llvm::partition_point(
SafeBufferOptOutMap,
[&SourceMgr,
&Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
});
const SourceLocation &Loc) const {
// The lambda that tests if a `Loc` is in an opt-out region given one opt-out
// region map:
auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map,
const SourceLocation &Loc) -> bool {
// Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
auto FirstRegionEndingAfterLoc = llvm::partition_point(
Map, [&SourceMgr,
&Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
});

if (FirstRegionEndingAfterLoc != Map.end()) {
// To test if the start location of the found region precedes `Loc`:
return SourceMgr.isBeforeInTranslationUnit(
FirstRegionEndingAfterLoc->first, Loc);
}
// If we do not find a region whose end location passes `Loc`, we want to
// check if the current region is still open:
if (!Map.empty() && Map.back().first == Map.back().second)
return SourceMgr.isBeforeInTranslationUnit(Map.back().first, Loc);
return false;
};

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok @ziqingluo-90 has just walked me through this offline and I think I understand what's happening now.

What was unobvious to me: shouldn't we TestInMap over the entire stack of includes? And the answer is:

  • Regular textual #includes will just show up in the current map normally; there's no need to go up the stack.
  • PCHes are going to be included with the -include-pch flag, not directly. There's not really much of a stack going on at this point and there's no way to "put pragmas around the compiler flag".
  • In case of modules, we don't care what this function returns deep inside a module, because we probably shouldn't be analyzing that code in the first place. Instead, the warning should be naturally enabled in the clang invocation that builds the module itself, and that's where the warning would be emitted, taking only the current module into account.
    • As a matter of fact, as of today we're analyzing that nested code. But we shouldn't be, that's not how modules are intended to work. We consider it a bug and have a separate plan to fix this. Right now this means that the warning will be emitted multiple times for the same line of code (both by module compilation and by .cpp file compilation, and possibly by everything in between). And the worst that could happen if this function misbehaves, is that some of these duplicates will disappear. Which is annoying but harmless.

Let's add a comment explaining this!

if (FirstRegionEndingAfterLoc != SafeBufferOptOutMap.end()) {
// To test if the start location of the found region precedes `Loc`:
return SourceMgr.isBeforeInTranslationUnit(FirstRegionEndingAfterLoc->first,
Loc);
}
// If we do not find a region whose end location passes `Loc`, we want to
// check if the current region is still open:
if (!SafeBufferOptOutMap.empty() &&
SafeBufferOptOutMap.back().first == SafeBufferOptOutMap.back().second)
return SourceMgr.isBeforeInTranslationUnit(SafeBufferOptOutMap.back().first,
Loc);
// What the following does:
//
// If `Loc` belongs to the local TU, we just look up `SafeBufferOptOutMap`.
// Otherwise, `Loc` is from a loaded AST. We look up the
// `LoadedSafeBufferOptOutMap` first to get the opt-out region map of the
// loaded AST where `Loc` is at. Then we find if `Loc` is in an opt-out
// region w.r.t. the region map. If the region map is absent, it means there
// is no opt-out pragma in that loaded AST.
//
// Opt-out pragmas in the local TU or a loaded AST is not visible to another
// one of them. That means if you put the pragmas around a `#include
// "module.h"`, where module.h is a module, it is not actually suppressing
// warnings in module.h. This is fine because warnings in module.h will be
// reported when module.h is compiled in isolation and nothing in module.h
// will be analyzed ever again. So you will not see warnings from the file
// that imports module.h anyway. And you can't even do the same thing for PCHs
// because they can only be included from the command line.

if (SourceMgr.isLocalSourceLocation(Loc))
return TestInMap(SafeBufferOptOutMap, Loc);

const SafeBufferOptOutRegionsTy *LoadedRegions =
LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SourceMgr);

if (LoadedRegions)
return TestInMap(*LoadedRegions, Loc);
return false;
}

Expand Down Expand Up @@ -1551,6 +1582,47 @@ bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
return InSafeBufferOptOutRegion;
}

SmallVector<SourceLocation, 64>
Preprocessor::serializeSafeBufferOptOutMap() const {
assert(!InSafeBufferOptOutRegion &&
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We require the pragma to be closed before the end of TU right? And that's a hard error, not a warning?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a warning.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah in this case we need to support the situation when the warning was ignored.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I'm sorry, they are not warnings, they are errors:

PP.Diag(Loc, diag::err_pp_double_begin_pragma_unsafe_buffer_usage);

"Attempt to serialize safe buffer opt-out regions before file being "
"completely preprocessed");

SmallVector<SourceLocation, 64> SrcSeq;

for (const auto &[begin, end] : SafeBufferOptOutMap) {
SrcSeq.push_back(begin);
SrcSeq.push_back(end);
}
// Only `SafeBufferOptOutMap` gets serialized. No need to serialize
// `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
// pch/module in the pch-chain/module-DAG will be loaded one by one in order.
// It means that for each loading pch/module m, it just needs to load m's own
// `SafeBufferOptOutMap`.
return SrcSeq;
}

bool Preprocessor::setDeserializedSafeBufferOptOutMap(
const SmallVectorImpl<SourceLocation> &SourceLocations) {
if (SourceLocations.size() == 0)
return false;

assert(SourceLocations.size() % 2 == 0 &&
"ill-formed SourceLocation sequence");

auto It = SourceLocations.begin();
SafeBufferOptOutRegionsTy &Regions =
LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(*It, SourceMgr);

do {
SourceLocation Begin = *It++;
SourceLocation End = *It++;

Regions.emplace_back(Begin, End);
} while (It != SourceLocations.end());
return true;
}

ModuleLoader::~ModuleLoader() = default;

CommentHandler::~CommentHandler() = default;
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3586,6 +3586,17 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
break;
}

case PP_UNSAFE_BUFFER_USAGE: {
if (!Record.empty()) {
SmallVector<SourceLocation, 64> SrcLocs;
unsigned Idx = 0;
while (Idx < Record.size())
SrcLocs.push_back(ReadSourceLocation(F, Record, Idx));
PP.setDeserializedSafeBufferOptOutMap(SrcLocs);
}
break;
}

case PP_CONDITIONAL_STACK:
if (!Record.empty()) {
unsigned Idx = 0, End = Record.size() - 1;
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/Serialization/ASTWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -911,6 +911,7 @@ void ASTWriter::WriteBlockInfoBlock() {
RECORD(PP_CONDITIONAL_STACK);
RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
RECORD(PP_ASSUME_NONNULL_LOC);
RECORD(PP_UNSAFE_BUFFER_USAGE);

// SourceManager Block.
BLOCK(SOURCE_MANAGER_BLOCK);
Expand Down Expand Up @@ -2439,6 +2440,12 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
Record.clear();
}

// Write the safe buffer opt-out region map in PP
for (SourceLocation &S : PP.serializeSafeBufferOptOutMap())
AddSourceLocation(S, Record);
Stream.EmitRecord(PP_UNSAFE_BUFFER_USAGE, Record);
Record.clear();

// Enter the preprocessor block.
Stream.EnterSubblock(PREPROCESSOR_BLOCK_ID, 3);

Expand Down
Loading
Loading