Skip to content

Commit 32db4c2

Browse files
DougGregoretcwilde
authored andcommitted
[Serialized diagnostics] Record generated buffer contents in diagnostics files
Extend the format for serialized diagnostic files (`.dia`) to allow for the contents of source files to be included inside the diagnostic files themselves. This enables producers of diagnostics files that generate source code as part of their build (e.g., a compiler that performs macro expansion) to provide that generated source code to consumers of diagnostics, treating it like a normal file (e.g., with diagnostics pointing into it) that isn't otherwise recorded anywhere on disk. Source files with contents can, optionally, provide a source range that describes the "original" source range from which those contents are derived. This information is optional, but if present it can refer back to the source locations that triggered the creation of the generated source code, i.e., the use of a macro that caused macro expansion. This change extends the libclang C APIs with access to the source file contents (via `clang_getDiagnosticFileContents`) and the original source range (via `clang_getDiagnosticFileOriginalSourceRange`). The diagnostics file format, which has been stable for roughly a decade, is extended in a backward-compatible way by adding a new record, which existing clients (i.e., older libclang versions) will safely ignore. As such, we do not bump the diagnostics file format version at all. At present, the Swift compiler is the only producer of source file content records, which uses them to provide macro-expansion buffers. The Swift repository contains tests that produce these records and verify their correctness via `c-index-test`. We could consider introducing command-line flags to Clang to make it start producing these records, e.g., to capture macro-expansion buffers (as we're doing in Swift) or even to package up the contents of all source files that are referenced by serialized diagnostics, making the serialized-diagnostics file self-contained. (cherry picked from commit bd7ca13)
1 parent fced1a1 commit 32db4c2

File tree

10 files changed

+195
-5
lines changed

10 files changed

+195
-5
lines changed

clang/include/clang-c/CXDiagnostic.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,30 @@ CINDEX_LINKAGE void clang_disposeDiagnosticSet(CXDiagnosticSet Diags);
148148
*/
149149
CINDEX_LINKAGE CXDiagnosticSet clang_getChildDiagnostics(CXDiagnostic D);
150150

151+
/**
152+
* Get the contents if the given file that was provided via diagnostics.
153+
*
154+
* \param diags the diagnostics set to query for the contents of the file.
155+
* \param file the file to get the contents of.
156+
* \param outFileSize if non-null, set to the file size on success.
157+
* \returns on success, a pointer to the file contents. Otherwise, NULL.
158+
*/
159+
CINDEX_LINKAGE const char *clang_getDiagnosticFileContents(
160+
CXDiagnosticSet diags, CXFile file, size_t *outFileSize);
161+
162+
/**
163+
* Retrieve the original source range if the given file was provided via
164+
* diagnostics and is conceptually a replacement for the original source range.
165+
*
166+
* \param diags the diagnostics set to query for the contents of the file.
167+
* \param file the file to get the contents of.
168+
* \returns on success, the source range (into another file) that is
169+
* conceptually replaced by the contents of the given file (available via
170+
* \c clang_getDiagnosticFileContents).
171+
*/
172+
CINDEX_LINKAGE CXSourceRange clang_getDiagnosticFileOriginalSourceRange(
173+
CXDiagnosticSet diags, CXFile file);
174+
151175
/**
152176
* Destroy a diagnostic.
153177
*/

clang/include/clang/Frontend/SerializedDiagnosticReader.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,16 @@ class SerializedDiagnosticReader {
112112
return {};
113113
}
114114

115+
/// Visit file contents. This associates the file's \c ID with the
116+
/// contents of
117+
virtual std::error_code visitSourceFileContentsRecord(
118+
unsigned ID,
119+
const Location &OriginalStartLoc,
120+
const Location &OriginalEndLoc,
121+
StringRef Contents) {
122+
return {};
123+
}
124+
115125
/// Visit a fixit hint.
116126
virtual std::error_code
117127
visitFixitRecord(const Location &Start, const Location &End, StringRef Text) {

clang/include/clang/Frontend/SerializedDiagnostics.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ enum RecordIDs {
3232
RECORD_CATEGORY,
3333
RECORD_FILENAME,
3434
RECORD_FIXIT,
35+
RECORD_SOURCE_FILE_CONTENTS,
3536
RECORD_FIRST = RECORD_VERSION,
36-
RECORD_LAST = RECORD_FIXIT
37+
RECORD_LAST = RECORD_SOURCE_FILE_CONTENTS
3738
};
3839

3940
/// A stable version of DiagnosticIDs::Level.

clang/lib/Frontend/SerializedDiagnosticPrinter.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ class SDiagsMerger : SerializedDiagnosticReader {
9191
AbbrevLookup FileLookup;
9292
AbbrevLookup CategoryLookup;
9393
AbbrevLookup DiagFlagLookup;
94+
llvm::DenseSet<unsigned> ContentsWritten;
9495

9596
public:
9697
SDiagsMerger(SDiagsWriter &Writer) : Writer(Writer) {}
@@ -110,6 +111,12 @@ class SDiagsMerger : SerializedDiagnosticReader {
110111
std::error_code visitFilenameRecord(unsigned ID, unsigned Size,
111112
unsigned Timestamp,
112113
StringRef Name) override;
114+
std::error_code visitSourceFileContentsRecord(
115+
unsigned ID,
116+
const Location &OriginalStartLoc,
117+
const Location &OriginalEndLoc,
118+
StringRef Contents) override;
119+
113120
std::error_code visitFixitRecord(const serialized_diags::Location &Start,
114121
const serialized_diags::Location &End,
115122
StringRef CodeToInsert) override;
@@ -460,6 +467,8 @@ void SDiagsWriter::EmitBlockInfoBlock() {
460467
EmitRecordID(RECORD_DIAG_FLAG, "DiagFlag", Stream, Record);
461468
EmitRecordID(RECORD_FILENAME, "FileName", Stream, Record);
462469
EmitRecordID(RECORD_FIXIT, "FixIt", Stream, Record);
470+
EmitRecordID(
471+
RECORD_SOURCE_FILE_CONTENTS, "SourceFileContents", Stream, Record);
463472

464473
// Emit abbreviation for RECORD_DIAG.
465474
Abbrev = std::make_shared<BitCodeAbbrev>();
@@ -516,6 +525,16 @@ void SDiagsWriter::EmitBlockInfoBlock() {
516525
Abbrevs.set(RECORD_FIXIT, Stream.EmitBlockInfoAbbrev(BLOCK_DIAG,
517526
Abbrev));
518527

528+
// Emit the abbreviation for RECORD_SOURCE_FILE_CONTENTS.
529+
Abbrev = std::make_shared<BitCodeAbbrev>();
530+
Abbrev->Add(BitCodeAbbrevOp(RECORD_SOURCE_FILE_CONTENTS));
531+
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); // File ID.
532+
AddRangeLocationAbbrev(*Abbrev);
533+
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // File size.
534+
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File contents.
535+
Abbrevs.set(RECORD_SOURCE_FILE_CONTENTS,
536+
Stream.EmitBlockInfoAbbrev(BLOCK_DIAG, Abbrev));
537+
519538
Stream.ExitBlock();
520539
}
521540

@@ -891,6 +910,28 @@ std::error_code SDiagsMerger::visitFilenameRecord(unsigned ID, unsigned Size,
891910
return std::error_code();
892911
}
893912

913+
std::error_code SDiagsMerger::visitSourceFileContentsRecord(
914+
unsigned ID,
915+
const Location &OriginalStartLoc,
916+
const Location &OriginalEndLoc,
917+
StringRef Contents) {
918+
unsigned MappedID = FileLookup[ID];
919+
if (!ContentsWritten.insert(MappedID).second)
920+
return std::error_code();
921+
922+
RecordData::value_type Record[] = {
923+
RECORD_SOURCE_FILE_CONTENTS, MappedID,
924+
FileLookup[OriginalStartLoc.FileID],
925+
OriginalStartLoc.Line, OriginalStartLoc.Col, OriginalStartLoc.Offset,
926+
FileLookup[OriginalEndLoc.FileID], OriginalEndLoc.Line,
927+
OriginalEndLoc.Col, OriginalEndLoc.Offset,
928+
Contents.size()};
929+
930+
Writer.State->Stream.EmitRecordWithBlob(
931+
Writer.State->Abbrevs.get(RECORD_SOURCE_FILE_CONTENTS), Record, Contents);
932+
return std::error_code();
933+
}
934+
894935
std::error_code SDiagsMerger::visitCategoryRecord(unsigned ID, StringRef Name) {
895936
CategoryLookup[ID] = Writer.getEmitCategory(ID);
896937
return std::error_code();

clang/lib/Frontend/SerializedDiagnosticReader.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,16 @@ SerializedDiagnosticReader::readDiagnosticBlock(llvm::BitstreamCursor &Stream) {
307307
Location(Record[4], Record[5], Record[6], Record[7]), Blob)))
308308
return EC;
309309
continue;
310+
case RECORD_SOURCE_FILE_CONTENTS:
311+
if (Record.size() != 10 || Record[9] != Blob.size())
312+
return SDError::MalformedDiagnosticRecord;
313+
if ((EC = visitSourceFileContentsRecord(
314+
Record[0],
315+
Location(Record[1], Record[2], Record[3], Record[4]),
316+
Location(Record[5], Record[6], Record[7], Record[8]),
317+
Blob)))
318+
return EC;
319+
continue;
310320
case RECORD_SOURCE_RANGE:
311321
// A source range is two locations (4 each).
312322
if (Record.size() != 8)

clang/tools/c-index-test/c-index-test.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4760,7 +4760,8 @@ static void printDiagnosticSet(CXDiagnosticSet Diags, unsigned indent) {
47604760
CXString FileName, DiagSpelling, DiagOption, DiagCat;
47614761
unsigned line, column, offset;
47624762
const char *FileNameStr = 0, *DiagOptionStr = 0, *DiagCatStr = 0;
4763-
4763+
const char *FileContents = 0;
4764+
47644765
D = clang_getDiagnosticInSet(Diags, i);
47654766
DiagLoc = clang_getDiagnosticLocation(D);
47664767
clang_getExpansionLocation(DiagLoc, &File, &line, &column, &offset);
@@ -4793,15 +4794,37 @@ static void printDiagnosticSet(CXDiagnosticSet Diags, unsigned indent) {
47934794

47944795
printRanges(D, indent);
47954796
printFixIts(D, indent);
4796-
4797+
4798+
// If we have the source file contents for this file, print them now.
4799+
FileContents = clang_getDiagnosticFileContents(Diags, File, 0);
4800+
if (FileContents) {
4801+
CXSourceRange OriginalSourceRange;
4802+
4803+
fprintf(stderr, "CONTENTS OF FILE %s:\n",
4804+
FileNameStr ? FileNameStr : "(null)");
4805+
4806+
OriginalSourceRange = clang_getDiagnosticFileOriginalSourceRange(
4807+
Diags, File);
4808+
if (!clang_equalRanges(clang_getNullRange(), OriginalSourceRange)) {
4809+
printIndent(indent);
4810+
fprintf(stderr, "Original source range: ");
4811+
printLocation(clang_getRangeStart(OriginalSourceRange));
4812+
fprintf(stderr, " - ");
4813+
printLocation(clang_getRangeEnd(OriginalSourceRange));
4814+
fprintf(stderr, "\n");
4815+
}
4816+
4817+
fprintf(stderr, "%s\nEND CONTENTS OF FILE\n", FileContents);
4818+
}
4819+
47974820
/* Print subdiagnostics. */
47984821
printDiagnosticSet(clang_getChildDiagnostics(D), indent+2);
47994822

48004823
clang_disposeString(FileName);
48014824
clang_disposeString(DiagSpelling);
48024825
clang_disposeString(DiagOption);
48034826
clang_disposeString(DiagCat);
4804-
}
4827+
}
48054828
}
48064829

48074830
static int read_diagnostics(const char *filename) {

clang/tools/libclang/CIndexDiagnostic.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ CXDiagnosticSetImpl::appendDiagnostic(std::unique_ptr<CXDiagnosticImpl> D) {
3434
Diagnostics.push_back(std::move(D));
3535
}
3636

37+
void CXDiagnosticSetImpl::recordSourceFileContents(
38+
CXFile file, StringRef contents, CXSourceRange originalSourceRange) {
39+
FileContents[file] = CXSourceFileContents{contents, originalSourceRange};
40+
}
41+
3742
CXDiagnosticImpl::~CXDiagnosticImpl() {}
3843

3944
namespace {
@@ -479,6 +484,30 @@ CXDiagnosticSet clang_getChildDiagnostics(CXDiagnostic Diag) {
479484
return nullptr;
480485
}
481486

487+
const char *clang_getDiagnosticFileContents(
488+
CXDiagnosticSet diags, CXFile file, size_t *outFileSize) {
489+
if (CXDiagnosticSetImpl *D = static_cast<CXDiagnosticSetImpl *>(diags)) {
490+
CXSourceRange originalSourceRange;
491+
if (auto contents = D->getSourceFileContents(file, originalSourceRange)) {
492+
if (outFileSize)
493+
*outFileSize = contents->size();
494+
return contents->data();
495+
}
496+
}
497+
return nullptr;
498+
}
499+
500+
CXSourceRange clang_getDiagnosticFileOriginalSourceRange(
501+
CXDiagnosticSet diags, CXFile file) {
502+
if (CXDiagnosticSetImpl *D = static_cast<CXDiagnosticSetImpl *>(diags)) {
503+
CXSourceRange originalSourceRange;
504+
if (auto contents = D->getSourceFileContents(file, originalSourceRange)) {
505+
return originalSourceRange;
506+
}
507+
}
508+
return clang_getNullRange();
509+
}
510+
482511
unsigned clang_getNumDiagnosticsInSet(CXDiagnosticSet Diags) {
483512
if (CXDiagnosticSetImpl *D = static_cast<CXDiagnosticSetImpl*>(Diags))
484513
return D->getNumDiagnostics();

clang/tools/libclang/CIndexDiagnostic.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515

1616
#include "clang-c/Index.h"
1717
#include "clang/Basic/LLVM.h"
18+
#include "llvm/ADT/DenseMap.h"
19+
#include "llvm/ADT/Optional.h"
20+
#include "llvm/ADT/StringRef.h"
1821
#include <memory>
1922
#include <vector>
2023
#include <assert.h>
@@ -24,9 +27,15 @@ namespace clang {
2427
class LangOptions;
2528
class StoredDiagnostic;
2629
class CXDiagnosticImpl;
27-
30+
2831
class CXDiagnosticSetImpl {
32+
struct CXSourceFileContents {
33+
StringRef Contents;
34+
CXSourceRange OriginalSourceRange;
35+
};
36+
2937
std::vector<std::unique_ptr<CXDiagnosticImpl>> Diagnostics;
38+
llvm::DenseMap<CXFile, CXSourceFileContents> FileContents;
3039
const bool IsExternallyManaged;
3140
public:
3241
CXDiagnosticSetImpl(bool isManaged = false)
@@ -45,6 +54,19 @@ class CXDiagnosticSetImpl {
4554

4655
void appendDiagnostic(std::unique_ptr<CXDiagnosticImpl> D);
4756

57+
void recordSourceFileContents(
58+
CXFile file, StringRef contents, CXSourceRange originalSourceRange);
59+
60+
Optional<StringRef> getSourceFileContents(
61+
CXFile file, CXSourceRange &originalSourceRange) {
62+
auto found = FileContents.find(file);
63+
if (found == FileContents.end())
64+
return None;
65+
66+
originalSourceRange = found->second.OriginalSourceRange;
67+
return found->second.Contents;
68+
}
69+
4870
bool empty() const {
4971
return Diagnostics.empty();
5072
}

clang/tools/libclang/CXLoadedDiagnostic.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,12 @@ class DiagLoader : serialized_diags::SerializedDiagnosticReader {
226226
unsigned Timestamp,
227227
StringRef Name) override;
228228

229+
std::error_code visitSourceFileContentsRecord(
230+
unsigned ID,
231+
const serialized_diags::Location &OriginalStartLoc,
232+
const serialized_diags::Location &OriginalEndLoc,
233+
StringRef Contents) override;
234+
229235
std::error_code visitFixitRecord(const serialized_diags::Location &Start,
230236
const serialized_diags::Location &End,
231237
StringRef CodeToInsert) override;
@@ -366,6 +372,28 @@ std::error_code DiagLoader::visitFilenameRecord(unsigned ID, unsigned Size,
366372
return std::error_code();
367373
}
368374

375+
std::error_code DiagLoader::visitSourceFileContentsRecord(
376+
unsigned ID,
377+
const serialized_diags::Location &OriginalStartLoc,
378+
const serialized_diags::Location &OriginalEndLoc,
379+
StringRef Contents
380+
) {
381+
CXSourceRange OriginalSourceRange;
382+
if (std::error_code EC = readRange(
383+
OriginalStartLoc, OriginalEndLoc, OriginalSourceRange))
384+
return EC;
385+
386+
auto file = const_cast<FileEntry *>(TopDiags->Files[ID]);
387+
if (!file)
388+
return reportInvalidFile("Source file contents for unknown file ID");
389+
390+
StringRef CopiedContents(TopDiags->copyString(Contents),
391+
Contents.size());
392+
393+
TopDiags->recordSourceFileContents(file, CopiedContents, OriginalSourceRange);
394+
return std::error_code();
395+
}
396+
369397
std::error_code
370398
DiagLoader::visitSourceRangeRecord(const serialized_diags::Location &Start,
371399
const serialized_diags::Location &End) {

clang/tools/libclang/libclang.map

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,8 @@ LLVM_16 {
519519
clang_CXXMethod_isCopyAssignmentOperator;
520520
clang_CXXMethod_isMoveAssignmentOperator;
521521
clang_createAPISet;
522+
clang_getDiagnosticFileContents;
523+
clang_getDiagnosticFileOriginalSourceRange;
522524
clang_disposeAPISet;
523525
clang_getSymbolGraphForCursor;
524526
clang_getSymbolGraphForUSR;

0 commit comments

Comments
 (0)