Skip to content

Commit bd7ca13

Browse files
committed
[Serialized diagnostics] Record generated buffer contents in diagnostics files
Extend the format for serialized diagnostic files (`.dia`) to allow for the contents of source files to be included inside the diagnostic files themselves. This enables producers of diagnostics files that generate source code as part of their build (e.g., a compiler that performs macro expansion) to provide that generated source code to consumers of diagnostics, treating it like a normal file (e.g., with diagnostics pointing into it) that isn't otherwise recorded anywhere on disk. Source files with contents can, optionally, provide a source range that describes the "original" source range from which those contents are derived. This information is optional, but if present it can refer back to the source locations that triggered the creation of the generated source code, i.e., the use of a macro that caused macro expansion. This change extends the libclang C APIs with access to the source file contents (via `clang_getDiagnosticFileContents`) and the original source range (via `clang_getDiagnosticFileOriginalSourceRange`). The diagnostics file format, which has been stable for roughly a decade, is extended in a backward-compatible way by adding a new record, which existing clients (i.e., older libclang versions) will safely ignore. As such, we do not bump the diagnostics file format version at all. At present, the Swift compiler is the only producer of source file content records, which uses them to provide macro-expansion buffers. The Swift repository contains tests that produce these records and verify their correctness via `c-index-test`. We could consider introducing command-line flags to Clang to make it start producing these records, e.g., to capture macro-expansion buffers (as we're doing in Swift) or even to package up the contents of all source files that are referenced by serialized diagnostics, making the serialized-diagnostics file self-contained.
1 parent 7aca4f5 commit bd7ca13

File tree

10 files changed

+195
-5
lines changed

10 files changed

+195
-5
lines changed

clang/include/clang-c/CXDiagnostic.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,30 @@ CINDEX_LINKAGE void clang_disposeDiagnosticSet(CXDiagnosticSet Diags);
148148
*/
149149
CINDEX_LINKAGE CXDiagnosticSet clang_getChildDiagnostics(CXDiagnostic D);
150150

151+
/**
152+
* Get the contents if the given file that was provided via diagnostics.
153+
*
154+
* \param diags the diagnostics set to query for the contents of the file.
155+
* \param file the file to get the contents of.
156+
* \param outFileSize if non-null, set to the file size on success.
157+
* \returns on success, a pointer to the file contents. Otherwise, NULL.
158+
*/
159+
CINDEX_LINKAGE const char *clang_getDiagnosticFileContents(
160+
CXDiagnosticSet diags, CXFile file, size_t *outFileSize);
161+
162+
/**
163+
* Retrieve the original source range if the given file was provided via
164+
* diagnostics and is conceptually a replacement for the original source range.
165+
*
166+
* \param diags the diagnostics set to query for the contents of the file.
167+
* \param file the file to get the contents of.
168+
* \returns on success, the source range (into another file) that is
169+
* conceptually replaced by the contents of the given file (available via
170+
* \c clang_getDiagnosticFileContents).
171+
*/
172+
CINDEX_LINKAGE CXSourceRange clang_getDiagnosticFileOriginalSourceRange(
173+
CXDiagnosticSet diags, CXFile file);
174+
151175
/**
152176
* Destroy a diagnostic.
153177
*/

clang/include/clang/Frontend/SerializedDiagnosticReader.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,16 @@ class SerializedDiagnosticReader {
109109
return {};
110110
}
111111

112+
/// Visit file contents. This associates the file's \c ID with the
113+
/// contents of
114+
virtual std::error_code visitSourceFileContentsRecord(
115+
unsigned ID,
116+
const Location &OriginalStartLoc,
117+
const Location &OriginalEndLoc,
118+
StringRef Contents) {
119+
return {};
120+
}
121+
112122
/// Visit a fixit hint.
113123
virtual std::error_code
114124
visitFixitRecord(const Location &Start, const Location &End, StringRef Text) {

clang/include/clang/Frontend/SerializedDiagnostics.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ enum RecordIDs {
3232
RECORD_CATEGORY,
3333
RECORD_FILENAME,
3434
RECORD_FIXIT,
35+
RECORD_SOURCE_FILE_CONTENTS,
3536
RECORD_FIRST = RECORD_VERSION,
36-
RECORD_LAST = RECORD_FIXIT
37+
RECORD_LAST = RECORD_SOURCE_FILE_CONTENTS
3738
};
3839

3940
/// A stable version of DiagnosticIDs::Level.

clang/lib/Frontend/SerializedDiagnosticPrinter.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ class SDiagsMerger : SerializedDiagnosticReader {
9393
AbbrevLookup FileLookup;
9494
AbbrevLookup CategoryLookup;
9595
AbbrevLookup DiagFlagLookup;
96+
llvm::DenseSet<unsigned> ContentsWritten;
9697

9798
public:
9899
SDiagsMerger(SDiagsWriter &Writer) : Writer(Writer) {}
@@ -112,6 +113,12 @@ class SDiagsMerger : SerializedDiagnosticReader {
112113
std::error_code visitFilenameRecord(unsigned ID, unsigned Size,
113114
unsigned Timestamp,
114115
StringRef Name) override;
116+
std::error_code visitSourceFileContentsRecord(
117+
unsigned ID,
118+
const Location &OriginalStartLoc,
119+
const Location &OriginalEndLoc,
120+
StringRef Contents) override;
121+
115122
std::error_code visitFixitRecord(const serialized_diags::Location &Start,
116123
const serialized_diags::Location &End,
117124
StringRef CodeToInsert) override;
@@ -462,6 +469,8 @@ void SDiagsWriter::EmitBlockInfoBlock() {
462469
EmitRecordID(RECORD_DIAG_FLAG, "DiagFlag", Stream, Record);
463470
EmitRecordID(RECORD_FILENAME, "FileName", Stream, Record);
464471
EmitRecordID(RECORD_FIXIT, "FixIt", Stream, Record);
472+
EmitRecordID(
473+
RECORD_SOURCE_FILE_CONTENTS, "SourceFileContents", Stream, Record);
465474

466475
// Emit abbreviation for RECORD_DIAG.
467476
Abbrev = std::make_shared<BitCodeAbbrev>();
@@ -518,6 +527,16 @@ void SDiagsWriter::EmitBlockInfoBlock() {
518527
Abbrevs.set(RECORD_FIXIT, Stream.EmitBlockInfoAbbrev(BLOCK_DIAG,
519528
Abbrev));
520529

530+
// Emit the abbreviation for RECORD_SOURCE_FILE_CONTENTS.
531+
Abbrev = std::make_shared<BitCodeAbbrev>();
532+
Abbrev->Add(BitCodeAbbrevOp(RECORD_SOURCE_FILE_CONTENTS));
533+
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); // File ID.
534+
AddRangeLocationAbbrev(*Abbrev);
535+
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // File size.
536+
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File contents.
537+
Abbrevs.set(RECORD_SOURCE_FILE_CONTENTS,
538+
Stream.EmitBlockInfoAbbrev(BLOCK_DIAG, Abbrev));
539+
521540
Stream.ExitBlock();
522541
}
523542

@@ -890,6 +909,28 @@ std::error_code SDiagsMerger::visitFilenameRecord(unsigned ID, unsigned Size,
890909
return std::error_code();
891910
}
892911

912+
std::error_code SDiagsMerger::visitSourceFileContentsRecord(
913+
unsigned ID,
914+
const Location &OriginalStartLoc,
915+
const Location &OriginalEndLoc,
916+
StringRef Contents) {
917+
unsigned MappedID = FileLookup[ID];
918+
if (!ContentsWritten.insert(MappedID).second)
919+
return std::error_code();
920+
921+
RecordData::value_type Record[] = {
922+
RECORD_SOURCE_FILE_CONTENTS, MappedID,
923+
FileLookup[OriginalStartLoc.FileID],
924+
OriginalStartLoc.Line, OriginalStartLoc.Col, OriginalStartLoc.Offset,
925+
FileLookup[OriginalEndLoc.FileID], OriginalEndLoc.Line,
926+
OriginalEndLoc.Col, OriginalEndLoc.Offset,
927+
Contents.size()};
928+
929+
Writer.State->Stream.EmitRecordWithBlob(
930+
Writer.State->Abbrevs.get(RECORD_SOURCE_FILE_CONTENTS), Record, Contents);
931+
return std::error_code();
932+
}
933+
893934
std::error_code SDiagsMerger::visitCategoryRecord(unsigned ID, StringRef Name) {
894935
CategoryLookup[ID] = Writer.getEmitCategory(ID);
895936
return std::error_code();

clang/lib/Frontend/SerializedDiagnosticReader.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,16 @@ SerializedDiagnosticReader::readDiagnosticBlock(llvm::BitstreamCursor &Stream) {
302302
Location(Record[4], Record[5], Record[6], Record[7]), Blob)))
303303
return EC;
304304
continue;
305+
case RECORD_SOURCE_FILE_CONTENTS:
306+
if (Record.size() != 10 || Record[9] != Blob.size())
307+
return SDError::MalformedDiagnosticRecord;
308+
if ((EC = visitSourceFileContentsRecord(
309+
Record[0],
310+
Location(Record[1], Record[2], Record[3], Record[4]),
311+
Location(Record[5], Record[6], Record[7], Record[8]),
312+
Blob)))
313+
return EC;
314+
continue;
305315
case RECORD_SOURCE_RANGE:
306316
// A source range is two locations (4 each).
307317
if (Record.size() != 8)

clang/tools/c-index-test/c-index-test.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4730,7 +4730,8 @@ static void printDiagnosticSet(CXDiagnosticSet Diags, unsigned indent) {
47304730
CXString FileName, DiagSpelling, DiagOption, DiagCat;
47314731
unsigned line, column, offset;
47324732
const char *FileNameStr = 0, *DiagOptionStr = 0, *DiagCatStr = 0;
4733-
4733+
const char *FileContents = 0;
4734+
47344735
D = clang_getDiagnosticInSet(Diags, i);
47354736
DiagLoc = clang_getDiagnosticLocation(D);
47364737
clang_getExpansionLocation(DiagLoc, &File, &line, &column, &offset);
@@ -4763,15 +4764,37 @@ static void printDiagnosticSet(CXDiagnosticSet Diags, unsigned indent) {
47634764

47644765
printRanges(D, indent);
47654766
printFixIts(D, indent);
4766-
4767+
4768+
// If we have the source file contents for this file, print them now.
4769+
FileContents = clang_getDiagnosticFileContents(Diags, File, 0);
4770+
if (FileContents) {
4771+
CXSourceRange OriginalSourceRange;
4772+
4773+
fprintf(stderr, "CONTENTS OF FILE %s:\n",
4774+
FileNameStr ? FileNameStr : "(null)");
4775+
4776+
OriginalSourceRange = clang_getDiagnosticFileOriginalSourceRange(
4777+
Diags, File);
4778+
if (!clang_equalRanges(clang_getNullRange(), OriginalSourceRange)) {
4779+
printIndent(indent);
4780+
fprintf(stderr, "Original source range: ");
4781+
printLocation(clang_getRangeStart(OriginalSourceRange));
4782+
fprintf(stderr, " - ");
4783+
printLocation(clang_getRangeEnd(OriginalSourceRange));
4784+
fprintf(stderr, "\n");
4785+
}
4786+
4787+
fprintf(stderr, "%s\nEND CONTENTS OF FILE\n", FileContents);
4788+
}
4789+
47674790
/* Print subdiagnostics. */
47684791
printDiagnosticSet(clang_getChildDiagnostics(D), indent+2);
47694792

47704793
clang_disposeString(FileName);
47714794
clang_disposeString(DiagSpelling);
47724795
clang_disposeString(DiagOption);
47734796
clang_disposeString(DiagCat);
4774-
}
4797+
}
47754798
}
47764799

47774800
static int read_diagnostics(const char *filename) {

clang/tools/libclang/CIndexDiagnostic.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ CXDiagnosticSetImpl::appendDiagnostic(std::unique_ptr<CXDiagnosticImpl> D) {
3434
Diagnostics.push_back(std::move(D));
3535
}
3636

37+
void CXDiagnosticSetImpl::recordSourceFileContents(
38+
CXFile file, StringRef contents, CXSourceRange originalSourceRange) {
39+
FileContents[file] = CXSourceFileContents{contents, originalSourceRange};
40+
}
41+
3742
CXDiagnosticImpl::~CXDiagnosticImpl() {}
3843

3944
namespace {
@@ -479,6 +484,30 @@ CXDiagnosticSet clang_getChildDiagnostics(CXDiagnostic Diag) {
479484
return nullptr;
480485
}
481486

487+
const char *clang_getDiagnosticFileContents(
488+
CXDiagnosticSet diags, CXFile file, size_t *outFileSize) {
489+
if (CXDiagnosticSetImpl *D = static_cast<CXDiagnosticSetImpl *>(diags)) {
490+
CXSourceRange originalSourceRange;
491+
if (auto contents = D->getSourceFileContents(file, originalSourceRange)) {
492+
if (outFileSize)
493+
*outFileSize = contents->size();
494+
return contents->data();
495+
}
496+
}
497+
return nullptr;
498+
}
499+
500+
CXSourceRange clang_getDiagnosticFileOriginalSourceRange(
501+
CXDiagnosticSet diags, CXFile file) {
502+
if (CXDiagnosticSetImpl *D = static_cast<CXDiagnosticSetImpl *>(diags)) {
503+
CXSourceRange originalSourceRange;
504+
if (auto contents = D->getSourceFileContents(file, originalSourceRange)) {
505+
return originalSourceRange;
506+
}
507+
}
508+
return clang_getNullRange();
509+
}
510+
482511
unsigned clang_getNumDiagnosticsInSet(CXDiagnosticSet Diags) {
483512
if (CXDiagnosticSetImpl *D = static_cast<CXDiagnosticSetImpl*>(Diags))
484513
return D->getNumDiagnostics();

clang/tools/libclang/CIndexDiagnostic.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515

1616
#include "clang-c/Index.h"
1717
#include "clang/Basic/LLVM.h"
18+
#include "llvm/ADT/DenseMap.h"
19+
#include "llvm/ADT/Optional.h"
20+
#include "llvm/ADT/StringRef.h"
1821
#include <memory>
1922
#include <vector>
2023
#include <assert.h>
@@ -24,9 +27,15 @@ namespace clang {
2427
class LangOptions;
2528
class StoredDiagnostic;
2629
class CXDiagnosticImpl;
27-
30+
2831
class CXDiagnosticSetImpl {
32+
struct CXSourceFileContents {
33+
StringRef Contents;
34+
CXSourceRange OriginalSourceRange;
35+
};
36+
2937
std::vector<std::unique_ptr<CXDiagnosticImpl>> Diagnostics;
38+
llvm::DenseMap<CXFile, CXSourceFileContents> FileContents;
3039
const bool IsExternallyManaged;
3140
public:
3241
CXDiagnosticSetImpl(bool isManaged = false)
@@ -45,6 +54,19 @@ class CXDiagnosticSetImpl {
4554

4655
void appendDiagnostic(std::unique_ptr<CXDiagnosticImpl> D);
4756

57+
void recordSourceFileContents(
58+
CXFile file, StringRef contents, CXSourceRange originalSourceRange);
59+
60+
Optional<StringRef> getSourceFileContents(
61+
CXFile file, CXSourceRange &originalSourceRange) {
62+
auto found = FileContents.find(file);
63+
if (found == FileContents.end())
64+
return None;
65+
66+
originalSourceRange = found->second.OriginalSourceRange;
67+
return found->second.Contents;
68+
}
69+
4870
bool empty() const {
4971
return Diagnostics.empty();
5072
}

clang/tools/libclang/CXLoadedDiagnostic.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,12 @@ class DiagLoader : serialized_diags::SerializedDiagnosticReader {
225225
unsigned Timestamp,
226226
StringRef Name) override;
227227

228+
std::error_code visitSourceFileContentsRecord(
229+
unsigned ID,
230+
const serialized_diags::Location &OriginalStartLoc,
231+
const serialized_diags::Location &OriginalEndLoc,
232+
StringRef Contents) override;
233+
228234
std::error_code visitFixitRecord(const serialized_diags::Location &Start,
229235
const serialized_diags::Location &End,
230236
StringRef CodeToInsert) override;
@@ -347,6 +353,28 @@ std::error_code DiagLoader::visitFilenameRecord(unsigned ID, unsigned Size,
347353
return std::error_code();
348354
}
349355

356+
std::error_code DiagLoader::visitSourceFileContentsRecord(
357+
unsigned ID,
358+
const serialized_diags::Location &OriginalStartLoc,
359+
const serialized_diags::Location &OriginalEndLoc,
360+
StringRef Contents
361+
) {
362+
CXSourceRange OriginalSourceRange;
363+
if (std::error_code EC = readRange(
364+
OriginalStartLoc, OriginalEndLoc, OriginalSourceRange))
365+
return EC;
366+
367+
auto file = const_cast<FileEntry *>(TopDiags->Files[ID]);
368+
if (!file)
369+
return reportInvalidFile("Source file contents for unknown file ID");
370+
371+
StringRef CopiedContents(TopDiags->copyString(Contents),
372+
Contents.size());
373+
374+
TopDiags->recordSourceFileContents(file, CopiedContents, OriginalSourceRange);
375+
return std::error_code();
376+
}
377+
350378
std::error_code
351379
DiagLoader::visitSourceRangeRecord(const serialized_diags::Location &Start,
352380
const serialized_diags::Location &End) {

clang/tools/libclang/libclang.map

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,8 @@ LLVM_16 {
484484
clang_experimental_DependencyScannerServiceOptions_setObjectStore;
485485
clang_experimental_DependencyScannerWorker_getFileDependencies_v5;
486486
clang_createAPISet;
487+
clang_getDiagnosticFileContents;
488+
clang_getDiagnosticFileOriginalSourceRange;
487489
clang_disposeAPISet;
488490
clang_getSymbolGraphForCursor;
489491
clang_getSymbolGraphForUSR;

0 commit comments

Comments
 (0)