Skip to content

Commit da058f5

Browse files
committed
Switch diagnostic group names to use llvm::StringTable
Previously, they used a hand-rolled Pascal-string encoding different from all the other string tables produced from TableGen. This moves them to use the newly introduced runtime abstraction, and enhances that abstraction to support iterating over the string table as used in this case. From what I can tell the Pascal-string encoding isn't critical here to avoid expensive `strlen` calls, so I think this is a simpler and more consistent model. But if folks would prefer a Pascal-string style encoding, I can instead work to switch the `StringTable` abstraction towards that. It would require some tricky tradeoffs though to make it reasonably general: either using 4 bytes instead of 1 byte to encode the size, or having a fallback to `strlen` for long strings.
1 parent 7253c6f commit da058f5

File tree

4 files changed

+69
-30
lines changed

4 files changed

+69
-30
lines changed

clang/lib/Basic/DiagnosticIDs.cpp

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "clang/Basic/SourceManager.h"
1717
#include "llvm/ADT/STLExtras.h"
1818
#include "llvm/ADT/SmallVector.h"
19+
#include "llvm/ADT/StringTable.h"
1920
#include "llvm/Support/ErrorHandling.h"
2021
#include "llvm/Support/Path.h"
2122
#include <map>
@@ -618,11 +619,7 @@ namespace {
618619
uint16_t SubGroups;
619620
StringRef Documentation;
620621

621-
// String is stored with a pascal-style length byte.
622-
StringRef getName() const {
623-
return StringRef(DiagGroupNames + NameOffset + 1,
624-
DiagGroupNames[NameOffset]);
625-
}
622+
StringRef getName() const { return DiagGroupNames[NameOffset]; }
626623
};
627624
}
628625

@@ -669,11 +666,12 @@ StringRef DiagnosticIDs::getWarningOptionForDiag(unsigned DiagID) {
669666

670667
std::vector<std::string> DiagnosticIDs::getDiagnosticFlags() {
671668
std::vector<std::string> Res{"-W", "-Wno-"};
672-
for (size_t I = 1; DiagGroupNames[I] != '\0';) {
673-
std::string Diag(DiagGroupNames + I + 1, DiagGroupNames[I]);
674-
I += DiagGroupNames[I] + 1;
675-
Res.push_back("-W" + Diag);
676-
Res.push_back("-Wno-" + Diag);
669+
for (StringRef Name : DiagGroupNames) {
670+
if (Name.empty())
671+
continue;
672+
673+
Res.push_back((Twine("-W") + Name).str());
674+
Res.push_back((Twine("-Wno-") + Name).str());
677675
}
678676

679677
return Res;

clang/tools/diagtool/DiagnosticNames.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "DiagnosticNames.h"
1010
#include "clang/Basic/AllDiagnostics.h"
1111
#include "llvm/ADT/STLExtras.h"
12+
#include "llvm/ADT/StringTable.h"
1213

1314
using namespace clang;
1415
using namespace diagtool;
@@ -74,7 +75,7 @@ static const GroupRecord OptionTable[] = {
7475
};
7576

7677
llvm::StringRef GroupRecord::getName() const {
77-
return StringRef(DiagGroupNames + NameOffset + 1, DiagGroupNames[NameOffset]);
78+
return DiagGroupNames[NameOffset];
7879
}
7980

8081
GroupRecord::subgroup_iterator GroupRecord::subgroup_begin() const {

clang/utils/TableGen/ClangDiagnosticsEmitter.cpp

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,19 +1782,12 @@ static void emitDiagArrays(DiagsInGroupTy &DiagsInGroup,
17821782

17831783
/// Emit a list of group names.
17841784
///
1785-
/// This creates a long string which by itself contains a list of pascal style
1786-
/// strings, which consist of a length byte directly followed by the string.
1787-
///
1788-
/// \code
1789-
/// static const char DiagGroupNames[] = {
1790-
/// \000\020#pragma-messages\t#warnings\020CFString-literal"
1791-
/// };
1792-
/// \endcode
1785+
/// This creates an `llvm::StringTable` of all the diagnostic group names.
17931786
static void emitDiagGroupNames(const StringToOffsetTable &GroupNames,
17941787
raw_ostream &OS) {
1795-
OS << "static const char DiagGroupNames[] = {\n";
1796-
GroupNames.EmitString(OS);
1797-
OS << "};\n\n";
1788+
GroupNames.EmitStringLiteralDef(
1789+
OS, "static constexpr llvm::StringTable DiagGroupNames");
1790+
OS << "\n";
17981791
}
17991792

18001793
/// Emit diagnostic arrays and related data structures.
@@ -1806,7 +1799,7 @@ static void emitDiagGroupNames(const StringToOffsetTable &GroupNames,
18061799
/// #ifdef GET_DIAG_ARRAYS
18071800
/// static const int16_t DiagArrays[];
18081801
/// static const int16_t DiagSubGroups[];
1809-
/// static const char DiagGroupNames[];
1802+
/// static constexpr llvm::StringTable DiagGroupNames;
18101803
/// #endif
18111804
/// \endcode
18121805
static void emitAllDiagArrays(DiagsInGroupTy &DiagsInGroup,
@@ -1858,9 +1851,7 @@ static void emitDiagTable(DiagsInGroupTy &DiagsInGroup,
18581851
"0123456789!@#$%^*-+=:?") != std::string::npos)
18591852
PrintFatalError("Invalid character in diagnostic group '" + Name + "'");
18601853
OS << Name << " */, ";
1861-
// Store a pascal-style length byte at the beginning of the string.
1862-
std::string PascalName = char(Name.size()) + Name.str();
1863-
OS << *GroupNames.GetStringOffset(PascalName) << ", ";
1854+
OS << *GroupNames.GetStringOffset(Name) << ", ";
18641855

18651856
// Special handling for 'pedantic'.
18661857
const bool IsPedantic = Name == "pedantic";
@@ -1948,10 +1939,11 @@ void clang::EmitClangDiagGroups(const RecordKeeper &Records, raw_ostream &OS) {
19481939
inferPedantic.compute(&DiagsInPedantic, &GroupsInPedantic);
19491940

19501941
StringToOffsetTable GroupNames;
1942+
// Add an empty string to the table first so we can use `llvm::StringTable`.
1943+
// TODO: Factor this into `StringToOffsetTable`.
1944+
GroupNames.GetOrAddStringOffset("");
19511945
for (const auto &[Name, Group] : DiagsInGroup) {
1952-
// Store a pascal-style length byte at the beginning of the string.
1953-
std::string PascalName = char(Name.size()) + Name.str();
1954-
GroupNames.GetOrAddStringOffset(PascalName, false);
1946+
GroupNames.GetOrAddStringOffset(Name);
19551947
}
19561948

19571949
emitAllDiagArrays(DiagsInGroup, DiagsInPedantic, GroupsInPedantic, GroupNames,

llvm/include/llvm/ADT/StringTable.h

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
#define LLVM_ADT_STRING_TABLE_H
1111

1212
#include "llvm/ADT/StringRef.h"
13+
#include "llvm/ADT/iterator.h"
14+
#include <iterator>
1315
#include <limits>
1416

1517
namespace llvm {
@@ -51,6 +53,14 @@ class StringTable {
5153
constexpr Offset() = default;
5254
constexpr Offset(unsigned Value) : Value(Value) {}
5355

56+
constexpr bool operator==(const Offset &RHS) const {
57+
return Value == RHS.Value;
58+
}
59+
60+
constexpr bool operator!=(const Offset &RHS) const {
61+
return Value != RHS.Value;
62+
}
63+
5464
constexpr unsigned value() const { return Value; }
5565
};
5666

@@ -69,9 +79,13 @@ class StringTable {
6979
assert(!Table.empty() && "Requires at least a valid empty string.");
7080
assert(Table.data()[0] == '\0' && "Offset zero must be the empty string.");
7181
// Ensure that `strlen` from any offset cannot overflow the end of the table
72-
// by insisting on a null byte at the end.
82+
// by insisting on a null byte at the end. We also insist on the last string
83+
// within the table being *separately* null terminated. This structure is
84+
// used to enable predictable iteration over all the strings when needed.
7385
assert(Table.data()[Table.size() - 1] == '\0' &&
7486
"Last byte must be a null byte.");
87+
assert(Table.data()[Table.size() - 2] == '\0' &&
88+
"Next-to-last byte must be a null byte.");
7589
}
7690

7791
// Get a string from the table starting with the provided offset. The returned
@@ -84,6 +98,40 @@ class StringTable {
8498

8599
/// Returns the byte size of the table.
86100
constexpr size_t size() const { return Table.size(); }
101+
102+
class Iterator
103+
: public iterator_facade_base<Iterator, std::forward_iterator_tag,
104+
const StringRef> {
105+
friend StringTable;
106+
107+
const StringTable *Table;
108+
Offset O;
109+
110+
// A cache of one value to allow `*` to return a reference.
111+
mutable StringRef S;
112+
113+
explicit constexpr Iterator(const StringTable &Table, Offset O)
114+
: Table(&Table), O(O) {}
115+
116+
public:
117+
constexpr Iterator(const Iterator &RHS) = default;
118+
constexpr Iterator(Iterator &&RHS) = default;
119+
120+
bool operator==(const Iterator &RHS) const {
121+
assert(Table == RHS.Table && "Compared iterators for unrelated tables!");
122+
return O == RHS.O;
123+
}
124+
125+
const StringRef &operator*() const { return (S = (*Table)[O]); }
126+
127+
Iterator &operator++() {
128+
O = O.value() + (*Table)[O].size() + 1;
129+
return *this;
130+
}
131+
};
132+
133+
constexpr Iterator begin() const { return Iterator(*this, 0); }
134+
constexpr Iterator end() const { return Iterator(*this, size() - 1); }
87135
};
88136

89137
} // namespace llvm

0 commit comments

Comments
 (0)