Skip to content

[StrTable] Switch diag group names to llvm::StringTable #123302

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions clang/lib/Basic/DiagnosticIDs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringTable.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Path.h"
#include <map>
Expand Down Expand Up @@ -576,11 +577,7 @@ namespace {
uint16_t SubGroups;
StringRef Documentation;

// String is stored with a pascal-style length byte.
StringRef getName() const {
return StringRef(DiagGroupNames + NameOffset + 1,
DiagGroupNames[NameOffset]);
}
StringRef getName() const { return DiagGroupNames[NameOffset]; }
};
}

Expand Down Expand Up @@ -627,11 +624,12 @@ StringRef DiagnosticIDs::getWarningOptionForDiag(unsigned DiagID) {

std::vector<std::string> DiagnosticIDs::getDiagnosticFlags() {
std::vector<std::string> Res{"-W", "-Wno-"};
for (size_t I = 1; DiagGroupNames[I] != '\0';) {
std::string Diag(DiagGroupNames + I + 1, DiagGroupNames[I]);
I += DiagGroupNames[I] + 1;
Res.push_back("-W" + Diag);
Res.push_back("-Wno-" + Diag);
for (StringRef Name : DiagGroupNames) {
if (Name.empty())
continue;

Res.push_back((Twine("-W") + Name).str());
Res.push_back((Twine("-Wno-") + Name).str());
}

return Res;
Expand Down
3 changes: 2 additions & 1 deletion clang/tools/diagtool/DiagnosticNames.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "DiagnosticNames.h"
#include "clang/Basic/AllDiagnostics.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringTable.h"

using namespace clang;
using namespace diagtool;
Expand Down Expand Up @@ -68,7 +69,7 @@ static const GroupRecord OptionTable[] = {
};

llvm::StringRef GroupRecord::getName() const {
return StringRef(DiagGroupNames + NameOffset + 1, DiagGroupNames[NameOffset]);
return DiagGroupNames[NameOffset];
}

GroupRecord::subgroup_iterator GroupRecord::subgroup_begin() const {
Expand Down
28 changes: 10 additions & 18 deletions clang/utils/TableGen/ClangDiagnosticsEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1782,19 +1782,12 @@ static void emitDiagArrays(DiagsInGroupTy &DiagsInGroup,

/// Emit a list of group names.
///
/// This creates a long string which by itself contains a list of pascal style
/// strings, which consist of a length byte directly followed by the string.
///
/// \code
/// static const char DiagGroupNames[] = {
/// \000\020#pragma-messages\t#warnings\020CFString-literal"
/// };
/// \endcode
/// This creates an `llvm::StringTable` of all the diagnostic group names.
static void emitDiagGroupNames(const StringToOffsetTable &GroupNames,
raw_ostream &OS) {
OS << "static const char DiagGroupNames[] = {\n";
GroupNames.EmitString(OS);
OS << "};\n\n";
GroupNames.EmitStringLiteralDef(
OS, "static constexpr llvm::StringTable DiagGroupNames");
OS << "\n";
}

/// Emit diagnostic arrays and related data structures.
Expand All @@ -1806,7 +1799,7 @@ static void emitDiagGroupNames(const StringToOffsetTable &GroupNames,
/// #ifdef GET_DIAG_ARRAYS
/// static const int16_t DiagArrays[];
/// static const int16_t DiagSubGroups[];
/// static const char DiagGroupNames[];
/// static constexpr llvm::StringTable DiagGroupNames;
/// #endif
/// \endcode
static void emitAllDiagArrays(DiagsInGroupTy &DiagsInGroup,
Expand Down Expand Up @@ -1858,9 +1851,7 @@ static void emitDiagTable(DiagsInGroupTy &DiagsInGroup,
"0123456789!@#$%^*-+=:?") != std::string::npos)
PrintFatalError("Invalid character in diagnostic group '" + Name + "'");
OS << Name << " */, ";
// Store a pascal-style length byte at the beginning of the string.
std::string PascalName = char(Name.size()) + Name.str();
OS << *GroupNames.GetStringOffset(PascalName) << ", ";
OS << *GroupNames.GetStringOffset(Name) << ", ";

// Special handling for 'pedantic'.
const bool IsPedantic = Name == "pedantic";
Expand Down Expand Up @@ -1948,10 +1939,11 @@ void clang::EmitClangDiagGroups(const RecordKeeper &Records, raw_ostream &OS) {
inferPedantic.compute(&DiagsInPedantic, &GroupsInPedantic);

StringToOffsetTable GroupNames;
// Add an empty string to the table first so we can use `llvm::StringTable`.
// TODO: Factor this into `StringToOffsetTable`.
GroupNames.GetOrAddStringOffset("");
for (const auto &[Name, Group] : DiagsInGroup) {
// Store a pascal-style length byte at the beginning of the string.
std::string PascalName = char(Name.size()) + Name.str();
GroupNames.GetOrAddStringOffset(PascalName, false);
GroupNames.GetOrAddStringOffset(Name);
}

emitAllDiagArrays(DiagsInGroup, DiagsInPedantic, GroupsInPedantic, GroupNames,
Expand Down
53 changes: 52 additions & 1 deletion llvm/include/llvm/ADT/StringTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#define LLVM_ADT_STRING_TABLE_H

#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
#include <iterator>
#include <limits>

namespace llvm {
Expand Down Expand Up @@ -51,6 +53,14 @@ class StringTable {
constexpr Offset() = default;
constexpr Offset(unsigned Value) : Value(Value) {}

friend constexpr bool operator==(const Offset &LHS, const Offset &RHS) {
return LHS.Value == RHS.Value;
}

friend constexpr bool operator!=(const Offset &LHS, const Offset &RHS) {
return LHS.Value != RHS.Value;
}

constexpr unsigned value() const { return Value; }
};

Expand All @@ -69,9 +79,13 @@ class StringTable {
assert(!Table.empty() && "Requires at least a valid empty string.");
assert(Table.data()[0] == '\0' && "Offset zero must be the empty string.");
// Ensure that `strlen` from any offset cannot overflow the end of the table
// by insisting on a null byte at the end.
// by insisting on a null byte at the end. We also insist on the last string
// within the table being *separately* null terminated. This structure is
// used to enable predictable iteration over all the strings when needed.
assert(Table.data()[Table.size() - 1] == '\0' &&
"Last byte must be a null byte.");
assert(Table.data()[Table.size() - 2] == '\0' &&
"Next-to-last byte must be a null byte.");
}

// Get a string from the table starting with the provided offset. The returned
Expand All @@ -84,6 +98,43 @@ class StringTable {

/// Returns the byte size of the table.
constexpr size_t size() const { return Table.size(); }

class Iterator
: public iterator_facade_base<Iterator, std::forward_iterator_tag,
const StringRef> {
friend StringTable;

const StringTable *Table;
Offset O;

// A cache of one value to allow `*` to return a reference.
mutable StringRef S;

explicit constexpr Iterator(const StringTable &Table, Offset O)
: Table(&Table), O(O) {}

public:
constexpr Iterator(const Iterator &RHS) = default;
constexpr Iterator(Iterator &&RHS) = default;

bool operator==(const Iterator &RHS) const {
assert(Table == RHS.Table && "Compared iterators for unrelated tables!");
return O == RHS.O;
}

const StringRef &operator*() const {
S = (*Table)[O];
return S;
}

Iterator &operator++() {
O = O.value() + (*Table)[O].size() + 1;
return *this;
}
};

constexpr Iterator begin() const { return Iterator(*this, 0); }
constexpr Iterator end() const { return Iterator(*this, size() - 1); }
};

} // namespace llvm
Expand Down
Loading