Skip to content

Commit ef28e96

Browse files
authored
Add a super simple wrapper for a merged string table. (#119488)
Suggestions welcome on what to better name this -- `StringTable` as I currently have it seems too general, but wasn't sure what other name would be better. It currently has a *very* minimal API. I'm happy to expand it if folks have ideas for what API would be useful, but this actually seemed like it might be all we really need.
1 parent bff6fee commit ef28e96

File tree

3 files changed

+133
-0
lines changed

3 files changed

+133
-0
lines changed

llvm/include/llvm/ADT/StringTable.h

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
//===- StringTable.h - Table of strings tracked by offset ----------C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_ADT_STRING_TABLE_H
10+
#define LLVM_ADT_STRING_TABLE_H
11+
12+
#include "llvm/ADT/StringRef.h"
13+
#include <limits>
14+
15+
namespace llvm {
16+
17+
/// A table of densely packed, null-terminated strings indexed by offset.
18+
///
19+
/// This table abstracts a densely concatenated list of null-terminated strings,
20+
/// each of which can be referenced using an offset into the table.
21+
///
22+
/// This requires and ensures that the string at offset 0 is also the empty
23+
/// string. This helps allow zero-initialized offsets form empty strings and
24+
/// avoids non-zero initialization when using a string literal pointer would
25+
/// allow a null pointer.
26+
///
27+
/// The primary use case is having a single global string literal for the table
28+
/// contents, and offsets into it in other global data structures to avoid
29+
/// dynamic relocations of individual string literal pointers in those global
30+
/// data structures.
31+
class StringTable {
32+
StringRef Table;
33+
34+
public:
35+
// An offset into one of these packed string tables, used to select a string
36+
// within the table.
37+
//
38+
// Typically these are created by TableGen or other code generator from
39+
// computed offsets, and it just wraps that integer into a type until it is
40+
// used with the relevant table.
41+
//
42+
// We also ensure that the empty string is at offset zero and default
43+
// constructing this class gives you an offset of zero. This makes default
44+
// constructing this type work similarly (after indexing the table) to default
45+
// constructing a `StringRef`.
46+
class Offset {
47+
// Note that we ensure the empty string is at offset zero.
48+
unsigned Value = 0;
49+
50+
public:
51+
constexpr Offset() = default;
52+
constexpr Offset(unsigned Value) : Value(Value) {}
53+
54+
constexpr unsigned value() const { return Value; }
55+
};
56+
57+
// We directly handle string literals with a templated converting constructor
58+
// because we *don't* want to do `strlen` on them -- we fully expect null
59+
// bytes in this input. This is somewhat the opposite of how `StringLiteral`
60+
// works.
61+
template <size_t N>
62+
constexpr StringTable(const char (&RawTable)[N]) : Table(RawTable, N) {
63+
static_assert(N <= std::numeric_limits<unsigned>::max(),
64+
"We only support table sizes that can be indexed by an "
65+
"`unsigned` offset.");
66+
67+
// Note that we can only use `empty`, `data`, and `size` in these asserts to
68+
// support `constexpr`.
69+
assert(!Table.empty() && "Requires at least a valid empty string.");
70+
assert(Table.data()[0] == '\0' && "Offset zero must be the empty string.");
71+
// Ensure that `strlen` from any offset cannot overflow the end of the table
72+
// by insisting on a null byte at the end.
73+
assert(Table.data()[Table.size() - 1] == '\0' &&
74+
"Last byte must be a null byte.");
75+
}
76+
77+
// Get a string from the table starting with the provided offset. The returned
78+
// `StringRef` is in fact null terminated, and so can be converted safely to a
79+
// C-string if necessary for a system API.
80+
constexpr StringRef operator[](Offset O) const {
81+
assert(O.value() < Table.size() && "Out of bounds offset!");
82+
return Table.data() + O.value();
83+
}
84+
85+
/// Returns the byte size of the table.
86+
constexpr size_t size() const { return Table.size(); }
87+
};
88+
89+
} // namespace llvm
90+
91+
#endif // LLVM_ADT_STRING_TABLE_H

llvm/unittests/ADT/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ add_llvm_unittest(ADTTests
8686
StringRefTest.cpp
8787
StringSetTest.cpp
8888
StringSwitchTest.cpp
89+
StringTableTest.cpp
8990
TinyPtrVectorTest.cpp
9091
TrieRawHashMapTest.cpp
9192
TwineTest.cpp
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
//===- llvm/unittest/ADT/StringTableTest.cpp - StringTable tests ----------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/ADT/StringTable.h"
10+
#include "gmock/gmock.h"
11+
#include "gtest/gtest.h"
12+
#include <cstdlib>
13+
14+
using namespace llvm;
15+
16+
namespace {
17+
18+
using ::testing::Eq;
19+
using ::testing::StrEq;
20+
21+
TEST(StringTableTest, Basic) {
22+
static constexpr char InputTable[] = "\0test\0";
23+
constexpr StringTable T = InputTable;
24+
25+
// We support some limited constexpr operations, check those first.
26+
static_assert(T.size() == sizeof(InputTable));
27+
static_assert(T[0].empty());
28+
static_assert(T[StringTable::Offset()].empty());
29+
static_assert(T[1].size() == 4);
30+
31+
// And use normal Google Test runtime assertions to check the contents and
32+
// give more complete error messages.
33+
EXPECT_THAT(T[0], Eq(""));
34+
EXPECT_THAT(T[StringTable::Offset()], Eq(""));
35+
EXPECT_THAT(T[1], Eq("test"));
36+
37+
// Also check that this is a valid C-string.
38+
EXPECT_THAT(T[1].data(), StrEq("test"));
39+
}
40+
41+
} // anonymous namespace

0 commit comments

Comments
 (0)