Skip to content

Commit 4ae5157

Browse files
authored
Introduce paged vector (#66430)
The goal of the class is to be an (almost) drop in replacement for SmallVector and std::vector when those are presized and filled later, as it happens in SourceManager and ASTReader. By doing so, sparsely accessed PagedVector can profit from reduced memory footprint.
1 parent abcaebf commit 4ae5157

File tree

8 files changed

+627
-10
lines changed

8 files changed

+627
-10
lines changed

clang/include/clang/Basic/SourceManager.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "llvm/ADT/DenseMap.h"
4444
#include "llvm/ADT/DenseSet.h"
4545
#include "llvm/ADT/IntrusiveRefCntPtr.h"
46+
#include "llvm/ADT/PagedVector.h"
4647
#include "llvm/ADT/PointerIntPair.h"
4748
#include "llvm/ADT/SmallVector.h"
4849
#include "llvm/ADT/StringRef.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase<SourceManager> {
699700
///
700701
/// Negative FileIDs are indexes into this table. To get from ID to an index,
701702
/// use (-ID - 2).
702-
SmallVector<SrcMgr::SLocEntry, 0> LoadedSLocEntryTable;
703+
llvm::PagedVector<SrcMgr::SLocEntry> LoadedSLocEntryTable;
703704

704705
/// The starting offset of the next local SLocEntry.
705706
///

clang/include/clang/Serialization/ASTReader.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "llvm/ADT/DenseSet.h"
3939
#include "llvm/ADT/IntrusiveRefCntPtr.h"
4040
#include "llvm/ADT/MapVector.h"
41+
#include "llvm/ADT/PagedVector.h"
4142
#include "llvm/ADT/STLExtras.h"
4243
#include "llvm/ADT/SetVector.h"
4344
#include "llvm/ADT/SmallPtrSet.h"
@@ -487,7 +488,7 @@ class ASTReader
487488
///
488489
/// When the pointer at index I is non-NULL, the type with
489490
/// ID = (I + 1) << FastQual::Width has already been loaded
490-
std::vector<QualType> TypesLoaded;
491+
llvm::PagedVector<QualType> TypesLoaded;
491492

492493
using GlobalTypeMapType =
493494
ContinuousRangeMap<serialization::TypeID, ModuleFile *, 4>;
@@ -501,7 +502,7 @@ class ASTReader
501502
///
502503
/// When the pointer at index I is non-NULL, the declaration with ID
503504
/// = I + 1 has already been loaded.
504-
std::vector<Decl *> DeclsLoaded;
505+
llvm::PagedVector<Decl *> DeclsLoaded;
505506

506507
using GlobalDeclMapType =
507508
ContinuousRangeMap<serialization::DeclID, ModuleFile *, 4>;

clang/lib/Basic/SourceManager.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2343,11 +2343,11 @@ SourceManager::MemoryBufferSizes SourceManager::getMemoryBufferSizes() const {
23432343
}
23442344

23452345
size_t SourceManager::getDataStructureSizes() const {
2346-
size_t size = llvm::capacity_in_bytes(MemBufferInfos)
2347-
+ llvm::capacity_in_bytes(LocalSLocEntryTable)
2348-
+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
2349-
+ llvm::capacity_in_bytes(SLocEntryLoaded)
2350-
+ llvm::capacity_in_bytes(FileInfos);
2346+
size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
2347+
llvm::capacity_in_bytes(LocalSLocEntryTable) +
2348+
llvm::capacity_in_bytes(LoadedSLocEntryTable) +
2349+
llvm::capacity_in_bytes(SLocEntryLoaded) +
2350+
llvm::capacity_in_bytes(FileInfos);
23512351

23522352
if (OverriddenFilesInfo)
23532353
size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);

clang/lib/Serialization/ASTReader.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7946,9 +7946,10 @@ void ASTReader::PrintStats() {
79467946
std::fprintf(stderr, "*** AST File Statistics:\n");
79477947

79487948
unsigned NumTypesLoaded =
7949-
TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
7949+
TypesLoaded.size() - llvm::count(TypesLoaded.materialized(), QualType());
79507950
unsigned NumDeclsLoaded =
7951-
DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
7951+
DeclsLoaded.size() -
7952+
llvm::count(DeclsLoaded.materialized(), (Decl *)nullptr);
79527953
unsigned NumIdentifiersLoaded =
79537954
IdentifiersLoaded.size() -
79547955
llvm::count(IdentifiersLoaded, (IdentifierInfo *)nullptr);

llvm/docs/ProgrammersManual.rst

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1625,6 +1625,40 @@ SmallVector has grown a few other minor advantages over std::vector, causing
16251625
and is no longer "private to the implementation". A name like
16261626
``SmallVectorHeader`` might be more appropriate.
16271627

1628+
.. _dss_pagedvector:
1629+
1630+
llvm/ADT/PagedVector.h
1631+
^^^^^^^^^^^^^^^^^^^^^^
1632+
1633+
``PagedVector<Type, PageSize>`` is a random access container that allocates
1634+
``PageSize`` elements of type ``Type`` when the first element of a page is
1635+
accessed via the ``operator[]``. This is useful for cases where the number of
1636+
elements is known in advance; their actual initialization is expensive; and
1637+
they are sparsely used. This utility uses page-granular lazy initialization
1638+
when the element is accessed. When the number of used pages is small
1639+
significant memory savings can be achieved.
1640+
1641+
The main advantage is that a ``PagedVector`` allows to delay the actual
1642+
allocation of the page until it's needed, at the extra cost of one pointer per
1643+
page and one extra indirection when accessing elements with their positional
1644+
index.
1645+
1646+
In order to minimise the memory footprint of this container, it's important to
1647+
balance the PageSize so that it's not too small (otherwise the overhead of the
1648+
pointer per page might become too high) and not too big (otherwise the memory
1649+
is wasted if the page is not fully used).
1650+
1651+
Moreover, while retaining the order of the elements based on their insertion
1652+
index, like a vector, iterating over the elements via ``begin()`` and ``end()``
1653+
is not provided in the API, due to the fact accessing the elements in order
1654+
would allocate all the iterated pages, defeating memory savings and the purpose
1655+
of the ``PagedVector``.
1656+
1657+
Finally a ``materialized_begin()`` and ``materialized_end`` iterators are
1658+
provided to access the elements associated to the accessed pages, which could
1659+
speed up operations that need to iterate over initialized elements in a
1660+
non-ordered manner.
1661+
16281662
.. _dss_vector:
16291663

16301664
<vector>

llvm/include/llvm/ADT/PagedVector.h

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
//===- llvm/ADT/PagedVector.h - 'Lazily allocated' vectors --*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file defines the PagedVector class.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
#ifndef LLVM_ADT_PAGEDVECTOR_H
13+
#define LLVM_ADT_PAGEDVECTOR_H
14+
15+
#include "llvm/ADT/PointerIntPair.h"
16+
#include "llvm/ADT/SmallVector.h"
17+
#include "llvm/ADT/iterator_range.h"
18+
#include "llvm/Support/Allocator.h"
19+
#include <cassert>
20+
#include <vector>
21+
22+
namespace llvm {
23+
/// A vector that allocates memory in pages.
24+
///
25+
/// Order is kept, but memory is allocated only when one element of the page is
26+
/// accessed. This introduces a level of indirection, but it is useful when you
27+
/// have a sparsely initialised vector where the full size is allocated upfront.
28+
///
29+
/// As a side effect the elements are initialised later than in a normal vector.
30+
/// On the first access to one of the elements of a given page, all the elements
31+
/// of the page are initialised. This also means that the elements of the page
32+
/// are initialised beyond the size of the vector.
33+
///
34+
/// Similarly on destruction the elements are destroyed only when the page is
35+
/// not needed anymore, delaying invoking the destructor of the elements.
36+
///
37+
/// Notice that this has iterators only on materialized elements. This
38+
/// is deliberately done under the assumption you would dereference the elements
39+
/// while iterating, therefore materialising them and losing the gains in terms
40+
/// of memory usage this container provides. If you have such a use case, you
41+
/// probably want to use a normal std::vector or a llvm::SmallVector.
42+
template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
43+
static_assert(PageSize > 1, "PageSize must be greater than 0. Most likely "
44+
"you want it to be greater than 16.");
45+
/// The actual number of elements in the vector which can be accessed.
46+
size_t Size = 0;
47+
48+
/// The position of the initial element of the page in the Data vector.
49+
/// Pages are allocated contiguously in the Data vector.
50+
mutable SmallVector<T *, 0> PageToDataPtrs;
51+
/// Actual page data. All the page elements are allocated on the
52+
/// first access of any of the elements of the page. Elements are default
53+
/// constructed and elements of the page are stored contiguously.
54+
PointerIntPair<BumpPtrAllocator *, 1, bool> Allocator;
55+
56+
public:
57+
using value_type = T;
58+
59+
/// Default constructor. We build our own allocator and mark it as such with
60+
/// `true` in the second pair element.
61+
PagedVector() : Allocator(new BumpPtrAllocator, true) {}
62+
explicit PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {
63+
assert(A && "Allocator cannot be nullptr");
64+
}
65+
66+
~PagedVector() {
67+
clear();
68+
// If we own the allocator, delete it.
69+
if (Allocator.getInt())
70+
delete Allocator.getPointer();
71+
}
72+
73+
// Forbid copy and move as we do not need them for the current use case.
74+
PagedVector(const PagedVector &) = delete;
75+
PagedVector(PagedVector &&) = delete;
76+
PagedVector &operator=(const PagedVector &) = delete;
77+
PagedVector &operator=(PagedVector &&) = delete;
78+
79+
/// Look up an element at position `Index`.
80+
/// If the associated page is not filled, it will be filled with default
81+
/// constructed elements.
82+
T &operator[](size_t Index) const {
83+
assert(Index < Size);
84+
assert(Index / PageSize < PageToDataPtrs.size());
85+
T *&PagePtr = PageToDataPtrs[Index / PageSize];
86+
// If the page was not yet allocated, allocate it.
87+
if (!PagePtr) {
88+
PagePtr = Allocator.getPointer()->template Allocate<T>(PageSize);
89+
// We need to invoke the default constructor on all the elements of the
90+
// page.
91+
std::uninitialized_value_construct_n(PagePtr, PageSize);
92+
}
93+
// Dereference the element in the page.
94+
return PagePtr[Index % PageSize];
95+
}
96+
97+
/// Return the capacity of the vector. I.e. the maximum size it can be
98+
/// expanded to with the resize method without allocating more pages.
99+
[[nodiscard]] size_t capacity() const {
100+
return PageToDataPtrs.size() * PageSize;
101+
}
102+
103+
/// Return the size of the vector.
104+
[[nodiscard]] size_t size() const { return Size; }
105+
106+
/// Resize the vector. Notice that the constructor of the elements will not
107+
/// be invoked until an element of a given page is accessed, at which point
108+
/// all the elements of the page will be constructed.
109+
///
110+
/// If the new size is smaller than the current size, the elements of the
111+
/// pages that are not needed anymore will be destroyed, however, elements of
112+
/// the last page will not be destroyed.
113+
///
114+
/// For these reason the usage of this vector is discouraged if you rely
115+
/// on the construction / destructor of the elements to be invoked.
116+
void resize(size_t NewSize) {
117+
if (NewSize == 0) {
118+
clear();
119+
return;
120+
}
121+
// Handle shrink case: destroy the elements in the pages that are not
122+
// needed any more and deallocate the pages.
123+
//
124+
// On the other hand, we do not destroy the extra elements in the last page,
125+
// because we might need them later and the logic is simpler if we do not
126+
// destroy them. This means that elements are only destroyed when the
127+
// page they belong to is destroyed. This is similar to what happens on
128+
// access of the elements of a page, where all the elements of the page are
129+
// constructed not only the one effectively needed.
130+
size_t NewLastPage = (NewSize - 1) / PageSize;
131+
if (NewSize < Size) {
132+
for (size_t I = NewLastPage + 1, N = PageToDataPtrs.size(); I < N; ++I) {
133+
T *Page = PageToDataPtrs[I];
134+
if (!Page)
135+
continue;
136+
// We need to invoke the destructor on all the elements of the page.
137+
std::destroy_n(Page, PageSize);
138+
Allocator.getPointer()->Deallocate(Page);
139+
}
140+
}
141+
142+
Size = NewSize;
143+
PageToDataPtrs.resize(NewLastPage + 1);
144+
}
145+
146+
[[nodiscard]] bool empty() const { return Size == 0; }
147+
148+
/// Clear the vector, i.e. clear the allocated pages, the whole page
149+
/// lookup index and reset the size.
150+
void clear() {
151+
Size = 0;
152+
for (T *Page : PageToDataPtrs) {
153+
if (Page == nullptr)
154+
continue;
155+
std::destroy_n(Page, PageSize);
156+
// If we do not own the allocator, deallocate the pages one by one.
157+
if (!Allocator.getInt())
158+
Allocator.getPointer()->Deallocate(Page);
159+
}
160+
// If we own the allocator, simply reset it.
161+
if (Allocator.getInt())
162+
Allocator.getPointer()->Reset();
163+
PageToDataPtrs.clear();
164+
}
165+
166+
/// Iterator on all the elements of the vector
167+
/// which have actually being constructed.
168+
class MaterializedIterator {
169+
const PagedVector *PV;
170+
size_t ElementIdx;
171+
172+
public:
173+
using iterator_category = std::forward_iterator_tag;
174+
using value_type = T;
175+
using difference_type = std::ptrdiff_t;
176+
using pointer = T *;
177+
using reference = T &;
178+
179+
MaterializedIterator(PagedVector const *PV, size_t ElementIdx)
180+
: PV(PV), ElementIdx(ElementIdx) {}
181+
182+
/// Pre-increment operator.
183+
///
184+
/// When incrementing the iterator, we skip the elements which have not
185+
/// been materialized yet.
186+
MaterializedIterator &operator++() {
187+
++ElementIdx;
188+
if (ElementIdx % PageSize == 0) {
189+
while (ElementIdx < PV->Size &&
190+
!PV->PageToDataPtrs[ElementIdx / PageSize])
191+
ElementIdx += PageSize;
192+
if (ElementIdx > PV->Size)
193+
ElementIdx = PV->Size;
194+
}
195+
196+
return *this;
197+
}
198+
199+
MaterializedIterator operator++(int) {
200+
MaterializedIterator Copy = *this;
201+
++*this;
202+
return Copy;
203+
}
204+
205+
T const &operator*() const {
206+
assert(ElementIdx < PV->Size);
207+
assert(PV->PageToDataPtrs[ElementIdx / PageSize]);
208+
T *PagePtr = PV->PageToDataPtrs[ElementIdx / PageSize];
209+
return PagePtr[ElementIdx % PageSize];
210+
}
211+
212+
friend bool operator==(MaterializedIterator const &LHS,
213+
MaterializedIterator const &RHS);
214+
friend bool operator!=(MaterializedIterator const &LHS,
215+
MaterializedIterator const &RHS);
216+
217+
[[nodiscard]] size_t getIndex() const { return ElementIdx; }
218+
};
219+
220+
/// Equality operator.
221+
friend bool operator==(MaterializedIterator const &LHS,
222+
MaterializedIterator const &RHS) {
223+
assert(LHS.PV == RHS.PV);
224+
// Make sure we are comparing either end iterators or iterators pointing
225+
// to materialized elements.
226+
// It should not be possible to build two iterators pointing to non
227+
// materialized elements.
228+
assert(LHS.ElementIdx == LHS.PV->Size ||
229+
(LHS.ElementIdx < LHS.PV->Size &&
230+
LHS.PV->PageToDataPtrs[LHS.ElementIdx / PageSize]));
231+
assert(RHS.ElementIdx == RHS.PV->Size ||
232+
(RHS.ElementIdx < RHS.PV->Size &&
233+
RHS.PV->PageToDataPtrs[RHS.ElementIdx / PageSize]));
234+
return LHS.ElementIdx == RHS.ElementIdx;
235+
}
236+
237+
friend bool operator!=(MaterializedIterator const &LHS,
238+
MaterializedIterator const &RHS) {
239+
return !(LHS == RHS);
240+
}
241+
242+
/// Iterators over the materialized elements of the vector.
243+
///
244+
/// This includes all the elements belonging to allocated pages,
245+
/// even if they have not been accessed yet. It's enough to access
246+
/// one element of a page to materialize all the elements of the page.
247+
MaterializedIterator materialized_begin() const {
248+
// Look for the first valid page.
249+
for (size_t ElementIdx = 0; ElementIdx < Size; ElementIdx += PageSize)
250+
if (PageToDataPtrs[ElementIdx / PageSize])
251+
return MaterializedIterator(this, ElementIdx);
252+
253+
return MaterializedIterator(this, Size);
254+
}
255+
256+
MaterializedIterator materialized_end() const {
257+
return MaterializedIterator(this, Size);
258+
}
259+
260+
[[nodiscard]] llvm::iterator_range<MaterializedIterator>
261+
materialized() const {
262+
return {materialized_begin(), materialized_end()};
263+
}
264+
};
265+
} // namespace llvm
266+
#endif // LLVM_ADT_PAGEDVECTOR_H

llvm/unittests/ADT/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ add_llvm_unittest(ADTTests
5151
MapVectorTest.cpp
5252
MoveOnly.cpp
5353
PackedVectorTest.cpp
54+
PagedVectorTest.cpp
5455
PointerEmbeddedIntTest.cpp
5556
PointerIntPairTest.cpp
5657
PointerSumTypeTest.cpp

0 commit comments

Comments
 (0)