Skip to content

Commit 7d1ba2f

Browse files
nikicAlexisPerry
authored andcommitted
[SmallPtrSet] Don't leave tombstones in small mode (llvm#96762)
When erasing elements in small mode, we currently leave behind tombstones. This means that insertion into the SmallPtrSet also has to check for these, making the operation more expensive than it really should be. We don't really need the tombstones in small mode, because we can just replace with the last element in the set instead. This changes the order, but SmallPtrSet order is fundamentally unstable anyway. However, not leaving tombstones means that the erase() operation now invalidates iterators. This means that consumers that want to remove elements while iterating over the set have to use remove_if() instead. If they fail to do so, there will be an assertion failure thanks to debug epochs, so any such cases are easy to detect (and I have already fixed all cases inside llvm at least).
1 parent e8fbf55 commit 7d1ba2f

File tree

3 files changed

+57
-62
lines changed

3 files changed

+57
-62
lines changed

llvm/docs/ProgrammersManual.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2066,8 +2066,10 @@ insertion/deleting/queries with low constant factors) and is very stingy with
20662066
malloc traffic.
20672067

20682068
Note that, unlike :ref:`std::set <dss_set>`, the iterators of ``SmallPtrSet``
2069-
are invalidated whenever an insertion occurs. Also, the values visited by the
2070-
iterators are not visited in sorted order.
2069+
are invalidated whenever an insertion or erasure occurs. The ``remove_if``
2070+
method can be used to remove elements while iterating over the set.
2071+
2072+
Also, the values visited by the iterators are not visited in sorted order.
20712073

20722074
.. _dss_stringset:
20732075

llvm/include/llvm/ADT/SmallPtrSet.h

Lines changed: 53 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -127,22 +127,11 @@ class SmallPtrSetImplBase : public DebugEpochBase {
127127
std::pair<const void *const *, bool> insert_imp(const void *Ptr) {
128128
if (isSmall()) {
129129
// Check to see if it is already in the set.
130-
const void **LastTombstone = nullptr;
131130
for (const void **APtr = SmallArray, **E = SmallArray + NumNonEmpty;
132131
APtr != E; ++APtr) {
133132
const void *Value = *APtr;
134133
if (Value == Ptr)
135134
return std::make_pair(APtr, false);
136-
if (Value == getTombstoneMarker())
137-
LastTombstone = APtr;
138-
}
139-
140-
// Did we find any tombstone marker?
141-
if (LastTombstone != nullptr) {
142-
*LastTombstone = Ptr;
143-
--NumTombstones;
144-
incrementEpoch();
145-
return std::make_pair(LastTombstone, true);
146135
}
147136

148137
// Nope, there isn't. If we stay small, just 'pushback' now.
@@ -161,14 +150,27 @@ class SmallPtrSetImplBase : public DebugEpochBase {
161150
/// that the derived class can check that the right type of pointer is passed
162151
/// in.
163152
bool erase_imp(const void * Ptr) {
164-
const void *const *P = find_imp(Ptr);
165-
if (P == EndPointer())
153+
if (isSmall()) {
154+
for (const void **APtr = SmallArray, **E = SmallArray + NumNonEmpty;
155+
APtr != E; ++APtr) {
156+
if (*APtr == Ptr) {
157+
*APtr = SmallArray[--NumNonEmpty];
158+
incrementEpoch();
159+
return true;
160+
}
161+
}
166162
return false;
163+
}
167164

168-
const void **Loc = const_cast<const void **>(P);
169-
assert(*Loc == Ptr && "broken find!");
170-
*Loc = getTombstoneMarker();
165+
auto *Bucket = FindBucketFor(Ptr);
166+
if (*Bucket != Ptr)
167+
return false;
168+
169+
*const_cast<const void **>(Bucket) = getTombstoneMarker();
171170
NumTombstones++;
171+
// Treat this consistently from an API perspective, even if we don't
172+
// actually invalidate iterators here.
173+
incrementEpoch();
172174
return true;
173175
}
174176

@@ -192,9 +194,9 @@ class SmallPtrSetImplBase : public DebugEpochBase {
192194
return EndPointer();
193195
}
194196

195-
private:
196197
bool isSmall() const { return CurArray == SmallArray; }
197198

199+
private:
198200
std::pair<const void *const *, bool> insert_imp_big(const void *Ptr);
199201

200202
const void * const *FindBucketFor(const void *Ptr) const;
@@ -351,17 +353,46 @@ class SmallPtrSetImpl : public SmallPtrSetImplBase {
351353
return insert(Ptr).first;
352354
}
353355

354-
/// erase - If the set contains the specified pointer, remove it and return
355-
/// true, otherwise return false.
356+
/// Remove pointer from the set.
357+
///
358+
/// Returns whether the pointer was in the set. Invalidates iterators if
359+
/// true is returned. To remove elements while iterating over the set, use
360+
/// remove_if() instead.
356361
bool erase(PtrType Ptr) {
357362
return erase_imp(PtrTraits::getAsVoidPointer(Ptr));
358363
}
359364

360-
/// Remove elements that match the given predicate. Returns whether anything
361-
/// was removed.
365+
/// Remove elements that match the given predicate.
366+
///
367+
/// This method is a safe replacement for the following pattern, which is not
368+
/// valid, because the erase() calls would invalidate the iterator:
369+
///
370+
/// for (PtrType *Ptr : Set)
371+
/// if (Pred(P))
372+
/// Set.erase(P);
373+
///
374+
/// Returns whether anything was removed. It is safe to read the set inside
375+
/// the predicate function. However, the predicate must not modify the set
376+
/// itself, only indicate a removal by returning true.
362377
template <typename UnaryPredicate>
363378
bool remove_if(UnaryPredicate P) {
364379
bool Removed = false;
380+
if (isSmall()) {
381+
const void **APtr = SmallArray, **E = SmallArray + NumNonEmpty;
382+
while (APtr != E) {
383+
PtrType Ptr = PtrTraits::getFromVoidPointer(const_cast<void *>(*APtr));
384+
if (P(Ptr)) {
385+
*APtr = *--E;
386+
--NumNonEmpty;
387+
incrementEpoch();
388+
Removed = true;
389+
} else {
390+
++APtr;
391+
}
392+
}
393+
return Removed;
394+
}
395+
365396
for (const void **APtr = CurArray, **E = EndPointer(); APtr != E; ++APtr) {
366397
const void *Value = *APtr;
367398
if (Value == getTombstoneMarker() || Value == getEmptyMarker())
@@ -370,6 +401,7 @@ class SmallPtrSetImpl : public SmallPtrSetImplBase {
370401
if (P(Ptr)) {
371402
*APtr = getTombstoneMarker();
372403
++NumTombstones;
404+
incrementEpoch();
373405
Removed = true;
374406
}
375407
}

llvm/unittests/ADT/SmallPtrSetTest.cpp

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -243,45 +243,6 @@ TEST(SmallPtrSetTest, SwapTest) {
243243
EXPECT_TRUE(a.count(&buf[3]));
244244
}
245245

246-
void checkEraseAndIterators(SmallPtrSetImpl<int*> &S) {
247-
int buf[3];
248-
249-
S.insert(&buf[0]);
250-
S.insert(&buf[1]);
251-
S.insert(&buf[2]);
252-
253-
// Iterators must still be valid after erase() calls;
254-
auto B = S.begin();
255-
auto M = std::next(B);
256-
auto E = S.end();
257-
EXPECT_TRUE(*B == &buf[0] || *B == &buf[1] || *B == &buf[2]);
258-
EXPECT_TRUE(*M == &buf[0] || *M == &buf[1] || *M == &buf[2]);
259-
EXPECT_TRUE(*B != *M);
260-
int *Removable = *std::next(M);
261-
// No iterator points to Removable now.
262-
EXPECT_TRUE(Removable == &buf[0] || Removable == &buf[1] ||
263-
Removable == &buf[2]);
264-
EXPECT_TRUE(Removable != *B && Removable != *M);
265-
266-
S.erase(Removable);
267-
268-
// B,M,E iterators should still be valid
269-
EXPECT_EQ(B, S.begin());
270-
EXPECT_EQ(M, std::next(B));
271-
EXPECT_EQ(E, S.end());
272-
EXPECT_EQ(std::next(M), E);
273-
}
274-
275-
TEST(SmallPtrSetTest, EraseTest) {
276-
// Test when set stays small.
277-
SmallPtrSet<int *, 8> B;
278-
checkEraseAndIterators(B);
279-
280-
// Test when set grows big.
281-
SmallPtrSet<int *, 2> A;
282-
checkEraseAndIterators(A);
283-
}
284-
285246
// Verify that dereferencing and iteration work.
286247
TEST(SmallPtrSetTest, dereferenceAndIterate) {
287248
int Ints[] = {0, 1, 2, 3, 4, 5, 6, 7};

0 commit comments

Comments
 (0)