-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ADT][NFC] Simplify SmallSet #109412
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ADT][NFC] Simplify SmallSet #109412
Conversation
@llvm/pr-subscribers-clang Author: Victor Campos (vhscampos) Changes
Full diff: https://github.com/llvm/llvm-project/pull/109412.diff 2 Files Affected:
diff --git a/clang/lib/Basic/TargetID.cpp b/clang/lib/Basic/TargetID.cpp
index 3c06d9bad1dc0d..fa1bfec2aacb9c 100644
--- a/clang/lib/Basic/TargetID.cpp
+++ b/clang/lib/Basic/TargetID.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "clang/Basic/TargetID.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/TargetParser.h"
diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h
index 630c98504261aa..8d7511bf0bc8d9 100644
--- a/llvm/include/llvm/ADT/SmallSet.h
+++ b/llvm/include/llvm/ADT/SmallSet.h
@@ -16,14 +16,10 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/iterator.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/type_traits.h"
#include <cstddef>
#include <functional>
#include <set>
-#include <type_traits>
#include <utility>
namespace llvm {
@@ -139,10 +135,6 @@ class SmallSet {
SmallVector<T, N> Vector;
std::set<T, C> Set;
- using VIterator = typename SmallVector<T, N>::const_iterator;
- using SIterator = typename std::set<T, C>::const_iterator;
- using mutable_iterator = typename SmallVector<T, N>::iterator;
-
// In small mode SmallPtrSet uses linear search for the elements, so it is
// not a good idea to choose this value too high. You may consider using a
// DenseSet<> instead if you expect many elements in the set.
@@ -163,13 +155,7 @@ class SmallSet {
}
/// count - Return 1 if the element is in the set, 0 otherwise.
- size_type count(const T &V) const {
- if (isSmall()) {
- // Since the collection is small, just do a linear search.
- return vfind(V) == Vector.end() ? 0 : 1;
- }
- return Set.count(V);
- }
+ size_type count(const T &V) const { return contains(V) ? 1 : 0; }
/// insert - Insert an element into the set if it isn't already there.
/// Returns a pair. The first value of it is an iterator to the inserted
@@ -181,7 +167,7 @@ class SmallSet {
return std::make_pair(const_iterator(I), Inserted);
}
- VIterator I = vfind(V);
+ auto I = std::find(Vector.begin(), Vector.end(), V);
if (I != Vector.end()) // Don't reinsert if it already exists.
return std::make_pair(const_iterator(I), false);
if (Vector.size() < N) {
@@ -206,11 +192,11 @@ class SmallSet {
bool erase(const T &V) {
if (!isSmall())
return Set.erase(V);
- for (mutable_iterator I = Vector.begin(), E = Vector.end(); I != E; ++I)
- if (*I == V) {
- Vector.erase(I);
- return true;
- }
+ auto I = std::find(Vector.begin(), Vector.end(), V);
+ if (I != Vector.end()) {
+ Vector.erase(I);
+ return true;
+ }
return false;
}
@@ -234,19 +220,12 @@ class SmallSet {
/// Check if the SmallSet contains the given element.
bool contains(const T &V) const {
if (isSmall())
- return vfind(V) != Vector.end();
+ return std::find(Vector.begin(), Vector.end(), V) != Vector.end();
return Set.find(V) != Set.end();
}
private:
bool isSmall() const { return Set.empty(); }
-
- VIterator vfind(const T &V) const {
- for (VIterator I = Vector.begin(), E = Vector.end(); I != E; ++I)
- if (*I == V)
- return I;
- return Vector.end();
- }
};
/// If this set is of pointer values, transparently switch over to using
|
@llvm/pr-subscribers-llvm-adt Author: Victor Campos (vhscampos) Changes
Full diff: https://github.com/llvm/llvm-project/pull/109412.diff 2 Files Affected:
diff --git a/clang/lib/Basic/TargetID.cpp b/clang/lib/Basic/TargetID.cpp
index 3c06d9bad1dc0d..fa1bfec2aacb9c 100644
--- a/clang/lib/Basic/TargetID.cpp
+++ b/clang/lib/Basic/TargetID.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "clang/Basic/TargetID.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/TargetParser.h"
diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h
index 630c98504261aa..8d7511bf0bc8d9 100644
--- a/llvm/include/llvm/ADT/SmallSet.h
+++ b/llvm/include/llvm/ADT/SmallSet.h
@@ -16,14 +16,10 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/iterator.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/type_traits.h"
#include <cstddef>
#include <functional>
#include <set>
-#include <type_traits>
#include <utility>
namespace llvm {
@@ -139,10 +135,6 @@ class SmallSet {
SmallVector<T, N> Vector;
std::set<T, C> Set;
- using VIterator = typename SmallVector<T, N>::const_iterator;
- using SIterator = typename std::set<T, C>::const_iterator;
- using mutable_iterator = typename SmallVector<T, N>::iterator;
-
// In small mode SmallPtrSet uses linear search for the elements, so it is
// not a good idea to choose this value too high. You may consider using a
// DenseSet<> instead if you expect many elements in the set.
@@ -163,13 +155,7 @@ class SmallSet {
}
/// count - Return 1 if the element is in the set, 0 otherwise.
- size_type count(const T &V) const {
- if (isSmall()) {
- // Since the collection is small, just do a linear search.
- return vfind(V) == Vector.end() ? 0 : 1;
- }
- return Set.count(V);
- }
+ size_type count(const T &V) const { return contains(V) ? 1 : 0; }
/// insert - Insert an element into the set if it isn't already there.
/// Returns a pair. The first value of it is an iterator to the inserted
@@ -181,7 +167,7 @@ class SmallSet {
return std::make_pair(const_iterator(I), Inserted);
}
- VIterator I = vfind(V);
+ auto I = std::find(Vector.begin(), Vector.end(), V);
if (I != Vector.end()) // Don't reinsert if it already exists.
return std::make_pair(const_iterator(I), false);
if (Vector.size() < N) {
@@ -206,11 +192,11 @@ class SmallSet {
bool erase(const T &V) {
if (!isSmall())
return Set.erase(V);
- for (mutable_iterator I = Vector.begin(), E = Vector.end(); I != E; ++I)
- if (*I == V) {
- Vector.erase(I);
- return true;
- }
+ auto I = std::find(Vector.begin(), Vector.end(), V);
+ if (I != Vector.end()) {
+ Vector.erase(I);
+ return true;
+ }
return false;
}
@@ -234,19 +220,12 @@ class SmallSet {
/// Check if the SmallSet contains the given element.
bool contains(const T &V) const {
if (isSmall())
- return vfind(V) != Vector.end();
+ return std::find(Vector.begin(), Vector.end(), V) != Vector.end();
return Set.find(V) != Set.end();
}
private:
bool isSmall() const { return Set.empty(); }
-
- VIterator vfind(const T &V) const {
- for (VIterator I = Vector.begin(), E = Vector.end(); I != E; ++I)
- if (*I == V)
- return I;
- return Vector.end();
- }
};
/// If this set is of pointer values, transparently switch over to using
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Could you also add [NFC] to the PR title?
- Remove dependence on `STLExtras.h`. - Remove unused header inclusions. - Make `count` use `contains` for deduplication. - Replace hand-written linear scans on Vector by `std::find`.
0b48b3b
to
3877c99
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/123/builds/6362 Here is the relevant piece of the build log for the reference
|
Looks like this change has a small negative compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=0c31ea5a09d854d5891eac40629f6a17a66fdcf7&to=ce6c236c965dc1bb5fa2257e17ea253a015705cc&stat=instructions:u Not obvious to me why that should be. |
Okay, I tried a few things, and can reproduce the regression just by using std::find inside vfind: 21f9869 My best guess at the cause would be the forced unroll in: https://github.com/gcc-mirror/gcc/blob/819098dc71f2079aedc15a904ab5f17f0788d991/libstdc%2B%2B-v3/include/bits/stl_algobase.h#L2107 These finds works on small numbers of iterations (often just up to 4), so probably that forced unroll is not profitable. It may make sense to restore vfind with a comment on why it's there. |
Thanks for the investigation @nikic . I'll have a patch restoring |
This patch restores handwritten linear searches instead of the use of std::find. After PR llvm#109412, a performance regression was observed that's caused by the use of std::find for linear searches. The exact cause wasn't pinpointed, but, at the time of writing, the most likely culprit is the forced loop unrolling in the definition of libstdc++'s std::find. Presumably this is done to optimise for larger containers. However for the case of small containers such as SmallVector, this actually hurts performance.
This patch restores handwritten linear searches instead of the use of std::find. After PR #109412, a performance regression was observed that's caused by the use of std::find for linear searches. The exact cause wasn't pinpointed, but, at the time of writing, the most likely culprit is the forced loop unrolling in the definition of libstdc++'s std::find. Presumably this is done to optimise for larger containers. However for the case of small containers such as SmallVector, this actually hurts performance.
STLExtras.h
.count
usecontains
for deduplication.std::find
.