Skip to content

Commit affd201

Browse files
author
Krzysztof Parzyszek
committed
Recommit r313647 now that GCC seems to accept the offering
Add some member types to MachineValueTypeSet::const_iterator so that iterator_traits can work with it. Improve TableGen performance of -gen-dag-isel (motivated by X86 backend) The introduction of parameterized register classes in r313271 caused the matcher generation code in TableGen to run much slower, particularly so in the unoptimized (debug) build. This patch recovers some of the lost performance. Summary of changes: - Cache the set of legal types in TypeInfer::getLegalTypes. The contents of this set do not change. - Add LLVM_ATTRIBUTE_ALWAYS_INLINE to several small functions. Normally this would not be necessary, but in the debug build TableGen is not optimized, so this helps a little bit. - Add an early exit from TypeSetByHwMode::operator== for the case when one or both arguments are "simple", i.e. only have one mode. This saves some time in GenerateVariants. - Finally, replace the underlying storage type in TypeSetByHwMode::SetType with MachineValueTypeSet based on std::array instead of std::set. This significantly reduces the number of memory allocation calls. I've done a number of experiments with the underlying type of InfoByHwMode. The type is a map, and for targets that do not use the parameterization, this map has only one entry. The best (unoptimized) performance, somewhat surprisingly came from std::map, followed closely by std::unordered_map. DenseMap was the slowest by a large margin. Various hand-crafted solutions (emulating enough of the map interface not to make sweeping changes to the users) did not yield any observable improvements. llvm-svn: 313660
1 parent e79dda3 commit affd201

File tree

3 files changed

+230
-45
lines changed

3 files changed

+230
-45
lines changed

llvm/utils/TableGen/CodeGenDAGPatterns.cpp

Lines changed: 52 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,16 @@ static inline bool isScalar(MVT VT) {
4343
return !VT.isVector();
4444
}
4545

46-
template <typename T, typename Predicate>
47-
static bool berase_if(std::set<T> &S, Predicate P) {
46+
template <typename Predicate>
47+
static bool berase_if(MachineValueTypeSet &S, Predicate P) {
4848
bool Erased = false;
49-
for (auto I = S.begin(); I != S.end(); ) {
50-
if (P(*I)) {
51-
Erased = true;
52-
I = S.erase(I);
53-
} else
54-
++I;
49+
// It is ok to iterate over MachineValueTypeSet and remove elements from it
50+
// at the same time.
51+
for (MVT T : S) {
52+
if (!P(T))
53+
continue;
54+
Erased = true;
55+
S.erase(T);
5556
}
5657
return Erased;
5758
}
@@ -125,7 +126,7 @@ bool TypeSetByHwMode::constrain(const TypeSetByHwMode &VTS) {
125126
unsigned M = I.first;
126127
if (M == DefaultMode || hasMode(M))
127128
continue;
128-
Map[M] = Map[DefaultMode];
129+
Map.insert({M, Map.at(DefaultMode)});
129130
Changed = true;
130131
}
131132
}
@@ -183,7 +184,9 @@ std::string TypeSetByHwMode::getAsString() const {
183184
}
184185

185186
std::string TypeSetByHwMode::getAsString(const SetType &S) {
186-
std::vector<MVT> Types(S.begin(), S.end());
187+
std::vector<MVT> Types;
188+
for (MVT T : S)
189+
Types.push_back(T);
187190
array_pod_sort(Types.begin(), Types.end());
188191

189192
std::stringstream str;
@@ -202,6 +205,12 @@ bool TypeSetByHwMode::operator==(const TypeSetByHwMode &VTS) const {
202205
if (HaveDefault != VTS.hasDefault())
203206
return false;
204207

208+
if (isSimple()) {
209+
if (VTS.isSimple())
210+
return *begin() == *VTS.begin();
211+
return false;
212+
}
213+
205214
std::set<unsigned> Modes;
206215
for (auto &I : *this)
207216
Modes.insert(I.first);
@@ -253,18 +262,31 @@ bool TypeSetByHwMode::intersect(SetType &Out, const SetType &In) {
253262
// For example
254263
// { iPTR } * { i32 } -> { i32 }
255264
// { iPTR } * { i32 i64 } -> { iPTR }
256-
265+
// and
266+
// { iPTR i32 } * { i32 } -> { i32 }
267+
// { iPTR i32 } * { i32 i64 } -> { i32 i64 }
268+
// { iPTR i32 } * { i32 i64 i128 } -> { iPTR i32 }
269+
270+
// Compute the difference between the two sets in such a way that the
271+
// iPTR is in the set that is being subtracted. This is to see if there
272+
// are any extra scalars in the set without iPTR that are not in the
273+
// set containing iPTR. Then the iPTR could be considered a "wildcard"
274+
// matching these scalars. If there is only one such scalar, it would
275+
// replace the iPTR, if there are more, the iPTR would be retained.
257276
SetType Diff;
258277
if (InP) {
259-
std::copy_if(Out.begin(), Out.end(), std::inserter(Diff, Diff.end()),
260-
[&In](MVT T) { return !In.count(T); });
278+
Diff = Out;
279+
berase_if(Diff, [&In](MVT T) { return In.count(T); });
280+
// Pre-remove these elements and rely only on InP/OutP to determine
281+
// whether a change has been made.
261282
berase_if(Out, [&Diff](MVT T) { return Diff.count(T); });
262283
} else {
263-
std::copy_if(In.begin(), In.end(), std::inserter(Diff, Diff.end()),
264-
[&Out](MVT T) { return !Out.count(T); });
284+
Diff = In;
285+
berase_if(Diff, [&Out](MVT T) { return Out.count(T); });
265286
Out.erase(MVT::iPTR);
266287
}
267288

289+
// The actual intersection.
268290
bool Changed = berase_if(Out, Int);
269291
unsigned NumD = Diff.size();
270292
if (NumD == 0)
@@ -276,8 +298,9 @@ bool TypeSetByHwMode::intersect(SetType &Out, const SetType &In) {
276298
// being replaced).
277299
Changed |= OutP;
278300
} else {
301+
// Multiple elements from Out are now replaced with iPTR.
279302
Out.insert(MVT::iPTR);
280-
Changed |= InP;
303+
Changed |= !OutP;
281304
}
282305
return Changed;
283306
}
@@ -758,13 +781,12 @@ void TypeInfer::expandOverloads(TypeSetByHwMode &VTS) {
758781
void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out,
759782
const TypeSetByHwMode::SetType &Legal) {
760783
std::set<MVT> Ovs;
761-
for (auto I = Out.begin(); I != Out.end(); ) {
762-
if (I->isOverloaded()) {
763-
Ovs.insert(*I);
764-
I = Out.erase(I);
784+
for (MVT T : Out) {
785+
if (!T.isOverloaded())
765786
continue;
766-
}
767-
++I;
787+
Ovs.insert(T);
788+
// MachineValueTypeSet allows iteration and erasing.
789+
Out.erase(T);
768790
}
769791

770792
for (MVT Ov : Ovs) {
@@ -805,13 +827,15 @@ void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out,
805827
}
806828

807829
TypeSetByHwMode TypeInfer::getLegalTypes() {
830+
if (!LegalTypesCached) {
831+
// Stuff all types from all modes into the default mode.
832+
const TypeSetByHwMode &LTS = TP.getDAGPatterns().getLegalTypes();
833+
for (const auto &I : LTS)
834+
LegalCache.insert(I.second);
835+
LegalTypesCached = true;
836+
}
808837
TypeSetByHwMode VTS;
809-
TypeSetByHwMode::SetType &DS = VTS.getOrCreate(DefaultMode);
810-
const TypeSetByHwMode &LTS = TP.getDAGPatterns().getLegalTypes();
811-
812-
// Stuff all types from all modes into the default mode.
813-
for (const auto &I : LTS)
814-
DS.insert(I.second.begin(), I.second.end());
838+
VTS.getOrCreate(DefaultMode) = LegalCache;
815839
return VTS;
816840
}
817841

llvm/utils/TableGen/CodeGenDAGPatterns.h

Lines changed: 166 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,24 +21,168 @@
2121
#include "llvm/ADT/SmallVector.h"
2222
#include "llvm/ADT/StringMap.h"
2323
#include "llvm/Support/ErrorHandling.h"
24+
#include "llvm/Support/MathExtras.h"
2425
#include <algorithm>
26+
#include <array>
2527
#include <map>
2628
#include <set>
2729
#include <vector>
2830

2931
namespace llvm {
30-
class Record;
31-
class Init;
32-
class ListInit;
33-
class DagInit;
34-
class SDNodeInfo;
35-
class TreePattern;
36-
class TreePatternNode;
37-
class CodeGenDAGPatterns;
38-
class ComplexPattern;
39-
40-
struct TypeSetByHwMode : public InfoByHwMode<std::set<MVT>> {
41-
typedef std::set<MVT> SetType;
32+
33+
class Record;
34+
class Init;
35+
class ListInit;
36+
class DagInit;
37+
class SDNodeInfo;
38+
class TreePattern;
39+
class TreePatternNode;
40+
class CodeGenDAGPatterns;
41+
class ComplexPattern;
42+
43+
/// This represents a set of MVTs. Since the underlying type for the MVT
44+
/// is uint8_t, there are at most 256 values. To reduce the number of memory
45+
/// allocations and deallocations, represent the set as a sequence of bits.
46+
/// To reduce the allocations even further, make MachineValueTypeSet own
47+
/// the storage and use std::array as the bit container.
48+
struct MachineValueTypeSet {
49+
static_assert(std::is_same<std::underlying_type<MVT::SimpleValueType>::type,
50+
uint8_t>::value,
51+
"Change uint8_t here to the SimpleValueType's type");
52+
static unsigned constexpr Capacity = std::numeric_limits<uint8_t>::max()+1;
53+
using WordType = uint64_t;
54+
static unsigned constexpr WordWidth = 8*sizeof(WordType);
55+
static unsigned constexpr NumWords = Capacity/WordWidth;
56+
static_assert(NumWords*WordWidth == Capacity,
57+
"Capacity should be a multiple of WordWidth");
58+
59+
LLVM_ATTRIBUTE_ALWAYS_INLINE
60+
MachineValueTypeSet() {
61+
clear();
62+
}
63+
64+
LLVM_ATTRIBUTE_ALWAYS_INLINE
65+
unsigned size() const {
66+
unsigned Count = 0;
67+
for (WordType W : Words)
68+
Count += countPopulation(W);
69+
return Count;
70+
}
71+
LLVM_ATTRIBUTE_ALWAYS_INLINE
72+
void clear() {
73+
std::memset(Words.data(), 0, NumWords*sizeof(WordType));
74+
}
75+
LLVM_ATTRIBUTE_ALWAYS_INLINE
76+
bool empty() const {
77+
for (WordType W : Words)
78+
if (W != 0)
79+
return false;
80+
return true;
81+
}
82+
LLVM_ATTRIBUTE_ALWAYS_INLINE
83+
unsigned count(MVT T) const {
84+
return (Words[T.SimpleTy / WordWidth] >> (T.SimpleTy % WordWidth)) & 1;
85+
}
86+
std::pair<MachineValueTypeSet&,bool> insert(MVT T) {
87+
bool V = count(T.SimpleTy);
88+
Words[T.SimpleTy / WordWidth] |= WordType(1) << (T.SimpleTy % WordWidth);
89+
return {*this, V};
90+
}
91+
MachineValueTypeSet &insert(const MachineValueTypeSet &S) {
92+
for (unsigned i = 0; i != NumWords; ++i)
93+
Words[i] |= S.Words[i];
94+
return *this;
95+
}
96+
LLVM_ATTRIBUTE_ALWAYS_INLINE
97+
void erase(MVT T) {
98+
Words[T.SimpleTy / WordWidth] &= ~(WordType(1) << (T.SimpleTy % WordWidth));
99+
}
100+
101+
struct const_iterator {
102+
// Some implementations of the C++ library require these traits to be
103+
// defined.
104+
using iterator_category = std::forward_iterator_tag;
105+
using value_type = MVT;
106+
using difference_type = ptrdiff_t;
107+
using pointer = const MVT*;
108+
using reference = const MVT&;
109+
110+
LLVM_ATTRIBUTE_ALWAYS_INLINE
111+
MVT operator*() const {
112+
assert(Pos != Capacity);
113+
return MVT::SimpleValueType(Pos);
114+
}
115+
LLVM_ATTRIBUTE_ALWAYS_INLINE
116+
const_iterator(const MachineValueTypeSet *S, bool End) : Set(S) {
117+
Pos = End ? Capacity : find_from_pos(0);
118+
}
119+
LLVM_ATTRIBUTE_ALWAYS_INLINE
120+
const_iterator &operator++() {
121+
assert(Pos != Capacity);
122+
Pos = find_from_pos(Pos+1);
123+
return *this;
124+
}
125+
126+
LLVM_ATTRIBUTE_ALWAYS_INLINE
127+
bool operator==(const const_iterator &It) const {
128+
return Set == It.Set && Pos == It.Pos;
129+
}
130+
LLVM_ATTRIBUTE_ALWAYS_INLINE
131+
bool operator!=(const const_iterator &It) const {
132+
return !operator==(It);
133+
}
134+
135+
private:
136+
unsigned find_from_pos(unsigned P) const {
137+
unsigned SkipWords = P / WordWidth;
138+
unsigned SkipBits = P % WordWidth;
139+
unsigned Count = SkipWords * WordWidth;
140+
141+
// If P is in the middle of a word, process it manually here, because
142+
// the trailing bits need to be masked off to use findFirstSet.
143+
if (SkipBits != 0) {
144+
WordType W = Set->Words[SkipWords];
145+
W &= maskLeadingOnes<WordType>(WordWidth-SkipBits);
146+
if (W != 0)
147+
return Count + findFirstSet(W);
148+
Count += WordWidth;
149+
SkipWords++;
150+
}
151+
152+
for (unsigned i = SkipWords; i != NumWords; ++i) {
153+
WordType W = Set->Words[i];
154+
if (W != 0)
155+
return Count + findFirstSet(W);
156+
Count += WordWidth;
157+
}
158+
return Capacity;
159+
}
160+
161+
const MachineValueTypeSet *Set;
162+
unsigned Pos;
163+
};
164+
165+
LLVM_ATTRIBUTE_ALWAYS_INLINE
166+
const_iterator begin() const { return const_iterator(this, false); }
167+
LLVM_ATTRIBUTE_ALWAYS_INLINE
168+
const_iterator end() const { return const_iterator(this, true); }
169+
170+
LLVM_ATTRIBUTE_ALWAYS_INLINE
171+
bool operator==(const MachineValueTypeSet &S) const {
172+
return Words == S.Words;
173+
}
174+
LLVM_ATTRIBUTE_ALWAYS_INLINE
175+
bool operator!=(const MachineValueTypeSet &S) const {
176+
return !operator==(S);
177+
}
178+
179+
private:
180+
friend struct const_iterator;
181+
std::array<WordType,NumWords> Words;
182+
};
183+
184+
struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> {
185+
using SetType = MachineValueTypeSet;
42186

43187
TypeSetByHwMode() = default;
44188
TypeSetByHwMode(const TypeSetByHwMode &VTS) = default;
@@ -56,19 +200,23 @@ struct TypeSetByHwMode : public InfoByHwMode<std::set<MVT>> {
56200

57201
bool isValueTypeByHwMode(bool AllowEmpty) const;
58202
ValueTypeByHwMode getValueTypeByHwMode() const;
203+
204+
LLVM_ATTRIBUTE_ALWAYS_INLINE
59205
bool isMachineValueType() const {
60206
return isDefaultOnly() && Map.begin()->second.size() == 1;
61207
}
62208

209+
LLVM_ATTRIBUTE_ALWAYS_INLINE
63210
MVT getMachineValueType() const {
64211
assert(isMachineValueType());
65212
return *Map.begin()->second.begin();
66213
}
67214

68215
bool isPossible() const;
216+
217+
LLVM_ATTRIBUTE_ALWAYS_INLINE
69218
bool isDefaultOnly() const {
70-
return Map.size() == 1 &&
71-
Map.begin()->first == DefaultMode;
219+
return Map.size() == 1 && Map.begin()->first == DefaultMode;
72220
}
73221

74222
bool insert(const ValueTypeByHwMode &VVT);
@@ -178,6 +326,10 @@ struct TypeInfer {
178326

179327
private:
180328
TypeSetByHwMode getLegalTypes();
329+
330+
/// Cached legal types.
331+
bool LegalTypesCached = false;
332+
TypeSetByHwMode::SetType LegalCache = {};
181333
};
182334

183335
/// Set type used to track multiply used variables in patterns

0 commit comments

Comments
 (0)