@@ -40,7 +40,7 @@ def _HasClass(tag, *classes):
40
40
return False
41
41
42
42
43
- def _ParseSymbolPage (symbol_page_html , symbol_name , qual_name ):
43
+ def _ParseSymbolPage (symbol_page_html , symbols ):
44
44
"""Parse symbol page and retrieve the include header defined in this page.
45
45
The symbol page provides header for the symbol, specifically in
46
46
"Defined in header <header>" section. An example:
@@ -51,8 +51,12 @@ def _ParseSymbolPage(symbol_page_html, symbol_name, qual_name):
51
51
52
52
Returns a list of headers.
53
53
"""
54
- headers = set ( )
54
+ headers = collections . defaultdict ( set )
55
55
all_headers = set ()
56
+ symbol_names = {}
57
+ for symbol_name , qualified_symbol_name in symbols :
58
+ symbol_names [symbol_name ] = symbol_name
59
+ symbol_names [qualified_symbol_name ] = symbol_name
56
60
57
61
soup = BeautifulSoup (symbol_page_html , "html.parser" )
58
62
# Rows in table are like:
@@ -69,9 +73,10 @@ def _ParseSymbolPage(symbol_page_html, symbol_name, qual_name):
69
73
was_decl = True
70
74
# Symbols are in the first cell.
71
75
found_symbols = row .find ("td" ).stripped_strings
72
- if not (symbol_name in found_symbols or qual_name in found_symbols ):
73
- continue
74
- headers .update (current_headers )
76
+ for found_symbol in found_symbols :
77
+ symbol_name = symbol_names .get (found_symbol )
78
+ if symbol_name :
79
+ headers [symbol_name ].update (current_headers )
75
80
elif _HasClass (row , "t-dsc-header" ):
76
81
# If we saw a decl since the last header, this is a new block of headers
77
82
# for a new block of decls.
@@ -86,26 +91,28 @@ def _ParseSymbolPage(symbol_page_html, symbol_name, qual_name):
86
91
current_headers .append (header_code .text )
87
92
all_headers .add (header_code .text )
88
93
# If the symbol was never named, consider all named headers.
89
- return headers or all_headers
94
+ return [
95
+ (symbol_name , headers .get (symbol_name ) or all_headers )
96
+ for symbol_name , _ in symbols
97
+ ]
90
98
91
99
92
100
def _ParseSymbolVariant (caption ):
93
101
if not (isinstance (caption , NavigableString ) and "(" in caption ):
94
102
return None
95
103
96
- if ')' in caption .text : # (locale), (algorithm), etc.
104
+ if ")" in caption .text : # (locale), (algorithm), etc.
97
105
return caption .text .strip (" ()" )
98
106
99
107
second_part = caption .next_sibling
100
108
if isinstance (second_part , Tag ) and second_part .name == "code" :
101
109
# (<code>std::complex</code>), etc.
102
110
third_part = second_part .next_sibling
103
- if isinstance (third_part , NavigableString ) and third_part .text .startswith (')' ):
111
+ if isinstance (third_part , NavigableString ) and third_part .text .startswith (")" ):
104
112
return second_part .text
105
113
return None
106
114
107
115
108
-
109
116
def _ParseIndexPage (index_page_html ):
110
117
"""Parse index page.
111
118
The index page lists all std symbols and hrefs to their detailed pages
@@ -137,9 +144,9 @@ def _ParseIndexPage(index_page_html):
137
144
return symbols
138
145
139
146
140
- def _ReadSymbolPage (path , name , qual_name ):
147
+ def _ReadSymbolPage (path , symbols ):
141
148
with open (path ) as f :
142
- return _ParseSymbolPage (f .read (), name , qual_name )
149
+ return _ParseSymbolPage (f .read (), symbols )
143
150
144
151
145
152
def _GetSymbols (pool , root_dir , index_page_name , namespace , variants_to_accept ):
@@ -158,33 +165,33 @@ def _GetSymbols(pool, root_dir, index_page_name, namespace, variants_to_accept):
158
165
with open (index_page_path , "r" ) as f :
159
166
# Read each symbol page in parallel.
160
167
results = [] # (symbol_name, promise of [header...])
168
+ symbols_by_page = collections .defaultdict (list )
161
169
for symbol_name , symbol_page_path , variant in _ParseIndexPage (f .read ()):
162
170
# Variant symbols (e.g. the std::locale version of isalpha) add ambiguity.
163
171
# FIXME: use these as a fallback rather than ignoring entirely.
164
172
qualified_symbol_name = (namespace or "" ) + symbol_name
165
- variants_for_symbol = variants_to_accept .get (
166
- qualified_symbol_name , ()
167
- )
173
+ variants_for_symbol = variants_to_accept .get (qualified_symbol_name , ())
168
174
if variant and variant not in variants_for_symbol :
169
175
continue
170
176
path = os .path .join (root_dir , symbol_page_path )
171
- if os .path .isfile (path ):
172
- results .append (
173
- (
174
- symbol_name ,
175
- pool .apply_async (_ReadSymbolPage , (path , symbol_name , qualified_symbol_name )),
176
- )
177
- )
177
+ if path in symbols_by_page or os .path .isfile (path ):
178
+ symbols_by_page [path ].append ((symbol_name , qualified_symbol_name ))
178
179
else :
179
180
sys .stderr .write (
180
181
"Discarding information for symbol: %s. Page %s does not exist.\n "
181
182
% (symbol_name , path )
182
183
)
183
184
185
+ for path , symbols in symbols_by_page .items ():
186
+ results .append (
187
+ pool .apply_async (_ReadSymbolPage , (path , symbols )),
188
+ )
189
+
184
190
# Build map from symbol name to a set of headers.
185
191
symbol_headers = collections .defaultdict (set )
186
- for symbol_name , lazy_headers in results :
187
- symbol_headers [symbol_name ].update (lazy_headers .get ())
192
+ for lazy_mapping in results :
193
+ for symbol_name , headers in lazy_mapping .get ():
194
+ symbol_headers [symbol_name ].update (headers )
188
195
189
196
symbols = []
190
197
for name , headers in sorted (symbol_headers .items (), key = lambda t : t [0 ]):
0 commit comments