Skip to content

Commit 8538819

Browse files
committed
Fix variant parsing
1 parent 95e5a99 commit 8538819

File tree

1 file changed

+29
-4
lines changed

1 file changed

+29
-4
lines changed

clang/tools/include-mapping/cppreference_parser.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#
88
# ===------------------------------------------------------------------------===#
99

10-
from bs4 import BeautifulSoup, NavigableString
10+
from bs4 import BeautifulSoup, NavigableString, Tag
1111

1212
import collections
1313
import multiprocessing
@@ -89,6 +89,23 @@ def _ParseSymbolPage(symbol_page_html, symbol_name):
8989
return headers or all_headers
9090

9191

92+
def _ParseSymbolVariant(caption):
93+
if not (isinstance(caption, NavigableString) and "(" in caption):
94+
return None
95+
96+
if ')' in caption.text: # (locale), (algorithm), etc.
97+
return caption.text.strip(" ()")
98+
99+
second_part = caption.next_sibling
100+
if isinstance(second_part, Tag) and second_part.name == "code":
101+
# (<code>std::complex</code>), etc.
102+
third_part = second_part.next_sibling
103+
if isinstance(third_part, NavigableString) and third_part.text.startswith(')'):
104+
return second_part.text
105+
return None
106+
107+
108+
92109
def _ParseIndexPage(index_page_html):
93110
"""Parse index page.
94111
The index page lists all std symbols and hrefs to their detailed pages
@@ -107,9 +124,7 @@ def _ParseIndexPage(index_page_html):
107124
# This accidentally accepts begin/end despite the (iterator) caption: the
108125
# (since C++11) note is first. They are good symbols, so the bug is unfixed.
109126
caption = symbol_href.next_sibling
110-
variant = None
111-
if isinstance(caption, NavigableString) and "(" in caption:
112-
variant = caption.text.strip(" ()")
127+
variant = _ParseSymbolVariant(caption)
113128
symbol_tt = symbol_href.find("tt")
114129
if symbol_tt:
115130
symbols.append(
@@ -192,6 +207,16 @@ def GetSymbols(parse_pages):
192207
variants_to_accept = {
193208
# std::remove<> has variant algorithm.
194209
"std::remove": ("algorithm"),
210+
# These functions don't have a generic version, and all variants are defined in <chrono>
211+
"std::chrono::abs": ("std::chrono::duration"),
212+
"std::chrono::ceil": ("std::chrono::duration"),
213+
"std::chrono::floor": ("std::chrono::duration"),
214+
"std::chrono::from_stream": ("std::chrono::day"),
215+
"std::chrono::round": ("std::chrono::duration"),
216+
# Same, but in <filesystem>
217+
"std::filesystem::begin": ("std::filesystem::directory_iterator"),
218+
"std::filesystem::end": ("std::filesystem::directory_iterator"),
219+
"std::ranges::get": ("std::ranges::subrange"),
195220
}
196221
symbols = []
197222
# Run many workers to process individual symbol pages under the symbol index.

0 commit comments

Comments
 (0)