Skip to content

Commit 1096e52

Browse files
authored
👌 Enhance [](#id) references (#707)
This PR moves myst-parser in the direction of jupyter-book/myst-enhancement-proposals#10, in a relatively back compatible manner, and in a way that supports both docutils and sphinx. It expands the capability of `[](#id)` to more than just linking to heading slugs, with the order of specificity being: 1. If it matches a local (to that document) "explicit" `std:ref` target, then link to that and stop - Note, currently only `std:ref` domain/type are supported, e.g. not `math` etc. That's more difficult and can come later 2. If it matches a local (to that document) "implicit" heading slug, then link to that and stop 3. If using docutils (i.e. single-page) build, then stop here and emit a `myst.xref_missing` warning 4. If using sphinx then create a `pending_xref` node, and hand-off to sphinx's "any" resolver, which takes effect once all documents have been read: - This first tries to resolve against a local (to the project) reference and, if matching, stops - Otherwise also try to match against any intersphinx reference - Otherwise emit a ~~`myst.ref`~~ `myst.xref_missing` warning If the text is explicit, e.g. `[text](#id)`, that text is used, otherwise a determination of implicit text is attempted, e.g. based on the section title or figure caption.
1 parent f02d40f commit 1096e52

File tree

16 files changed

+725
-151
lines changed

16 files changed

+725
-151
lines changed

codecov.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ coverage:
22
status:
33
project:
44
default:
5-
target: 90%
5+
target: 89%
66
threshold: 0.5%
77
patch:
88
default:

myst_parser/mdit_to_docutils/base.py

Lines changed: 25 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -248,36 +248,14 @@ def _render_initialise(self) -> None:
248248
def _render_finalise(self) -> None:
249249
"""Finalise the render of the document."""
250250

251-
# attempt to replace id_link references with internal links
252-
for refnode in findall(self.document)(nodes.reference):
253-
if not refnode.get("id_link"):
254-
continue
255-
target = refnode["refuri"][1:]
256-
if target in self._slug_to_section:
257-
section_node = self._slug_to_section[target]
258-
refnode["refid"] = section_node["ids"][0]
259-
260-
if not refnode.children:
261-
implicit_text = clean_astext(section_node[0])
262-
refnode += nodes.inline(
263-
implicit_text, implicit_text, classes=["std", "std-ref"]
264-
)
265-
else:
266-
self.create_warning(
267-
f"local id not found: {refnode['refuri']!r}",
268-
MystWarnings.XREF_MISSING,
269-
line=refnode.line,
270-
append_to=refnode,
271-
)
272-
refnode["refid"] = target
273-
del refnode["refuri"]
274-
275-
if self._slug_to_section and self.sphinx_env:
276-
# save for later reference resolution
277-
self.sphinx_env.metadata[self.sphinx_env.docname]["myst_slugs"] = {
278-
slug: (snode["ids"][0], clean_astext(snode[0]))
279-
for slug, snode in self._slug_to_section.items()
280-
}
251+
# save for later reference resolution
252+
slugs = {
253+
slug: (snode.line, snode["ids"][0], clean_astext(snode[0]))
254+
for slug, snode in self._slug_to_section.items()
255+
}
256+
self.document.myst_slugs = slugs
257+
if slugs and self.sphinx_env:
258+
self.sphinx_env.metadata[self.sphinx_env.docname]["myst_slugs"] = slugs
281259

282260
# log warnings for duplicate reference definitions
283261
# "duplicate_refs": [{"href": "ijk", "label": "B", "map": [4, 5], "title": ""}],
@@ -795,7 +773,7 @@ def render_heading(self, token: SyntaxTreeNode) -> None:
795773
token.attrs.get("toc", None) == "false"
796774
or self.md_env.get("match_titles", None) is False
797775
):
798-
if self.md_env.get("match_titles", None) is False:
776+
if token.attrs.get("toc", None) != "false":
799777
# this can occur if a nested parse is performed by a directive
800778
# (such as an admonition) which contains a header.
801779
# this would break the document structure
@@ -984,8 +962,10 @@ def render_id_link(self, token: SyntaxTreeNode) -> None:
984962
self.copy_attributes(
985963
token, ref_node, ("class", "id", "reftitle"), aliases={"title": "reftitle"}
986964
)
987-
with self.current_node_context(ref_node, append=True):
988-
self.render_children(token)
965+
self.current_node.append(ref_node)
966+
if token.info != "auto" and not is_ellipsis(token):
967+
with self.current_node_context(ref_node):
968+
self.render_children(token)
989969

990970
def render_internal_link(self, token: SyntaxTreeNode) -> None:
991971
"""Render link token `[text](link "title")`,
@@ -1024,7 +1004,9 @@ def render_inventory_link(self, token: SyntaxTreeNode) -> None:
10241004
href = self.md.normalizeLinkText(cast(str, token.attrGet("href") or ""))
10251005

10261006
# note if the link had explicit text or not (autolinks are always implicit)
1027-
explicit = False if token.info == "auto" else bool(token.children)
1007+
explicit = (
1008+
(token.info != "auto") and bool(token.children) and not is_ellipsis(token)
1009+
)
10281010

10291011
# split the href up into parts
10301012
uri_parts = urlparse(href)
@@ -1897,3 +1879,12 @@ def compute_unique_slug(
18971879
slug = f"{slug}-{i}"
18981880
i += 1
18991881
return slug
1882+
1883+
1884+
def is_ellipsis(token: SyntaxTreeNode) -> bool:
1885+
"""Check if a token content only contains an ellipsis."""
1886+
return (
1887+
len(token.children) == 1
1888+
and token.children[0].type == "text"
1889+
and token.children[0].content in ("...", "…")
1890+
)

myst_parser/mdit_to_docutils/sphinx_.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from sphinx.util import logging
1717

1818
from myst_parser import inventory
19-
from myst_parser.mdit_to_docutils.base import DocutilsRenderer
19+
from myst_parser.mdit_to_docutils.base import DocutilsRenderer, is_ellipsis
2020

2121
LOGGER = logging.getLogger(__name__)
2222

@@ -46,25 +46,33 @@ def render_internal_link(self, token: SyntaxTreeNode) -> None:
4646
destination = os.path.relpath(
4747
os.path.join(include_dir, os.path.normpath(destination)), source_dir
4848
)
49+
explicit = len(token.children or []) > 0 and not is_ellipsis(token)
4950
kwargs = {
5051
"refdoc": self.sphinx_env.docname,
5152
"reftype": "myst",
52-
"refexplicit": len(token.children or []) > 0,
53+
"refexplicit": explicit,
5354
}
5455
path_dest, *_path_ids = destination.split("#", maxsplit=1)
5556
path_id = _path_ids[0] if _path_ids else None
56-
potential_path = (
57-
Path(self.sphinx_env.doc2path(self.sphinx_env.docname)).parent / path_dest
58-
if self.sphinx_env.srcdir # not set in some test situations
59-
else None
60-
)
61-
if path_dest == "./":
57+
58+
potential_path: None | Path
59+
if path_dest.startswith("/"):
60+
# here we are referencing a file relative to the source directory
61+
potential_path = Path(self.sphinx_env.srcdir) / path_dest[1:]
62+
elif path_dest == "./":
6263
# this is a special case, where we want to reference the current document
6364
potential_path = (
6465
Path(self.sphinx_env.doc2path(self.sphinx_env.docname))
6566
if self.sphinx_env.srcdir
6667
else None
6768
)
69+
else:
70+
potential_path = (
71+
Path(self.sphinx_env.doc2path(self.sphinx_env.docname)).parent
72+
/ path_dest
73+
if self.sphinx_env.srcdir # not set in some test situations
74+
else None
75+
)
6876
if potential_path and potential_path.is_file():
6977
docname = self.sphinx_env.path2doc(str(potential_path))
7078
if docname:
@@ -92,8 +100,9 @@ def render_internal_link(self, token: SyntaxTreeNode) -> None:
92100

93101
inner_node = nodes.inline("", text, classes=classes)
94102
wrap_node.append(inner_node)
95-
with self.current_node_context(inner_node):
96-
self.render_children(token)
103+
if explicit:
104+
with self.current_node_context(inner_node):
105+
self.render_children(token)
97106

98107
def get_inventory_matches(
99108
self,
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
"""Directives that can be applied to both Sphinx and docutils."""
2+
from __future__ import annotations
3+
4+
import typing as t
5+
6+
from docutils import nodes
7+
from docutils.transforms import Transform
8+
9+
from myst_parser._compat import findall
10+
from myst_parser.mdit_to_docutils.base import clean_astext
11+
from myst_parser.warnings_ import MystWarnings, create_warning
12+
13+
14+
class ResolveAnchorIds(Transform):
15+
"""Directive for resolving `[name](#id)` type links."""
16+
17+
default_priority = 879 # this is the same as Sphinx's StandardDomain.process_doc
18+
19+
def apply(self, **kwargs: t.Any) -> None:
20+
"""Apply the transform."""
21+
# gather the implicit heading slugs
22+
# name -> (line, slug, title)
23+
slugs: dict[str, tuple[int, str, str]] = getattr(
24+
self.document, "myst_slugs", {}
25+
)
26+
27+
# gather explicit references
28+
# this follows the same logic as Sphinx's StandardDomain.process_doc
29+
explicit: dict[str, tuple[str, None | str]] = {}
30+
for name, is_explicit in self.document.nametypes.items():
31+
if not is_explicit:
32+
continue
33+
labelid = self.document.nameids[name]
34+
if labelid is None:
35+
continue
36+
if labelid is None:
37+
continue
38+
node = self.document.ids[labelid]
39+
if isinstance(node, nodes.target) and "refid" in node:
40+
# indirect hyperlink targets
41+
node = self.document.ids.get(node["refid"])
42+
labelid = node["names"][0]
43+
if (
44+
node.tagname == "footnote"
45+
or "refuri" in node
46+
or node.tagname.startswith("desc_")
47+
):
48+
# ignore footnote labels, labels automatically generated from a
49+
# link and object descriptions
50+
continue
51+
52+
implicit_title = None
53+
if node.tagname == "rubric":
54+
implicit_title = clean_astext(node)
55+
if implicit_title is None:
56+
# handle sections and and other captioned elements
57+
for subnode in node:
58+
if isinstance(subnode, (nodes.caption, nodes.title)):
59+
implicit_title = clean_astext(subnode)
60+
break
61+
if implicit_title is None:
62+
# handle definition lists and field lists
63+
if (
64+
isinstance(node, (nodes.definition_list, nodes.field_list))
65+
and node.children
66+
):
67+
node = node[0]
68+
if (
69+
isinstance(node, (nodes.field, nodes.definition_list_item))
70+
and node.children
71+
):
72+
node = node[0]
73+
if isinstance(node, (nodes.term, nodes.field_name)):
74+
implicit_title = clean_astext(node)
75+
76+
explicit[name] = (labelid, implicit_title)
77+
78+
for refnode in findall(self.document)(nodes.reference):
79+
if not refnode.get("id_link"):
80+
continue
81+
82+
target = refnode["refuri"][1:]
83+
del refnode["refuri"]
84+
85+
# search explicit first
86+
if target in explicit:
87+
ref_id, implicit_title = explicit[target]
88+
refnode["refid"] = ref_id
89+
if not refnode.children and implicit_title:
90+
refnode += nodes.inline(
91+
implicit_title, implicit_title, classes=["std", "std-ref"]
92+
)
93+
elif not refnode.children:
94+
refnode += nodes.inline(
95+
"#" + target, "#" + target, classes=["std", "std-ref"]
96+
)
97+
continue
98+
99+
# now search implicit
100+
if target in slugs:
101+
_, sect_id, implicit_title = slugs[target]
102+
refnode["refid"] = sect_id
103+
if not refnode.children and implicit_title:
104+
refnode += nodes.inline(
105+
implicit_title, implicit_title, classes=["std", "std-ref"]
106+
)
107+
continue
108+
109+
# if still not found, and using sphinx, then create a pending_xref
110+
if hasattr(self.document.settings, "env"):
111+
from sphinx import addnodes
112+
113+
pending = addnodes.pending_xref(
114+
refdoc=self.document.settings.env.docname,
115+
refdomain=None,
116+
reftype="myst",
117+
reftarget=target,
118+
refwarn=True,
119+
refexplicit=bool(refnode.children),
120+
)
121+
inner_node = nodes.inline(
122+
"", "", classes=["xref", "myst"] + refnode["classes"]
123+
)
124+
for attr in ("ids", "names", "dupnames"):
125+
inner_node[attr] = refnode[attr]
126+
inner_node += refnode.children
127+
pending += inner_node
128+
refnode.parent.replace(refnode, pending)
129+
continue
130+
131+
# if still not found, and using docutils, then create a warning
132+
# and simply output as a url
133+
134+
create_warning(
135+
self.document,
136+
f"'myst' reference target not found: {target!r}",
137+
MystWarnings.XREF_MISSING,
138+
line=refnode.line,
139+
append_to=refnode,
140+
)
141+
refnode["refid"] = target
142+
if not refnode.children:
143+
refnode += nodes.inline(
144+
"#" + target, "#" + target, classes=["std", "std-ref"]
145+
)

myst_parser/parsers/docutils_.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
read_topmatter,
2727
)
2828
from myst_parser.mdit_to_docutils.base import DocutilsRenderer
29+
from myst_parser.mdit_to_docutils.transforms import ResolveAnchorIds
2930
from myst_parser.parsers.mdit import create_md_parser
3031
from myst_parser.warnings_ import MystWarnings, create_warning
3132

@@ -247,6 +248,9 @@ class Parser(RstParser):
247248
config_section_dependencies = ("parsers",)
248249
translate_section_name = None
249250

251+
def get_transforms(self):
252+
return super().get_transforms() + [ResolveAnchorIds]
253+
250254
def parse(self, inputstring: str, document: nodes.document) -> None:
251255
"""Parse source text.
252256

myst_parser/parsers/sphinx_.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
read_topmatter,
1414
)
1515
from myst_parser.mdit_to_docutils.sphinx_ import SphinxRenderer
16+
from myst_parser.mdit_to_docutils.transforms import ResolveAnchorIds
1617
from myst_parser.parsers.mdit import create_md_parser
1718
from myst_parser.warnings_ import create_warning
1819

@@ -43,6 +44,9 @@ class MystParser(SphinxParser):
4344
config_section_dependencies = ("parsers",)
4445
translate_section_name = None
4546

47+
def get_transforms(self):
48+
return super().get_transforms() + [ResolveAnchorIds]
49+
4650
def parse(self, inputstring: str, document: nodes.document) -> None:
4751
"""Parse source text.
4852

0 commit comments

Comments
 (0)