Skip to content

Commit 164bef9

Browse files
committed
Process notebooks with pypandoc and a custom pandoc filter
1 parent c703e69 commit 164bef9

File tree

4 files changed

+120
-2
lines changed

4 files changed

+120
-2
lines changed

.github/workflows/build-tutorials.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,8 @@ jobs:
187187
echo "rm /opt/cache/bin/*" | docker exec -u root -i "${container_name}" bash
188188
189189
docker exec -t "${container_name}" sh -c ".jenkins/build.sh"
190-
190+
docker exec -t "${container_name}" python "./post_process_notebooks.py"
191+
191192
- name: Upload docs preview
192193
uses: seemethere/upload-artifact-s3@v5
193194
if: ${{ github.event_name == 'pull_request' }}

conf.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,10 @@ def reset_seeds(gallery_conf, fname):
107107
"# https://pytorch.org/tutorials/beginner/colab\n"
108108
"%matplotlib inline"),
109109
'reset_modules': (reset_seeds),
110-
'ignore_pattern': r'_torch_export_nightly_tutorial.py'
110+
'ignore_pattern': r'_torch_export_nightly_tutorial.py',
111+
'pypandoc': {'extra_args': ['--mathjax'],
112+
'filters': ['./custom_pandoc_filter.py'],
113+
},
111114
}
112115

113116
if os.getenv('GALLERY_PATTERN'):

custom_pandoc_filter.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
from pandocfilters import toJSONFilter, Div, RawBlock, Para, Str, Space, Link, Code, CodeBlock
2+
import markdown
3+
import re
4+
5+
def to_markdown(item):
6+
if item['t'] == 'Str':
7+
return item['c']
8+
elif item['t'] == 'Space':
9+
return ' '
10+
elif item['t'] == 'Link':
11+
# Assuming the link text is always in the first item
12+
return f"[{item['c'][1][0]['c']}]({item['c'][2][0]})"
13+
elif item['t'] == 'Code':
14+
return f"`{item['c'][1]}`"
15+
elif item['t'] == 'CodeBlock':
16+
return f"```\n{item['c'][1]}\n```"
17+
18+
def process_admonitions(key, value, format, meta):
19+
if key == 'Div':
20+
[[ident, classes, keyvals], contents] = value
21+
if 'note' in classes:
22+
color = '#54c7ec'
23+
label = 'NOTE:'
24+
elif 'tip' in classes:
25+
color = '#6bcebb'
26+
label = 'TIP:'
27+
elif 'warning' in classes:
28+
color = '#e94f3b'
29+
label = 'WARNING:'
30+
else:
31+
return
32+
33+
note_content = []
34+
for block in contents:
35+
if 't' in block and block['t'] == 'Para':
36+
for item in block['c']:
37+
if item['t'] == 'Str':
38+
note_content.append(Str(item['c']))
39+
elif item['t'] == 'Space':
40+
note_content.append(Space())
41+
elif item['t'] == 'Link':
42+
note_content.append(Link(*item['c']))
43+
elif item['t'] == 'Code':
44+
note_content.append(Code(*item['c']))
45+
elif 't' in block and block['t'] == 'CodeBlock':
46+
note_content.append(CodeBlock(*block['c']))
47+
48+
note_content_md = ''.join(to_markdown(item) for item in note_content)
49+
html_content = markdown.markdown(note_content_md)
50+
51+
return [{'t': 'RawBlock', 'c': ['html', f'<div style="background-color: {color}; color: #fff; font-weight: 700; padding-left: 10px; padding-top: 5px; padding-bottom: 5px">{label}</div>']}, {'t': 'RawBlock', 'c': ['html', '<div style="background-color: #f3f4f7; padding-left: 10px; padding-top: 10px; padding-bottom: 10px; padding-right: 10px">']}, {'t': 'RawBlock', 'c': ['html', html_content]}, {'t': 'RawBlock', 'c': ['html', '</div>']}]
52+
53+
elif key == 'RawBlock':
54+
[format, content] = value
55+
if format == 'html' and 'iframe' in content:
56+
# Extract the video URL
57+
video_url = content.split('src="')[1].split('"')[0]
58+
# Create the Python code to display the video
59+
html_code = f"""
60+
from IPython.display import display, HTML
61+
html_code = \"""
62+
{content}
63+
\"""
64+
display(HTML(html_code))
65+
"""
66+
67+
if __name__ == "__main__":
68+
toJSONFilter(process_admonitions)

post_process_notebooks.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import nbformat as nbf
2+
import os
3+
import re
4+
5+
def get_gallery_dirs(conf_path):
6+
"""Execute the conf.py file and return the gallery directories."""
7+
namespace = {}
8+
exec(open(conf_path).read(), namespace)
9+
sphinx_gallery_conf = namespace['sphinx_gallery_conf']
10+
print(f"Processing directories: {', '.join(sphinx_gallery_conf['gallery_dirs'])}")
11+
return sphinx_gallery_conf['gallery_dirs']
12+
13+
def process_notebook(notebook_path):
14+
"""Read and process a notebook file."""
15+
print(f'Processing file: {notebook_path}')
16+
notebook = nbf.read(notebook_path, as_version=4)
17+
for cell in notebook.cells:
18+
if cell.cell_type == 'markdown':
19+
cell.source = process_content(cell.source)
20+
nbf.write(notebook, notebook_path)
21+
22+
def process_content(content):
23+
"""Remove extra syntax from the content of a Markdown cell."""
24+
content = re.sub(r'```{=html}\n<div', '<div', content)
25+
content = re.sub(r'">\n```', '">', content)
26+
content = re.sub(r'<\/div>\n```', '</div>\n', content)
27+
content = re.sub(r'```{=html}\n</div>\n```', '</div>\n', content)
28+
content = re.sub(r'```{=html}', '', content)
29+
content = re.sub(r'</p>\n```', '</p>', content)
30+
return content
31+
32+
def process_directory(notebook_dir):
33+
"""Process all notebook files in a directory and its subdirectories."""
34+
for root, dirs, files in os.walk(notebook_dir):
35+
for filename in files:
36+
if filename.endswith('.ipynb'):
37+
process_notebook(os.path.join(root, filename))
38+
39+
def main():
40+
"""Main function to process all directories specified in the conf.py file."""
41+
conf_path = 'conf.py'
42+
for notebook_dir in get_gallery_dirs(conf_path):
43+
process_directory(notebook_dir)
44+
45+
if __name__ == "__main__":
46+
main()

0 commit comments

Comments
 (0)