Skip to content

Commit 4f03f03

Browse files
committed
Auto merge of #2643 - JohnTitor:code-block-annot, r=jtgeibel
Shrink code block annotation after a comma The Rust-specific code block annotations like `no_run` are used in doc testing through `doc-comment` or similar crates, and other services seem to ignore characters after a comma. This removes characters from the annotations after a comma. Since this is additional work, this may affect performance a bit, but from the benchmarks, it looks fine. Also this allows `language-toml` class as it's supposed to be used widely. Small input: ``` test bench_orig ... bench: 1,363 ns/iter (+/- 29) test bench_tweaked ... bench: 1,498 ns/iter (+/- 28) ``` Large input (about 2k COL): ``` test bench_orig ... bench: 634,254 ns/iter (+/- 10,553) test bench_tweaked ... bench: 636,279 ns/iter (+/- 13,216) ``` Fixes #1150 r? @jtgeibel
2 parents 03ac789 + 7cba38b commit 4f03f03

File tree

1 file changed

+48
-3
lines changed

1 file changed

+48
-3
lines changed

src/render.rs

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! Render README files to HTML.
22
33
use ammonia::{Builder, UrlRelative, UrlRelativeEvaluate};
4+
use comrak::nodes::{AstNode, NodeValue};
45
use htmlescape::encode_minimal;
56
use std::borrow::Cow;
67
use std::path::Path;
@@ -38,6 +39,7 @@ impl<'a> MarkdownRenderer<'a> {
3839
"language-rust",
3940
"language-scss",
4041
"language-sql",
42+
"language-toml",
4143
"yaml",
4244
]),
4345
)]);
@@ -57,7 +59,10 @@ impl<'a> MarkdownRenderer<'a> {
5759

5860
/// Renders the given markdown to HTML using the current settings.
5961
fn to_html(&self, text: &str) -> String {
60-
use comrak::{ComrakExtensionOptions, ComrakOptions, ComrakRenderOptions};
62+
use comrak::{
63+
format_html, parse_document, Arena, ComrakExtensionOptions, ComrakOptions,
64+
ComrakRenderOptions,
65+
};
6166

6267
let options = ComrakOptions {
6368
render: ComrakRenderOptions {
@@ -75,11 +80,42 @@ impl<'a> MarkdownRenderer<'a> {
7580
},
7681
..ComrakOptions::default()
7782
};
78-
let rendered = comrak::markdown_to_html(text, &options);
83+
84+
let arena = Arena::new();
85+
let root = parse_document(&arena, text, &options);
86+
87+
// Tweak annotations of code blocks.
88+
iter_nodes(root, &|node| {
89+
if let NodeValue::CodeBlock(ref mut ncb) = node.data.borrow_mut().value {
90+
// If annot includes invalid UTF-8 char, do nothing.
91+
if let Ok(mut orig_annot) = String::from_utf8(ncb.info.to_vec()) {
92+
// Ignore characters after a comma for syntax highlighting to work correctly.
93+
if let Some(offset) = orig_annot.find(',') {
94+
let _ = orig_annot.drain(offset..orig_annot.len());
95+
ncb.info = orig_annot.as_bytes().to_vec();
96+
}
97+
}
98+
}
99+
});
100+
101+
let mut html = Vec::new();
102+
format_html(root, &options, &mut html).unwrap();
103+
let rendered = String::from_utf8(html).unwrap();
79104
self.html_sanitizer.clean(&rendered).to_string()
80105
}
81106
}
82107

108+
/// Iterate the nodes in the CommonMark AST, used in comrak.
109+
fn iter_nodes<'a, F>(node: &'a AstNode<'a>, f: &F)
110+
where
111+
F: Fn(&'a AstNode<'a>),
112+
{
113+
f(node);
114+
for c in node.children() {
115+
iter_nodes(c, f);
116+
}
117+
}
118+
83119
/// Add trailing slash and remove `.git` suffix of base URL.
84120
fn canon_base_url(mut base_url: String) -> String {
85121
if !base_url.ends_with('/') {
@@ -117,7 +153,7 @@ struct MediaUrl {
117153
add_sanitize_query: bool,
118154
}
119155

120-
/// Determine whether the given URL has a media file externsion.
156+
/// Determine whether the given URL has a media file extension.
121157
/// Also check if `sanitize=true` must be added to the query string,
122158
/// which is required to load SVGs properly from GitHub.
123159
fn is_media_url(url: &str) -> MediaUrl {
@@ -334,6 +370,15 @@ mod tests {
334370
assert!(result.contains("<code class=\"language-rust\">"));
335371
}
336372

373+
#[test]
374+
fn code_block_with_syntax_highlighting_even_if_annot_has_no_run() {
375+
let code_block = r#"```rust , no_run \
376+
println!("Hello World"); \
377+
```"#;
378+
let result = markdown_to_html(code_block, None);
379+
assert!(result.contains("<code class=\"language-rust\">"));
380+
}
381+
337382
#[test]
338383
fn text_with_forbidden_class_attribute() {
339384
let text = "<p class='bad-class'>Hello World!</p>";

0 commit comments

Comments
 (0)