Skip to content

Commit e0e57b9

Browse files
Merge #1131
1131: Relative links in README r=carols10cents This is a proposed fix for #974. Currently implemented is GitHub/GitLab/Bitbucket-style (insert `/blob/master/` between the repository and the link destination), with a caveat.
2 parents 630e151 + 3f64fd3 commit e0e57b9

File tree

5 files changed

+159
-35
lines changed

5 files changed

+159
-35
lines changed

Cargo.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ serde = "1.0.0"
5656
clippy = { version = "=0.0.162", optional = true }
5757
chrono = { version = "0.4.0", features = ["serde"] }
5858
comrak = { version = "0.2.3", default-features = false }
59-
ammonia = { git = "https://github.com/notriddle/ammonia" }
59+
ammonia = { git = "https://github.com/notriddle/ammonia", rev = "3d4e4073f8cdb7c60203b9036c09f4385a2fdbbd" }
6060
docopt = "0.8.1"
6161
itertools = "0.6.0"
6262
lettre = "0.6"

src/bin/render-readmes.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,15 @@ fn get_readme(config: &Config, version: &Version, krate_name: &str) -> Option<St
255255
manifest.package.readme.unwrap()
256256
);
257257
let contents = find_file_by_path(&mut entries, Path::new(&path), &version, &krate_name);
258-
readme_to_html(&contents, &path).expect(&format!(
258+
readme_to_html(
259+
&contents,
260+
manifest
261+
.package
262+
.readme_file
263+
.as_ref()
264+
.map_or("README.md", |e| &**e),
265+
manifest.package.repository.as_ref().map(|e| &**e),
266+
).expect(&format!(
259267
"[{}-{}] Couldn't render README",
260268
krate_name,
261269
version.num
@@ -265,6 +273,8 @@ fn get_readme(config: &Config, version: &Version, krate_name: &str) -> Option<St
265273
#[derive(Deserialize)]
266274
struct Package {
267275
readme: Option<String>,
276+
readme_file: Option<String>,
277+
repository: Option<String>,
268278
}
269279
#[derive(Deserialize)]
270280
struct Manifest {

src/krate/publish.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ pub fn publish(req: &mut Request) -> CargoResult<Response> {
3636

3737
let name = &*new_crate.name;
3838
let vers = &*new_crate.vers;
39+
let repo = new_crate.repository.as_ref().map(|s| &**s);
3940
let features = new_crate
4041
.features
4142
.iter()
@@ -67,7 +68,7 @@ pub fn publish(req: &mut Request) -> CargoResult<Response> {
6768
documentation: new_crate.documentation.as_ref().map(|s| &**s),
6869
readme: new_crate.readme.as_ref().map(|s| &**s),
6970
readme_file: new_crate.readme_file.as_ref().map(|s| &**s),
70-
repository: new_crate.repository.as_ref().map(|s| &**s),
71+
repository: repo,
7172
license: new_crate.license.as_ref().map(|s| &**s),
7273
max_upload_size: None,
7374
};
@@ -128,6 +129,7 @@ pub fn publish(req: &mut Request) -> CargoResult<Response> {
128129
Some(readme) => Some(render::readme_to_html(
129130
&**readme,
130131
new_crate.readme_file.as_ref().map_or("README.md", |s| &**s),
132+
repo,
131133
)?),
132134
None => None,
133135
};

src/render.rs

Lines changed: 140 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
use ammonia::Builder;
1+
use ammonia::{Builder, UrlRelative};
22
use comrak;
33
use htmlescape::encode_minimal;
4+
use std::borrow::Cow;
5+
use url::Url;
46

57
use util::CargoResult;
68

@@ -12,7 +14,10 @@ struct MarkdownRenderer<'a> {
1214

1315
impl<'a> MarkdownRenderer<'a> {
1416
/// Creates a new renderer instance.
15-
fn new() -> MarkdownRenderer<'a> {
17+
///
18+
/// Per `readme_to_html`, `base_url` is the base URL prepended to any
19+
/// relative links in the input document. See that function for more detail.
20+
fn new(base_url: Option<&'a str>) -> MarkdownRenderer<'a> {
1621
let tags = [
1722
"a",
1823
"b",
@@ -94,13 +99,68 @@ impl<'a> MarkdownRenderer<'a> {
9499
].iter()
95100
.cloned()
96101
.collect();
102+
103+
let sanitizer_base_url = base_url.map(|s| s.to_string());
104+
105+
// Constrain the type of the closures given to the HTML sanitizer.
106+
fn constrain_closure<F>(f: F) -> F
107+
where
108+
F: for<'a> Fn(&'a str) -> Option<Cow<'a, str>> + Send + Sync,
109+
{
110+
f
111+
}
112+
113+
let unrelative_url_sanitizer = constrain_closure(|url| {
114+
// We have no base URL; allow fragment links only.
115+
if url.starts_with('#') {
116+
return Some(Cow::Borrowed(url));
117+
}
118+
119+
None
120+
});
121+
122+
let relative_url_sanitizer = constrain_closure(move |url| {
123+
// sanitizer_base_url is Some(String); use it to fix the relative URL.
124+
if url.starts_with('#') {
125+
return Some(Cow::Borrowed(url));
126+
}
127+
128+
let mut new_url = sanitizer_base_url.clone().unwrap();
129+
if !new_url.ends_with('/') {
130+
new_url.push('/');
131+
}
132+
new_url += "blob/master";
133+
if !url.starts_with('/') {
134+
new_url.push('/');
135+
}
136+
new_url += url;
137+
Some(Cow::Owned(new_url))
138+
});
139+
140+
let use_relative = if let Some(base_url) = base_url {
141+
if let Ok(url) = Url::parse(base_url) {
142+
url.host_str() == Some("github.com") || url.host_str() == Some("gitlab.com")
143+
|| url.host_str() == Some("bitbucket.org")
144+
} else {
145+
false
146+
}
147+
} else {
148+
false
149+
};
150+
97151
let mut html_sanitizer = Builder::new();
98152
html_sanitizer
99153
.link_rel(Some("nofollow noopener noreferrer"))
100154
.tags(tags)
101155
.tag_attributes(tag_attributes)
102156
.allowed_classes(allowed_classes)
157+
.url_relative(if use_relative {
158+
UrlRelative::Custom(Box::new(relative_url_sanitizer))
159+
} else {
160+
UrlRelative::Custom(Box::new(unrelative_url_sanitizer))
161+
})
103162
.id_prefix(Some("user-content-"));
163+
104164
MarkdownRenderer {
105165
html_sanitizer: html_sanitizer,
106166
}
@@ -122,15 +182,10 @@ impl<'a> MarkdownRenderer<'a> {
122182
}
123183
}
124184

125-
impl<'a> Default for MarkdownRenderer<'a> {
126-
fn default() -> Self {
127-
Self::new()
128-
}
129-
}
130-
131-
/// Renders Markdown text to sanitized HTML.
132-
fn markdown_to_html(text: &str) -> CargoResult<String> {
133-
let renderer = MarkdownRenderer::new();
185+
/// Renders Markdown text to sanitized HTML with a given `base_url`.
186+
/// See `readme_to_html` for the interpretation of `base_url`.
187+
fn markdown_to_html(text: &str, base_url: Option<&str>) -> CargoResult<String> {
188+
let renderer = MarkdownRenderer::new(base_url);
134189
renderer.to_html(text)
135190
}
136191

@@ -147,24 +202,29 @@ static MARKDOWN_EXTENSIONS: [&'static str; 7] = [
147202
];
148203

149204
/// Renders a readme to sanitized HTML. An appropriate rendering method is chosen depending
150-
/// on the extension of the supplied filename.
205+
/// on the extension of the supplied `filename`.
151206
///
152-
/// The returned text should not contain any harmful HTML tag or attribute (such as iframe,
207+
/// The returned text will not contain any harmful HTML tag or attribute (such as iframe,
153208
/// onclick, onmouseover, etc.).
154209
///
210+
/// The `base_url` parameter will be used as the base for any relative links found in the
211+
/// Markdown, as long as its host part is github.com, gitlab.com, or bitbucket.org. The
212+
/// supplied URL will be used as a directory base whether or not the relative link is
213+
/// prefixed with '/'. If `None` is passed, relative links will be omitted.
214+
///
155215
/// # Examples
156216
///
157217
/// ```
158218
/// use render::render_to_html;
159219
///
160220
/// let text = "[Rust](https://rust-lang.org/) is an awesome *systems programming* language!";
161-
/// let rendered = readme_to_html(text, "README.md")?;
221+
/// let rendered = readme_to_html(text, "README.md", None)?;
162222
/// ```
163-
pub fn readme_to_html(text: &str, filename: &str) -> CargoResult<String> {
223+
pub fn readme_to_html(text: &str, filename: &str, base_url: Option<&str>) -> CargoResult<String> {
164224
let filename = filename.to_lowercase();
165225

166226
if !filename.contains('.') || MARKDOWN_EXTENSIONS.iter().any(|e| filename.ends_with(e)) {
167-
return markdown_to_html(text);
227+
return markdown_to_html(text, base_url);
168228
}
169229

170230
Ok(encode_minimal(text).replace("\n", "<br>\n"))
@@ -177,14 +237,14 @@ mod tests {
177237
#[test]
178238
fn empty_text() {
179239
let text = "";
180-
let result = markdown_to_html(text).unwrap();
240+
let result = markdown_to_html(text, None).unwrap();
181241
assert_eq!(result, "");
182242
}
183243

184244
#[test]
185245
fn text_with_script_tag() {
186246
let text = "foo_readme\n\n<script>alert('Hello World')</script>";
187-
let result = markdown_to_html(text).unwrap();
247+
let result = markdown_to_html(text, None).unwrap();
188248
assert_eq!(
189249
result,
190250
"<p>foo_readme</p>\n&lt;script&gt;alert(\'Hello World\')&lt;/script&gt;\n"
@@ -194,7 +254,7 @@ mod tests {
194254
#[test]
195255
fn text_with_iframe_tag() {
196256
let text = "foo_readme\n\n<iframe>alert('Hello World')</iframe>";
197-
let result = markdown_to_html(text).unwrap();
257+
let result = markdown_to_html(text, None).unwrap();
198258
assert_eq!(
199259
result,
200260
"<p>foo_readme</p>\n&lt;iframe&gt;alert(\'Hello World\')&lt;/iframe&gt;\n"
@@ -204,14 +264,14 @@ mod tests {
204264
#[test]
205265
fn text_with_unknown_tag() {
206266
let text = "foo_readme\n\n<unknown>alert('Hello World')</unknown>";
207-
let result = markdown_to_html(text).unwrap();
267+
let result = markdown_to_html(text, None).unwrap();
208268
assert_eq!(result, "<p>foo_readme</p>\n<p>alert(\'Hello World\')</p>\n");
209269
}
210270

211271
#[test]
212272
fn text_with_inline_javascript() {
213273
let text = r#"foo_readme\n\n<a href="https://crates.io/crates/cargo-registry" onclick="window.alert('Got you')">Crate page</a>"#;
214-
let result = markdown_to_html(text).unwrap();
274+
let result = markdown_to_html(text, None).unwrap();
215275
assert_eq!(
216276
result,
217277
"<p>foo_readme\\n\\n<a href=\"https://crates.io/crates/cargo-registry\" rel=\"nofollow noopener noreferrer\">Crate page</a></p>\n"
@@ -223,7 +283,7 @@ mod tests {
223283
#[test]
224284
fn text_with_fancy_single_quotes() {
225285
let text = r#"wb’"#;
226-
let result = markdown_to_html(text).unwrap();
286+
let result = markdown_to_html(text, None).unwrap();
227287
assert_eq!(result, "<p>wb’</p>\n");
228288
}
229289

@@ -232,22 +292,74 @@ mod tests {
232292
let code_block = r#"```rust \
233293
println!("Hello World"); \
234294
```"#;
235-
let result = markdown_to_html(code_block).unwrap();
295+
let result = markdown_to_html(code_block, None).unwrap();
236296
assert!(result.contains("<code class=\"language-rust\">"));
237297
}
238298

239299
#[test]
240300
fn text_with_forbidden_class_attribute() {
241301
let text = "<p class='bad-class'>Hello World!</p>";
242-
let result = markdown_to_html(text).unwrap();
302+
let result = markdown_to_html(text, None).unwrap();
243303
assert_eq!(result, "<p>Hello World!</p>\n");
244304
}
245305

306+
#[test]
307+
fn relative_links() {
308+
let absolute = "[hi](/hi)";
309+
let relative = "[there](there)";
310+
311+
for host in &["github.com", "gitlab.com", "bitbucket.org"] {
312+
for &extra_slash in &[true, false] {
313+
let url = format!(
314+
"https://{}/rust-lang/test{}",
315+
host,
316+
if extra_slash { "/" } else { "" }
317+
);
318+
319+
let result = markdown_to_html(absolute, Some(&url)).unwrap();
320+
assert_eq!(
321+
result,
322+
format!(
323+
"<p><a href=\"https://{}/rust-lang/test/blob/master/hi\" rel=\"nofollow noopener noreferrer\">hi</a></p>\n",
324+
host
325+
)
326+
);
327+
328+
let result = markdown_to_html(relative, Some(&url)).unwrap();
329+
assert_eq!(
330+
result,
331+
format!(
332+
"<p><a href=\"https://{}/rust-lang/test/blob/master/there\" rel=\"nofollow noopener noreferrer\">there</a></p>\n",
333+
host
334+
)
335+
);
336+
}
337+
}
338+
339+
let result = markdown_to_html(absolute, Some("https://google.com/")).unwrap();
340+
assert_eq!(
341+
result,
342+
"<p><a rel=\"nofollow noopener noreferrer\">hi</a></p>\n"
343+
);
344+
}
345+
346+
#[test]
347+
fn absolute_links_dont_get_resolved() {
348+
let readme_text = "[![Crates.io](https://img.shields.io/crates/v/clap.svg)](https://crates.io/crates/clap)";
349+
let repository = "https://github.com/kbknapp/clap-rs/";
350+
let result = markdown_to_html(readme_text, Some(&repository)).unwrap();
351+
352+
assert_eq!(
353+
result,
354+
"<p><a href=\"https://crates.io/crates/clap\" rel=\"nofollow noopener noreferrer\"><img src=\"https://img.shields.io/crates/v/clap.svg\" alt=\"Crates.io\"></a></p>\n"
355+
);
356+
}
357+
246358
#[test]
247359
fn readme_to_html_renders_markdown() {
248360
for f in &["README", "readme.md", "README.MARKDOWN", "whatever.mkd"] {
249361
assert_eq!(
250-
readme_to_html("*lobster*", f).unwrap(),
362+
readme_to_html("*lobster*", f, None).unwrap(),
251363
"<p><em>lobster</em></p>\n"
252364
);
253365
}
@@ -257,7 +369,7 @@ mod tests {
257369
fn readme_to_html_renders_other_things() {
258370
for f in &["readme.exe", "readem.org", "blah.adoc"] {
259371
assert_eq!(
260-
readme_to_html("<script>lobster</script>\n\nis my friend\n", f).unwrap(),
372+
readme_to_html("<script>lobster</script>\n\nis my friend\n", f, None).unwrap(),
261373
"&lt;script&gt;lobster&lt;/script&gt;<br>\n<br>\nis my friend<br>\n"
262374
);
263375
}
@@ -266,7 +378,7 @@ mod tests {
266378
#[test]
267379
fn header_has_tags() {
268380
let text = "# My crate\n\nHello, world!\n";
269-
let result = markdown_to_html(text).unwrap();
381+
let result = markdown_to_html(text, None).unwrap();
270382
assert_eq!(
271383
result,
272384
"<h1><a href=\"#my-crate\" id=\"user-content-my-crate\" rel=\"nofollow noopener noreferrer\"></a>My crate</h1>\n<p>Hello, world!</p>\n"
@@ -276,7 +388,7 @@ mod tests {
276388
#[test]
277389
fn manual_anchor_is_sanitized() {
278390
let text = "<h1><a href=\"#my-crate\" id=\"my-crate\"></a>My crate</h1>\n<p>Hello, world!</p>\n";
279-
let result = markdown_to_html(text).unwrap();
391+
let result = markdown_to_html(text, None).unwrap();
280392
assert_eq!(
281393
result,
282394
"<h1><a href=\"#my-crate\" id=\"user-content-my-crate\" rel=\"nofollow noopener noreferrer\"></a>My crate</h1>\n<p>Hello, world!</p>\n"

0 commit comments

Comments
 (0)