Skip to content

Relative links in README #1131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Oct 29, 2017
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ serde = "1.0.0"
clippy = { version = "=0.0.162", optional = true }
chrono = { version = "0.4.0", features = ["serde"] }
comrak = { version = "0.1.9", default-features = false }
ammonia = "0.7.0"
ammonia = { git = "https://github.com/notriddle/ammonia" }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this dependency be specified as { git = "https://github.com/notriddle/ammonia", rev = "46054ff" } to ensure reproducible builds ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea! I was thinking Cargo.lock storing the rev would be sufficient, but this is better.

docopt = "0.8.1"
itertools = "0.6.0"
lettre = "0.6"
Expand Down
6 changes: 5 additions & 1 deletion src/bin/render-readmes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,10 @@ fn get_readme(config: &Config, version: &Version, krate_name: &str) -> Option<St
manifest.package.readme.unwrap()
);
let contents = find_file_by_path(&mut entries, Path::new(&path), &version, &krate_name);
markdown_to_html(&contents).expect(&format!(
markdown_to_html(
&contents,
manifest.package.repository.as_ref().map(|e| &**e),
).expect(&format!(
"[{}-{}] Couldn't render README",
krate_name,
version.num
Expand All @@ -265,6 +268,7 @@ fn get_readme(config: &Config, version: &Version, krate_name: &str) -> Option<St
#[derive(Deserialize)]
struct Package {
readme: Option<String>,
repository: Option<String>,
}
#[derive(Deserialize)]
struct Manifest {
Expand Down
5 changes: 3 additions & 2 deletions src/krate/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -940,6 +940,7 @@ pub fn new(req: &mut Request) -> CargoResult<Response> {

let name = &*new_crate.name;
let vers = &*new_crate.vers;
let repo = new_crate.repository.as_ref().map(|s| &**s);
let features = new_crate
.features
.iter()
Expand Down Expand Up @@ -970,7 +971,7 @@ pub fn new(req: &mut Request) -> CargoResult<Response> {
homepage: new_crate.homepage.as_ref().map(|s| &**s),
documentation: new_crate.documentation.as_ref().map(|s| &**s),
readme: new_crate.readme.as_ref().map(|s| &**s),
repository: new_crate.repository.as_ref().map(|s| &**s),
repository: repo,
license: new_crate.license.as_ref().map(|s| &**s),
max_upload_size: None,
};
Expand Down Expand Up @@ -1028,7 +1029,7 @@ pub fn new(req: &mut Request) -> CargoResult<Response> {

// Render the README for this crate
let readme = match new_crate.readme.as_ref() {
Some(readme) => Some(render::markdown_to_html(&**readme)?),
Some(readme) => Some(render::markdown_to_html(&**readme, repo)?),
None => None,
};

Expand Down
130 changes: 100 additions & 30 deletions src/render.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
use ammonia::Ammonia;
use ammonia::{Builder, UrlRelative};
use comrak;
use std::borrow::Cow;
use url::Url;

use util::CargoResult;

/// Context for markdown to HTML rendering.
#[allow(missing_debug_implementations)]
pub struct MarkdownRenderer<'a> {
html_sanitizer: Ammonia<'a>,
html_sanitizer: Builder<'a>,
}

impl<'a> MarkdownRenderer<'a> {
/// Creates a new renderer instance.
pub fn new() -> MarkdownRenderer<'a> {
///
/// Per `markdown_to_html`, `base_url` is the base URL prepended to any
/// relative links in the input document. See that function for more detail.
fn new(base_url: Option<&'a str>) -> MarkdownRenderer<'a> {
let tags = [
"a",
"b",
Expand Down Expand Up @@ -54,7 +59,6 @@ impl<'a> MarkdownRenderer<'a> {
.collect();
let tag_attributes = [
("a", ["href", "target"].iter().cloned().collect()),
("code", ["class"].iter().cloned().collect()),
(
"img",
["width", "height", "src", "alt", "align"]
Expand Down Expand Up @@ -94,21 +98,59 @@ impl<'a> MarkdownRenderer<'a> {
].iter()
.cloned()
.collect();
let html_sanitizer = Ammonia {
link_rel: Some("nofollow noopener noreferrer"),
keep_cleaned_elements: true,
tags: tags,
tag_attributes: tag_attributes,
allowed_classes: allowed_classes,
..Ammonia::default()

let sanitizer_base_url = base_url.map(|s| s.to_string());

fn constrain_closure<F>(f: F) -> F
where
F: for<'a> Fn(&'a str) -> Option<Cow<'a, str>> + Send + Sync,
{
f
}

let relative_url_sanitizer = constrain_closure(move |url| {
let mut new_url = sanitizer_base_url.clone().unwrap();
if !new_url.ends_with('/') {
new_url.push('/');
}
new_url += "blob/master";
if !url.starts_with('/') {
new_url.push('/');
}
new_url += url;
Some(Cow::Owned(new_url))
});

let use_relative = if let Some(base_url) = base_url {
if let Ok(url) = Url::parse(base_url) {
url.host_str() == Some("github.com") || url.host_str() == Some("gitlab.com")
|| url.host_str() == Some("bitbucket.org")
} else {
false
}
} else {
false
};

let mut html_sanitizer = Builder::new();
html_sanitizer
.link_rel(Some("nofollow noopener noreferrer"))
.tags(tags)
.tag_attributes(tag_attributes)
.allowed_classes(allowed_classes)
.url_relative(if use_relative {
UrlRelative::Custom(Box::new(relative_url_sanitizer))
} else {
UrlRelative::Deny
});

MarkdownRenderer {
html_sanitizer: html_sanitizer,
}
}

/// Renders the given markdown to HTML using the current settings.
pub fn to_html(&self, text: &str) -> CargoResult<String> {
fn to_html(&self, text: &str) -> CargoResult<String> {
let options = comrak::ComrakOptions {
ext_autolink: true,
ext_strikethrough: true,
Expand All @@ -118,13 +160,7 @@ impl<'a> MarkdownRenderer<'a> {
..comrak::ComrakOptions::default()
};
let rendered = comrak::markdown_to_html(text, &options);
Ok(self.html_sanitizer.clean(&rendered))
}
}

impl<'a> Default for MarkdownRenderer<'a> {
fn default() -> Self {
Self::new()
Ok(self.html_sanitizer.clean(&rendered).to_string())
}
}

Expand All @@ -133,16 +169,21 @@ impl<'a> Default for MarkdownRenderer<'a> {
/// The returned text should not contain any harmful HTML tag or attribute (such as iframe,
/// onclick, onmouseover, etc.).
///
/// The `base_url` parameter will be used as the base for any relative links found in the
/// Markdown, as long as its host part is github.com, gitlab.com, or bitbucket.org. The
/// supplied URL will be used as a directory base whether or not the relative link is
/// prefixed with '/'. If `None` is passed, relative links will be omitted.
///
/// # Examples
///
/// ```
/// use render::markdown_to_html;
///
/// let text = "[Rust](https://rust-lang.org/) is an awesome *systems programming* language!";
/// let rendered = markdown_to_html(text)?;
/// let rendered = markdown_to_html(text, None)?;
/// ```
pub fn markdown_to_html(text: &str) -> CargoResult<String> {
let renderer = MarkdownRenderer::new();
pub fn markdown_to_html(text: &str, base_url: Option<&str>) -> CargoResult<String> {
let renderer = MarkdownRenderer::new(base_url);
renderer.to_html(text)
}

Expand All @@ -153,14 +194,14 @@ mod tests {
#[test]
fn empty_text() {
let text = "";
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(result, "");
}

#[test]
fn text_with_script_tag() {
let text = "foo_readme\n\n<script>alert('Hello World')</script>";
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(
result,
"<p>foo_readme</p>\n&lt;script&gt;alert(\'Hello World\')&lt;/script&gt;\n"
Expand All @@ -170,7 +211,7 @@ mod tests {
#[test]
fn text_with_iframe_tag() {
let text = "foo_readme\n\n<iframe>alert('Hello World')</iframe>";
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(
result,
"<p>foo_readme</p>\n&lt;iframe&gt;alert(\'Hello World\')&lt;/iframe&gt;\n"
Expand All @@ -180,14 +221,14 @@ mod tests {
#[test]
fn text_with_unknown_tag() {
let text = "foo_readme\n\n<unknown>alert('Hello World')</unknown>";
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(result, "<p>foo_readme</p>\n<p>alert(\'Hello World\')</p>\n");
}

#[test]
fn text_with_inline_javascript() {
let text = r#"foo_readme\n\n<a href="https://crates.io/crates/cargo-registry" onclick="window.alert('Got you')">Crate page</a>"#;
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(
result,
"<p>foo_readme\\n\\n<a href=\"https://crates.io/crates/cargo-registry\" rel=\"nofollow noopener noreferrer\">Crate page</a></p>\n"
Expand All @@ -199,7 +240,7 @@ mod tests {
#[test]
fn text_with_fancy_single_quotes() {
let text = r#"wb’"#;
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(result, "<p>wb’</p>\n");
}

Expand All @@ -208,14 +249,43 @@ mod tests {
let code_block = r#"```rust \
println!("Hello World"); \
```"#;
let result = markdown_to_html(code_block).unwrap();
let result = markdown_to_html(code_block, None).unwrap();
assert!(result.contains("<code class=\"language-rust\">"));
}

#[test]
fn text_with_forbidden_class_attribute() {
let text = "<p class='bad-class'>Hello World!</p>";
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(result, "<p>Hello World!</p>\n");
}

#[test]
fn relative_links() {
let absolute = "[hi](/hi)";
let relative = "[there](there)";

for url in &[
"https://github.com/rust-lang/test",
"https://github.com/rust-lang/test/",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we have gitlab.com & bitbucket.org tests in here to avoid regressions ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, let's.

] {
let result = markdown_to_html(absolute, Some(url)).unwrap();
assert_eq!(
result,
"<p><a href=\"https://github.com/rust-lang/test/blob/master/hi\" rel=\"nofollow noopener noreferrer\">hi</a></p>\n"
);

let result = markdown_to_html(relative, Some(url)).unwrap();
assert_eq!(
result,
"<p><a href=\"https://github.com/rust-lang/test/blob/master/there\" rel=\"nofollow noopener noreferrer\">there</a></p>\n"
);
}

let result = markdown_to_html(absolute, Some("https://google.com/")).unwrap();
assert_eq!(
result,
"<p><a rel=\"nofollow noopener noreferrer\">hi</a></p>\n"
);
}
}