Skip to content

Relative links in README #1131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Oct 29, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ serde = "1.0.0"
clippy = { version = "=0.0.162", optional = true }
chrono = { version = "0.4.0", features = ["serde"] }
comrak = { version = "0.2.3", default-features = false }
ammonia = { git = "https://github.com/notriddle/ammonia" }
ammonia = { git = "https://github.com/notriddle/ammonia", rev = "3d4e4073f8cdb7c60203b9036c09f4385a2fdbbd" }
docopt = "0.8.1"
itertools = "0.6.0"
lettre = "0.6"
Expand Down
12 changes: 11 additions & 1 deletion src/bin/render-readmes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,15 @@ fn get_readme(config: &Config, version: &Version, krate_name: &str) -> Option<St
manifest.package.readme.unwrap()
);
let contents = find_file_by_path(&mut entries, Path::new(&path), &version, &krate_name);
readme_to_html(&contents, &path).expect(&format!(
readme_to_html(
&contents,
manifest
.package
.readme_file
.as_ref()
.map_or("README.md", |e| &**e),
manifest.package.repository.as_ref().map(|e| &**e),
).expect(&format!(
"[{}-{}] Couldn't render README",
krate_name,
version.num
Expand All @@ -265,6 +273,8 @@ fn get_readme(config: &Config, version: &Version, krate_name: &str) -> Option<St
#[derive(Deserialize)]
struct Package {
readme: Option<String>,
readme_file: Option<String>,
repository: Option<String>,
}
#[derive(Deserialize)]
struct Manifest {
Expand Down
4 changes: 3 additions & 1 deletion src/krate/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ pub fn publish(req: &mut Request) -> CargoResult<Response> {

let name = &*new_crate.name;
let vers = &*new_crate.vers;
let repo = new_crate.repository.as_ref().map(|s| &**s);
let features = new_crate
.features
.iter()
Expand Down Expand Up @@ -67,7 +68,7 @@ pub fn publish(req: &mut Request) -> CargoResult<Response> {
documentation: new_crate.documentation.as_ref().map(|s| &**s),
readme: new_crate.readme.as_ref().map(|s| &**s),
readme_file: new_crate.readme_file.as_ref().map(|s| &**s),
repository: new_crate.repository.as_ref().map(|s| &**s),
repository: repo,
license: new_crate.license.as_ref().map(|s| &**s),
max_upload_size: None,
};
Expand Down Expand Up @@ -128,6 +129,7 @@ pub fn publish(req: &mut Request) -> CargoResult<Response> {
Some(readme) => Some(render::readme_to_html(
&**readme,
new_crate.readme_file.as_ref().map_or("README.md", |s| &**s),
repo,
)?),
None => None,
};
Expand Down
168 changes: 140 additions & 28 deletions src/render.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use ammonia::Builder;
use ammonia::{Builder, UrlRelative};
use comrak;
use htmlescape::encode_minimal;
use std::borrow::Cow;
use url::Url;

use util::CargoResult;

Expand All @@ -12,7 +14,10 @@ struct MarkdownRenderer<'a> {

impl<'a> MarkdownRenderer<'a> {
/// Creates a new renderer instance.
fn new() -> MarkdownRenderer<'a> {
///
/// Per `readme_to_html`, `base_url` is the base URL prepended to any
/// relative links in the input document. See that function for more detail.
fn new(base_url: Option<&'a str>) -> MarkdownRenderer<'a> {
let tags = [
"a",
"b",
Expand Down Expand Up @@ -94,13 +99,68 @@ impl<'a> MarkdownRenderer<'a> {
].iter()
.cloned()
.collect();

let sanitizer_base_url = base_url.map(|s| s.to_string());

// Constrain the type of the closures given to the HTML sanitizer.
fn constrain_closure<F>(f: F) -> F
where
F: for<'a> Fn(&'a str) -> Option<Cow<'a, str>> + Send + Sync,
{
f
}

let unrelative_url_sanitizer = constrain_closure(|url| {
// We have no base URL; allow fragment links only.
if url.starts_with('#') {
return Some(Cow::Borrowed(url));
}

None
});

let relative_url_sanitizer = constrain_closure(move |url| {
// sanitizer_base_url is Some(String); use it to fix the relative URL.
if url.starts_with('#') {
return Some(Cow::Borrowed(url));
}

let mut new_url = sanitizer_base_url.clone().unwrap();
if !new_url.ends_with('/') {
new_url.push('/');
}
new_url += "blob/master";
if !url.starts_with('/') {
new_url.push('/');
}
new_url += url;
Some(Cow::Owned(new_url))
});

let use_relative = if let Some(base_url) = base_url {
if let Ok(url) = Url::parse(base_url) {
url.host_str() == Some("github.com") || url.host_str() == Some("gitlab.com")
|| url.host_str() == Some("bitbucket.org")
} else {
false
}
} else {
false
};

let mut html_sanitizer = Builder::new();
html_sanitizer
.link_rel(Some("nofollow noopener noreferrer"))
.tags(tags)
.tag_attributes(tag_attributes)
.allowed_classes(allowed_classes)
.url_relative(if use_relative {
UrlRelative::Custom(Box::new(relative_url_sanitizer))
} else {
UrlRelative::Custom(Box::new(unrelative_url_sanitizer))
})
.id_prefix(Some("user-content-"));

MarkdownRenderer {
html_sanitizer: html_sanitizer,
}
Expand All @@ -122,15 +182,10 @@ impl<'a> MarkdownRenderer<'a> {
}
}

impl<'a> Default for MarkdownRenderer<'a> {
fn default() -> Self {
Self::new()
}
}

/// Renders Markdown text to sanitized HTML.
fn markdown_to_html(text: &str) -> CargoResult<String> {
let renderer = MarkdownRenderer::new();
/// Renders Markdown text to sanitized HTML with a given `base_url`.
/// See `readme_to_html` for the interpretation of `base_url`.
fn markdown_to_html(text: &str, base_url: Option<&str>) -> CargoResult<String> {
let renderer = MarkdownRenderer::new(base_url);
renderer.to_html(text)
}

Expand All @@ -147,24 +202,29 @@ static MARKDOWN_EXTENSIONS: [&'static str; 7] = [
];

/// Renders a readme to sanitized HTML. An appropriate rendering method is chosen depending
/// on the extension of the supplied filename.
/// on the extension of the supplied `filename`.
///
/// The returned text should not contain any harmful HTML tag or attribute (such as iframe,
/// The returned text will not contain any harmful HTML tag or attribute (such as iframe,
/// onclick, onmouseover, etc.).
///
/// The `base_url` parameter will be used as the base for any relative links found in the
/// Markdown, as long as its host part is github.com, gitlab.com, or bitbucket.org. The
/// supplied URL will be used as a directory base whether or not the relative link is
/// prefixed with '/'. If `None` is passed, relative links will be omitted.
///
/// # Examples
///
/// ```
/// use render::render_to_html;
///
/// let text = "[Rust](https://rust-lang.org/) is an awesome *systems programming* language!";
/// let rendered = readme_to_html(text, "README.md")?;
/// let rendered = readme_to_html(text, "README.md", None)?;
/// ```
pub fn readme_to_html(text: &str, filename: &str) -> CargoResult<String> {
pub fn readme_to_html(text: &str, filename: &str, base_url: Option<&str>) -> CargoResult<String> {
let filename = filename.to_lowercase();

if !filename.contains('.') || MARKDOWN_EXTENSIONS.iter().any(|e| filename.ends_with(e)) {
return markdown_to_html(text);
return markdown_to_html(text, base_url);
}

Ok(encode_minimal(text).replace("\n", "<br>\n"))
Expand All @@ -177,14 +237,14 @@ mod tests {
#[test]
fn empty_text() {
let text = "";
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(result, "");
}

#[test]
fn text_with_script_tag() {
let text = "foo_readme\n\n<script>alert('Hello World')</script>";
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(
result,
"<p>foo_readme</p>\n&lt;script&gt;alert(\'Hello World\')&lt;/script&gt;\n"
Expand All @@ -194,7 +254,7 @@ mod tests {
#[test]
fn text_with_iframe_tag() {
let text = "foo_readme\n\n<iframe>alert('Hello World')</iframe>";
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(
result,
"<p>foo_readme</p>\n&lt;iframe&gt;alert(\'Hello World\')&lt;/iframe&gt;\n"
Expand All @@ -204,14 +264,14 @@ mod tests {
#[test]
fn text_with_unknown_tag() {
let text = "foo_readme\n\n<unknown>alert('Hello World')</unknown>";
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(result, "<p>foo_readme</p>\n<p>alert(\'Hello World\')</p>\n");
}

#[test]
fn text_with_inline_javascript() {
let text = r#"foo_readme\n\n<a href="https://crates.io/crates/cargo-registry" onclick="window.alert('Got you')">Crate page</a>"#;
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(
result,
"<p>foo_readme\\n\\n<a href=\"https://crates.io/crates/cargo-registry\" rel=\"nofollow noopener noreferrer\">Crate page</a></p>\n"
Expand All @@ -223,7 +283,7 @@ mod tests {
#[test]
fn text_with_fancy_single_quotes() {
let text = r#"wb’"#;
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(result, "<p>wb’</p>\n");
}

Expand All @@ -232,22 +292,74 @@ mod tests {
let code_block = r#"```rust \
println!("Hello World"); \
```"#;
let result = markdown_to_html(code_block).unwrap();
let result = markdown_to_html(code_block, None).unwrap();
assert!(result.contains("<code class=\"language-rust\">"));
}

#[test]
fn text_with_forbidden_class_attribute() {
let text = "<p class='bad-class'>Hello World!</p>";
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(result, "<p>Hello World!</p>\n");
}

#[test]
fn relative_links() {
let absolute = "[hi](/hi)";
let relative = "[there](there)";

for host in &["github.com", "gitlab.com", "bitbucket.org"] {
for &extra_slash in &[true, false] {
let url = format!(
"https://{}/rust-lang/test{}",
host,
if extra_slash { "/" } else { "" }
);

let result = markdown_to_html(absolute, Some(&url)).unwrap();
assert_eq!(
result,
format!(
"<p><a href=\"https://{}/rust-lang/test/blob/master/hi\" rel=\"nofollow noopener noreferrer\">hi</a></p>\n",
host
)
);

let result = markdown_to_html(relative, Some(&url)).unwrap();
assert_eq!(
result,
format!(
"<p><a href=\"https://{}/rust-lang/test/blob/master/there\" rel=\"nofollow noopener noreferrer\">there</a></p>\n",
host
)
);
}
}

let result = markdown_to_html(absolute, Some("https://google.com/")).unwrap();
assert_eq!(
result,
"<p><a rel=\"nofollow noopener noreferrer\">hi</a></p>\n"
);
}

#[test]
fn absolute_links_dont_get_resolved() {
let readme_text = "[![Crates.io](https://img.shields.io/crates/v/clap.svg)](https://crates.io/crates/clap)";
let repository = "https://github.com/kbknapp/clap-rs/";
let result = markdown_to_html(readme_text, Some(&repository)).unwrap();

assert_eq!(
result,
"<p><a href=\"https://crates.io/crates/clap\" rel=\"nofollow noopener noreferrer\"><img src=\"https://img.shields.io/crates/v/clap.svg\" alt=\"Crates.io\"></a></p>\n"
);
}

#[test]
fn readme_to_html_renders_markdown() {
for f in &["README", "readme.md", "README.MARKDOWN", "whatever.mkd"] {
assert_eq!(
readme_to_html("*lobster*", f).unwrap(),
readme_to_html("*lobster*", f, None).unwrap(),
"<p><em>lobster</em></p>\n"
);
}
Expand All @@ -257,7 +369,7 @@ mod tests {
fn readme_to_html_renders_other_things() {
for f in &["readme.exe", "readem.org", "blah.adoc"] {
assert_eq!(
readme_to_html("<script>lobster</script>\n\nis my friend\n", f).unwrap(),
readme_to_html("<script>lobster</script>\n\nis my friend\n", f, None).unwrap(),
"&lt;script&gt;lobster&lt;/script&gt;<br>\n<br>\nis my friend<br>\n"
);
}
Expand All @@ -266,7 +378,7 @@ mod tests {
#[test]
fn header_has_tags() {
let text = "# My crate\n\nHello, world!\n";
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(
result,
"<h1><a href=\"#my-crate\" id=\"user-content-my-crate\" rel=\"nofollow noopener noreferrer\"></a>My crate</h1>\n<p>Hello, world!</p>\n"
Expand All @@ -276,7 +388,7 @@ mod tests {
#[test]
fn manual_anchor_is_sanitized() {
let text = "<h1><a href=\"#my-crate\" id=\"my-crate\"></a>My crate</h1>\n<p>Hello, world!</p>\n";
let result = markdown_to_html(text).unwrap();
let result = markdown_to_html(text, None).unwrap();
assert_eq!(
result,
"<h1><a href=\"#my-crate\" id=\"user-content-my-crate\" rel=\"nofollow noopener noreferrer\"></a>My crate</h1>\n<p>Hello, world!</p>\n"
Expand Down