1
- use ammonia:: Builder ;
1
+ use ammonia:: { Builder , UrlRelative } ;
2
2
use comrak;
3
3
use htmlescape:: encode_minimal;
4
+ use std:: borrow:: Cow ;
5
+ use url:: Url ;
4
6
5
7
use util:: CargoResult ;
6
8
@@ -12,7 +14,10 @@ struct MarkdownRenderer<'a> {
12
14
13
15
impl < ' a > MarkdownRenderer < ' a > {
14
16
/// Creates a new renderer instance.
15
- fn new ( ) -> MarkdownRenderer < ' a > {
17
+ ///
18
+ /// Per `readme_to_html`, `base_url` is the base URL prepended to any
19
+ /// relative links in the input document. See that function for more detail.
20
+ fn new ( base_url : Option < & ' a str > ) -> MarkdownRenderer < ' a > {
16
21
let tags = [
17
22
"a" ,
18
23
"b" ,
@@ -94,13 +99,68 @@ impl<'a> MarkdownRenderer<'a> {
94
99
] . iter ( )
95
100
. cloned ( )
96
101
. collect ( ) ;
102
+
103
+ let sanitizer_base_url = base_url. map ( |s| s. to_string ( ) ) ;
104
+
105
+ // Constrain the type of the closures given to the HTML sanitizer.
106
+ fn constrain_closure < F > ( f : F ) -> F
107
+ where
108
+ F : for < ' a > Fn ( & ' a str ) -> Option < Cow < ' a , str > > + Send + Sync ,
109
+ {
110
+ f
111
+ }
112
+
113
+ let unrelative_url_sanitizer = constrain_closure ( |url| {
114
+ // We have no base URL; allow fragment links only.
115
+ if url. starts_with ( '#' ) {
116
+ return Some ( Cow :: Borrowed ( url) ) ;
117
+ }
118
+
119
+ None
120
+ } ) ;
121
+
122
+ let relative_url_sanitizer = constrain_closure ( move |url| {
123
+ // sanitizer_base_url is Some(String); use it to fix the relative URL.
124
+ if url. starts_with ( '#' ) {
125
+ return Some ( Cow :: Borrowed ( url) ) ;
126
+ }
127
+
128
+ let mut new_url = sanitizer_base_url. clone ( ) . unwrap ( ) ;
129
+ if !new_url. ends_with ( '/' ) {
130
+ new_url. push ( '/' ) ;
131
+ }
132
+ new_url += "blob/master" ;
133
+ if !url. starts_with ( '/' ) {
134
+ new_url. push ( '/' ) ;
135
+ }
136
+ new_url += url;
137
+ Some ( Cow :: Owned ( new_url) )
138
+ } ) ;
139
+
140
+ let use_relative = if let Some ( base_url) = base_url {
141
+ if let Ok ( url) = Url :: parse ( base_url) {
142
+ url. host_str ( ) == Some ( "github.com" ) || url. host_str ( ) == Some ( "gitlab.com" )
143
+ || url. host_str ( ) == Some ( "bitbucket.org" )
144
+ } else {
145
+ false
146
+ }
147
+ } else {
148
+ false
149
+ } ;
150
+
97
151
let mut html_sanitizer = Builder :: new ( ) ;
98
152
html_sanitizer
99
153
. link_rel ( Some ( "nofollow noopener noreferrer" ) )
100
154
. tags ( tags)
101
155
. tag_attributes ( tag_attributes)
102
156
. allowed_classes ( allowed_classes)
157
+ . url_relative ( if use_relative {
158
+ UrlRelative :: Custom ( Box :: new ( relative_url_sanitizer) )
159
+ } else {
160
+ UrlRelative :: Custom ( Box :: new ( unrelative_url_sanitizer) )
161
+ } )
103
162
. id_prefix ( Some ( "user-content-" ) ) ;
163
+
104
164
MarkdownRenderer {
105
165
html_sanitizer : html_sanitizer,
106
166
}
@@ -122,15 +182,10 @@ impl<'a> MarkdownRenderer<'a> {
122
182
}
123
183
}
124
184
125
- impl < ' a > Default for MarkdownRenderer < ' a > {
126
- fn default ( ) -> Self {
127
- Self :: new ( )
128
- }
129
- }
130
-
131
- /// Renders Markdown text to sanitized HTML.
132
- fn markdown_to_html ( text : & str ) -> CargoResult < String > {
133
- let renderer = MarkdownRenderer :: new ( ) ;
185
+ /// Renders Markdown text to sanitized HTML with a given `base_url`.
186
+ /// See `readme_to_html` for the interpretation of `base_url`.
187
+ fn markdown_to_html ( text : & str , base_url : Option < & str > ) -> CargoResult < String > {
188
+ let renderer = MarkdownRenderer :: new ( base_url) ;
134
189
renderer. to_html ( text)
135
190
}
136
191
@@ -147,24 +202,29 @@ static MARKDOWN_EXTENSIONS: [&'static str; 7] = [
147
202
] ;
148
203
149
204
/// Renders a readme to sanitized HTML. An appropriate rendering method is chosen depending
150
- /// on the extension of the supplied filename.
205
+ /// on the extension of the supplied ` filename` .
151
206
///
152
- /// The returned text should not contain any harmful HTML tag or attribute (such as iframe,
207
+ /// The returned text will not contain any harmful HTML tag or attribute (such as iframe,
153
208
/// onclick, onmouseover, etc.).
154
209
///
210
+ /// The `base_url` parameter will be used as the base for any relative links found in the
211
+ /// Markdown, as long as its host part is github.com, gitlab.com, or bitbucket.org. The
212
+ /// supplied URL will be used as a directory base whether or not the relative link is
213
+ /// prefixed with '/'. If `None` is passed, relative links will be omitted.
214
+ ///
155
215
/// # Examples
156
216
///
157
217
/// ```
158
218
/// use render::render_to_html;
159
219
///
160
220
/// let text = "[Rust](https://rust-lang.org/) is an awesome *systems programming* language!";
161
- /// let rendered = readme_to_html(text, "README.md")?;
221
+ /// let rendered = readme_to_html(text, "README.md", None )?;
162
222
/// ```
163
- pub fn readme_to_html ( text : & str , filename : & str ) -> CargoResult < String > {
223
+ pub fn readme_to_html ( text : & str , filename : & str , base_url : Option < & str > ) -> CargoResult < String > {
164
224
let filename = filename. to_lowercase ( ) ;
165
225
166
226
if !filename. contains ( '.' ) || MARKDOWN_EXTENSIONS . iter ( ) . any ( |e| filename. ends_with ( e) ) {
167
- return markdown_to_html ( text) ;
227
+ return markdown_to_html ( text, base_url ) ;
168
228
}
169
229
170
230
Ok ( encode_minimal ( text) . replace ( "\n " , "<br>\n " ) )
@@ -177,14 +237,14 @@ mod tests {
177
237
#[ test]
178
238
fn empty_text ( ) {
179
239
let text = "" ;
180
- let result = markdown_to_html ( text) . unwrap ( ) ;
240
+ let result = markdown_to_html ( text, None ) . unwrap ( ) ;
181
241
assert_eq ! ( result, "" ) ;
182
242
}
183
243
184
244
#[ test]
185
245
fn text_with_script_tag ( ) {
186
246
let text = "foo_readme\n \n <script>alert('Hello World')</script>" ;
187
- let result = markdown_to_html ( text) . unwrap ( ) ;
247
+ let result = markdown_to_html ( text, None ) . unwrap ( ) ;
188
248
assert_eq ! (
189
249
result,
190
250
"<p>foo_readme</p>\n <script>alert(\' Hello World\' )</script>\n "
@@ -194,7 +254,7 @@ mod tests {
194
254
#[ test]
195
255
fn text_with_iframe_tag ( ) {
196
256
let text = "foo_readme\n \n <iframe>alert('Hello World')</iframe>" ;
197
- let result = markdown_to_html ( text) . unwrap ( ) ;
257
+ let result = markdown_to_html ( text, None ) . unwrap ( ) ;
198
258
assert_eq ! (
199
259
result,
200
260
"<p>foo_readme</p>\n <iframe>alert(\' Hello World\' )</iframe>\n "
@@ -204,14 +264,14 @@ mod tests {
204
264
#[ test]
205
265
fn text_with_unknown_tag ( ) {
206
266
let text = "foo_readme\n \n <unknown>alert('Hello World')</unknown>" ;
207
- let result = markdown_to_html ( text) . unwrap ( ) ;
267
+ let result = markdown_to_html ( text, None ) . unwrap ( ) ;
208
268
assert_eq ! ( result, "<p>foo_readme</p>\n <p>alert(\' Hello World\' )</p>\n " ) ;
209
269
}
210
270
211
271
#[ test]
212
272
fn text_with_inline_javascript ( ) {
213
273
let text = r#"foo_readme\n\n<a href="https://crates.io/crates/cargo-registry" onclick="window.alert('Got you')">Crate page</a>"# ;
214
- let result = markdown_to_html ( text) . unwrap ( ) ;
274
+ let result = markdown_to_html ( text, None ) . unwrap ( ) ;
215
275
assert_eq ! (
216
276
result,
217
277
"<p>foo_readme\\ n\\ n<a href=\" https://crates.io/crates/cargo-registry\" rel=\" nofollow noopener noreferrer\" >Crate page</a></p>\n "
@@ -223,7 +283,7 @@ mod tests {
223
283
#[ test]
224
284
fn text_with_fancy_single_quotes ( ) {
225
285
let text = r#"wb’"# ;
226
- let result = markdown_to_html ( text) . unwrap ( ) ;
286
+ let result = markdown_to_html ( text, None ) . unwrap ( ) ;
227
287
assert_eq ! ( result, "<p>wb’</p>\n " ) ;
228
288
}
229
289
@@ -232,22 +292,74 @@ mod tests {
232
292
let code_block = r#"```rust \
233
293
println!("Hello World"); \
234
294
```"# ;
235
- let result = markdown_to_html ( code_block) . unwrap ( ) ;
295
+ let result = markdown_to_html ( code_block, None ) . unwrap ( ) ;
236
296
assert ! ( result. contains( "<code class=\" language-rust\" >" ) ) ;
237
297
}
238
298
239
299
#[ test]
240
300
fn text_with_forbidden_class_attribute ( ) {
241
301
let text = "<p class='bad-class'>Hello World!</p>" ;
242
- let result = markdown_to_html ( text) . unwrap ( ) ;
302
+ let result = markdown_to_html ( text, None ) . unwrap ( ) ;
243
303
assert_eq ! ( result, "<p>Hello World!</p>\n " ) ;
244
304
}
245
305
306
+ #[ test]
307
+ fn relative_links ( ) {
308
+ let absolute = "[hi](/hi)" ;
309
+ let relative = "[there](there)" ;
310
+
311
+ for host in & [ "github.com" , "gitlab.com" , "bitbucket.org" ] {
312
+ for & extra_slash in & [ true , false ] {
313
+ let url = format ! (
314
+ "https://{}/rust-lang/test{}" ,
315
+ host,
316
+ if extra_slash { "/" } else { "" }
317
+ ) ;
318
+
319
+ let result = markdown_to_html ( absolute, Some ( & url) ) . unwrap ( ) ;
320
+ assert_eq ! (
321
+ result,
322
+ format!(
323
+ "<p><a href=\" https://{}/rust-lang/test/blob/master/hi\" rel=\" nofollow noopener noreferrer\" >hi</a></p>\n " ,
324
+ host
325
+ )
326
+ ) ;
327
+
328
+ let result = markdown_to_html ( relative, Some ( & url) ) . unwrap ( ) ;
329
+ assert_eq ! (
330
+ result,
331
+ format!(
332
+ "<p><a href=\" https://{}/rust-lang/test/blob/master/there\" rel=\" nofollow noopener noreferrer\" >there</a></p>\n " ,
333
+ host
334
+ )
335
+ ) ;
336
+ }
337
+ }
338
+
339
+ let result = markdown_to_html ( absolute, Some ( "https://google.com/" ) ) . unwrap ( ) ;
340
+ assert_eq ! (
341
+ result,
342
+ "<p><a rel=\" nofollow noopener noreferrer\" >hi</a></p>\n "
343
+ ) ;
344
+ }
345
+
346
+ #[ test]
347
+ fn absolute_links_dont_get_resolved ( ) {
348
+ let readme_text = "[](https://crates.io/crates/clap)" ;
349
+ let repository = "https://github.com/kbknapp/clap-rs/" ;
350
+ let result = markdown_to_html ( readme_text, Some ( & repository) ) . unwrap ( ) ;
351
+
352
+ assert_eq ! (
353
+ result,
354
+ "<p><a href=\" https://crates.io/crates/clap\" rel=\" nofollow noopener noreferrer\" ><img src=\" https://img.shields.io/crates/v/clap.svg\" alt=\" Crates.io\" ></a></p>\n "
355
+ ) ;
356
+ }
357
+
246
358
#[ test]
247
359
fn readme_to_html_renders_markdown ( ) {
248
360
for f in & [ "README" , "readme.md" , "README.MARKDOWN" , "whatever.mkd" ] {
249
361
assert_eq ! (
250
- readme_to_html( "*lobster*" , f) . unwrap( ) ,
362
+ readme_to_html( "*lobster*" , f, None ) . unwrap( ) ,
251
363
"<p><em>lobster</em></p>\n "
252
364
) ;
253
365
}
@@ -257,7 +369,7 @@ mod tests {
257
369
fn readme_to_html_renders_other_things ( ) {
258
370
for f in & [ "readme.exe" , "readem.org" , "blah.adoc" ] {
259
371
assert_eq ! (
260
- readme_to_html( "<script>lobster</script>\n \n is my friend\n " , f) . unwrap( ) ,
372
+ readme_to_html( "<script>lobster</script>\n \n is my friend\n " , f, None ) . unwrap( ) ,
261
373
"<script>lobster</script><br>\n <br>\n is my friend<br>\n "
262
374
) ;
263
375
}
@@ -266,7 +378,7 @@ mod tests {
266
378
#[ test]
267
379
fn header_has_tags ( ) {
268
380
let text = "# My crate\n \n Hello, world!\n " ;
269
- let result = markdown_to_html ( text) . unwrap ( ) ;
381
+ let result = markdown_to_html ( text, None ) . unwrap ( ) ;
270
382
assert_eq ! (
271
383
result,
272
384
"<h1><a href=\" #my-crate\" id=\" user-content-my-crate\" rel=\" nofollow noopener noreferrer\" ></a>My crate</h1>\n <p>Hello, world!</p>\n "
@@ -276,7 +388,7 @@ mod tests {
276
388
#[ test]
277
389
fn manual_anchor_is_sanitized ( ) {
278
390
let text = "<h1><a href=\" #my-crate\" id=\" my-crate\" ></a>My crate</h1>\n <p>Hello, world!</p>\n " ;
279
- let result = markdown_to_html ( text) . unwrap ( ) ;
391
+ let result = markdown_to_html ( text, None ) . unwrap ( ) ;
280
392
assert_eq ! (
281
393
result,
282
394
"<h1><a href=\" #my-crate\" id=\" user-content-my-crate\" rel=\" nofollow noopener noreferrer\" ></a>My crate</h1>\n <p>Hello, world!</p>\n "
0 commit comments