Skip to content

Commit 273254e

Browse files
committed
rustdoc: use smarter encoding for playground URL
The old way would compress okay with DEFLATE, but this version makes uncompressed docs smaller, which matters for memory usage and stuff like `cargo doc`. Try it out: <https://play.rust-lang.org/?code=fn+main()+{%0Alet+mut+v+=+Vec::new();%0Av.push(1+/+1);%0Aprintln!(%22{}%22,+v[0]);%0A}> In local testing, this change shrinks sample pages by anywhere between 5.0% and 0.044% $ du -b after.dir/std/vec/struct.Vec.html before.dir/std/vec/struct.Vec.html 753023 after.dir/std/vec/struct.Vec.html 781842 before.dir/std/vec/struct.Vec.html 100*((753023-781842)/781842)=-3.7 $ du -b after.dir/std/num/struct.Wrapping.html before.dir/std/num/struct.Wrapping.html 3189989 after.dir/std/num/struct.Wrapping.html 3204351 before.dir/std/num/struct.Wrapping.html 100*((3189989-3204351)/3204351)=-0.044 $ du -b after.dir/std/keyword.match.html before.dir/std/keyword.match.html 8067 after.dir/std/keyword.match.html 8495 before.dir/std/keyword.match.html 100*((8067-8495)/8495)=-5.0 Gzipped tarball sizes seem shrunk, but not by much. du -s before.tar.gz after.tar.gz 69600 before.tar.gz 69492 after.tar.gz 100*((69492-69600)/69600)=-0.16
1 parent c8e6a9e commit 273254e

File tree

1 file changed

+26
-1
lines changed

1 file changed

+26
-1
lines changed

src/librustdoc/html/markdown.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,25 +296,50 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
296296
let channel = if test.contains("#![feature(") { "&amp;version=nightly" } else { "" };
297297

298298
// These characters don't need to be escaped in a URI.
299-
// FIXME: use a library function for percent encoding.
299+
// See https://url.spec.whatwg.org/#query-percent-encode-set
300+
// and https://url.spec.whatwg.org/#urlencoded-parsing
300301
fn dont_escape(c: u8) -> bool {
301302
(b'a' <= c && c <= b'z')
302303
|| (b'A' <= c && c <= b'Z')
303304
|| (b'0' <= c && c <= b'9')
304305
|| c == b'-'
305306
|| c == b'_'
306307
|| c == b'.'
308+
|| c == b','
307309
|| c == b'~'
308310
|| c == b'!'
309311
|| c == b'\''
310312
|| c == b'('
311313
|| c == b')'
314+
|| c == b'['
315+
|| c == b']'
316+
|| c == b'{'
317+
|| c == b'}'
312318
|| c == b'*'
319+
|| c == b'/'
320+
|| c == b'|'
321+
|| c == b'^'
322+
|| c == b'\\'
323+
|| c == b';'
324+
|| c == b':'
325+
|| c == b'?'
326+
|| c == b'<'
327+
|| c == b'>'
328+
// As described in urlencoded-parsing, the
329+
// first `=` is the one that separates key from
330+
// value. Following `=`s are part of the value.
331+
|| c == b'='
313332
}
314333
let mut test_escaped = String::new();
315334
for b in test.bytes() {
316335
if dont_escape(b) {
317336
test_escaped.push(char::from(b));
337+
} else if b == b' ' {
338+
// URL queries are decoded with + replaced with SP
339+
test_escaped.push('+');
340+
} else if b == b'%' {
341+
test_escaped.push('%');
342+
test_escaped.push('%');
318343
} else {
319344
write!(test_escaped, "%{:02X}", b).unwrap();
320345
}

0 commit comments

Comments
 (0)