Skip to content

Commit 20ac5f6

Browse files
authored
Prefer more spec compliant escaping (#959)
1 parent 8e31198 commit 20ac5f6

File tree

2 files changed

+46
-17
lines changed

2 files changed

+46
-17
lines changed

google-http-client/src/main/java/com/google/api/client/util/escape/PercentEscaper.java

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/**
1818
* A {@code UnicodeEscaper} that escapes some set of Java characters using the URI percent encoding
19-
* scheme. The set of safe characters (those which remain unescaped) can be specified on
19+
* scheme. The set of safe characters (those which remain unescaped) is specified on
2020
* construction.
2121
*
2222
* <p>For details on escaping URIs for use in web pages, see <a
@@ -29,25 +29,28 @@
2929
* <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain the
3030
* same.
3131
* <li>Any additionally specified safe characters remain the same.
32-
* <li>If {@code plusForSpace} was specified, the space character " " is converted into a plus
32+
* <li>If {@code plusForSpace} is true, the space character " " is converted into a plus
3333
* sign "+".
34-
* <li>All other characters are converted into one or more bytes using UTF-8 encoding and each
34+
* <li>All other characters are converted into one or more bytes using UTF-8 encoding. Each
3535
* byte is then represented by the 3-character string "%XY", where "XY" is the two-digit,
3636
* uppercase, hexadecimal representation of the byte value.
3737
* </ul>
3838
*
39-
* <p>RFC 2396 specifies the set of unreserved characters as "-", "_", ".", "!", "~", "*", "'", "("
40-
* and ")". It goes on to state:
39+
* <p>RFC 3986 defines the set of unreserved characters as "-", "_", "~", and "."
40+
* It goes on to state:
4141
*
42-
* <p><i>Unreserved characters can be escaped without changing the semantics of the URI, but this
43-
* should not be done unless the URI is being used in a context that does not allow the unescaped
44-
* character to appear.</i>
45-
*
46-
* <p>For performance reasons the only currently supported character encoding of this class is
47-
* UTF-8.
42+
* <p><q>URIs that differ in the replacement of an unreserved character with
43+
its corresponding percent-encoded US-ASCII octet are equivalent: they
44+
identify the same resource. However, URI comparison implementations
45+
do not always perform normalization prior to comparison (see Section
46+
6). For consistency, percent-encoded octets in the ranges of ALPHA
47+
(%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E),
48+
underscore (%5F), or tilde (%7E) should not be created by URI
49+
producers and, when found in a URI, should be decoded to their
50+
corresponding unreserved characters by URI normalizers.</q>
4851
*
4952
* <p><b>Note</b>: This escaper produces uppercase hexadecimal sequences. From <a
50-
* href="http://tools.ietf.org/html/rfc3986">RFC 3986</a>:<br>
53+
* href="https://tools.ietf.org/html/rfc3986">RFC 3986</a>:<br>
5154
* <i>"URI producers and normalizers should use uppercase hexadecimal digits for all
5255
* percent-encodings."</i>
5356
*
@@ -100,21 +103,39 @@ public class PercentEscaper extends UnicodeEscaper {
100103
* escaped.
101104
*/
102105
private final boolean[] safeOctets;
103-
106+
104107
/**
105-
* Constructs a URI escaper with the specified safe characters and optional handling of the space
106-
* character.
108+
* Constructs a URI escaper with the specified safe characters. The space
109+
* character is escaped to %20 in accordance with the URI specification.
107110
*
108111
* @param safeChars a non null string specifying additional safe characters for this escaper (the
109112
* ranges 0..9, a..z and A..Z are always safe and should not be specified here)
113+
* @throws IllegalArgumentException if any of the parameters are invalid
114+
*/
115+
public PercentEscaper(String safeChars) {
116+
this(safeChars, false);
117+
}
118+
119+
/**
120+
* Constructs a URI escaper that converts all but the specified safe characters
121+
* into hexadecimal percent escapes. Optionally space characters can be converted into
122+
* a plus sign {@code +} instead of {@code %20}. and optional handling of the space
123+
*
124+
* @param safeChars a non null string specifying additional safe characters for this escaper. The
125+
* ranges 0..9, a..z and A..Z are always safe and should not be specified here.
110126
* @param plusForSpace true if ASCII space should be escaped to {@code +} rather than {@code %20}
111-
* @throws IllegalArgumentException if any of the parameters were invalid
127+
* @throws IllegalArgumentException if safeChars includes characters that are always safe or
128+
* characters that must always be escaped
129+
* @deprecated use {@code PercentEscaper(String safeChars)} instead which is the same as invoking
130+
* this method with plusForSpace set to false. Escaping spaces as plus signs does not
131+
* conform to the URI specification.
112132
*/
133+
@Deprecated
113134
public PercentEscaper(String safeChars, boolean plusForSpace) {
114135
// Avoid any misunderstandings about the behavior of this escaper
115136
if (safeChars.matches(".*[0-9A-Za-z].*")) {
116137
throw new IllegalArgumentException(
117-
"Alphanumeric characters are always 'safe' and should not be " + "explicitly specified");
138+
"Alphanumeric ASCII characters are always 'safe' and should not be " + "escaped.");
118139
}
119140
// Avoid ambiguous parameters. Safe characters are never modified so if
120141
// space is a safe character then setting plusForSpace is meaningless.

google-http-client/src/test/java/com/google/api/client/util/escape/PercentEscaperTest.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,12 @@ public void testEscapeSpace() {
2626
String actual = escaper.escape("Hello there");
2727
Assert.assertEquals("Hello%20there", actual);
2828
}
29+
30+
@Test
31+
public void testEscapeSpaceDefault() {
32+
PercentEscaper escaper =
33+
new PercentEscaper(PercentEscaper.SAFE_PLUS_RESERVED_CHARS_URLENCODER);
34+
String actual = escaper.escape("Hello there");
35+
Assert.assertEquals("Hello%20there", actual);
36+
}
2937
}

0 commit comments

Comments
 (0)