|
16 | 16 |
|
17 | 17 | /**
|
18 | 18 | * A {@code UnicodeEscaper} that escapes some set of Java characters using the URI percent encoding
|
19 |
| - * scheme. The set of safe characters (those which remain unescaped) can be specified on |
| 19 | + * scheme. The set of safe characters (those which remain unescaped) is specified on |
20 | 20 | * construction.
|
21 | 21 | *
|
22 | 22 | * <p>For details on escaping URIs for use in web pages, see <a
|
|
29 | 29 | * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain the
|
30 | 30 | * same.
|
31 | 31 | * <li>Any additionally specified safe characters remain the same.
|
32 |
| - * <li>If {@code plusForSpace} was specified, the space character " " is converted into a plus |
| 32 | + * <li>If {@code plusForSpace} is true, the space character " " is converted into a plus |
33 | 33 | * sign "+".
|
34 |
| - * <li>All other characters are converted into one or more bytes using UTF-8 encoding and each |
| 34 | + * <li>All other characters are converted into one or more bytes using UTF-8 encoding. Each |
35 | 35 | * byte is then represented by the 3-character string "%XY", where "XY" is the two-digit,
|
36 | 36 | * uppercase, hexadecimal representation of the byte value.
|
37 | 37 | * </ul>
|
38 | 38 | *
|
39 |
| - * <p>RFC 2396 specifies the set of unreserved characters as "-", "_", ".", "!", "~", "*", "'", "(" |
40 |
| - * and ")". It goes on to state: |
| 39 | + * <p>RFC 3986 defines the set of unreserved characters as "-", "_", "~", and "." |
| 40 | + * It goes on to state: |
41 | 41 | *
|
42 |
| - * <p><i>Unreserved characters can be escaped without changing the semantics of the URI, but this |
43 |
| - * should not be done unless the URI is being used in a context that does not allow the unescaped |
44 |
| - * character to appear.</i> |
45 |
| - * |
46 |
| - * <p>For performance reasons the only currently supported character encoding of this class is |
47 |
| - * UTF-8. |
| 42 | + * <p><q>URIs that differ in the replacement of an unreserved character with |
| 43 | + its corresponding percent-encoded US-ASCII octet are equivalent: they |
| 44 | + identify the same resource. However, URI comparison implementations |
| 45 | + do not always perform normalization prior to comparison (see Section |
| 46 | + 6). For consistency, percent-encoded octets in the ranges of ALPHA |
| 47 | + (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E), |
| 48 | + underscore (%5F), or tilde (%7E) should not be created by URI |
| 49 | + producers and, when found in a URI, should be decoded to their |
| 50 | + corresponding unreserved characters by URI normalizers.</q> |
48 | 51 | *
|
49 | 52 | * <p><b>Note</b>: This escaper produces uppercase hexadecimal sequences. From <a
|
50 |
| - * href="http://tools.ietf.org/html/rfc3986">RFC 3986</a>:<br> |
| 53 | + * href="https://tools.ietf.org/html/rfc3986">RFC 3986</a>:<br> |
51 | 54 | * <i>"URI producers and normalizers should use uppercase hexadecimal digits for all
|
52 | 55 | * percent-encodings."</i>
|
53 | 56 | *
|
@@ -100,21 +103,39 @@ public class PercentEscaper extends UnicodeEscaper {
|
100 | 103 | * escaped.
|
101 | 104 | */
|
102 | 105 | private final boolean[] safeOctets;
|
103 |
| - |
| 106 | + |
104 | 107 | /**
|
105 |
| - * Constructs a URI escaper with the specified safe characters and optional handling of the space |
106 |
| - * character. |
| 108 | + * Constructs a URI escaper with the specified safe characters. The space |
| 109 | + * character is escaped to %20 in accordance with the URI specification. |
107 | 110 | *
|
108 | 111 | * @param safeChars a non null string specifying additional safe characters for this escaper (the
|
109 | 112 | * ranges 0..9, a..z and A..Z are always safe and should not be specified here)
|
| 113 | + * @throws IllegalArgumentException if any of the parameters are invalid |
| 114 | + */ |
| 115 | + public PercentEscaper(String safeChars) { |
| 116 | + this(safeChars, false); |
| 117 | + } |
| 118 | + |
| 119 | + /** |
| 120 | + * Constructs a URI escaper that converts all but the specified safe characters |
| 121 | + * into hexadecimal percent escapes. Optionally space characters can be converted into |
| 122 | + * a plus sign {@code +} instead of {@code %20}. and optional handling of the space |
| 123 | + * |
| 124 | + * @param safeChars a non null string specifying additional safe characters for this escaper. The |
| 125 | + * ranges 0..9, a..z and A..Z are always safe and should not be specified here. |
110 | 126 | * @param plusForSpace true if ASCII space should be escaped to {@code +} rather than {@code %20}
|
111 |
| - * @throws IllegalArgumentException if any of the parameters were invalid |
| 127 | + * @throws IllegalArgumentException if safeChars includes characters that are always safe or |
| 128 | + * characters that must always be escaped |
| 129 | + * @deprecated use {@code PercentEscaper(String safeChars)} instead which is the same as invoking |
| 130 | + * this method with plusForSpace set to false. Escaping spaces as plus signs does not |
| 131 | + * conform to the URI specification. |
112 | 132 | */
|
| 133 | + @Deprecated |
113 | 134 | public PercentEscaper(String safeChars, boolean plusForSpace) {
|
114 | 135 | // Avoid any misunderstandings about the behavior of this escaper
|
115 | 136 | if (safeChars.matches(".*[0-9A-Za-z].*")) {
|
116 | 137 | throw new IllegalArgumentException(
|
117 |
| - "Alphanumeric characters are always 'safe' and should not be " + "explicitly specified"); |
| 138 | + "Alphanumeric ASCII characters are always 'safe' and should not be " + "escaped."); |
118 | 139 | }
|
119 | 140 | // Avoid ambiguous parameters. Safe characters are never modified so if
|
120 | 141 | // space is a safe character then setting plusForSpace is meaningless.
|
|
0 commit comments