Skip to content

Commit 7d4a048

Browse files
codyosschingor13
authored andcommitted
feat: decode uri path components correctly (#913)
* feat: decode uri path components correctly The old implementation was incorrecly treating '+' as a space. Now the only things that get decoded in the path are uri escaped sequences. Fixes #398 * tweak javadoc * remove hardcoded string
1 parent 853ab4b commit 7d4a048

File tree

5 files changed

+106
-22
lines changed

5 files changed

+106
-22
lines changed

google-http-client/src/main/java/com/google/api/client/http/GenericUrl.java

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,10 @@ public class GenericUrl extends GenericData {
8181
private String fragment;
8282

8383
/**
84-
* If true, the URL string originally given is used as is (without encoding, decoding and
85-
* escaping) whenever referenced; otherwise, part of the URL string may be encoded or decoded as
86-
* deemed appropriate or necessary.
87-
*/
84+
* If true, the URL string originally given is used as is (without encoding, decoding and
85+
* escaping) whenever referenced; otherwise, part of the URL string may be encoded or decoded as
86+
* deemed appropriate or necessary.
87+
*/
8888
private boolean verbatim;
8989

9090
public GenericUrl() {}
@@ -112,20 +112,20 @@ public GenericUrl(String encodedUrl) {
112112
/**
113113
* Constructs from an encoded URL.
114114
*
115-
* <p>Any known query parameters with pre-defined fields as data keys are parsed based on
116-
* their data type. Any unrecognized query parameter are always parsed as a string.
115+
* <p>Any known query parameters with pre-defined fields as data keys are parsed based on their
116+
* data type. Any unrecognized query parameter are always parsed as a string.
117117
*
118118
* <p>Any {@link MalformedURLException} is wrapped in an {@link IllegalArgumentException}.
119119
*
120120
* @param encodedUrl encoded URL, including any existing query parameters that should be parsed
121-
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and escaping)
121+
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and
122+
* escaping)
122123
* @throws IllegalArgumentException if URL has a syntax error
123124
*/
124125
public GenericUrl(String encodedUrl, boolean verbatim) {
125126
this(parseURL(encodedUrl), verbatim);
126127
}
127128

128-
129129
/**
130130
* Constructs from a URI.
131131
*
@@ -140,7 +140,8 @@ public GenericUrl(URI uri) {
140140
* Constructs from a URI.
141141
*
142142
* @param uri URI
143-
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and escaping)
143+
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and
144+
* escaping)
144145
*/
145146
public GenericUrl(URI uri, boolean verbatim) {
146147
this(
@@ -168,7 +169,8 @@ public GenericUrl(URL url) {
168169
* Constructs from a URL.
169170
*
170171
* @param url URL
171-
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and escaping)
172+
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and
173+
* escaping)
172174
* @since 1.14
173175
*/
174176
public GenericUrl(URL url, boolean verbatim) {
@@ -209,7 +211,7 @@ private GenericUrl(
209211
UrlEncodedParser.parse(query, this);
210212
}
211213
this.userInfo = userInfo != null ? CharEscapers.decodeUri(userInfo) : null;
212-
}
214+
}
213215
}
214216

215217
@Override
@@ -567,10 +569,11 @@ public static List<String> toPathParts(String encodedPath) {
567569
*
568570
* @param encodedPath slash-prefixed encoded path, for example {@code
569571
* "/m8/feeds/contacts/default/full"}
570-
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and escaping)
571-
* @return path parts (decoded if not {@code verbatim}), with each part assumed to be preceded by a {@code '/'}, for example
572-
* {@code "", "m8", "feeds", "contacts", "default", "full"}, or {@code null} for {@code null}
573-
* or {@code ""} input
572+
* @param verbatim flag, to specify if URL should be used as is (without encoding, decoding and
573+
* escaping)
574+
* @return path parts (decoded if not {@code verbatim}), with each part assumed to be preceded by
575+
* a {@code '/'}, for example {@code "", "m8", "feeds", "contacts", "default", "full"}, or
576+
* {@code null} for {@code null} or {@code ""} input
574577
*/
575578
public static List<String> toPathParts(String encodedPath, boolean verbatim) {
576579
if (encodedPath == null || encodedPath.length() == 0) {
@@ -588,7 +591,7 @@ public static List<String> toPathParts(String encodedPath, boolean verbatim) {
588591
} else {
589592
sub = encodedPath.substring(cur);
590593
}
591-
result.add(verbatim ? sub : CharEscapers.decodeUri(sub));
594+
result.add(verbatim ? sub : CharEscapers.decodeUriPath(sub));
592595
cur = slash + 1;
593596
}
594597
return result;
@@ -608,13 +611,17 @@ private void appendRawPathFromParts(StringBuilder buf) {
608611
}
609612

610613
/** Adds query parameters from the provided entrySet into the buffer. */
611-
static void addQueryParams(Set<Entry<String, Object>> entrySet, StringBuilder buf, boolean verbatim) {
614+
static void addQueryParams(
615+
Set<Entry<String, Object>> entrySet, StringBuilder buf, boolean verbatim) {
612616
// (similar to UrlEncodedContent)
613617
boolean first = true;
614618
for (Map.Entry<String, Object> nameValueEntry : entrySet) {
615619
Object value = nameValueEntry.getValue();
616620
if (value != null) {
617-
String name = verbatim ? nameValueEntry.getKey() : CharEscapers.escapeUriQuery(nameValueEntry.getKey());
621+
String name =
622+
verbatim
623+
? nameValueEntry.getKey()
624+
: CharEscapers.escapeUriQuery(nameValueEntry.getKey());
618625
if (value instanceof Collection<?>) {
619626
Collection<?> collectionValue = (Collection<?>) value;
620627
for (Object repeatedValue : collectionValue) {
@@ -627,15 +634,17 @@ static void addQueryParams(Set<Entry<String, Object>> entrySet, StringBuilder bu
627634
}
628635
}
629636

630-
private static boolean appendParam(boolean first, StringBuilder buf, String name, Object value, boolean verbatim) {
637+
private static boolean appendParam(
638+
boolean first, StringBuilder buf, String name, Object value, boolean verbatim) {
631639
if (first) {
632640
first = false;
633641
buf.append('?');
634642
} else {
635643
buf.append('&');
636644
}
637645
buf.append(name);
638-
String stringValue = verbatim ? value.toString() : CharEscapers.escapeUriQuery(value.toString());
646+
String stringValue =
647+
verbatim ? value.toString() : CharEscapers.escapeUriQuery(value.toString());
639648
if (stringValue.length() != 0) {
640649
buf.append('=').append(stringValue);
641650
}

google-http-client/src/main/java/com/google/api/client/util/escape/CharEscapers.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
import java.io.UnsupportedEncodingException;
1818
import java.net.URLDecoder;
19+
import java.nio.ByteBuffer;
20+
import java.nio.charset.StandardCharsets;
1921

2022
/**
2123
* Utility functions for dealing with {@code CharEscaper}s, and some commonly used {@code
@@ -83,7 +85,29 @@ public static String escapeUri(String value) {
8385
*/
8486
public static String decodeUri(String uri) {
8587
try {
86-
return URLDecoder.decode(uri, "UTF-8");
88+
return URLDecoder.decode(uri, StandardCharsets.UTF_8.name());
89+
} catch (UnsupportedEncodingException e) {
90+
// UTF-8 encoding guaranteed to be supported by JVM
91+
throw new RuntimeException(e);
92+
}
93+
}
94+
95+
/**
96+
* Decodes the path component of a URI. This must be done via a method that does not try to
97+
* convert + into spaces(the behavior of {@link java.net.URLDecoder#decode(String, String)}). This
98+
* method transforms URI encoded values into their decoded symbols.
99+
*
100+
* <p>i.e: {@code decodePath("%3Co%3E")} would return {@code "<o>"}
101+
*
102+
* @param path the value to be decoded
103+
* @return decoded version of {@code path}
104+
*/
105+
public static String decodeUriPath(String path) {
106+
if (path == null) {
107+
return null;
108+
}
109+
try {
110+
return URLDecoder.decode(path.replace("+", "%2B"), StandardCharsets.UTF_8.name());
87111
} catch (UnsupportedEncodingException e) {
88112
// UTF-8 encoding guaranteed to be supported by JVM
89113
throw new RuntimeException(e);

google-http-client/src/main/java/com/google/api/client/util/escape/PercentEscaper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ public class PercentEscaper extends UnicodeEscaper {
6262
* specified in RFC 3986. Note that some of these characters do need to be escaped when used in
6363
* other parts of the URI.
6464
*/
65-
public static final String SAFEPATHCHARS_URLENCODER = "-_.!~*'()@:$&,;=";
65+
public static final String SAFEPATHCHARS_URLENCODER = "-_.!~*'()@:$&,;=+";
6666

6767
/**
6868
* Contains the save characters plus all reserved characters. This happens to be the safe path

google-http-client/src/test/java/com/google/api/client/http/GenericUrlTest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,8 @@ public void testToPathParts() {
480480
subtestToPathParts("/path/to/resource", "", "path", "to", "resource");
481481
subtestToPathParts("/path/to/resource/", "", "path", "to", "resource", "");
482482
subtestToPathParts("/Go%3D%23%2F%25%26%20?%3Co%3Egle/2nd", "", "Go=#/%& ?<o>gle", "2nd");
483+
subtestToPathParts("/plus+test/resource", "", "plus+test", "resource");
484+
subtestToPathParts("/plus%2Btest/resource", "", "plus+test", "resource");
483485
}
484486

485487
private void subtestToPathParts(String encodedPath, String... expectedDecodedParts) {
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
* Copyright 2019 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5+
* in compliance with the License. You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software distributed under the License
10+
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11+
* or implied. See the License for the specific language governing permissions and limitations under
12+
* the License.
13+
*/
14+
15+
package com.google.api.client.util.escape;
16+
17+
import junit.framework.TestCase;
18+
19+
public class CharEscapersTest extends TestCase {
20+
21+
public void testDecodeUriPath() {
22+
subtestDecodeUriPath(null, null);
23+
subtestDecodeUriPath("", "");
24+
subtestDecodeUriPath("abc", "abc");
25+
subtestDecodeUriPath("a+b%2Bc", "a+b+c");
26+
subtestDecodeUriPath("Go%3D%23%2F%25%26%20?%3Co%3Egle", "Go=#/%& ?<o>gle");
27+
}
28+
29+
private void subtestDecodeUriPath(String input, String expected) {
30+
String actual = CharEscapers.decodeUriPath(input);
31+
assertEquals(expected, actual);
32+
}
33+
34+
public void testDecodeUri_IllegalArgumentException() {
35+
subtestDecodeUri_IllegalArgumentException("abc%-1abc");
36+
subtestDecodeUri_IllegalArgumentException("%JJ");
37+
subtestDecodeUri_IllegalArgumentException("abc%0");
38+
}
39+
40+
private void subtestDecodeUri_IllegalArgumentException(String input) {
41+
boolean thrown = false;
42+
try {
43+
CharEscapers.decodeUriPath(input);
44+
} catch (IllegalArgumentException e) {
45+
thrown = true;
46+
}
47+
assertTrue(thrown);
48+
}
49+
}

0 commit comments

Comments
 (0)