Skip to content

Commit 7b18bd7

Browse files
committed
Enforce percent encoding
Partially copy file https://github.com/ninenines/cowlib/blob/optimise-urldecode/src/cow_uri.erl We use this copy because: 1. uri_string:unquote/1 is lax: It doesn't validate that characters that are required to be percent encoded are indeed percent encoded. In RabbitMQ, we want to enforce that proper percent encoding is done by AMQP clients. 2. uri_string:unquote/1 and cow_uri:urldecode/1 in cowlib v2.13.0 are both slow because they allocate a new binary for the common case where no character was percent encoded. When a new cowlib version is released, we should make app rabbit depend on app cowlib calling cow_uri:urldecode/1 and delete this file (rabbit_uri.erl).
1 parent 0de9591 commit 7b18bd7

File tree

6 files changed

+198
-40
lines changed

6 files changed

+198
-40
lines changed

deps/rabbit/app.bzl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ def all_beam_files(name = "all_beam_files"):
223223
"src/rabbit_trace.erl",
224224
"src/rabbit_tracking_store.erl",
225225
"src/rabbit_upgrade_preparation.erl",
226+
"src/rabbit_uri.erl",
226227
"src/rabbit_variable_queue.erl",
227228
"src/rabbit_version.erl",
228229
"src/rabbit_vhost.erl",
@@ -481,6 +482,7 @@ def all_test_beam_files(name = "all_test_beam_files"):
481482
"src/rabbit_trace.erl",
482483
"src/rabbit_tracking_store.erl",
483484
"src/rabbit_upgrade_preparation.erl",
485+
"src/rabbit_uri.erl",
484486
"src/rabbit_variable_queue.erl",
485487
"src/rabbit_version.erl",
486488
"src/rabbit_vhost.erl",
@@ -762,6 +764,7 @@ def all_srcs(name = "all_srcs"):
762764
"src/rabbit_tracking.erl",
763765
"src/rabbit_tracking_store.erl",
764766
"src/rabbit_upgrade_preparation.erl",
767+
"src/rabbit_uri.erl",
765768
"src/rabbit_variable_queue.erl",
766769
"src/rabbit_version.erl",
767770
"src/rabbit_vhost.erl",

deps/rabbit/src/rabbit_amqp_management.erl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ handle_http_req(<<"GET">>,
8080
_User,
8181
_ConnPid,
8282
PermCaches) ->
83-
QNameBin = uri_string:unquote(QNameBinQuoted),
83+
QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
8484
QName = queue_resource(Vhost, QNameBin),
8585
case rabbit_amqqueue:with(
8686
QName,
@@ -110,7 +110,7 @@ handle_http_req(HttpMethod = <<"PUT">>,
110110
exclusive := Exclusive,
111111
arguments := QArgs0
112112
} = decode_queue(ReqPayload),
113-
QNameBin = uri_string:unquote(QNameBinQuoted),
113+
QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
114114
Owner = case Exclusive of
115115
true -> ConnPid;
116116
false -> none
@@ -185,7 +185,7 @@ handle_http_req(<<"PUT">>,
185185
User = #user{username = Username},
186186
_ConnPid,
187187
{PermCache0, TopicPermCache}) ->
188-
XNameBin = uri_string:unquote(XNameBinQuoted),
188+
XNameBin = rabbit_uri:urldecode(XNameBinQuoted),
189189
#{type := XTypeBin,
190190
durable := Durable,
191191
auto_delete := AutoDelete,
@@ -226,7 +226,7 @@ handle_http_req(<<"DELETE">>,
226226
User,
227227
ConnPid,
228228
{PermCache0, TopicPermCache}) ->
229-
QNameBin = uri_string:unquote(QNameBinQuoted),
229+
QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
230230
QName = queue_resource(Vhost, QNameBin),
231231
PermCache = check_resource_access(QName, read, User, PermCache0),
232232
try rabbit_amqqueue:with_exclusive_access_or_die(
@@ -254,7 +254,7 @@ handle_http_req(<<"DELETE">>,
254254
User = #user{username = Username},
255255
ConnPid,
256256
{PermCache0, TopicPermCache}) ->
257-
QNameBin = uri_string:unquote(QNameBinQuoted),
257+
QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
258258
QName = queue_resource(Vhost, QNameBin),
259259
ok = prohibit_cr_lf(QNameBin),
260260
PermCache = check_resource_access(QName, configure, User, PermCache0),
@@ -274,7 +274,7 @@ handle_http_req(<<"DELETE">>,
274274
User = #user{username = Username},
275275
_ConnPid,
276276
{PermCache0, TopicPermCache}) ->
277-
XNameBin = uri_string:unquote(XNameBinQuoted),
277+
XNameBin = rabbit_uri:urldecode(XNameBinQuoted),
278278
XName = exchange_resource(Vhost, XNameBin),
279279
ok = prohibit_cr_lf(XNameBin),
280280
ok = prohibit_default_exchange(XName),
@@ -594,9 +594,9 @@ decode_binding_path_segment(Segment) ->
594594
end,
595595
case re:run(Segment, MP, [{capture, all_but_first, binary}]) of
596596
{match, [SrcQ, <<DstKindChar>>, DstQ, KeyQ, ArgsHash]} ->
597-
Src = uri_string:unquote(SrcQ),
598-
Dst = uri_string:unquote(DstQ),
599-
Key = uri_string:unquote(KeyQ),
597+
Src = rabbit_uri:urldecode(SrcQ),
598+
Dst = rabbit_uri:urldecode(DstQ),
599+
Key = rabbit_uri:urldecode(KeyQ),
600600
DstKind = destination_char_to_kind(DstKindChar),
601601
{Src, DstKind, Dst, Key, ArgsHash};
602602
nomatch ->

deps/rabbit/src/rabbit_amqp_session.erl

Lines changed: 22 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2430,10 +2430,14 @@ ensure_source(#'v1_0.source'{address = Address,
24302430
{utf8, <<"/q/", QNameBinQuoted/binary>>} ->
24312431
%% The only possible v2 source address format is:
24322432
%% /q/:queue
2433-
QNameBin = unquote(QNameBinQuoted),
2434-
QName = queue_resource(Vhost, QNameBin),
2435-
ok = exit_if_absent(QName),
2436-
{ok, QName, PermCache, TopicPermCache};
2433+
try rabbit_uri:urldecode(QNameBinQuoted) of
2434+
QNameBin ->
2435+
QName = queue_resource(Vhost, QNameBin),
2436+
ok = exit_if_absent(QName),
2437+
{ok, QName, PermCache, TopicPermCache}
2438+
catch error:_ ->
2439+
{error, {bad_address, Address}}
2440+
end;
24372441
{utf8, SourceAddr} ->
24382442
case address_v1_permitted() of
24392443
true ->
@@ -2576,7 +2580,13 @@ ensure_target_v2(undefined, _) ->
25762580
%% https://docs.oasis-open.org/amqp/anonterm/v1.0/cs01/anonterm-v1.0-cs01.html#doc-anonymous-relay
25772581
{ok, to, to, undefined}.
25782582

2579-
parse_target_v2_string(<<"/e/", Rest/binary>>) ->
2583+
parse_target_v2_string(String) ->
2584+
try parse_target_v2_string0(String)
2585+
catch error:_ ->
2586+
{error, bad_address}
2587+
end.
2588+
2589+
parse_target_v2_string0(<<"/e/", Rest/binary>>) ->
25802590
Key = cp_slash,
25812591
Pattern = try persistent_term:get(Key)
25822592
catch error:badarg ->
@@ -2590,22 +2600,22 @@ parse_target_v2_string(<<"/e/", Rest/binary>>) ->
25902600
[<<"amq.default">> | _] ->
25912601
{error, bad_address};
25922602
[XNameBinQuoted] ->
2593-
XNameBin = unquote(XNameBinQuoted),
2603+
XNameBin = rabbit_uri:urldecode(XNameBinQuoted),
25942604
{ok, XNameBin, <<>>, undefined};
25952605
[XNameBinQuoted, RKeyQuoted] ->
2596-
XNameBin = unquote(XNameBinQuoted),
2597-
RKey = unquote(RKeyQuoted),
2606+
XNameBin = rabbit_uri:urldecode(XNameBinQuoted),
2607+
RKey = rabbit_uri:urldecode(RKeyQuoted),
25982608
{ok, XNameBin, RKey, undefined};
25992609
_ ->
26002610
{error, bad_address}
26012611
end;
2602-
parse_target_v2_string(<<"/q/">>) ->
2612+
parse_target_v2_string0(<<"/q/">>) ->
26032613
%% empty queue name is invalid
26042614
{error, bad_address};
2605-
parse_target_v2_string(<<"/q/", QNameBinQuoted/binary>>) ->
2606-
QNameBin = unquote(QNameBinQuoted),
2615+
parse_target_v2_string0(<<"/q/", QNameBinQuoted/binary>>) ->
2616+
QNameBin = rabbit_uri:urldecode(QNameBinQuoted),
26072617
{ok, ?DEFAULT_EXCHANGE_NAME, QNameBin, QNameBin};
2608-
parse_target_v2_string(_) ->
2618+
parse_target_v2_string0(_) ->
26092619
{error, bad_address}.
26102620

26112621
ensure_target_v1({utf8, Address}, Vhost, User, Durable, PermCache0) ->
@@ -2627,24 +2637,6 @@ ensure_target_v1({utf8, Address}, Vhost, User, Durable, PermCache0) ->
26272637
ensure_target_v1(Address, _, _, _, _) ->
26282638
{error, {bad_address, Address}}.
26292639

2630-
%% uri_string:unquote/1 is implemented inefficiently because it always creates
2631-
%% a new binary. We optimise for the common case: When no character is percent
2632-
%% encoded, we avoid a new binary being created.
2633-
unquote(Bin) ->
2634-
case is_quoted(Bin) of
2635-
true ->
2636-
uri_string:unquote(Bin);
2637-
false ->
2638-
Bin
2639-
end.
2640-
2641-
is_quoted(<<>>) ->
2642-
false;
2643-
is_quoted(<<$%, _/binary>>) ->
2644-
true;
2645-
is_quoted(<<_, Rest/binary>>) ->
2646-
is_quoted(Rest).
2647-
26482640
handle_outgoing_mgmt_link_flow_control(
26492641
#management_link{delivery_count = DeliveryCountSnd} = Link0,
26502642
#'v1_0.flow'{handle = Handle = ?UINT(HandleInt),

deps/rabbit/src/rabbit_uri.erl

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
%% Copyright (c) 2016-2024, Loïc Hoguin <[email protected]>
2+
%%
3+
%% Permission to use, copy, modify, and/or distribute this software for any
4+
%% purpose with or without fee is hereby granted, provided that the above
5+
%% copyright notice and this permission notice appear in all copies.
6+
%%
7+
%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8+
%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9+
%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10+
%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11+
%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12+
%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13+
%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14+
15+
%% ------------------------------------------------------------------------- %%
16+
%% This file is a partial copy of
17+
%% https://github.com/ninenines/cowlib/blob/optimise-urldecode/src/cow_uri.erl
18+
%% We use this copy because:
19+
%% 1. uri_string:unquote/1 is lax: It doesn't validate that characters that are
20+
%% required to be percent encoded are indeed percent encoded. In RabbitMQ,
21+
%% we want to enforce that proper percent encoding is done by AMQP clients.
22+
%% 2. uri_string:unquote/1 and cow_uri:urldecode/1 in cowlib v2.13.0 are both
23+
%% slow because they allocate a new binary for the common case where no
24+
%% character was percent encoded.
25+
%% When a new cowlib version is released, we should make app rabbit depend on
26+
%% app cowlib calling cow_uri:urldecode/1 and delete this file (rabbit_uri.erl).
27+
%% ------------------------------------------------------------------------- %%
28+
29+
-module(rabbit_uri).
30+
31+
-export([urldecode/1]).
32+
33+
-define(UNHEX(H, L), (?UNHEX(H) bsl 4 bor ?UNHEX(L))).
34+
35+
-define(UNHEX(C),
36+
case C of
37+
$0 -> 0;
38+
$1 -> 1;
39+
$2 -> 2;
40+
$3 -> 3;
41+
$4 -> 4;
42+
$5 -> 5;
43+
$6 -> 6;
44+
$7 -> 7;
45+
$8 -> 8;
46+
$9 -> 9;
47+
$A -> 10;
48+
$B -> 11;
49+
$C -> 12;
50+
$D -> 13;
51+
$E -> 14;
52+
$F -> 15;
53+
$a -> 10;
54+
$b -> 11;
55+
$c -> 12;
56+
$d -> 13;
57+
$e -> 14;
58+
$f -> 15
59+
end
60+
).
61+
62+
%% Decode a percent encoded string. (RFC3986 2.1)
63+
%%
64+
%% Inspiration for some of the optimisations done here come
65+
%% from the new `json` module as it was in mid-2024.
66+
%%
67+
%% Possible input includes:
68+
%%
69+
%% * nothing encoded (no % character):
70+
%% We want to return the binary as-is to avoid an allocation.
71+
%%
72+
%% * small number of encoded characters:
73+
%% We can "skip" words of text.
74+
%%
75+
%% * mostly encoded characters (non-ascii languages)
76+
%% We can decode characters in bulk.
77+
78+
-define(IS_PLAIN(C), (
79+
(C =:= $!) orelse (C =:= $$) orelse (C =:= $&) orelse (C =:= $') orelse
80+
(C =:= $() orelse (C =:= $)) orelse (C =:= $*) orelse (C =:= $+) orelse
81+
(C =:= $,) orelse (C =:= $-) orelse (C =:= $.) orelse (C =:= $0) orelse
82+
(C =:= $1) orelse (C =:= $2) orelse (C =:= $3) orelse (C =:= $4) orelse
83+
(C =:= $5) orelse (C =:= $6) orelse (C =:= $7) orelse (C =:= $8) orelse
84+
(C =:= $9) orelse (C =:= $:) orelse (C =:= $;) orelse (C =:= $=) orelse
85+
(C =:= $@) orelse (C =:= $A) orelse (C =:= $B) orelse (C =:= $C) orelse
86+
(C =:= $D) orelse (C =:= $E) orelse (C =:= $F) orelse (C =:= $G) orelse
87+
(C =:= $H) orelse (C =:= $I) orelse (C =:= $J) orelse (C =:= $K) orelse
88+
(C =:= $L) orelse (C =:= $M) orelse (C =:= $N) orelse (C =:= $O) orelse
89+
(C =:= $P) orelse (C =:= $Q) orelse (C =:= $R) orelse (C =:= $S) orelse
90+
(C =:= $T) orelse (C =:= $U) orelse (C =:= $V) orelse (C =:= $W) orelse
91+
(C =:= $X) orelse (C =:= $Y) orelse (C =:= $Z) orelse (C =:= $_) orelse
92+
(C =:= $a) orelse (C =:= $b) orelse (C =:= $c) orelse (C =:= $d) orelse
93+
(C =:= $e) orelse (C =:= $f) orelse (C =:= $g) orelse (C =:= $h) orelse
94+
(C =:= $i) orelse (C =:= $j) orelse (C =:= $k) orelse (C =:= $l) orelse
95+
(C =:= $m) orelse (C =:= $n) orelse (C =:= $o) orelse (C =:= $p) orelse
96+
(C =:= $q) orelse (C =:= $r) orelse (C =:= $s) orelse (C =:= $t) orelse
97+
(C =:= $u) orelse (C =:= $v) orelse (C =:= $w) orelse (C =:= $x) orelse
98+
(C =:= $y) orelse (C =:= $z) orelse (C =:= $~)
99+
)).
100+
101+
urldecode(Binary) ->
102+
skip_dec(Binary, Binary, 0).
103+
104+
%% This functions helps avoid a binary allocation when
105+
%% there is nothing to decode.
106+
skip_dec(Binary, Orig, Len) ->
107+
case Binary of
108+
<<C1, C2, C3, C4, Rest/bits>>
109+
when ?IS_PLAIN(C1) andalso ?IS_PLAIN(C2)
110+
andalso ?IS_PLAIN(C3) andalso ?IS_PLAIN(C4) ->
111+
skip_dec(Rest, Orig, Len + 4);
112+
_ ->
113+
dec(Binary, [], Orig, 0, Len)
114+
end.
115+
116+
-dialyzer({no_improper_lists, [dec/5]}).
117+
%% This clause helps speed up decoding of highly encoded values.
118+
dec(<<$%, H1, L1, $%, H2, L2, $%, H3, L3, $%, H4, L4, Rest/bits>>, Acc, Orig, Skip, Len) ->
119+
C1 = ?UNHEX(H1, L1),
120+
C2 = ?UNHEX(H2, L2),
121+
C3 = ?UNHEX(H3, L3),
122+
C4 = ?UNHEX(H4, L4),
123+
case Len of
124+
0 ->
125+
dec(Rest, [Acc|<<C1, C2, C3, C4>>], Orig, Skip + 12, 0);
126+
_ ->
127+
Part = binary_part(Orig, Skip, Len),
128+
dec(Rest, [Acc, Part|<<C1, C2, C3, C4>>], Orig, Skip + Len + 12, 0)
129+
end;
130+
dec(<<$%, H, L, Rest/bits>>, Acc, Orig, Skip, Len) ->
131+
C = ?UNHEX(H, L),
132+
case Len of
133+
0 ->
134+
dec(Rest, [Acc|<<C>>], Orig, Skip + 3, 0);
135+
_ ->
136+
Part = binary_part(Orig, Skip, Len),
137+
dec(Rest, [Acc, Part|<<C>>], Orig, Skip + Len + 3, 0)
138+
end;
139+
%% This clause helps speed up decoding of barely encoded values.
140+
dec(<<C1, C2, C3, C4, Rest/bits>>, Acc, Orig, Skip, Len)
141+
when ?IS_PLAIN(C1) andalso ?IS_PLAIN(C2)
142+
andalso ?IS_PLAIN(C3) andalso ?IS_PLAIN(C4) ->
143+
dec(Rest, Acc, Orig, Skip, Len + 4);
144+
dec(<<C, Rest/bits>>, Acc, Orig, Skip, Len) when ?IS_PLAIN(C) ->
145+
dec(Rest, Acc, Orig, Skip, Len + 1);
146+
dec(<<>>, _, Orig, 0, _) ->
147+
Orig;
148+
dec(<<>>, Acc, _, _, 0) ->
149+
iolist_to_binary(Acc);
150+
dec(<<>>, Acc, Orig, Skip, Len) ->
151+
Part = binary_part(Orig, Skip, Len),
152+
iolist_to_binary([Acc|Part]);
153+
dec(_, _, Orig, Skip, Len) ->
154+
error({invalid_byte, binary:at(Orig, Skip + Len)}).

deps/rabbit/test/amqp_address_SUITE.erl

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,15 @@ bad_v2_addresses() ->
436436
<<"/exchange//key/">>,
437437
<<"/exchange//key/mykey">>,
438438
<<"/exchange/amq.default/key/">>,
439-
<<"/exchange/amq.default/key/mykey">>
439+
<<"/exchange/amq.default/key/mykey">>,
440+
%% The following addresses should be percent encoded, but aren't.
441+
<<"/q/missing%encoding">>,
442+
<<"/q/missing/encoding">>,
443+
<<"/q/✋"/utf8>>,
444+
<<"/e/missing%encoding">>,
445+
<<"/e/missing/encoding/routingkey">>,
446+
<<"/e/exchange/missing%encoding">>,
447+
<<"/e/✋"/utf8>>
440448
].
441449

442450
%% Test v2 target address 'null' with an invalid 'to' addresses.

moduleindex.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,7 @@ rabbit:
723723
- rabbit_tracking
724724
- rabbit_tracking_store
725725
- rabbit_upgrade_preparation
726+
- rabbit_uri
726727
- rabbit_variable_queue
727728
- rabbit_version
728729
- rabbit_vhost

0 commit comments

Comments
 (0)