Skip to content

Commit 1441957

Browse files
kjnilssonmergify[bot]
authored andcommitted
mc: increase utf8 scanning limit for longstr conversions.
The AMQP 0.9.1 longstr type is problematic as it can contain arbitrary binary data but is typically used for utf8 by users. The current conversion into AMQP avoids scanning arbitrarily large longstr to see if they only contain valid utf8 by treating all longstr data longer than 255 bytes as binary. This is in hindsight too strict and thus this commit increases the scanning limit to 4096 bytes - enough to cover the vast majority of AMQP 0.9.1 header values. This change also conversts the AMQP binary types into longstr to ensure that existing data (held in streams for example) is converted to an AMQP 0.9.1 type most likely what the user intended. (cherry picked from commit 131379a)
1 parent fdd441b commit 1441957

File tree

2 files changed

+12
-8
lines changed

2 files changed

+12
-8
lines changed

deps/rabbit/src/mc_amqpl.erl

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
-define(AMQP10_MESSAGE_ANNOTATIONS_HEADER, <<"x-amqp-1.0-message-annotations">>).
4141
-define(PROTOMOD, rabbit_framing_amqp_0_9_1).
4242
-define(CLASS_ID, 60).
43+
-define(LONGSTR_UTF8_LIMIT, 4096).
4344

4445
-opaque state() :: #content{}.
4546

@@ -534,16 +535,19 @@ wrap(_Type, undefined) ->
534535
wrap(Type, Val) ->
535536
{Type, Val}.
536537

537-
from_091(longstr, V) ->
538-
case mc_util:is_valid_shortstr(V) of
538+
from_091(longstr, V)
539+
when is_binary(V) andalso
540+
byte_size(V) =< ?LONGSTR_UTF8_LIMIT ->
541+
%% if a longstr is longer than 4096 bytes we just assume it is binary
542+
%% it _may_ still be valid utf8 but checking this for every longstr header
543+
%% value is going to be excessively slow
544+
case mc_util:is_utf8_no_null(V) of
539545
true ->
540546
{utf8, V};
541547
false ->
542-
%% if a string is longer than 255 bytes we just assume it is binary
543-
%% it _may_ still be valid utf8 but checking this is going to be
544-
%% excessively slow
545548
{binary, V}
546549
end;
550+
from_091(longstr, V) -> {binary, V};
547551
from_091(long, V) -> {long, V};
548552
from_091(unsignedbyte, V) -> {ubyte, V};
549553
from_091(short, V) -> {short, V};
@@ -614,7 +618,7 @@ to_091(Key, {int, V}) -> {Key, signedint, V};
614618
to_091(Key, {double, V}) -> {Key, double, V};
615619
to_091(Key, {float, V}) -> {Key, float, V};
616620
to_091(Key, {timestamp, V}) -> {Key, timestamp, V div 1000};
617-
to_091(Key, {binary, V}) -> {Key, binary, V};
621+
to_091(Key, {binary, V}) -> {Key, longstr, V};
618622
to_091(Key, {boolean, V}) -> {Key, bool, V};
619623
to_091(Key, true) -> {Key, bool, true};
620624
to_091(Key, false) -> {Key, bool, false};
@@ -637,7 +641,7 @@ to_091({int, V}) -> {signedint, V};
637641
to_091({double, V}) -> {double, V};
638642
to_091({float, V}) -> {float, V};
639643
to_091({timestamp, V}) -> {timestamp, V div 1000};
640-
to_091({binary, V}) -> {binary, V};
644+
to_091({binary, V}) -> {longstr, V};
641645
to_091({boolean, V}) -> {bool, V};
642646
to_091(true) -> {bool, true};
643647
to_091(false) -> {bool, false};

deps/rabbit/test/mc_unit_SUITE.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ amqp_amqpl(_Config) ->
663663
?assertMatch({_, long, 5}, header(<<"long">>, HL)),
664664
?assertMatch({_, long, 5}, header(<<"ulong">>, HL)),
665665
?assertMatch({_, longstr, <<"a-string">>}, header(<<"utf8">>, HL)),
666-
?assertMatch({_, binary, <<"data">>}, header(<<"binary">>, HL)),
666+
?assertMatch({_, longstr, <<"data">>}, header(<<"binary">>, HL)),
667667
?assertMatch({_, longstr, <<"symbol">>}, header(<<"symbol">>, HL)),
668668
?assertMatch({_, unsignedbyte, 1}, header(<<"ubyte">>, HL)),
669669
?assertMatch({_, short, 2}, header(<<"short">>, HL)),

0 commit comments

Comments
 (0)