Skip to content

Commit 1e6c9b0

Browse files
mralephCommit Queue
authored and
Commit Queue
committed
[vm/libs] Improve JsonUtf8Decoder performance.
This CL focuses on improving parsing of white space (between JSON tokens) and simple strings which don't contain escape sequences inside. Improvements are achieved by changing the code to table driven implementation instead of if-cascade: we have a 256 element table which stores attributes for characters (e.g. whether it is a white space or a terminal token for a simple string) and use this table to make decisions on whether to advance through characters or stop a loop and do something else. We also suppress bounds checks and interrupt checks in tight loops - in tight loops like this a bound checks can cost 30% in overhead. This CL brings 28% geomean improvement on benchmarks from the linked issue. (All measurements are done in X64 Product AOT) Individual measurements are: | Input JSON | ms/iter | vs HEAD | vs V8 | | ---------- | ------- | ------- | ----- | | apache_builds.json | 0.44 | 61.06% | 136.86% | | canada.json | 31.04 | 96.54% | 187.15% | | citm_catalog.json | 6.43 | 64.44% | 93.94% | | github_events.json | 0.23 | 59.02% | 128.86% | | google_maps_api_compact_response.json | 0.10 | 82.12% | 133.83% | | google_maps_api_response.json | 0.12 | 68.79% | 140.07% | | gsoc-2018.json | 9.25 | 44.89% | 147.43% | | instruments.json | 1.08 | 70.18% | 167.38% | | marine_ik.json | 21.07 | 88.25% | 142.58% | | mesh.json | 4.51 | 94.56% | 136.57% | | mesh.pretty.json | 9.97 | 83.76% | 193.79% | | numbers.json | 0.57 | 91.88% | 83.37% | | random.json | 3.79 | 78.32% | 107.18% | | repeat.json | 0.06 | 71.51% | 118.47% | | semanticscholar-corpus.json | 37.65 | 54.82% | 57.81% | | tree-pretty.json | 0.17 | 68.68% | 162.33% | | twitter_api_compact_response.json | 0.06 | 75.23% | 126.11% | | twitter_api_response.json | 0.08 | 70.64% | 123.60% | | twitterescaped.json | 3.88 | 84.66% | 177.94% | | twitter.json | 3.54 | 73.01% | 105.33% | | twitter_timeline.json | 0.37 | 81.52% | 271.37% | | update-center.json | 2.85 | 66.75% | 89.01% | vs HEAD (geomean): 72.94% vs V8 (geomean): 130.99% HEAD vs V8 (geomean): 179.59% Issue #55522 TEST=covered by co19 Change-Id: Id673118c19250ab7781cc98c7656b972debc60ff Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/365803 Reviewed-by: Alexander Markov <[email protected]> Commit-Queue: Slava Egorov <[email protected]>
1 parent da4bf60 commit 1e6c9b0

File tree

2 files changed

+126
-26
lines changed

2 files changed

+126
-26
lines changed

sdk/lib/_internal/vm/lib/convert_patch.dart

Lines changed: 118 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ import "dart:_internal"
1717
POWERS_OF_TEN,
1818
unsafeCast,
1919
writeIntoOneByteString,
20-
writeIntoTwoByteString;
20+
writeIntoTwoByteString,
21+
createOneByteStringFromCharacters;
2122

2223
import "dart:typed_data" show Uint8List, Uint16List;
2324

@@ -114,7 +115,7 @@ class _JsonListener {
114115
void popContainer() {
115116
value = currentContainer;
116117
currentContainer = stack.removeLast();
117-
if (currentContainer is Map) key = stack.removeLast() as String;
118+
if (currentContainer is Map) key = unsafeCast<String>(stack.removeLast());
118119
}
119120

120121
void handleString(String value) {
@@ -139,12 +140,12 @@ class _JsonListener {
139140
}
140141

141142
void propertyName() {
142-
key = value as String;
143+
key = unsafeCast<String>(value);
143144
value = null;
144145
}
145146

146147
void propertyValue() {
147-
var map = currentContainer as Map;
148+
var map = unsafeCast<Map>(currentContainer);
148149
var reviver = this.reviver;
149150
if (reviver != null) {
150151
value = reviver(key, value);
@@ -164,7 +165,7 @@ class _JsonListener {
164165
}
165166

166167
void arrayElement() {
167-
var list = currentContainer as List;
168+
var list = unsafeCast<List>(currentContainer);
168169
var reviver = this.reviver;
169170
if (reviver != null) {
170171
value = reviver(list.length, value);
@@ -535,6 +536,13 @@ mixin _ChunkedJsonParser<T> on _JsonParserWithListener {
535536
*/
536537
int getChar(int index);
537538

539+
/**
540+
* Returns [true] if [getChar] is returning UTF16 code units.
541+
*
542+
* Otherwise it is expected that [getChar] is returning UTF8 bytes.
543+
*/
544+
bool get isUtf16Input;
545+
538546
/**
539547
* Copy ASCII characters from start to end of chunk into a list.
540548
*
@@ -813,22 +821,35 @@ mixin _ChunkedJsonParser<T> on _JsonParserWithListener {
813821
* Starts parsing at [position] and continues until [chunkEnd].
814822
* Continues parsing where the previous chunk (if any) ended.
815823
*/
824+
@pragma('vm:unsafe:no-interrupts')
825+
@pragma('vm:unsafe:no-bounds-checks')
816826
void parse(int position) {
817827
int length = chunkEnd;
818828
if (partialState != NO_PARTIAL) {
819829
position = parsePartial(position);
820830
if (position == length) return;
821831
}
832+
final charAttributes = _characterAttributes;
833+
822834
int state = this.state;
835+
outer:
823836
while (position < length) {
824-
int char = getChar(position);
825-
switch (char) {
826-
case SPACE:
827-
case CARRIAGE_RETURN:
828-
case NEWLINE:
829-
case TAB:
830-
position++;
837+
int char = 0;
838+
do {
839+
char = getChar(position);
840+
if (isUtf16Input && char > 0xFF) {
831841
break;
842+
}
843+
if ((charAttributes.codeUnitAt(char) & CHAR_WHITESPACE) == 0) {
844+
break;
845+
}
846+
position++;
847+
if (position >= length) {
848+
break outer;
849+
}
850+
} while (true);
851+
852+
switch (char) {
832853
case QUOTE:
833854
if ((state & ALLOW_STRING_MASK) != 0) fail(position);
834855
state |= VALUE_READ_BITS;
@@ -988,35 +1009,80 @@ mixin _ChunkedJsonParser<T> on _JsonParserWithListener {
9881009
return length;
9891010
}
9901011

1012+
static const int CHAR_SIMPLE_STRING_END = 1;
1013+
static const int CHAR_WHITESPACE = 2;
1014+
1015+
/**
1016+
* [_characterAttributes] string was generated using the following code:
1017+
*
1018+
* ```
1019+
* int $(String ch) => ch.codeUnitAt(0);
1020+
* final list = Uint8List(256);
1021+
* for (var i = 0; i < $(' '); i++) {
1022+
* list[i] |= CHAR_SIMPLE_STRING_END;
1023+
* }
1024+
* list[$('"')] |= CHAR_SIMPLE_STRING_END;
1025+
* list[$('\\')] |= CHAR_SIMPLE_STRING_END;
1026+
* list[$(' ')] |= CHAR_WHITESPACE;
1027+
* list[$('\r')] |= CHAR_WHITESPACE;
1028+
* list[$('\n')] |= CHAR_WHITESPACE;
1029+
* list[$('\t')] |= CHAR_WHITESPACE;
1030+
* for (var i = 0; i < 256; i += 64) {
1031+
* print("'${String.fromCharCodes([
1032+
* for (var v in list.skip(i).take(64)) v + $(' '),
1033+
* ])}'");
1034+
* }
1035+
* ```
1036+
*/
1037+
static const String _characterAttributes =
1038+
'!!!!!!!!!##!!#!!!!!!!!!!!!!!!!!!" ! '
1039+
' ! '
1040+
' '
1041+
' ';
1042+
9911043
/**
9921044
* Parses a string value.
9931045
*
9941046
* Initial [position] is right after the initial quote.
9951047
* Returned position right after the final quote.
9961048
*/
1049+
@pragma('vm:unsafe:no-interrupts')
1050+
@pragma('vm:unsafe:no-bounds-checks')
9971051
int parseString(int position) {
1052+
final charAttributes = _characterAttributes;
1053+
9981054
// Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"'
9991055
// Initial position is right after first '"'.
10001056
int start = position;
10011057
int end = chunkEnd;
10021058
int bits = 0;
1003-
while (position < end) {
1004-
int char = getChar(position++);
1005-
bits |= char; // Includes final '"', but that never matters.
1006-
// BACKSLASH is larger than QUOTE and SPACE.
1007-
if (char > BACKSLASH) {
1008-
continue;
1059+
int char = 0;
1060+
if (position < end) {
1061+
do {
1062+
// Caveat: do not combine the following two lines together. It helps
1063+
// compiler to generate better code (it currently can't reorder operations
1064+
// to reduce register pressure).
1065+
char = getChar(position);
1066+
position++;
1067+
bits |= char; // Includes final '"', but that never matters.
1068+
if (isUtf16Input && char > 0xFF) {
1069+
continue;
1070+
}
1071+
if ((charAttributes.codeUnitAt(char) & CHAR_SIMPLE_STRING_END) != 0) {
1072+
break;
1073+
}
1074+
} while (position < end);
1075+
if (char == QUOTE) {
1076+
int sliceEnd = position - 1;
1077+
listener.handleString(getString(start, sliceEnd, bits));
1078+
return sliceEnd + 1;
10091079
}
10101080
if (char == BACKSLASH) {
1011-
beginString();
10121081
int sliceEnd = position - 1;
1082+
beginString();
10131083
if (start < sliceEnd) addSliceToString(start, sliceEnd);
10141084
return parseStringToBuffer(sliceEnd);
10151085
}
1016-
if (char == QUOTE) {
1017-
listener.handleString(getString(start, position - 1, bits));
1018-
return position;
1019-
}
10201086
if (char < SPACE) {
10211087
fail(position - 1, "Control character in string");
10221088
}
@@ -1065,7 +1131,11 @@ mixin _ChunkedJsonParser<T> on _JsonParserWithListener {
10651131
* This function scans through the string literal for escapes, and copies
10661132
* slices of non-escape characters using [addSliceToString].
10671133
*/
1134+
@pragma('vm:unsafe:no-interrupts')
1135+
@pragma('vm:unsafe:no-bounds-checks')
10681136
int parseStringToBuffer(int position) {
1137+
final charAttributes = _characterAttributes;
1138+
10691139
int end = chunkEnd;
10701140
int start = position;
10711141
while (true) {
@@ -1075,11 +1145,23 @@ mixin _ChunkedJsonParser<T> on _JsonParserWithListener {
10751145
}
10761146
return chunkString(STR_PLAIN);
10771147
}
1078-
int char = getChar(position++);
1079-
if (char > BACKSLASH) continue;
1148+
1149+
int char = 0;
1150+
do {
1151+
char = getChar(position);
1152+
position++;
1153+
if (isUtf16Input && char > 0xFF) {
1154+
continue;
1155+
}
1156+
if ((charAttributes.codeUnitAt(char) & CHAR_SIMPLE_STRING_END) != 0) {
1157+
break;
1158+
}
1159+
} while (position < end);
1160+
10801161
if (char < SPACE) {
10811162
fail(position - 1); // Control character in string.
10821163
}
1164+
10831165
if (char == QUOTE) {
10841166
int quotePosition = position - 1;
10851167
if (quotePosition > start) {
@@ -1088,13 +1170,16 @@ mixin _ChunkedJsonParser<T> on _JsonParserWithListener {
10881170
listener.handleString(endString());
10891171
return position;
10901172
}
1173+
10911174
if (char != BACKSLASH) {
10921175
continue;
10931176
}
1177+
10941178
// Handle escape.
10951179
if (position - 1 > start) {
10961180
addSliceToString(start, position - 1);
10971181
}
1182+
10981183
if (position == end) return chunkString(STR_ESCAPE);
10991184
position = parseStringEscape(position);
11001185
if (position == end) return position;
@@ -1391,6 +1476,10 @@ class _JsonStringParser extends _JsonParserWithListener
13911476

13921477
_JsonStringParser(_JsonListener listener) : super(listener);
13931478

1479+
@pragma('vm:prefer-inline')
1480+
bool get isUtf16Input => true;
1481+
1482+
@pragma('vm:prefer-inline')
13941483
int getChar(int position) => chunk.codeUnitAt(position);
13951484

13961485
String getString(int start, int end, int bits) {
@@ -1512,13 +1601,16 @@ class _JsonUtf8Parser extends _JsonParserWithListener
15121601
parse(start);
15131602
}
15141603

1604+
@pragma('vm:prefer-inline')
1605+
bool get isUtf16Input => false;
1606+
15151607
@pragma('vm:prefer-inline')
15161608
int getChar(int position) => chunk[position];
15171609

15181610
String getString(int start, int end, int bits) {
15191611
const int maxAsciiChar = 0x7f;
15201612
if (bits <= maxAsciiChar) {
1521-
return new String.fromCharCodes(chunk, start, end);
1613+
return createOneByteStringFromCharacters(chunk, start, end);
15221614
}
15231615
beginString();
15241616
if (start < end) addSliceToString(start, end);

sdk/lib/_internal/vm/lib/internal_patch.dart

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ void copyRangeFromUint8ListToOneByteString(
6262
}
6363
}
6464

65+
@pragma("vm:prefer-inline")
66+
String createOneByteStringFromCharacters(Uint8List bytes, int start, int end) {
67+
final len = end - start;
68+
final s = allocateOneByteString(len);
69+
copyRangeFromUint8ListToOneByteString(bytes, s, start, 0, len);
70+
return s;
71+
}
72+
6573
/// The returned string is a [_TwoByteString] with uninitialized content.
6674
@pragma("vm:recognized", "asm-intrinsic")
6775
@pragma("vm:external-name", "Internal_allocateTwoByteString")

0 commit comments

Comments
 (0)