Skip to content

Commit 875a7ba

Browse files
authored
Lazy initialization of score and list objects in search. (#8809)
1 parent 61ed497 commit 875a7ba

File tree

6 files changed

+115
-106
lines changed

6 files changed

+115
-106
lines changed

app/lib/search/mem_index.dart

Lines changed: 79 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import 'package:meta/meta.dart';
1212
import 'package:pub_dev/service/topics/models.dart';
1313
import 'package:pub_dev/third_party/bit_array/bit_array.dart';
1414

15-
import '../shared/utils.dart' show boundedList;
1615
import 'models.dart';
1716
import 'search_service.dart';
1817
import 'text_utils.dart';
@@ -142,9 +141,9 @@ class InMemoryPackageIndex {
142141
return PackageSearchResult.empty();
143142
}
144143
return _bitArrayPool.withPoolItem(fn: (array) {
145-
return _scorePool.withPoolItem(
146-
fn: (score) {
147-
return _search(query, array, score);
144+
return _scorePool.withItemGetter(
145+
(scoreFn) {
146+
return _search(query, array, scoreFn);
148147
},
149148
);
150149
});
@@ -220,88 +219,107 @@ class InMemoryPackageIndex {
220219
PackageSearchResult _search(
221220
ServiceSearchQuery query,
222221
BitArray packages,
223-
IndexedScore<String> packageScores,
222+
IndexedScore<String> Function() scoreFn,
224223
) {
225224
final predicateFilterCount = _filterOnPredicates(query, packages);
226225
if (predicateFilterCount <= query.offset) {
227226
return PackageSearchResult.empty();
228227
}
229-
230-
// TODO: find a better way to handle predicate-only filtering and scoring
231-
for (final index in packages.asIntIterable()) {
232-
if (index >= _documents.length) break;
233-
packageScores.setValue(index, 1.0);
234-
}
228+
final bestNameMatch = _bestNameMatch(query);
229+
final bestNameIndex =
230+
bestNameMatch == null ? null : _nameToIndex[bestNameMatch];
235231

236232
// do text matching
237233
final parsedQueryText = query.parsedQuery.text;
238-
final textResults = _searchText(
239-
packageScores,
240-
packages,
241-
parsedQueryText,
242-
textMatchExtent: query.textMatchExtent ?? TextMatchExtent.api,
243-
);
234+
_TextResults? textResults;
235+
IndexedScore<String>? packageScores;
236+
237+
if (parsedQueryText != null && parsedQueryText.isNotEmpty) {
238+
packageScores = scoreFn();
239+
textResults = _searchText(
240+
packageScores,
241+
packages,
242+
parsedQueryText,
243+
textMatchExtent: query.textMatchExtent ?? TextMatchExtent.api,
244+
);
245+
if (textResults.hasNoMatch) {
246+
return textResults.errorMessage == null
247+
? PackageSearchResult.empty()
248+
: PackageSearchResult.error(
249+
errorMessage: textResults.errorMessage,
250+
statusCode: 500,
251+
);
252+
}
253+
}
244254

245-
final bestNameMatch = _bestNameMatch(query);
255+
// The function takes the document index as parameter and returns whether
256+
// it should be in the result set. When text search is applied, the
257+
// [packageScores] contains the scores of the results, otherwise we are
258+
// using the bitarray index of the filtering.
259+
final selectFn = packageScores?.isPositive ?? packages.isSet;
260+
261+
// We know the total count at this point, we don't need to build the fully
262+
// sorted result list to get the number. The best name match may insert an
263+
// extra item, that will be addressed after the ranking score is determined.
264+
var totalCount = packageScores?.positiveCount() ?? predicateFilterCount;
246265

247-
List<IndexedPackageHit> indexedHits;
248-
switch (query.effectiveOrder ?? SearchOrder.top) {
266+
Iterable<IndexedPackageHit> indexedHits;
267+
switch (query.effectiveOrder) {
249268
case SearchOrder.top:
250-
if (textResults == null) {
251-
indexedHits = _overallOrderedHits.whereInScores(packageScores);
269+
case SearchOrder.text:
270+
if (packageScores == null) {
271+
indexedHits = _overallOrderedHits.whereInScores(selectFn);
252272
break;
253273
}
254274

255-
/// Adjusted score takes the overall score and transforms
256-
/// it linearly into the [0.4-1.0] range, to allow better
257-
/// multiplication outcomes.
258-
packageScores.multiplyAllFromValues(_adjustedOverallScores);
259-
indexedHits = _rankWithValues(
260-
packageScores,
261-
requiredLengthThreshold: query.offset,
262-
bestNameMatch: bestNameMatch,
263-
);
264-
break;
265-
case SearchOrder.text:
275+
if (query.effectiveOrder == SearchOrder.top) {
276+
/// Adjusted score takes the overall score and transforms
277+
/// it linearly into the [0.4-1.0] range, to allow better
278+
/// multiplication outcomes.
279+
packageScores.multiplyAllFromValues(_adjustedOverallScores);
280+
}
281+
// Check whether the best name match will increase the total item count.
282+
if (bestNameIndex != null &&
283+
packageScores.getValue(bestNameIndex) <= 0.0) {
284+
totalCount++;
285+
}
266286
indexedHits = _rankWithValues(
267287
packageScores,
268288
requiredLengthThreshold: query.offset,
269-
bestNameMatch: bestNameMatch,
289+
bestNameIndex: bestNameIndex ?? -1,
270290
);
271291
break;
272292
case SearchOrder.created:
273-
indexedHits = _createdOrderedHits.whereInScores(packageScores);
293+
indexedHits = _createdOrderedHits.whereInScores(selectFn);
274294
break;
275295
case SearchOrder.updated:
276-
indexedHits = _updatedOrderedHits.whereInScores(packageScores);
296+
indexedHits = _updatedOrderedHits.whereInScores(selectFn);
277297
break;
278298
// ignore: deprecated_member_use
279299
case SearchOrder.popularity:
280300
case SearchOrder.downloads:
281-
indexedHits = _downloadsOrderedHits.whereInScores(packageScores);
301+
indexedHits = _downloadsOrderedHits.whereInScores(selectFn);
282302
break;
283303
case SearchOrder.like:
284-
indexedHits = _likesOrderedHits.whereInScores(packageScores);
304+
indexedHits = _likesOrderedHits.whereInScores(selectFn);
285305
break;
286306
case SearchOrder.points:
287-
indexedHits = _pointsOrderedHits.whereInScores(packageScores);
307+
indexedHits = _pointsOrderedHits.whereInScores(selectFn);
288308
break;
289309
case SearchOrder.trending:
290-
indexedHits = _trendingOrderedHits.whereInScores(packageScores);
310+
indexedHits = _trendingOrderedHits.whereInScores(selectFn);
291311
break;
292312
}
293313

294-
// bound by offset and limit (or randomize items)
295-
final totalCount = indexedHits.length;
296-
indexedHits =
297-
boundedList(indexedHits, offset: query.offset, limit: query.limit);
314+
// bound by offset and limit
315+
indexedHits = indexedHits.skip(query.offset).take(query.limit);
298316

299317
late List<PackageHit> packageHits;
300318
if ((query.textMatchExtent ?? TextMatchExtent.api).shouldMatchApi() &&
301319
textResults != null &&
302320
(textResults.topApiPages?.isNotEmpty ?? false)) {
303321
packageHits = indexedHits.map((ps) {
304-
final apiPages = textResults.topApiPages?[ps.index]
322+
final apiPages = textResults!.topApiPages?[ps.index]
305323
// TODO(https://github.com/dart-lang/pub-dev/issues/7106): extract title for the page
306324
?.map((MapEntry<String, double> e) => ApiPageRef(path: e.key))
307325
.toList();
@@ -380,33 +398,30 @@ class InMemoryPackageIndex {
380398
}).toList();
381399
}
382400

383-
_TextResults? _searchText(
401+
_TextResults _searchText(
384402
IndexedScore<String> packageScores,
385403
BitArray packages,
386-
String? text, {
404+
String text, {
387405
required TextMatchExtent textMatchExtent,
388406
}) {
389-
if (text == null || text.isEmpty) {
390-
return null;
391-
}
392-
393407
final sw = Stopwatch()..start();
394408
final words = splitForQuery(text);
395409
if (words.isEmpty) {
396-
// packages.clearAll();
397-
packageScores.fillRange(0, packageScores.length, 0);
398410
return _TextResults.empty();
399411
}
400412

401413
final matchName = textMatchExtent.shouldMatchName();
402414
if (!matchName) {
403-
// packages.clearAll();
404-
packageScores.fillRange(0, packageScores.length, 0);
405415
return _TextResults.empty(
406416
errorMessage:
407417
'Search index in reduced mode: unable to match query text.');
408418
}
409419

420+
for (final index in packages.asIntIterable()) {
421+
if (index >= _documents.length) break;
422+
packageScores.setValue(index, 1.0);
423+
}
424+
410425
bool aborted = false;
411426
bool checkAborted() {
412427
if (!aborted && sw.elapsed > _textSearchTimeout) {
@@ -500,19 +515,18 @@ class InMemoryPackageIndex {
500515
List<IndexedPackageHit> _rankWithValues(
501516
IndexedScore<String> score, {
502517
// if the item count is fewer than this threshold, an empty list will be returned
503-
int? requiredLengthThreshold,
504-
String? bestNameMatch,
518+
required int requiredLengthThreshold,
519+
// When no best name match is applied, this parameter will be `-1`
520+
required int bestNameIndex,
505521
}) {
506522
final list = <IndexedPackageHit>[];
507-
final bestNameIndex =
508-
bestNameMatch == null ? null : _nameToIndex[bestNameMatch];
509523
for (var i = 0; i < score.length; i++) {
510524
final value = score.getValue(i);
511525
if (value <= 0.0 && i != bestNameIndex) continue;
512526
list.add(IndexedPackageHit(
513527
i, PackageHit(package: score.keys[i], score: value)));
514528
}
515-
if ((requiredLengthThreshold ?? 0) > list.length) {
529+
if (requiredLengthThreshold > list.length) {
516530
// There is no point to sort or even keep the results, as the search query offset ignores these anyway.
517531
return [];
518532
}
@@ -582,19 +596,22 @@ class InMemoryPackageIndex {
582596
}
583597

584598
class _TextResults {
599+
final bool hasNoMatch;
585600
final List<List<MapEntry<String, double>>?>? topApiPages;
586601
final String? errorMessage;
587602

588603
factory _TextResults.empty({String? errorMessage}) {
589604
return _TextResults(
590605
null,
591606
errorMessage: errorMessage,
607+
hasNoMatch: true,
592608
);
593609
}
594610

595611
_TextResults(
596612
this.topApiPages, {
597613
this.errorMessage,
614+
this.hasNoMatch = false,
598615
});
599616
}
600617

@@ -713,8 +730,8 @@ class _PkgNameData {
713730
}
714731

715732
extension on List<IndexedPackageHit> {
716-
List<IndexedPackageHit> whereInScores(IndexedScore scores) {
717-
return where((h) => scores.isPositive(h.index)).toList();
733+
Iterable<IndexedPackageHit> whereInScores(bool Function(int index) select) {
734+
return where((h) => select(h.index));
718735
}
719736
}
720737

app/lib/search/search_service.dart

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -282,13 +282,11 @@ class ServiceSearchQuery {
282282
/// - URL query sort [order] is used as a fallback.
283283
///
284284
/// TODO: remove this field when [order] is removed.
285-
late final effectiveOrder = parsedQuery.order ?? order;
285+
late final effectiveOrder = parsedQuery.order ?? order ?? SearchOrder.top;
286286
bool get _hasQuery => query != null && query!.isNotEmpty;
287287
bool get _hasOnlyFreeText => _hasQuery && parsedQuery.hasOnlyFreeText;
288288
bool get isNaturalOrder =>
289-
effectiveOrder == null ||
290-
effectiveOrder == SearchOrder.top ||
291-
effectiveOrder == SearchOrder.text;
289+
effectiveOrder == SearchOrder.top || effectiveOrder == SearchOrder.text;
292290
bool get _hasNoOwnershipScope => publisherId == null;
293291
bool get _isFlutterFavorite =>
294292
tagsPredicate.hasTag(PackageTags.isFlutterFavorite);

app/lib/search/token_index.dart

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,8 @@ abstract class _AllocationPool<T> {
177177
_pool.add(item);
178178
}
179179

180+
/// Executes [fn] and provides a pool item in the callback.
181+
/// The item will be released to the pool after [fn] completes.
180182
R withPoolItem<R>({
181183
required R Function(T array) fn,
182184
}) {
@@ -185,6 +187,28 @@ abstract class _AllocationPool<T> {
185187
_release(item);
186188
return r;
187189
}
190+
191+
/// Executes [fn] and provides a getter function that can be used to
192+
/// acquire new pool items while the [fn] is being executed. The
193+
/// acquired items will be released back to the pool after [fn] completes.
194+
R withItemGetter<R>(R Function(T Function() itemFn) fn) {
195+
List<T>? items;
196+
T itemFn() {
197+
items ??= <T>[];
198+
final item = _acquire();
199+
items!.add(item);
200+
return item;
201+
}
202+
203+
final r = fn(itemFn);
204+
205+
if (items != null) {
206+
for (final item in items!) {
207+
_release(item);
208+
}
209+
}
210+
return r;
211+
}
188212
}
189213

190214
/// A reusable pool for [IndexedScore] instances to spare some memory allocation.
@@ -225,6 +249,14 @@ class IndexedScore<K> {
225249
List<K> get keys => _keys;
226250
late final length = _values.length;
227251

252+
int positiveCount() {
253+
var count = 0;
254+
for (var i = 0; i < length; i++) {
255+
if (isPositive(i)) count++;
256+
}
257+
return count;
258+
}
259+
228260
bool isPositive(int index) {
229261
return _values[index] > 0.0;
230262
}

app/lib/shared/utils.dart

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -154,22 +154,6 @@ String contentType(String name) {
154154
return mime.defaultExtensionMap[ext] ?? 'application/octet-stream';
155155
}
156156

157-
/// Returns a subset of the list, bounded by [offset] and [limit].
158-
List<T> boundedList<T>(List<T> list, {int offset = 0, int limit = 0}) {
159-
Iterable<T> iterable = list;
160-
if (offset > 0) {
161-
if (offset >= list.length) {
162-
return <T>[];
163-
} else {
164-
iterable = iterable.skip(offset);
165-
}
166-
}
167-
if (limit > 0) {
168-
iterable = iterable.take(limit);
169-
}
170-
return iterable.toList();
171-
}
172-
173157
/// Returns a UUID in v4 format as a `String`.
174158
///
175159
/// If [bytes] is provided, it must be length 16 and have values between `0` and

app/lib/third_party/bit_array/bit_array.dart

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ class BitArray extends BitSet {
6363
return BitArray._(data);
6464
}
6565

66+
bool isSet(int index) => this[index];
67+
6668
/// The value of the bit with the specified [index].
6769
@override
6870
bool operator [](int index) {

0 commit comments

Comments
 (0)