Skip to content

Add IndexFreeQueryEngine #2169

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Sep 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 13 additions & 22 deletions packages/firestore/src/core/firestore_client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,6 @@ import { LocalStore } from '../local/local_store';
import { MemoryPersistence } from '../local/memory_persistence';
import { Persistence } from '../local/persistence';
import { SimpleQueryEngine } from '../local/simple_query_engine';
import {
DocumentKeySet,
documentKeySet,
DocumentMap
} from '../model/collections';
import { Document, MaybeDocument, NoDocument } from '../model/document';
import { DocumentKey } from '../model/document_key';
import { Mutation } from '../model/mutation';
Expand All @@ -45,7 +40,7 @@ import {
QueryListener
} from './event_manager';
import { SyncEngine } from './sync_engine';
import { View, ViewDocumentChanges } from './view';
import { View } from './view';

import {
LruGarbageCollector,
Expand Down Expand Up @@ -584,22 +579,18 @@ export class FirestoreClient {

getDocumentsFromLocalCache(query: Query): Promise<ViewSnapshot> {
this.verifyNotTerminated();
return this.asyncQueue
.enqueue(() => {
return this.localStore.executeQuery(query);
})
.then((docs: DocumentMap) => {
const remoteKeys: DocumentKeySet = documentKeySet();

const view = new View(query, remoteKeys);
const viewDocChanges: ViewDocumentChanges = view.computeDocChanges(
docs
);
return view.applyChanges(
viewDocChanges,
/* updateLimboDocuments= */ false
).snapshot!;
});
return this.asyncQueue.enqueue(async () => {
const queryResult = await this.localStore.executeQuery(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated this to all run on the async queue.

query,
/* usePreviousResults= */ true
);
const view = new View(query, queryResult.remoteKeys);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

queryResult.remoteKeys is used now instead of null, which seems more correct, but doesn't change the state of the fromCache flag since it is set to true either way.

const viewDocChanges = view.computeDocChanges(queryResult.documents);
return view.applyChanges(
viewDocChanges,
/* updateLimboDocuments= */ false
).snapshot!;
});
}

write(mutations: Mutation[]): Promise<void> {
Expand Down
31 changes: 17 additions & 14 deletions packages/firestore/src/core/sync_engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -243,13 +243,12 @@ export class SyncEngine implements RemoteSyncer, SharedClientStateSyncer {
current: boolean
): Promise<ViewSnapshot> {
const query = queryData.query;
const docs = await this.localStore.executeQuery(query);
const remoteKeys = await this.localStore.remoteDocumentKeys(
queryData.targetId
const queryResult = await this.localStore.executeQuery(
query,
/* usePreviousResults= */ true
);

const view = new View(query, remoteKeys);
const viewDocChanges = view.computeDocChanges(docs);
const view = new View(query, queryResult.remoteKeys);
const viewDocChanges = view.computeDocChanges(queryResult.documents);
const synthesizedTargetChange = TargetChange.createSynthesizedTargetChangeForCurrentChange(
queryData.targetId,
current && this.onlineState !== OnlineState.Offline
Expand Down Expand Up @@ -282,13 +281,12 @@ export class SyncEngine implements RemoteSyncer, SharedClientStateSyncer {
private async synchronizeViewAndComputeSnapshot(
queryView: QueryView
): Promise<ViewChange> {
const docs = await this.localStore.executeQuery(queryView.query);
const remoteKeys = await this.localStore.remoteDocumentKeys(
queryView.targetId
const queryResult = await this.localStore.executeQuery(
queryView.query,
/* usePreviousResults= */ true
);
const viewSnapshot = queryView.view.synchronizeWithPersistedState(
docs,
remoteKeys
queryResult
);
if (this.isPrimary) {
this.updateTrackedLimbos(queryView.targetId, viewSnapshot.limboChanges);
Expand Down Expand Up @@ -786,9 +784,14 @@ export class SyncEngine implements RemoteSyncer, SharedClientStateSyncer {
// The query has a limit and some docs were removed, so we need
// to re-run the query against the local store to make sure we
// didn't lose any good docs that had been past the limit.
return this.localStore.executeQuery(queryView.query).then(docs => {
return queryView.view.computeDocChanges(docs, viewDocChanges);
});
return this.localStore
.executeQuery(queryView.query, /* usePreviousResults= */ false)
.then(({ documents }) => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: I verified that the transpiler unpacks this (in cjs and esm mode).

return queryView.view.computeDocChanges(
documents,
viewDocChanges
);
});
})
.then((viewDocChanges: ViewDocumentChanges) => {
const targetChange =
Expand Down
18 changes: 8 additions & 10 deletions packages/firestore/src/core/view.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* limitations under the License.
*/

import { QueryResult } from '../local/local_store';
import {
documentKeySet,
DocumentKeySet,
Expand Down Expand Up @@ -433,21 +434,18 @@ export class View {
* of `syncedDocuments` since secondary clients update their query views
* based purely on synthesized RemoteEvents.
*
* @param localDocs - The documents that match the query according to the
* LocalStore.
* @param remoteKeys - The keys of the documents that match the query
* according to the backend.
* @param queryResult.documents - The documents that match the query according
* to the LocalStore.
* @param queryResult.remoteKeys - The keys of the documents that match the
* query according to the backend.
*
* @return The ViewChange that resulted from this synchronization.
*/
// PORTING NOTE: Multi-tab only.
synchronizeWithPersistedState(
localDocs: MaybeDocumentMap,
remoteKeys: DocumentKeySet
): ViewChange {
this._syncedDocuments = remoteKeys;
synchronizeWithPersistedState(queryResult: QueryResult): ViewChange {
this._syncedDocuments = queryResult.remoteKeys;
this.limboDocuments = documentKeySet();
const docChanges = this.computeDocChanges(localDocs);
const docChanges = this.computeDocChanges(queryResult.documents);
return this.applyChanges(docChanges, /*updateLimboDocuments=*/ true);
}

Expand Down
206 changes: 206 additions & 0 deletions packages/firestore/src/local/index_free_query_engine.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
/**
* @license
* Copyright 2019 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { QueryEngine } from './query_engine';
import { LocalDocumentsView } from './local_documents_view';
import { PersistenceTransaction } from './persistence';
import { PersistencePromise } from './persistence_promise';
import { Query } from '../core/query';
import { SnapshotVersion } from '../core/snapshot_version';
import {
DocumentKeySet,
DocumentMap,
MaybeDocumentMap
} from '../model/collections';
import { Document } from '../model/document';
import { assert } from '../util/assert';
import { debug, getLogLevel, LogLevel } from '../util/log';
import { SortedSet } from '../util/sorted_set';

// TOOD(b/140938512): Drop SimpleQueryEngine and rename IndexFreeQueryEngine.

/**
* A query engine that takes advantage of the target document mapping in the
* QueryCache. The IndexFreeQueryEngine optimizes query execution by only
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mentioned this before, but I think "index-free" is not a great name to have in our code, since 1) index-free explains what it doesn't do rather than what it does, 2) to understand the name you need to have context about a feature that doesn't exist yet, 3) it still uses indexes.

Do you think we can drop all references to "index-free" once this becomes the one and only QueryEngine? If so, can you add a TODO comment to that effect (maybe linking to a bug about migrating 100% from SimpleQueryEngine to IndexFreeQueryEngine)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think of "ThreadLocalFreeQueryEngine"? It sounds even more advanced and the implementation is actually free of them. :)

In all honesty, I can't come up with a better name right now. I filed b/140938512 and will tackle this once we remove SimpleQueryEngine.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The better name I am hoping for eventually is "QueryEngine" 😄

* reading the documents that previously matched a query plus any documents that were
* edited after the query was last listened to.
*
* There are some cases where Index-Free queries are not guaranteed to produce
* the same results as full collection scans. In these cases, the
* IndexFreeQueryEngine falls back to full query processing. These cases are:
*
* - Limit queries where a document that matched the query previously no longer
* matches the query.
*
* - Limit queries where a document edit may cause the document to sort below
* another document that is in the local cache.
*
* - Queries that have never been CURRENT or free of Limbo documents.
*/
export class IndexFreeQueryEngine implements QueryEngine {
private localDocumentsView: LocalDocumentsView | undefined;

setLocalDocumentsView(localDocuments: LocalDocumentsView): void {
this.localDocumentsView = localDocuments;
}

getDocumentsMatchingQuery(
transaction: PersistenceTransaction,
query: Query,
lastLimboFreeSnapshotVersion: SnapshotVersion,
remoteKeys: DocumentKeySet
): PersistencePromise<DocumentMap> {
assert(
this.localDocumentsView !== undefined,
'setLocalDocumentsView() not called'
);

// Queries that match all document don't benefit from using
// IndexFreeQueries. It is more efficient to scan all documents in a
// collection, rather than to perform individual lookups.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW- This may not be universally true. If you have a collection with 10 documents, but those docs contain subcollections with 10k documents, we'll end up iterating over (but fortunately not deserializing) the 10k documents, instead of just reading the 10 that match. I don't suggest we optimize for this case though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack. A parentPath index would help here.

if (query.matchesAllDocuments()) {
return this.executeFullCollectionScan(transaction, query);
}

// Queries that have never seen a snapshot without limbo free documents
// should also be run as a full collection scan.
if (lastLimboFreeSnapshotVersion.isEqual(SnapshotVersion.MIN)) {
return this.executeFullCollectionScan(transaction, query);
}

return this.localDocumentsView!.getDocuments(transaction, remoteKeys).next(
documents => {
const previousResults = this.applyQuery(query, documents);

if (
query.hasLimit() &&
this.needsRefill(
previousResults,
remoteKeys,
lastLimboFreeSnapshotVersion
)
) {
return this.executeFullCollectionScan(transaction, query);
}

if (getLogLevel() <= LogLevel.DEBUG) {
debug(
'IndexFreeQueryEngine',
'Re-using previous result from %s to execute query: %s',
lastLimboFreeSnapshotVersion.toString(),
query.toString()
);
}

// Retrieve all results for documents that were updated since the last
// limbo-document free remote snapshot.
return this.localDocumentsView!.getDocumentsMatchingQuery(
transaction,
query,
lastLimboFreeSnapshotVersion
).next(updatedResults => {
// We merge `previousResults` into `updateResults`, since
// `updateResults` is already a DocumentMap. If a document is
// contained in both lists, then its contents are the same.
previousResults.forEach(doc => {
updatedResults = updatedResults.insert(doc.key, doc);
});
return updatedResults;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this code looks backwards to me (we're overlaying previousResults on top of updatedResults instead of vice-versa). But it matches Android. And presumably if it was backwards, we'd have failing tests... So I'm kinda' confused?

UPDATE: I realized that it doesn't matter which one we overlay on which, because if a document exists in both sets, it'll be identical (since both sets would be populated from the same cached document).

So I have two thoughts:

  1. Since it doesn't matter, maybe we should overlay updatedResults on top of previousResults since that's conceptually more accurate.
  2. If we are doing it this way intentionally (maybe we're assuming that previousResults will be a smaller set and so it's cheaper), we should probably comment why the code may look backwards but is actually okay.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

100% agreed that the other order would be more natural. The reason it is written this way is that previousResults is a SortedSet<Document>, but we need to return a DocumentMap.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh! I see. Probably not a big deal, but a comment would still save me some head-scratching. Or you could add an "optimization" that also removes the concern I had:

if (updatedResults.get(doc.key) === null) {
  updatedResults = updatedResults.insert(doc.key, doc);
}

I also don't care too strongly if you want to leave it as-is though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a comment, but left the code as is.

});
}
);
}

/** Applies the query filter and sorting to the provided documents. */
private applyQuery(
query: Query,
documents: MaybeDocumentMap
): SortedSet<Document> {
// Sort the documents and re-apply the query filter since previously
// matching documents do not necessarily still match the query.
let queyrResults = new SortedSet<Document>((d1, d2) =>
query.docComparator(d1, d2)
);
documents.forEach((_, maybeDoc) => {
if (maybeDoc instanceof Document && query.matches(maybeDoc)) {
queyrResults = queyrResults.add(maybeDoc);
}
});
return queyrResults;
}

/**
* Determines if a limit query needs to be refilled from cache, making it
* ineligible for index-free execution.
*
* @param sortedPreviousResults The documents that matched the query when it
* was last synchronized, sorted by the query's comparator.
* @param remoteKeys The document keys that matched the query at the last
* snapshot.
* @param limboFreeSnapshotVersion The version of the snapshot when the query
* was last synchronized.
*/
private needsRefill(
sortedPreviousResults: SortedSet<Document>,
remoteKeys: DocumentKeySet,
limboFreeSnapshotVersion: SnapshotVersion
): boolean {
// The query needs to be refilled if a previously matching document no
// longer matches.
if (remoteKeys.size !== sortedPreviousResults.size) {
return true;
}

// Limit queries are not eligible for index-free query execution if there is
// a potential that an older document from cache now sorts before a document
// that was previously part of the limit. This, however, can only happen if
// the last document of the limit sorts lower than it did when the query was
// last synchronized. If a document that is not the limit boundary sorts
// differently, the boundary of the limit itself did not change and
// documents from cache will continue to be "rejected" by this boundary.
// Therefore, we can ignore any modifications that don't affect the last
// document.
const lastDocumentInLimit = sortedPreviousResults.last();
if (!lastDocumentInLimit) {
// We don't need to refill the query if there were already no documents.
return false;
}
return (
lastDocumentInLimit.hasPendingWrites ||
lastDocumentInLimit.version.compareTo(limboFreeSnapshotVersion) > 0
);
}

private executeFullCollectionScan(
transaction: PersistenceTransaction,
query: Query
): PersistencePromise<DocumentMap> {
if (getLogLevel() <= LogLevel.DEBUG) {
debug(
'IndexFreeQueryEngine',
'Using full collection scan to execute query: %s',
query.toString()
);
}

return this.localDocumentsView!.getDocumentsMatchingQuery(
transaction,
query,
SnapshotVersion.MIN
);
}
}
Loading