Skip to content

Use sequence numbers instead of read time in collection group #3111

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion firebase-crashlytics-ndk/src/third_party/crashpad
Submodule crashpad updated from e5e47b to 36d4bb
2 changes: 1 addition & 1 deletion firebase-crashlytics-ndk/src/third_party/mini_chromium
Submodule mini_chromium updated from 0e22ee to 76a9bb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import androidx.annotation.Nullable;
import androidx.annotation.VisibleForTesting;
import com.google.firebase.Timestamp;
import com.google.firebase.database.collection.ImmutableSortedMap;
import com.google.firebase.firestore.core.Query;
import com.google.firebase.firestore.model.Document;
Expand Down Expand Up @@ -140,24 +139,30 @@ public Results backfill() {
/** Writes index entries until the cap is reached. Returns the number of documents processed. */
private int writeIndexEntries(LocalDocumentsView localDocumentsView) {
int documentsProcessed = 0;
Timestamp startingTimestamp = Timestamp.now();

while (documentsProcessed < maxDocumentsToProcess) {
// Track the starting collection group to ensure that the backfill stops after looping through
// all collections a single time.
String startingCollectionGroup = indexManager.getNextCollectionGroupToUpdate();

// TODO(indexing): Handle pausing and resuming from the correct document if backfilling hits the
// max doc limit while processing docs for a certain read time.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a very big todo :)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just backfilling it in from before :)

Will tackle this in a subsequent PR

String collectionGroup = startingCollectionGroup;
while (collectionGroup != null && documentsProcessed < maxDocumentsToProcess) {
int documentsRemaining = maxDocumentsToProcess - documentsProcessed;
String collectionGroup = indexManager.getNextCollectionGroupToUpdate(startingTimestamp);
if (collectionGroup == null) {
break;
}
documentsProcessed +=
writeEntriesForCollectionGroup(localDocumentsView, collectionGroup, documentsRemaining);
collectionGroup = indexManager.getNextCollectionGroupToUpdate();
if (collectionGroup == null || collectionGroup.equals(startingCollectionGroup)) {
break;
}
}

return documentsProcessed;
}

/** Writes entries for the fetched field indexes. */
private int writeEntriesForCollectionGroup(
LocalDocumentsView localDocumentsView, String collectionGroup, int entriesRemainingUnderCap) {
LocalDocumentsView localDocumentsView, String collectionGroup, int documentsRemaining) {
Query query = new Query(ResourcePath.EMPTY, collectionGroup);

// Use the earliest updateTime of all field indexes as the base updateTime.
Expand All @@ -169,8 +174,15 @@ private int writeEntriesForCollectionGroup(
ImmutableSortedMap<DocumentKey, Document> documents =
localDocumentsView.getDocumentsMatchingQuery(query, earliestUpdateTime);

Queue<Document> oldestDocuments = getOldestDocuments(documents, entriesRemainingUnderCap);
Queue<Document> oldestDocuments = getOldestDocuments(documents, documentsRemaining);
indexManager.updateIndexEntries(oldestDocuments);

// Mark the collection group as fully indexed if all documents in the collection have been
// indexed during this backfill iteration.
if (documentsRemaining >= documents.size()) {
indexManager.markCollectionGroupIndexed(collectionGroup);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is different from how I implemented it. I always increment the sequence number regardless of whether I am done or not - it's a tiny bit simpler but may not be as optimal as what you are doing. We should discuss.

}

return oldestDocuments.size();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
package com.google.firebase.firestore.local;

import androidx.annotation.Nullable;
import com.google.firebase.Timestamp;
import com.google.firebase.firestore.core.Target;
import com.google.firebase.firestore.model.Document;
import com.google.firebase.firestore.model.DocumentKey;
Expand Down Expand Up @@ -87,9 +86,18 @@ public interface IndexManager {
/** Returns the documents that match the given target based on the provided index. */
Set<DocumentKey> getDocumentsMatchingTarget(FieldIndex fieldIndex, Target target);

/** Returns the next collection group to update. */
/**
* Returns the next collection group to update based on the collection with the lowest sequence
* number. Returns null if no collection groups are found.
*/
@Nullable
String getNextCollectionGroupToUpdate(Timestamp lastUpdateTime);
String getNextCollectionGroupToUpdate();

/**
* Marks the collection group as indexed by updating the sequence counter to the next highest
* value.
*/
void markCollectionGroupIndexed(String collectionGroup);

/**
* Updates the index entries for the provided documents and corresponding field indexes until the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import static com.google.firebase.firestore.util.Assert.hardAssert;

import androidx.annotation.Nullable;
import com.google.firebase.Timestamp;
import com.google.firebase.firestore.core.Target;
import com.google.firebase.firestore.model.Document;
import com.google.firebase.firestore.model.DocumentKey;
Expand Down Expand Up @@ -80,11 +79,16 @@ public Set<DocumentKey> getDocumentsMatchingTarget(FieldIndex fieldIndex, Target
}

@Override
public String getNextCollectionGroupToUpdate(Timestamp startingTimestamp) {
public String getNextCollectionGroupToUpdate() {
// Field indices are not supported with memory persistence.
return null;
}

@Override
public void markCollectionGroupIndexed(String collectionGroup) {
// Field indices are not supported with memory persistence.
}

@Override
public Collection<FieldIndex> getFieldIndexes(String collectionGroup) {
// Field indices are not supported with memory persistence.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
/** A persisted implementation of IndexManager. */
final class SQLiteIndexManager implements IndexManager {
private static final String TAG = SQLiteIndexManager.class.getSimpleName();
private static final int NEW_COLLECTION_GROUP_SEQUENCE_NUMBER = -1;

/**
* An in-memory copy of the index entries we've already written since the SDK launched. Used to
Expand Down Expand Up @@ -165,8 +166,7 @@ public void addFieldIndex(FieldIndex index) {
index.getUpdateTime().getTimestamp().getSeconds(),
index.getUpdateTime().getTimestamp().getNanoseconds());

// TODO(indexing): Use a 0-counter rather than the timestamp.
setCollectionGroupUpdateTime(index.getCollectionGroup(), SnapshotVersion.NONE.getTimestamp());
setCollectionGroup(index.getCollectionGroup(), NEW_COLLECTION_GROUP_SEQUENCE_NUMBER);

memoizeIndex(index);
}
Expand Down Expand Up @@ -199,19 +199,16 @@ private void updateFieldIndex(FieldIndex index) {
memoizeIndex(index);
}

// TODO(indexing): Use a counter rather than the starting timestamp.
@Override
public @Nullable String getNextCollectionGroupToUpdate(Timestamp startingTimestamp) {
public @Nullable String getNextCollectionGroupToUpdate() {
hardAssert(started, "IndexManager not started");

final String[] nextCollectionGroup = {null};
db.query(
"SELECT collection_group "
+ "FROM collection_group_update_times "
+ "WHERE update_time_seconds <= ? AND update_time_nanos <= ?"
+ "ORDER BY update_time_seconds, update_time_nanos "
+ "ORDER BY sequence_number "
+ "LIMIT 1")
.binding(startingTimestamp.getSeconds(), startingTimestamp.getNanoseconds())
.firstValue(
row -> {
nextCollectionGroup[0] = row.getString(0);
Expand All @@ -222,14 +219,15 @@ private void updateFieldIndex(FieldIndex index) {
return null;
}

// Store that this collection group will updated.
// TODO(indexing): Store progress with a counter rather than a timestamp. If using a timestamp,
// use the read time of the last document read in the loop.
setCollectionGroupUpdateTime(nextCollectionGroup[0], Timestamp.now());

return nextCollectionGroup[0];
}

@Override
public void markCollectionGroupIndexed(String collectionGroup) {
int currentMaxSequenceNumber = getMaxCollectionGroupSequenceNumber();
setCollectionGroup(collectionGroup, currentMaxSequenceNumber + 1);
}

@Override
public void updateIndexEntries(Collection<Document> documents) {
hardAssert(started, "IndexManager not started");
Expand All @@ -253,8 +251,6 @@ public void updateIndexEntries(Collection<Document> documents) {
}
}

// TODO(indexing): Use RemoteDocumentCache's readTime version rather than the document version.
// This will require plumbing out the RDC's readTime into the IndexBackfiller.
for (FieldIndex updatedFieldIndex : updatedFieldIndexes.values()) {
updateFieldIndex(updatedFieldIndex);
}
Expand Down Expand Up @@ -664,13 +660,20 @@ private byte[] encodeSegments(FieldIndex fieldIndex) {
}

@VisibleForTesting
void setCollectionGroupUpdateTime(String collectionGroup, Timestamp updateTime) {
void setCollectionGroup(String collectionGroup, int sequenceNumber) {
db.execute(
"INSERT OR REPLACE INTO collection_group_update_times "
+ "(collection_group, update_time_seconds, update_time_nanos) "
+ "VALUES (?, ?, ?)",
+ "(collection_group, sequence_number) "
+ "VALUES (?, ?)",
collectionGroup,
updateTime.getSeconds(),
updateTime.getNanoseconds());
sequenceNumber);
}

/** Returns the highest collection group sequence number in the table. */
private int getMaxCollectionGroupSequenceNumber() {
final int[] finalValue = {NEW_COLLECTION_GROUP_SEQUENCE_NUMBER};
db.query("SELECT MAX(sequence_number) FROM collection_group_update_times")
.first(value -> finalValue[0] = value.getInt(0));
return finalValue[0];
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,7 @@ private void createCollectionGroupsTable() {
db.execSQL(
"CREATE TABLE collection_group_update_times ("
+ "collection_group TEXT, " // Name of the collection group.
+ "update_time_seconds INTEGER," // Time of last index backfill update
+ "update_time_nanos INTEGER,"
+ "sequence_number INTEGER, " // Used to establish ordering when indexing.
+ "PRIMARY KEY (collection_group))");
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
package com.google.firebase.firestore.local;

import static com.google.common.truth.Truth.assertThat;
import static com.google.firebase.firestore.local.SQLiteIndexManagerTest.getCollectionGroupsOrderByUpdateTime;
import static com.google.firebase.firestore.local.SQLiteIndexManagerTest.getCollectionGroupsOrderBySequenceNumber;
import static com.google.firebase.firestore.testutil.TestUtil.doc;
import static com.google.firebase.firestore.testutil.TestUtil.fieldIndex;
import static com.google.firebase.firestore.testutil.TestUtil.key;
Expand All @@ -25,7 +25,6 @@
import static com.google.firebase.firestore.testutil.TestUtil.version;
import static junit.framework.TestCase.assertEquals;

import com.google.firebase.Timestamp;
import com.google.firebase.firestore.auth.User;
import com.google.firebase.firestore.core.Target;
import com.google.firebase.firestore.model.DocumentKey;
Expand All @@ -40,7 +39,6 @@
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
Expand Down Expand Up @@ -95,9 +93,7 @@ public void tearDown() {
persistence.shutdown();
}

// TODO(indexing): Re-enable this test once we have counters implemented.
@Test
@Ignore
public void testBackfillWritesLatestReadTimeToFieldIndexOnCompletion() {
addFieldIndex("coll1", "foo");
addFieldIndex("coll2", "bar");
Expand Down Expand Up @@ -127,8 +123,6 @@ public void testBackfillWritesLatestReadTimeToFieldIndexOnCompletion() {
}

@Test
@Ignore("Flaky")
// TODO(indexing): This test is flaky. Fix.
public void testBackfillFetchesDocumentsAfterEarliestReadTime() {
addFieldIndex("coll1", "foo", version(10, 0));
addFieldIndex("coll1", "boo", version(20, 0));
Expand Down Expand Up @@ -192,15 +186,13 @@ public void testBackfillWritesOldestDocumentFirst() {
}

@Test
@Ignore("Flaky")
// TODO(indexing): This test is flaky. Fix.
public void testBackfillUpdatesCollectionGroups() {
addFieldIndex("coll1", "foo");
addFieldIndex("coll2", "foo");
addFieldIndex("coll3", "foo");
addCollectionGroup("coll1", new Timestamp(30, 0));
addCollectionGroup("coll2", new Timestamp(30, 30));
addCollectionGroup("coll3", new Timestamp(10, 0));
addCollectionGroup("coll1", 2);
addCollectionGroup("coll2", 3);
addCollectionGroup("coll3", 1);
addDoc("coll1/docA", "foo", version(10, 0));
addDoc("coll2/docA", "foo", version(10, 0));
addDoc("coll3/docA", "foo", version(10, 0));
Expand All @@ -210,31 +202,29 @@ public void testBackfillUpdatesCollectionGroups() {

// Check that index entries are written in order of the collection group update times by
// verifying the collection group update times have been updated in the correct order.
List<String> collectionGroups = getCollectionGroupsOrderByUpdateTime(persistence);
List<String> collectionGroups = getCollectionGroupsOrderBySequenceNumber(persistence);
assertEquals(3, collectionGroups.size());
assertEquals("coll3", collectionGroups.get(0));
assertEquals("coll1", collectionGroups.get(1));
assertEquals("coll2", collectionGroups.get(2));
}

@Test
@Ignore("Flaky")
// TODO(indexing): This test is flaky. Fix.
public void testBackfillPrioritizesNewCollectionGroups() {
// In this test case, `coll3` is a new collection group that hasn't been indexed, so it should
// be processed ahead of the other collection groups.
addFieldIndex("coll1", "foo");
addFieldIndex("coll2", "foo");
addCollectionGroup("coll1", new Timestamp(1, 0));
addCollectionGroup("coll2", new Timestamp(2, 0));
addCollectionGroup("coll1", 1);
addCollectionGroup("coll2", 2);
addFieldIndex("coll3", "foo");

IndexBackfiller.Results results = backfiller.backfill();
assertEquals(0, results.getDocumentsProcessed());

// Check that index entries are written in order of the collection group update times by
// verifying the collection group update times have been updated in the correct order.
List<String> collectionGroups = getCollectionGroupsOrderByUpdateTime(persistence);
List<String> collectionGroups = getCollectionGroupsOrderBySequenceNumber(persistence);
assertEquals(3, collectionGroups.size());
assertEquals("coll3", collectionGroups.get(0));
assertEquals("coll1", collectionGroups.get(1));
Expand All @@ -246,7 +236,11 @@ public void testBackfillWritesUntilCap() {
backfiller.setMaxDocumentsToProcess(3);
addFieldIndex("coll1", "foo");
addFieldIndex("coll2", "foo");
addCollectionGroup("coll1", new Timestamp(1, 0));
addCollectionGroup("coll1", 1);
List<String> collectionGroup = getCollectionGroupsOrderBySequenceNumber(persistence);
assertEquals(2, collectionGroup.size());
assertEquals("coll2", collectionGroup.get(0));
assertEquals("coll1", collectionGroup.get(1));
addDoc("coll1/docA", "foo", version(10, 0));
addDoc("coll1/docB", "foo", version(10, 0));
addDoc("coll2/docA", "foo", version(10, 0));
Expand All @@ -256,12 +250,38 @@ public void testBackfillWritesUntilCap() {
assertEquals(3, results.getDocumentsProcessed());

// Check that collection groups are updated even if the backfiller hits the write cap. Since
// `coll1` was already in the table, `coll2` should be processed first, and thus appear first
// in the ordering.
List<String> collectionGroups = getCollectionGroupsOrderByUpdateTime(persistence);
// `coll1` was already in the table, `coll2` should be fully indexed first, and thus appear
// later in the collection groups table.
List<String> collectionGroups = getCollectionGroupsOrderBySequenceNumber(persistence);
assertEquals(2, collectionGroups.size());
assertEquals("coll1", collectionGroups.get(0));
assertEquals("coll2", collectionGroups.get(1));
}

@Test
public void testBackfillPrioritizesInProgressIndexes() {
backfiller.setMaxDocumentsToProcess(1);
addFieldIndex("coll1", "foo");
addFieldIndex("coll2", "foo");
addCollectionGroup("coll1", 1);
addDoc("coll1/docA", "foo", version(10, 0));
addDoc("coll1/docB", "foo", version(10, 0));
addDoc("coll2/docA", "foo", version(10, 0));
addDoc("coll2/docB", "foo", version(10, 0));

// The backfiller should prioritize `coll2` since `coll1` is already in the collections table.
// Since the backfill was not completed, `coll2` should still come first in the ordering.
backfiller.backfill();
List<String> collectionGroups = getCollectionGroupsOrderBySequenceNumber(persistence);
assertEquals("coll2", collectionGroups.get(0));
assertEquals("coll1", collectionGroups.get(1));

// The 2nd loop should complete the backfill for `coll2` and assign it to the next highest
// collection seq number.
backfiller.backfill();
collectionGroups = getCollectionGroupsOrderBySequenceNumber(persistence);
assertEquals("coll1", collectionGroups.get(0));
assertEquals("coll2", collectionGroups.get(1));
}

private void addFieldIndex(String collectionGroup, String fieldName) {
Expand All @@ -276,8 +296,8 @@ private void addFieldIndex(String collectionGroup, String fieldName, SnapshotVer
indexManager.addFieldIndex(fieldIndex);
}

private void addCollectionGroup(String collectionGroup, Timestamp updateTime) {
indexManager.setCollectionGroupUpdateTime(collectionGroup, updateTime);
private void addCollectionGroup(String collectionGroup, int sequenceNumber) {
indexManager.setCollectionGroup(collectionGroup, sequenceNumber);
}

/** Creates a document and adds it to the RemoteDocumentCache. */
Expand Down
Loading