Skip to content

Commit 21c1c42

Browse files
Migrate remote_documents table to use dedicated collection_path column
1 parent 486abaf commit 21c1c42

File tree

4 files changed

+149
-78
lines changed

4 files changed

+149
-78
lines changed

firebase-firestore/src/main/java/com/google/firebase/firestore/local/EncodedPath.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import static com.google.firebase.firestore.util.Assert.hardAssert;
1818

1919
import com.google.firebase.firestore.model.BasePath;
20-
import com.google.firebase.firestore.model.FieldPath;
2120
import com.google.firebase.firestore.model.ResourcePath;
2221
import java.util.ArrayList;
2322
import java.util.Collections;
@@ -118,10 +117,6 @@ static ResourcePath decodeResourcePath(String path) {
118117
return ResourcePath.fromSegments(decode(path));
119118
}
120119

121-
static FieldPath decodeFieldPath(String path) {
122-
return FieldPath.fromSegments(decode(path));
123-
}
124-
125120
private static List<String> decode(String path) {
126121
// Even the empty path must encode as a path of at least length 2. A path with length of exactly
127122
// 2 must be the empty path.

firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLiteRemoteDocumentCache.java

Lines changed: 56 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import com.google.protobuf.InvalidProtocolBufferException;
3131
import com.google.protobuf.MessageLite;
3232
import java.util.ArrayList;
33+
import java.util.Collections;
3334
import java.util.HashMap;
3435
import java.util.List;
3536
import java.util.Map;
@@ -57,15 +58,16 @@ public void add(MutableDocument document, SnapshotVersion readTime) {
5758
!readTime.equals(SnapshotVersion.NONE),
5859
"Cannot add document to the RemoteDocumentCache with a read time of zero");
5960

60-
String path = pathForKey(document.getKey());
61+
DocumentKey documentKey = document.getKey();
6162
Timestamp timestamp = readTime.getTimestamp();
6263
MessageLite message = serializer.encodeMaybeDocument(document);
6364

6465
db.execute(
6566
"INSERT OR REPLACE INTO remote_documents "
66-
+ "(path, read_time_seconds, read_time_nanos, contents) "
67-
+ "VALUES (?, ?, ?, ?)",
68-
path,
67+
+ "(collection_path, document_id, read_time_seconds, read_time_nanos, contents) "
68+
+ "VALUES (?, ?, ?, ?, ?)",
69+
collectionForKey(documentKey),
70+
documentKey.getPath().getLastSegment(),
6971
timestamp.getSeconds(),
7072
timestamp.getNanoseconds(),
7173
message.toByteArray());
@@ -75,56 +77,63 @@ public void add(MutableDocument document, SnapshotVersion readTime) {
7577

7678
@Override
7779
public void remove(DocumentKey documentKey) {
78-
String path = pathForKey(documentKey);
79-
80-
db.execute("DELETE FROM remote_documents WHERE path = ?", path);
80+
db.execute(
81+
"DELETE FROM remote_documents WHERE collection_path = ? AND document_id = ?",
82+
collectionForKey(documentKey),
83+
documentKey.getPath().getLastSegment());
8184
}
8285

8386
@Override
8487
public MutableDocument get(DocumentKey documentKey) {
85-
String path = pathForKey(documentKey);
86-
8788
MutableDocument document =
8889
db.query(
89-
"SELECT contents, read_time_seconds, read_time_nanos "
90-
+ "FROM remote_documents "
91-
+ "WHERE path = ?")
92-
.binding(path)
90+
"SELECT contents, read_time_seconds, read_time_nanos FROM remote_documents "
91+
+ "WHERE collection_path = ? AND document_id = ?")
92+
.binding(collectionForKey(documentKey), documentKey.getPath().getLastSegment())
9393
.firstValue(row -> decodeMaybeDocument(row.getBlob(0), row.getInt(1), row.getInt(2)));
9494
return document != null ? document : MutableDocument.newInvalidDocument(documentKey);
9595
}
9696

9797
@Override
9898
public Map<DocumentKey, MutableDocument> getAll(Iterable<DocumentKey> documentKeys) {
99-
List<Object> args = new ArrayList<>();
100-
for (DocumentKey key : documentKeys) {
101-
args.add(EncodedPath.encode(key.getPath()));
102-
}
103-
10499
Map<DocumentKey, MutableDocument> results = new HashMap<>();
100+
101+
// We issue one query by collection, so first we have to sort the keys into collection buckets.
102+
Map<ResourcePath, List<Object>> collectionToDocumentIds = new HashMap<>();
105103
for (DocumentKey key : documentKeys) {
104+
ResourcePath path = key.getPath();
105+
List<Object> documentIds = collectionToDocumentIds.get(path.popLast());
106+
if (documentIds == null) {
107+
documentIds = new ArrayList<>();
108+
collectionToDocumentIds.put(path.popLast(), documentIds);
109+
}
110+
documentIds.add(path.getLastSegment());
111+
106112
// Make sure each key has a corresponding entry, which is null in case the document is not
107113
// found.
108114
results.put(key, MutableDocument.newInvalidDocument(key));
109115
}
110116

111-
SQLitePersistence.LongQuery longQuery =
112-
new SQLitePersistence.LongQuery(
113-
db,
114-
"SELECT contents, read_time_seconds, read_time_nanos FROM remote_documents "
115-
+ "WHERE path IN (",
116-
args,
117-
") ORDER BY path");
118-
119-
while (longQuery.hasMoreSubqueries()) {
120-
longQuery
121-
.performNextSubquery()
122-
.forEach(
123-
row -> {
124-
MutableDocument decoded =
125-
decodeMaybeDocument(row.getBlob(0), row.getInt(1), row.getInt(2));
126-
results.put(decoded.getKey(), decoded);
127-
});
117+
for (Map.Entry<ResourcePath, List<Object>> entry : collectionToDocumentIds.entrySet()) {
118+
SQLitePersistence.LongQuery longQuery =
119+
new SQLitePersistence.LongQuery(
120+
db,
121+
"SELECT contents, read_time_seconds, read_time_nanos FROM remote_documents "
122+
+ "WHERE collection_path = ? AND document_id IN (",
123+
Collections.singletonList(EncodedPath.encode(entry.getKey())),
124+
entry.getValue(),
125+
")");
126+
127+
while (longQuery.hasMoreSubqueries()) {
128+
longQuery
129+
.performNextSubquery()
130+
.forEach(
131+
row -> {
132+
MutableDocument decoded =
133+
decodeMaybeDocument(row.getBlob(0), row.getInt(1), row.getInt(2));
134+
results.put(decoded.getKey(), decoded);
135+
});
136+
}
128137
}
129138

130139
return results;
@@ -137,12 +146,7 @@ public ImmutableSortedMap<DocumentKey, MutableDocument> getAllDocumentsMatchingQ
137146
!query.isCollectionGroupQuery(),
138147
"CollectionGroup queries should be handled in LocalDocumentsView");
139148

140-
// Use the query path as a prefix for testing if a document matches the query.
141-
ResourcePath prefix = query.getPath();
142-
int immediateChildrenPathLength = prefix.length() + 1;
143-
144-
String prefixPath = EncodedPath.encode(prefix);
145-
String prefixSuccessorPath = EncodedPath.prefixSuccessor(prefixPath);
149+
String collectionPath = EncodedPath.encode(query.getPath());
146150
Timestamp readTime = sinceReadTime.getTimestamp();
147151

148152
BackgroundQueue backgroundQueue = new BackgroundQueue();
@@ -155,43 +159,30 @@ public ImmutableSortedMap<DocumentKey, MutableDocument> getAllDocumentsMatchingQ
155159
if (sinceReadTime.equals(SnapshotVersion.NONE)) {
156160
sqlQuery =
157161
db.query(
158-
"SELECT path, contents, read_time_seconds, read_time_nanos "
159-
+ "FROM remote_documents WHERE path >= ? AND path < ?")
160-
.binding(prefixPath, prefixSuccessorPath);
162+
"SELECT contents, read_time_seconds, read_time_nanos "
163+
+ "FROM remote_documents WHERE collection_path = ?")
164+
.binding(collectionPath);
161165
} else {
162166
// Execute an index-free query and filter by read time. This is safe since all document
163167
// changes to queries that have a lastLimboFreeSnapshotVersion (`sinceReadTime`) have a read
164168
// time set.
165169
sqlQuery =
166170
db.query(
167-
"SELECT path, contents, read_time_seconds, read_time_nanos "
168-
+ "FROM remote_documents WHERE path >= ? AND path < ?"
171+
"SELECT contents, read_time_seconds, read_time_nanos "
172+
+ "FROM remote_documents WHERE collection_path = ? "
169173
+ "AND (read_time_seconds > ? OR (read_time_seconds = ? AND read_time_nanos > ?))")
170174
.binding(
171-
prefixPath,
172-
prefixSuccessorPath,
175+
collectionPath,
173176
readTime.getSeconds(),
174177
readTime.getSeconds(),
175178
readTime.getNanoseconds());
176179
}
177180
sqlQuery.forEach(
178181
row -> {
179-
// TODO: Actually implement a single-collection query
180-
//
181-
// The query is actually returning any path that starts with the query path prefix
182-
// which may include documents in subcollections. For example, a query on 'rooms'
183-
// will return rooms/abc/messages/xyx but we shouldn't match it. Fix this by
184-
// discarding rows with document keys more than one segment longer than the query
185-
// path.
186-
ResourcePath path = EncodedPath.decodeResourcePath(row.getString(0));
187-
if (path.length() != immediateChildrenPathLength) {
188-
return;
189-
}
190-
191182
// Store row values in array entries to provide the correct context inside the executor.
192-
final byte[] rawDocument = row.getBlob(1);
193-
final int[] readTimeSeconds = {row.getInt(2)};
194-
final int[] readTimeNanos = {row.getInt(3)};
183+
final byte[] rawDocument = row.getBlob(0);
184+
final int[] readTimeSeconds = {row.getInt(1)};
185+
final int[] readTimeNanos = {row.getInt(2)};
195186

196187
// Since scheduling background tasks incurs overhead, we only dispatch to a
197188
// background thread if there are still some documents remaining.
@@ -228,8 +219,8 @@ public SnapshotVersion getLatestReadTime() {
228219
return latestReadTime != null ? latestReadTime : SnapshotVersion.NONE;
229220
}
230221

231-
private String pathForKey(DocumentKey key) {
232-
return EncodedPath.encode(key.getPath());
222+
private String collectionForKey(DocumentKey key) {
223+
return EncodedPath.encode(key.getPath().popLast());
233224
}
234225

235226
private MutableDocument decodeMaybeDocument(

firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLiteSchema.java

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,21 +49,21 @@ class SQLiteSchema {
4949
* The version of the schema. Increase this by one for each migration added to runMigrations
5050
* below.
5151
*/
52-
static final int VERSION = 12;
52+
static final int VERSION = 13;
5353

5454
static final int OVERLAY_SUPPORT_VERSION = VERSION + 1;
5555

5656
// TODO(indexing): Remove this constant and increment VERSION to enable indexing support
5757
static final int INDEXING_SUPPORT_VERSION = OVERLAY_SUPPORT_VERSION + 1;
5858

5959
/**
60-
* The batch size for the sequence number migration in `ensureSequenceNumbers()`.
60+
* The batch size for data migrations.
6161
*
6262
* <p>This addresses https://github.com/firebase/firebase-android-sdk/issues/370, where a customer
6363
* reported that schema migrations failed for clients with thousands of documents. The number has
6464
* been chosen based on manual experiments.
6565
*/
66-
private static final int SEQUENCE_NUMBER_BATCH_SIZE = 100;
66+
private static final int MIGRATION_BATCH_SIZE = 100;
6767

6868
private final SQLiteDatabase db;
6969

@@ -170,6 +170,11 @@ void runSchemaUpgrades(int fromVersion, int toVersion) {
170170
if (fromVersion < 12 && toVersion >= 12) {
171171
createBundleCache();
172172
}
173+
174+
if (fromVersion < 13 && toVersion >= 13) {
175+
rewriteDocumentKeys();
176+
}
177+
173178
/*
174179
* Adding a new migration? READ THIS FIRST!
175180
*
@@ -394,9 +399,6 @@ private void createFieldIndex() {
394399
+ "directional_value BLOB, " // index values for equality and inequalities
395400
+ "document_name TEXT, "
396401
+ "PRIMARY KEY (index_id, uid, array_value, directional_value, document_name))");
397-
398-
db.execSQL(
399-
"CREATE INDEX read_time ON remote_documents(read_time_seconds, read_time_nanos)");
400402
});
401403
}
402404

@@ -489,7 +491,7 @@ private void ensureSequenceNumbers() {
489491
+ "SELECT TD.path FROM target_documents AS TD "
490492
+ "WHERE RD.path = TD.path AND TD.target_id = 0"
491493
+ ") LIMIT ?")
492-
.binding(SEQUENCE_NUMBER_BATCH_SIZE);
494+
.binding(MIGRATION_BATCH_SIZE);
493495

494496
boolean[] resultsRemaining = new boolean[1];
495497

@@ -604,6 +606,60 @@ private void rewriteCanonicalIds() {
604606
});
605607
}
606608

609+
/**
610+
* Migrates the remote_documents table to contain a distinct column for each entries collection
611+
* path and document id.
612+
*/
613+
private void rewriteDocumentKeys() {
614+
// SQLite does not support dropping a primary key. To create a new primary key on
615+
// collection_path and document_id we need to create a new table :(
616+
db.execSQL("ALTER TABLE remote_documents RENAME TO tmp;");
617+
db.execSQL(
618+
"CREATE TABLE remote_documents ("
619+
+ "collection_path TEXT, "
620+
+ "document_id TEXT, "
621+
+ "read_time_nanos INTEGER, "
622+
+ "read_time_seconds INTEGER, "
623+
+ "contents BLOB, "
624+
+ "PRIMARY KEY (collection_path, document_id))");
625+
db.execSQL(
626+
"CREATE INDEX remote_documents_read_time ON remote_documents (read_time_nanos, read_time_seconds)");
627+
db.execSQL(
628+
"INSERT INTO remote_documents (collection_path, read_time_nanos, read_time_seconds, contents) "
629+
+ "SELECT path AS collection_path, read_time_nanos, read_time_seconds, contents FROM tmp");
630+
db.execSQL("DROP TABLE tmp;");
631+
632+
// Process each entry to split the document key into collection_path and document_id
633+
SQLitePersistence.Query documentsToMigrate =
634+
new SQLitePersistence.Query(
635+
db,
636+
"SELECT collection_path FROM remote_documents WHERE document_id IS NULL LIMIT ?")
637+
.binding(MIGRATION_BATCH_SIZE);
638+
SQLiteStatement insertKey =
639+
db.compileStatement(
640+
"UPDATE remote_documents SET collection_path = ?, document_id = ? WHERE collection_path = ?");
641+
642+
boolean[] resultsRemaining = new boolean[1];
643+
644+
do {
645+
resultsRemaining[0] = false;
646+
647+
documentsToMigrate.forEach(
648+
row -> {
649+
resultsRemaining[0] = true;
650+
651+
String encodedPath = row.getString(0);
652+
ResourcePath decodedPath = EncodedPath.decodeResourcePath(encodedPath);
653+
654+
insertKey.clearBindings();
655+
insertKey.bindString(1, EncodedPath.encode(decodedPath.popLast()));
656+
insertKey.bindString(2, decodedPath.getLastSegment());
657+
insertKey.bindString(3, encodedPath);
658+
hardAssert(insertKey.executeUpdateDelete() != -1, "Failed to update document path");
659+
});
660+
} while (resultsRemaining[0]);
661+
}
662+
607663
private void createBundleCache() {
608664
ifTablesDontExist(
609665
new String[] {"bundles", "named_queries"},

firebase-firestore/src/test/java/com/google/firebase/firestore/local/SQLiteSchemaTest.java

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,7 @@ public void rewritesCanonicalIds() {
548548
QueryPurpose.LISTEN);
549549

550550
db.execSQL(
551-
"INSERT INTO targets (target_id, canonical_id, target_proto) VALUES (?,?, ?)",
551+
"INSERT INTO targets (target_id, canonical_id, target_proto) VALUES (? ,?, ?)",
552552
new Object[] {
553553
2, "invalid_canonical_id", serializer.encodeTargetData(initialTargetData).toByteArray()
554554
});
@@ -571,6 +571,35 @@ public void rewritesCanonicalIds() {
571571
});
572572
}
573573

574+
@Test
575+
public void rewritesDocumentKeys() {
576+
schema.runSchemaUpgrades(0, 12);
577+
578+
ResourcePath path = path("coll/docA");
579+
db.execSQL(
580+
"INSERT INTO remote_documents (path, read_time_seconds, read_time_nanos, contents) VALUES (?,?, ?, ?)",
581+
new Object[] {EncodedPath.encode(path), 1, 2, new byte[] {3}});
582+
583+
schema.runSchemaUpgrades(12, 13);
584+
new SQLitePersistence.Query(
585+
db,
586+
"SELECT collection_path, document_id, read_time_seconds, read_time_nanos, contents FROM remote_documents")
587+
.forEach(
588+
cursor -> {
589+
String encodedCollectionPath = cursor.getString(0);
590+
String documentId = cursor.getString(1);
591+
long readTimeSeconds = cursor.getLong(2);
592+
int readTimeNanos = cursor.getInt(3);
593+
byte[] contents = cursor.getBlob(4);
594+
595+
assertEquals(path("coll"), EncodedPath.decodeResourcePath(encodedCollectionPath));
596+
assertEquals("docA", documentId);
597+
assertEquals(1, readTimeSeconds);
598+
assertEquals(2, readTimeNanos);
599+
assertArrayEquals(new byte[] {3}, contents);
600+
});
601+
}
602+
574603
@Test
575604
public void createsBundlesTable() {
576605
schema.runSchemaUpgrades();

0 commit comments

Comments
 (0)