Skip to content

Commit da30e52

Browse files
vbabaninrozzajyemin
authored
Optimize GridFS throughput by removing redundant byte array cloning. (#1402)
JAVA-5485 Co-authored-by: Ross Lawley <[email protected]> Co-authored-by: Jeff Yemin <[email protected]>
1 parent 339bd2c commit da30e52

File tree

6 files changed

+108
-79
lines changed

6 files changed

+108
-79
lines changed

driver-sync/src/main/com/mongodb/client/gridfs/GridFSBucketImpl.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ final class GridFSBucketImpl implements GridFSBucket {
5757
private final String bucketName;
5858
private final int chunkSizeBytes;
5959
private final MongoCollection<GridFSFile> filesCollection;
60-
private final MongoCollection<Document> chunksCollection;
60+
private final MongoCollection<BsonDocument> chunksCollection;
6161
private volatile boolean checkedIndexes;
6262

6363
GridFSBucketImpl(final MongoDatabase database) {
@@ -71,7 +71,7 @@ final class GridFSBucketImpl implements GridFSBucket {
7171
}
7272

7373
GridFSBucketImpl(final String bucketName, final int chunkSizeBytes, final MongoCollection<GridFSFile> filesCollection,
74-
final MongoCollection<Document> chunksCollection) {
74+
final MongoCollection<BsonDocument> chunksCollection) {
7575
this.bucketName = notNull("bucketName", bucketName);
7676
this.chunkSizeBytes = chunkSizeBytes;
7777
this.filesCollection = notNull("filesCollection", filesCollection);
@@ -459,8 +459,8 @@ private static MongoCollection<GridFSFile> getFilesCollection(final MongoDatabas
459459
);
460460
}
461461

462-
private static MongoCollection<Document> getChunksCollection(final MongoDatabase database, final String bucketName) {
463-
return database.getCollection(bucketName + ".chunks").withCodecRegistry(MongoClientSettings.getDefaultCodecRegistry());
462+
private static MongoCollection<BsonDocument> getChunksCollection(final MongoDatabase database, final String bucketName) {
463+
return database.getCollection(bucketName + ".chunks", BsonDocument.class).withCodecRegistry(MongoClientSettings.getDefaultCodecRegistry());
464464
}
465465

466466
private void checkCreateIndex(@Nullable final ClientSession clientSession) {

driver-sync/src/main/com/mongodb/client/gridfs/GridFSDownloadStreamImpl.java

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,10 @@
2323
import com.mongodb.client.MongoCursor;
2424
import com.mongodb.client.gridfs.model.GridFSFile;
2525
import com.mongodb.lang.Nullable;
26+
import org.bson.BsonBinary;
27+
import org.bson.BsonDocument;
28+
import org.bson.BsonInt32;
2629
import org.bson.BsonValue;
27-
import org.bson.Document;
28-
import org.bson.types.Binary;
2930

3031
import java.util.concurrent.locks.ReentrantLock;
3132

@@ -37,12 +38,12 @@
3738
class GridFSDownloadStreamImpl extends GridFSDownloadStream {
3839
private final ClientSession clientSession;
3940
private final GridFSFile fileInfo;
40-
private final MongoCollection<Document> chunksCollection;
41+
private final MongoCollection<BsonDocument> chunksCollection;
4142
private final BsonValue fileId;
4243
private final long length;
4344
private final int chunkSizeInBytes;
4445
private final int numberOfChunks;
45-
private MongoCursor<Document> cursor;
46+
private MongoCursor<BsonDocument> cursor;
4647
private int batchSize;
4748
private int chunkIndex;
4849
private int bufferOffset;
@@ -55,10 +56,10 @@ class GridFSDownloadStreamImpl extends GridFSDownloadStream {
5556
private boolean closed = false;
5657

5758
GridFSDownloadStreamImpl(@Nullable final ClientSession clientSession, final GridFSFile fileInfo,
58-
final MongoCollection<Document> chunksCollection) {
59+
final MongoCollection<BsonDocument> chunksCollection) {
5960
this.clientSession = clientSession;
6061
this.fileInfo = notNull("file information", fileInfo);
61-
this.chunksCollection = notNull("chunks collection", chunksCollection);
62+
this.chunksCollection = notNull("chunks collection", chunksCollection);
6263

6364
fileId = fileInfo.getId();
6465
length = fileInfo.getLength();
@@ -213,17 +214,17 @@ private void discardCursor() {
213214
}
214215

215216
@Nullable
216-
private Document getChunk(final int startChunkIndex) {
217+
private BsonDocument getChunk(final int startChunkIndex) {
217218
if (cursor == null) {
218219
cursor = getCursor(startChunkIndex);
219220
}
220-
Document chunk = null;
221+
BsonDocument chunk = null;
221222
if (cursor.hasNext()) {
222223
chunk = cursor.next();
223224
if (batchSize == 1) {
224225
discardCursor();
225226
}
226-
if (chunk.getInteger("n") != startChunkIndex) {
227+
if (chunk.getInt32("n").getValue() != startChunkIndex) {
227228
throw new MongoGridFSException(format("Could not find file chunk for file_id: %s at chunk index %s.",
228229
fileId, startChunkIndex));
229230
}
@@ -232,28 +233,28 @@ private Document getChunk(final int startChunkIndex) {
232233
return chunk;
233234
}
234235

235-
private MongoCursor<Document> getCursor(final int startChunkIndex) {
236-
FindIterable<Document> findIterable;
237-
Document filter = new Document("files_id", fileId).append("n", new Document("$gte", startChunkIndex));
236+
private MongoCursor<BsonDocument> getCursor(final int startChunkIndex) {
237+
FindIterable<BsonDocument> findIterable;
238+
BsonDocument filter = new BsonDocument("files_id", fileId).append("n", new BsonDocument("$gte", new BsonInt32(startChunkIndex)));
238239
if (clientSession != null) {
239240
findIterable = chunksCollection.find(clientSession, filter);
240241
} else {
241242
findIterable = chunksCollection.find(filter);
242243
}
243-
return findIterable.batchSize(batchSize).sort(new Document("n", 1)).iterator();
244+
return findIterable.batchSize(batchSize).sort(new BsonDocument("n", new BsonInt32(1))).iterator();
244245
}
245246

246-
private byte[] getBufferFromChunk(@Nullable final Document chunk, final int expectedChunkIndex) {
247+
private byte[] getBufferFromChunk(@Nullable final BsonDocument chunk, final int expectedChunkIndex) {
247248

248-
if (chunk == null || chunk.getInteger("n") != expectedChunkIndex) {
249+
if (chunk == null || chunk.getInt32("n").getValue() != expectedChunkIndex) {
249250
throw new MongoGridFSException(format("Could not find file chunk for file_id: %s at chunk index %s.",
250251
fileId, expectedChunkIndex));
251252
}
252253

253-
if (!(chunk.get("data") instanceof Binary)) {
254+
if (!(chunk.get("data") instanceof BsonBinary)) {
254255
throw new MongoGridFSException("Unexpected data format for the chunk");
255256
}
256-
byte[] data = chunk.get("data", Binary.class).getData();
257+
byte[] data = chunk.getBinary("data").getData();
257258

258259
long expectedDataLength = 0;
259260
boolean extraChunk = false;

driver-sync/src/main/com/mongodb/client/gridfs/GridFSUploadStreamImpl.java

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@
2121
import com.mongodb.client.MongoCollection;
2222
import com.mongodb.client.gridfs.model.GridFSFile;
2323
import com.mongodb.lang.Nullable;
24+
import org.bson.BsonBinary;
25+
import org.bson.BsonDocument;
26+
import org.bson.BsonInt32;
2427
import org.bson.BsonValue;
2528
import org.bson.Document;
26-
import org.bson.types.Binary;
2729
import org.bson.types.ObjectId;
2830

2931
import java.util.Date;
@@ -35,7 +37,7 @@
3537
final class GridFSUploadStreamImpl extends GridFSUploadStream {
3638
private final ClientSession clientSession;
3739
private final MongoCollection<GridFSFile> filesCollection;
38-
private final MongoCollection<Document> chunksCollection;
40+
private final MongoCollection<BsonDocument> chunksCollection;
3941
private final BsonValue fileId;
4042
private final String filename;
4143
private final int chunkSizeBytes;
@@ -49,7 +51,7 @@ final class GridFSUploadStreamImpl extends GridFSUploadStream {
4951
private boolean closed = false;
5052

5153
GridFSUploadStreamImpl(@Nullable final ClientSession clientSession, final MongoCollection<GridFSFile> filesCollection,
52-
final MongoCollection<Document> chunksCollection, final BsonValue fileId, final String filename,
54+
final MongoCollection<BsonDocument> chunksCollection, final BsonValue fileId, final String filename,
5355
final int chunkSizeBytes, @Nullable final Document metadata) {
5456
this.clientSession = clientSession;
5557
this.filesCollection = notNull("files collection", filesCollection);
@@ -160,23 +162,23 @@ public void close() {
160162
private void writeChunk() {
161163
if (bufferOffset > 0) {
162164
if (clientSession != null) {
163-
chunksCollection.insertOne(clientSession, new Document("files_id", fileId).append("n", chunkIndex)
165+
chunksCollection.insertOne(clientSession, new BsonDocument("files_id", fileId).append("n", new BsonInt32(chunkIndex))
164166
.append("data", getData()));
165167
} else {
166-
chunksCollection.insertOne(new Document("files_id", fileId).append("n", chunkIndex).append("data", getData()));
168+
chunksCollection.insertOne(new BsonDocument("files_id", fileId).append("n", new BsonInt32(chunkIndex)).append("data", getData()));
167169
}
168170
chunkIndex++;
169171
bufferOffset = 0;
170172
}
171173
}
172174

173-
private Binary getData() {
175+
private BsonBinary getData() {
174176
if (bufferOffset < chunkSizeBytes) {
175177
byte[] sizedBuffer = new byte[bufferOffset];
176178
System.arraycopy(buffer, 0, sizedBuffer, 0, bufferOffset);
177179
buffer = sizedBuffer;
178180
}
179-
return new Binary(buffer);
181+
return new BsonBinary(buffer);
180182
}
181183

182184
private void checkClosed() {

driver-sync/src/test/unit/com/mongodb/client/gridfs/GridFSBucketSpecification.groovy

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,13 @@ import com.mongodb.client.result.DeleteResult
3535
import com.mongodb.client.result.UpdateResult
3636
import com.mongodb.internal.operation.BatchCursor
3737
import com.mongodb.internal.operation.FindOperation
38+
import org.bson.BsonBinary
3839
import org.bson.BsonDocument
40+
import org.bson.BsonInt32
3941
import org.bson.BsonObjectId
4042
import org.bson.BsonString
4143
import org.bson.Document
4244
import org.bson.codecs.DocumentCodecProvider
43-
import org.bson.types.Binary
4445
import org.bson.types.ObjectId
4546
import spock.lang.Specification
4647
import spock.lang.Unroll
@@ -327,7 +328,9 @@ class GridFSBucketSpecification extends Specification {
327328
def findIterable = Mock(FindIterable)
328329
def filesCollection = Mock(MongoCollection)
329330
def tenBytes = new byte[10]
330-
def chunkDocument = new Document('files_id', fileInfo.getId()).append('n', 0).append('data', new Binary(tenBytes))
331+
def chunkDocument = new BsonDocument('files_id', fileInfo.getId())
332+
.append('n', new BsonInt32(0))
333+
.append('data', new BsonBinary(tenBytes))
331334
def chunksCollection = Mock(MongoCollection)
332335
def gridFSBucket = new GridFSBucketImpl('fs', 255, filesCollection, chunksCollection)
333336
def outputStream = new ByteArrayOutputStream(10)
@@ -346,7 +349,7 @@ class GridFSBucketSpecification extends Specification {
346349
} else {
347350
1 * filesCollection.find() >> findIterable
348351
}
349-
1 * findIterable.filter(new Document('_id', bsonFileId)) >> findIterable
352+
1 * findIterable.filter(new BsonDocument('_id', bsonFileId)) >> findIterable
350353
1 * findIterable.first() >> fileInfo
351354

352355
then:
@@ -376,7 +379,9 @@ class GridFSBucketSpecification extends Specification {
376379
def findIterable = Mock(FindIterable)
377380
def filesCollection = Mock(MongoCollection)
378381
def tenBytes = new byte[10]
379-
def chunkDocument = new Document('files_id', fileInfo.getId()).append('n', 0).append('data', new Binary(tenBytes))
382+
def chunkDocument = new BsonDocument('files_id', fileInfo.getId())
383+
.append('n', new BsonInt32(0))
384+
.append('data', new BsonBinary(tenBytes))
380385
def chunksCollection = Mock(MongoCollection)
381386
def gridFSBucket = new GridFSBucketImpl('fs', 255, filesCollection, chunksCollection)
382387
def outputStream = new ByteArrayOutputStream(10)
@@ -395,7 +400,7 @@ class GridFSBucketSpecification extends Specification {
395400
} else {
396401
1 * filesCollection.find() >> findIterable
397402
}
398-
1 * findIterable.filter(new Document('_id', bsonFileId)) >> findIterable
403+
1 * findIterable.filter(new BsonDocument('_id', bsonFileId)) >> findIterable
399404
1 * findIterable.first() >> fileInfo
400405

401406
then:
@@ -424,11 +429,13 @@ class GridFSBucketSpecification extends Specification {
424429
def bsonFileId = new BsonObjectId(fileId)
425430
def fileInfo = new GridFSFile(bsonFileId, filename, 10, 255, new Date(), new Document())
426431
def mongoCursor = Mock(MongoCursor)
427-
def findIterable = Mock(FindIterable)
432+
def gridFsFileFindIterable = Mock(FindIterable)
428433
def findChunkIterable = Mock(FindIterable)
429434
def filesCollection = Mock(MongoCollection)
430435
def tenBytes = new byte[10]
431-
def chunkDocument = new Document('files_id', fileInfo.getId()).append('n', 0).append('data', new Binary(tenBytes))
436+
def chunkDocument = new BsonDocument('files_id', fileInfo.getId())
437+
.append('n', new BsonInt32(0))
438+
.append('data', new BsonBinary(tenBytes))
432439
def chunksCollection = Mock(MongoCollection)
433440
def gridFSBucket = new GridFSBucketImpl('fs', 255, filesCollection, chunksCollection)
434441
def outputStream = new ByteArrayOutputStream(10)
@@ -443,14 +450,14 @@ class GridFSBucketSpecification extends Specification {
443450

444451
then:
445452
if (clientSession != null) {
446-
1 * filesCollection.find(clientSession) >> findIterable
453+
1 * filesCollection.find(clientSession) >> gridFsFileFindIterable
447454
} else {
448-
1 * filesCollection.find() >> findIterable
455+
1 * filesCollection.find() >> gridFsFileFindIterable
449456
}
450-
1 * findIterable.filter(new Document('filename', filename)) >> findIterable
451-
1 * findIterable.skip(_) >> findIterable
452-
1 * findIterable.sort(_) >> findIterable
453-
1 * findIterable.first() >> fileInfo
457+
1 * gridFsFileFindIterable.filter(new Document('filename', filename)) >> gridFsFileFindIterable
458+
1 * gridFsFileFindIterable.skip(_) >> gridFsFileFindIterable
459+
1 * gridFsFileFindIterable.sort(_) >> gridFsFileFindIterable
460+
1 * gridFsFileFindIterable.first() >> fileInfo
454461

455462
if (clientSession != null) {
456463
1 * chunksCollection.find(clientSession, _) >> findChunkIterable

0 commit comments

Comments
 (0)