Skip to content

Commit fa9531b

Browse files
authored
PYTHON-2824 Make GridOut implement full io.IOBase spec (#677)
Make GridOut inherit from io.IOBase to be a fully "file-like" object (https://docs.python.org/3/glossary.html#term-file-like-object). Implement missing methods `readlines`, `writelines`, `writable`, `fileno`, `flush`, `isatty`, `truncate`, and property `closed`, following the spec (https://docs.python.org/3/library/io.html#io.IOBase.writable). Iterating over GridOut previously returned chunks, but IOBase specifies that lines should be returned. Thus, the `GridOutIterator` returning chunks is removed and GridOut simply uses the existing IOBase iterator implementation (returning `self` in `__iter__` and using `readline` in `__next__`). Additionally, iterating over GridOut previously did not move the "file pointer" along, i.e. `next(iter(some_grid_out_object))` always gave the same result (the first chunk of the file) as it would create a new iterator starting at the top of the file. This is now fixed as well, so a first call to `next(iter(some_grid_out_object))` gives the first line, and subsequent calls return the subsequent lines.
1 parent 9055bb0 commit fa9531b

File tree

4 files changed

+142
-19
lines changed

4 files changed

+142
-19
lines changed

doc/changelog.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ Changes in Version 3.12.0
126126

127127
- PyMongoCrypt 1.1.0 or later is now required for client side field level
128128
encryption support.
129+
- Iterating over :class:`gridfs.grid_file.GridOut` now moves through
130+
the file line by line instead of chunk by chunk, and does not
131+
restart at the top for subsequent iterations on the same object.
132+
Call `seek(0)` to reset the iterator.
129133

130134
Notable improvements
131135
....................

doc/contributors.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,4 +88,5 @@ The following is a list of people who have contributed to
8888
- Terence Honles (terencehonles)
8989
- Paul Fisher (thetorpedodog)
9090
- Julius Park (juliusgeo)
91-
- Khanh Nguyen (KN99HN)
91+
- Khanh Nguyen (KN99HN)
92+
- Henri Froese (henrifroese)

gridfs/grid_file.py

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
421421
return False
422422

423423

424-
class GridOut(object):
424+
class GridOut(io.IOBase):
425425
"""Class to read data out of GridFS.
426426
"""
427427
def __init__(self, root_collection, file_id=None, file_document=None,
@@ -465,6 +465,8 @@ def __init__(self, root_collection, file_id=None, file_document=None,
465465

466466
root_collection = _clear_entity_type_registry(root_collection)
467467

468+
super().__init__()
469+
468470
self.__chunks = root_collection.chunks
469471
self.__files = root_collection.files
470472
self.__file_id = file_id
@@ -656,33 +658,40 @@ def seekable(self):
656658
def __iter__(self):
657659
"""Return an iterator over all of this file's data.
658660
659-
The iterator will return chunk-sized instances of
660-
:class:`str` (:class:`bytes` in python 3). This can be
661-
useful when serving files using a webserver that handles
662-
such an iterator efficiently.
663-
664-
.. note::
665-
This is different from :py:class:`io.IOBase` which iterates over
666-
*lines* in the file. Use :meth:`GridOut.readline` to read line by
667-
line instead of chunk by chunk.
661+
The iterator will return lines (delimited by b'\n') of
662+
:class:`bytes`. This can be useful when serving files
663+
using a webserver that handles such an iterator efficiently.
668664
669665
.. versionchanged:: 3.8
670666
The iterator now raises :class:`CorruptGridFile` when encountering
671667
any truncated, missing, or extra chunk in a file. The previous
672668
behavior was to only raise :class:`CorruptGridFile` on a missing
673669
chunk.
670+
671+
.. versionchanged:: 4.0
672+
The iterator now iterates over *lines* in the file, instead
673+
of chunks, to conform to the base class :py:class:`io.IOBase`.
674+
Use :meth:`GridOut.readchunk` to read chunk by chunk instead
675+
of line by line.
674676
"""
675-
return GridOutIterator(self, self.__chunks, self._session)
677+
return self
676678

677679
def close(self):
678680
"""Make GridOut more generically file-like."""
679681
if self.__chunk_iter:
680682
self.__chunk_iter.close()
681683
self.__chunk_iter = None
684+
super().close()
682685

683686
def write(self, value):
684687
raise io.UnsupportedOperation('write')
685688

689+
def writelines(self, lines):
690+
raise io.UnsupportedOperation('writelines')
691+
692+
def writable(self):
693+
return False
694+
686695
def __enter__(self):
687696
"""Makes it possible to use :class:`GridOut` files
688697
with the context manager protocol.
@@ -696,6 +705,27 @@ def __exit__(self, exc_type, exc_val, exc_tb):
696705
self.close()
697706
return False
698707

708+
def fileno(self):
709+
raise io.UnsupportedOperation('fileno')
710+
711+
def flush(self):
712+
# GridOut is read-only, so flush does nothing.
713+
pass
714+
715+
def isatty(self):
716+
return False
717+
718+
def truncate(self, size=None):
719+
# See https://docs.python.org/3/library/io.html#io.IOBase.writable
720+
# for why truncate has to raise.
721+
raise io.UnsupportedOperation('truncate')
722+
723+
# Override IOBase.__del__ otherwise it will lead to __getattr__ on
724+
# __IOBase_closed which calls _ensure_file and potentially performs I/O.
725+
# We cannot do I/O in __del__ since it can lead to a deadlock.
726+
def __del__(self):
727+
pass
728+
699729

700730
class _GridOutChunkIterator(object):
701731
"""Iterates over a file's chunks using a single cursor.

test/test_grid_file.py

Lines changed: 95 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
"""
1919

2020
import datetime
21+
import io
2122
import sys
2223
import zipfile
2324

@@ -323,6 +324,20 @@ def test_close(self):
323324
self.assertRaises(ValueError, f.write, "test")
324325
f.close()
325326

327+
def test_closed(self):
328+
f = GridIn(self.db.fs, chunkSize=5)
329+
f.write(b"Hello world.\nHow are you?")
330+
f.close()
331+
332+
g = GridOut(self.db.fs, f._id)
333+
self.assertFalse(g.closed)
334+
g.read(1)
335+
self.assertFalse(g.closed)
336+
g.read(100)
337+
self.assertFalse(g.closed)
338+
g.close()
339+
self.assertTrue(g.closed)
340+
326341
def test_multi_chunk_file(self):
327342
random_string = b'a' * (DEFAULT_CHUNK_SIZE + 1000)
328343

@@ -447,27 +462,85 @@ def test_readline(self):
447462
self.assertEqual(b"e", g.readline(1))
448463
self.assertEqual(b"llo world,\n", g.readline())
449464

465+
def test_readlines(self):
466+
f = GridIn(self.db.fs, chunkSize=5)
467+
f.write((b"""Hello world,
468+
How are you?
469+
Hope all is well.
470+
Bye"""))
471+
f.close()
472+
473+
# Try read(), then readlines().
474+
g = GridOut(self.db.fs, f._id)
475+
self.assertEqual(b"He", g.read(2))
476+
self.assertEqual([b"llo world,\n", b"How are you?\n"], g.readlines(11))
477+
self.assertEqual([b"Hope all is well.\n", b"Bye"], g.readlines())
478+
self.assertEqual([], g.readlines())
479+
480+
# Try readline(), then readlines().
481+
g = GridOut(self.db.fs, f._id)
482+
self.assertEqual(b"Hello world,\n", g.readline())
483+
self.assertEqual([b"How are you?\n", b"Hope all is well.\n"], g.readlines(13))
484+
self.assertEqual(b"Bye", g.readline())
485+
self.assertEqual([], g.readlines())
486+
487+
# Only readlines().
488+
g = GridOut(self.db.fs, f._id)
489+
self.assertEqual(
490+
[b"Hello world,\n", b"How are you?\n", b"Hope all is well.\n", b"Bye"],
491+
g.readlines())
492+
493+
g = GridOut(self.db.fs, f._id)
494+
self.assertEqual(
495+
[b"Hello world,\n", b"How are you?\n", b"Hope all is well.\n", b"Bye"],
496+
g.readlines(0))
497+
498+
g = GridOut(self.db.fs, f._id)
499+
self.assertEqual([b"Hello world,\n"], g.readlines(1))
500+
self.assertEqual([b"How are you?\n"], g.readlines(12))
501+
self.assertEqual([b"Hope all is well.\n", b"Bye"], g.readlines(18))
502+
503+
# Try readlines() first, then read().
504+
g = GridOut(self.db.fs, f._id)
505+
self.assertEqual([b"Hello world,\n"], g.readlines(1))
506+
self.assertEqual(b"H", g.read(1))
507+
self.assertEqual([b"ow are you?\n", b"Hope all is well.\n"], g.readlines(29))
508+
self.assertEqual([b"Bye"], g.readlines(1))
509+
510+
# Try readlines() first, then readline().
511+
g = GridOut(self.db.fs, f._id)
512+
self.assertEqual([b"Hello world,\n"], g.readlines(1))
513+
self.assertEqual(b"How are you?\n", g.readline())
514+
self.assertEqual([b"Hope all is well.\n"], g.readlines(17))
515+
self.assertEqual(b"Bye", g.readline())
516+
450517
def test_iterator(self):
451518
f = GridIn(self.db.fs)
452519
f.close()
453520
g = GridOut(self.db.fs, f._id)
454521
self.assertEqual([], list(g))
455522

456523
f = GridIn(self.db.fs)
457-
f.write(b"hello world")
524+
f.write(b"hello world\nhere are\nsome lines.")
458525
f.close()
459526
g = GridOut(self.db.fs, f._id)
460-
self.assertEqual([b"hello world"], list(g))
461-
self.assertEqual(b"hello", g.read(5))
462-
self.assertEqual([b"hello world"], list(g))
463-
self.assertEqual(b" worl", g.read(5))
527+
self.assertEqual([b"hello world\n", b"here are\n", b"some lines."], list(g))
528+
self.assertEqual(b"", g.read(5))
529+
self.assertEqual([], list(g))
530+
531+
g = GridOut(self.db.fs, f._id)
532+
self.assertEqual(b"hello world\n", next(iter(g)))
533+
self.assertEqual(b"here", g.read(4))
534+
self.assertEqual(b" are\n", next(iter(g)))
535+
self.assertEqual(b"some lines", g.read(10))
536+
self.assertEqual(b".", next(iter(g)))
537+
self.assertRaises(StopIteration, iter(g).__next__)
464538

465539
f = GridIn(self.db.fs, chunk_size=2)
466540
f.write(b"hello world")
467541
f.close()
468542
g = GridOut(self.db.fs, f._id)
469-
self.assertEqual([b"he", b"ll", b"o ",
470-
b"wo", b"rl", b"d"], list(g))
543+
self.assertEqual([b"hello world"], list(g))
471544

472545
def test_read_unaligned_buffer_size(self):
473546
in_data = (b"This is a text that doesn't "
@@ -665,6 +738,21 @@ def test_zip(self):
665738
self.assertSequenceEqual(z.namelist(), ["test.txt"])
666739
self.assertEqual(z.read("test.txt"), b"hello world")
667740

741+
def test_grid_out_unsupported_operations(self):
742+
f = GridIn(self.db.fs, chunkSize=3)
743+
f.write(b"hello world")
744+
f.close()
745+
746+
g = GridOut(self.db.fs, f._id)
747+
748+
self.assertRaises(io.UnsupportedOperation, g.writelines, [b"some", b"lines"])
749+
self.assertRaises(io.UnsupportedOperation, g.write, b"some text")
750+
self.assertRaises(io.UnsupportedOperation, g.fileno)
751+
self.assertRaises(io.UnsupportedOperation, g.truncate)
752+
753+
self.assertFalse(g.writable())
754+
self.assertFalse(g.isatty())
755+
668756

669757
if __name__ == "__main__":
670758
unittest.main()

0 commit comments

Comments
 (0)