python · methane · Feb 20, 2017
diff --git a/Doc/library/io.rst b/Doc/library/io.rst
@@ -901,13 +901,26 @@ Text I/O
       locale encoding using :func:`locale.setlocale`, use the current locale
       encoding instead of the user preferred encoding.
 
-   :class:`TextIOWrapper` provides one attribute in addition to those of
+   :class:`TextIOWrapper` provides these members in addition to those of
    :class:`TextIOBase` and its parents:
 
    .. attribute:: line_buffering
 
       Whether line buffering is enabled.
 
+   .. method:: set_encoding(encoding=None, errors=None[, newline])
+
+      Change the encoding, error handler, and newline handler.
+      If *encoding* is None or *newline* is unspecified, the existing
+      setting is retained.  If *errors* is None, the default depends on
+      *encoding*: if *encoding* is also None, the existing error handler
+      is retained, otherwise it is reset to ``'strict'``.
+
+      It is not possible to change the encoding if some data has already
+      been read from the stream.
+
+      .. versionadded:: 3.7
+
 
 .. class:: StringIO(initial_value='', newline='\\n')
 

diff --git a/Lib/_pyio.py b/Lib/_pyio.py
@@ -1946,11 +1946,7 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None,
         self._line_buffering = line_buffering
         self._encoding = encoding
         self._errors = errors
-        self._readuniversal = not newline
-        self._readtranslate = newline is None
-        self._readnl = newline
-        self._writetranslate = newline != ''
-        self._writenl = newline or os.linesep
+        self._set_newline(newline)
         self._encoder = None
         self._decoder = None
         self._decoded_chars = ''  # buffer for text returned from decoder
@@ -1995,6 +1991,65 @@ def __repr__(self):
             result += " mode={0!r}".format(mode)
         return result + " encoding={0!r}>".format(self.encoding)
 
+    def set_encoding(self, encoding=None, errors=None, newline=Ellipsis):
+        """Change the encoding of the stream.
+
+        It is not possible to change the encoding if some data has already
+        been read from the stream.
+        """
+        old_encoding = codecs.lookup(self._encoding).name
+        if encoding is None:
+            encoding = old_encoding
+            if errors is None:
+                errors = self._errors
+        else:
+            if not isinstance(encoding, str):
+                raise ValueError("invalid encoding: %r" % encoding)
+
+            if errors is None:
+                errors = 'strict'
+
+            encoding = codecs.lookup(encoding).name
+        if newline is Ellipsis:
+            newline = self._readnl
+        if encoding == old_encoding and errors == self._errors \
+                and newline == self._readnl:
+            # no change
+            return
+
+        if self._decoder is not None:
+            raise UnsupportedOperation(
+                "It is not possible to set the encoding of stream after "
+                "the first read")
+
+        # flush write buffer
+        self.flush()
+
+        # reset attributes
+        self._encoding = encoding
+        self._errors = errors
+        self._encoder = None
+        self._decoder = None
+        self._b2cratio = 0.0
+        self._set_newline(newline)
+
+        # don't write a BOM in the middle of a file
+        if self._seekable and self.writable():
+            position = self.buffer.tell()
+            if position != 0:
+                try:
+                    self._get_encoder().setstate(0)
+                except LookupError:
+                    # Sometimes the encoder doesn't exist
+                    pass
+
+    def _set_newline(self, newline):
+        self._readuniversal = not newline
+        self._readtranslate = newline is None
+        self._readnl = newline
+        self._writetranslate = newline != ''
+        self._writenl = newline or os.linesep
+
     @property
     def encoding(self):
         return self._encoding

diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
@@ -3242,6 +3242,134 @@ def seekable(self): return True
         F.tell = lambda x: 0
         t = self.TextIOWrapper(F(), encoding='utf-8')
 
+    def test_set_encoding_same_codec(self):
+        data = 'foobar\n'.encode('latin1')
+        raw = self.BytesIO(data)
+        txt = self.TextIOWrapper(raw, encoding='latin1')
+        self.assertEqual(txt.encoding, 'latin1')
+
+        # Just an alias, shouldn't change anything
+        txt.set_encoding('ISO-8859-1')
+        self.assertEqual(txt.encoding, 'latin1')
+
+        # This is an actual change
+        txt.set_encoding('iso8859-15')
+        self.assertEqual(txt.encoding, 'iso8859-15')
+
+    def test_set_encoding_read(self):
+        # latin1 -> utf8
+        # (latin1 can decode utf-8 encoded string)
+        data = 'abc\xe9\n'.encode('latin1') + 'd\xe9f\n'.encode('utf8')
+        raw = self.BytesIO(data)
+        txt = self.TextIOWrapper(raw, encoding='latin1', newline='\n')
+        self.assertEqual(txt.readline(), 'abc\xe9\n')
+        with self.assertRaises(self.UnsupportedOperation):
+            txt.set_encoding('utf-8')
+
+    def test_set_encoding_write_fromascii(self):
+        # ascii has a specific encodefunc in the C implementation,
+        # but utf-8-sig has not. Make sure that we get rid of the
+        # cached encodefunc when we switch encoders.
+        raw = self.BytesIO()
+        txt = self.TextIOWrapper(raw, encoding='ascii', newline='\n')
+        txt.write('foo\n')
+        txt.set_encoding('utf-8-sig')
+        txt.write('\xe9\n')
+        txt.flush()
+        self.assertEqual(raw.getvalue(), b'foo\n\xc3\xa9\n')
+
+    def test_set_encoding_write(self):
+        # latin -> utf8
+        raw = self.BytesIO()
+        txt = self.TextIOWrapper(raw, encoding='latin1', newline='\n')
+        txt.write('abc\xe9\n')
+        txt.set_encoding('utf-8')
+        self.assertEqual(raw.getvalue(), b'abc\xe9\n')
+        txt.write('d\xe9f\n')
+        txt.flush()
+        self.assertEqual(raw.getvalue(), b'abc\xe9\nd\xc3\xa9f\n')
+
+        # ascii -> utf-8-sig: ensure that no BOM is written in the middle of
+        # the file
+        raw = self.BytesIO()
+        txt = self.TextIOWrapper(raw, encoding='ascii', newline='\n')
+        txt.write('abc\n')
+        txt.set_encoding('utf-8-sig')
+        txt.write('d\xe9f\n')
+        txt.flush()
+        self.assertEqual(raw.getvalue(), b'abc\nd\xc3\xa9f\n')
+
+    def test_set_encoding_write_non_seekable(self):
+        raw = self.BytesIO()
+        raw.seekable = lambda: False
+        raw.seek = None
+        txt = self.TextIOWrapper(raw, encoding='ascii', newline='\n')
+        txt.write('abc\n')
+        txt.set_encoding('utf-8-sig')
+        txt.write('d\xe9f\n')
+        txt.flush()
+
+        # If the raw stream is not seekable, there'll be a BOM
+        self.assertEqual(raw.getvalue(),  b'abc\n\xef\xbb\xbfd\xc3\xa9f\n')
+
+    def test_set_encoding_defaults(self):
+        txt = self.TextIOWrapper(self.BytesIO(), 'ascii', 'replace', '\n')
+        txt.set_encoding(None, None)
+        self.assertEqual(txt.encoding, 'ascii')
+        self.assertEqual(txt.errors, 'replace')
+        txt.write('LF\n')
+
+        txt.set_encoding(newline='\r\n')
+        self.assertEqual(txt.encoding, 'ascii')
+        self.assertEqual(txt.errors, 'replace')
+
+        txt.set_encoding(errors='ignore')
+        self.assertEqual(txt.encoding, 'ascii')
+        txt.write('CRLF\n')
+
+        txt.set_encoding(encoding='utf-8', newline=None)
+        self.assertEqual(txt.errors, 'strict')
+        txt.seek(0)
+        self.assertEqual(txt.read(), 'LF\nCRLF\n')
+
+        self.assertEqual(txt.detach().getvalue(), b'LF\nCRLF\r\n')
+
+    def test_set_encoding_newline(self):
+        raw = self.BytesIO(b'CR\rEOF')
+        txt = self.TextIOWrapper(raw, 'ascii', newline='\n')
+        txt.set_encoding(newline=None)
+        self.assertEqual(txt.readline(), 'CR\n')
+        raw = self.BytesIO(b'CR\rEOF')
+        txt = self.TextIOWrapper(raw, 'ascii', newline='\n')
+        txt.set_encoding(newline='')
+        self.assertEqual(txt.readline(), 'CR\r')
+        raw = self.BytesIO(b'CR\rLF\nEOF')
+        txt = self.TextIOWrapper(raw, 'ascii', newline='\r')
+        txt.set_encoding(newline='\n')
+        self.assertEqual(txt.readline(), 'CR\rLF\n')
+        raw = self.BytesIO(b'LF\nCR\rEOF')
+        txt = self.TextIOWrapper(raw, 'ascii', newline='\n')
+        txt.set_encoding(newline='\r')
+        self.assertEqual(txt.readline(), 'LF\nCR\r')
+        raw = self.BytesIO(b'CR\rCRLF\r\nEOF')
+        txt = self.TextIOWrapper(raw, 'ascii', newline='\r')
+        txt.set_encoding(newline='\r\n')
+        self.assertEqual(txt.readline(), 'CR\rCRLF\r\n')
+
+        txt = self.TextIOWrapper(self.BytesIO(), 'ascii', newline='\r')
+        txt.set_encoding(newline=None)
+        txt.write('linesep\n')
+        txt.set_encoding(newline='')
+        txt.write('LF\n')
+        txt.set_encoding(newline='\n')
+        txt.write('LF\n')
+        txt.set_encoding(newline='\r')
+        txt.write('CR\n')
+        txt.set_encoding(newline='\r\n')
+        txt.write('CRLF\n')
+        expected = 'linesep' + os.linesep + 'LF\nLF\nCR\rCRLF\r\n'
+        self.assertEqual(txt.detach().getvalue().decode('ascii'), expected)
+
 
 class MemviewBytesIO(io.BytesIO):
     '''A BytesIO object whose read method returns memoryviews