@@ -674,8 +674,8 @@ typedef struct
674
674
*/
675
675
PyObject * decoded_chars ; /* buffer for text returned from decoder */
676
676
Py_ssize_t decoded_chars_used ; /* offset into _decoded_chars for read() */
677
- PyObject * pending_bytes ; /* list of bytes objects waiting to be
678
- written , or NULL */
677
+ PyObject * pending_bytes ; // data waiting to be written.
678
+ // ascii unicode, bytes , or list of them.
679
679
Py_ssize_t pending_bytes_count ;
680
680
681
681
/* snapshot is either NULL, or a tuple (dec_flags, next_input) where
@@ -777,6 +777,15 @@ latin1_encode(textio *self, PyObject *text)
777
777
return _PyUnicode_AsLatin1String (text , PyUnicode_AsUTF8 (self -> errors ));
778
778
}
779
779
780
+ // Return true when encoding can be skipped when text is ascii.
781
+ static inline int
782
+ is_asciicompat_encoding (encodefunc_t f )
783
+ {
784
+ return f == (encodefunc_t ) ascii_encode
785
+ || f == (encodefunc_t ) latin1_encode
786
+ || f == (encodefunc_t ) utf8_encode ;
787
+ }
788
+
780
789
/* Map normalized encoding names onto the specialized encoding funcs */
781
790
782
791
typedef struct {
@@ -1489,21 +1498,62 @@ _io_TextIOWrapper_detach_impl(textio *self)
1489
1498
static int
1490
1499
_textiowrapper_writeflush (textio * self )
1491
1500
{
1492
- PyObject * pending , * b , * ret ;
1493
-
1494
1501
if (self -> pending_bytes == NULL )
1495
1502
return 0 ;
1496
1503
1497
- pending = self -> pending_bytes ;
1498
- Py_INCREF (pending );
1499
- self -> pending_bytes_count = 0 ;
1500
- Py_CLEAR (self -> pending_bytes );
1504
+ PyObject * pending = self -> pending_bytes ;
1505
+ PyObject * b ;
1501
1506
1502
- b = _PyBytes_Join (_PyIO_empty_bytes , pending );
1507
+ if (PyBytes_Check (pending )) {
1508
+ b = pending ;
1509
+ Py_INCREF (b );
1510
+ }
1511
+ else if (PyUnicode_Check (pending )) {
1512
+ assert (PyUnicode_IS_ASCII (pending ));
1513
+ assert (PyUnicode_GET_LENGTH (pending ) == self -> pending_bytes_count );
1514
+ b = PyBytes_FromStringAndSize (
1515
+ PyUnicode_DATA (pending ), PyUnicode_GET_LENGTH (pending ));
1516
+ if (b == NULL ) {
1517
+ return -1 ;
1518
+ }
1519
+ }
1520
+ else {
1521
+ assert (PyList_Check (pending ));
1522
+ b = PyBytes_FromStringAndSize (NULL , self -> pending_bytes_count );
1523
+ if (b == NULL ) {
1524
+ return -1 ;
1525
+ }
1526
+
1527
+ char * buf = PyBytes_AsString (b );
1528
+ Py_ssize_t pos = 0 ;
1529
+
1530
+ for (Py_ssize_t i = 0 ; i < PyList_GET_SIZE (pending ); i ++ ) {
1531
+ PyObject * obj = PyList_GET_ITEM (pending , i );
1532
+ char * src ;
1533
+ Py_ssize_t len ;
1534
+ if (PyUnicode_Check (obj )) {
1535
+ assert (PyUnicode_IS_ASCII (obj ));
1536
+ src = PyUnicode_DATA (obj );
1537
+ len = PyUnicode_GET_LENGTH (obj );
1538
+ }
1539
+ else {
1540
+ assert (PyBytes_Check (obj ));
1541
+ if (PyBytes_AsStringAndSize (obj , & src , & len ) < 0 ) {
1542
+ Py_DECREF (b );
1543
+ return -1 ;
1544
+ }
1545
+ }
1546
+ memcpy (buf + pos , src , len );
1547
+ pos += len ;
1548
+ }
1549
+ assert (pos == self -> pending_bytes_count );
1550
+ }
1551
+
1552
+ self -> pending_bytes_count = 0 ;
1553
+ self -> pending_bytes = NULL ;
1503
1554
Py_DECREF (pending );
1504
- if (b == NULL )
1505
- return -1 ;
1506
- ret = NULL ;
1555
+
1556
+ PyObject * ret ;
1507
1557
do {
1508
1558
ret = PyObject_CallMethodObjArgs (self -> buffer ,
1509
1559
_PyIO_str_write , b , NULL );
@@ -1566,37 +1616,61 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1566
1616
1567
1617
/* XXX What if we were just reading? */
1568
1618
if (self -> encodefunc != NULL ) {
1569
- b = (* self -> encodefunc )((PyObject * ) self , text );
1619
+ if (PyUnicode_IS_ASCII (text ) && is_asciicompat_encoding (self -> encodefunc )) {
1620
+ b = text ;
1621
+ Py_INCREF (b );
1622
+ }
1623
+ else {
1624
+ b = (* self -> encodefunc )((PyObject * ) self , text );
1625
+ }
1570
1626
self -> encoding_start_of_stream = 0 ;
1571
1627
}
1572
1628
else
1573
1629
b = PyObject_CallMethodObjArgs (self -> encoder ,
1574
1630
_PyIO_str_encode , text , NULL );
1631
+
1575
1632
Py_DECREF (text );
1576
1633
if (b == NULL )
1577
1634
return NULL ;
1578
- if (!PyBytes_Check (b )) {
1635
+ if (b != text && !PyBytes_Check (b )) {
1579
1636
PyErr_Format (PyExc_TypeError ,
1580
1637
"encoder should return a bytes object, not '%.200s'" ,
1581
1638
Py_TYPE (b )-> tp_name );
1582
1639
Py_DECREF (b );
1583
1640
return NULL ;
1584
1641
}
1585
1642
1643
+ Py_ssize_t bytes_len ;
1644
+ if (b == text ) {
1645
+ bytes_len = PyUnicode_GET_LENGTH (b );
1646
+ }
1647
+ else {
1648
+ bytes_len = PyBytes_GET_SIZE (b );
1649
+ }
1650
+
1586
1651
if (self -> pending_bytes == NULL ) {
1587
- self -> pending_bytes = PyList_New (0 );
1588
- if (self -> pending_bytes == NULL ) {
1652
+ self -> pending_bytes_count = 0 ;
1653
+ self -> pending_bytes = b ;
1654
+ }
1655
+ else if (!PyList_CheckExact (self -> pending_bytes )) {
1656
+ PyObject * list = PyList_New (2 );
1657
+ if (list == NULL ) {
1589
1658
Py_DECREF (b );
1590
1659
return NULL ;
1591
1660
}
1592
- self -> pending_bytes_count = 0 ;
1661
+ PyList_SET_ITEM (list , 0 , self -> pending_bytes );
1662
+ PyList_SET_ITEM (list , 1 , b );
1663
+ self -> pending_bytes = list ;
1593
1664
}
1594
- if (PyList_Append (self -> pending_bytes , b ) < 0 ) {
1665
+ else {
1666
+ if (PyList_Append (self -> pending_bytes , b ) < 0 ) {
1667
+ Py_DECREF (b );
1668
+ return NULL ;
1669
+ }
1595
1670
Py_DECREF (b );
1596
- return NULL ;
1597
1671
}
1598
- self -> pending_bytes_count += PyBytes_GET_SIZE ( b );
1599
- Py_DECREF ( b ) ;
1672
+
1673
+ self -> pending_bytes_count += bytes_len ;
1600
1674
if (self -> pending_bytes_count > self -> chunk_size || needflush ||
1601
1675
text_needflush ) {
1602
1676
if (_textiowrapper_writeflush (self ) < 0 )
0 commit comments