Merge pull request #274 from python/master

sthagen · web-flow · commit 0c59e55532e0 · 2020-02-27T08:01:09.000+01:00
Sync Fork from Upstream Repo
diff --git a/.azure-pipelines/macos-steps.yml b/.azure-pipelines/macos-steps.yml
@@ -6,7 +6,7 @@ steps:
 - script: ./configure --with-pydebug --with-openssl=/usr/local/opt/openssl --prefix=/opt/python-azdev
   displayName: 'Configure CPython (debug)'
 
-- script: make -s -j4
+- script: make -j4
   displayName: 'Build CPython'
 
 - script: make pythoninfo
diff --git a/.azure-pipelines/posix-steps.yml b/.azure-pipelines/posix-steps.yml
@@ -20,7 +20,7 @@ steps:
 - script: ./configure --with-pydebug
   displayName: 'Configure CPython (debug)'
 
-- script: make -s -j4
+- script: make -j4
   displayName: 'Build CPython'
 
 - ${{ if eq(parameters.coverage, 'true') }}:
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -55,7 +55,7 @@ jobs:
     - name: Configure CPython
       run: ./configure --with-pydebug --with-openssl=/usr/local/opt/openssl --prefix=/opt/python-dev
     - name: Build CPython
-      run: make -s -j4
+      run: make -j4
     - name: Display build info
       run: make pythoninfo
     - name: Tests
@@ -82,7 +82,7 @@ jobs:
     - name: Configure CPython
       run: ./configure --with-pydebug --with-openssl=$PWD/multissl/openssl/$OPENSSL_VER
     - name: Build CPython
-      run: make -s -j4
+      run: make -j4
     - name: Display build info
       run: make pythoninfo
     - name: Tests
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
@@ -40,7 +40,7 @@ jobs:
     - name: Configure CPython
       run: ./configure --with-openssl=$PWD/multissl/openssl/$OPENSSL_VER
     - name: Build CPython
-      run: make -s -j4
+      run: make -j4
     - name: Display build info
       run: make pythoninfo
     - name: 'Coverage Preparation'
diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
@@ -28,7 +28,7 @@ jobs:
     - name: 'Configure CPython'
       run: ./configure --with-pydebug
     - name: 'Build CPython'
-      run: make -s -j4
+      run: make -j4
     - name: 'Install build dependencies'
       run: make -C Doc/ PYTHON=../python venv
     - name: 'Build documentation'
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
@@ -4392,6 +4392,22 @@ pairs within braces, for example: ``{'jack': 4098, 'sjoerd': 4127}`` or ``{4098:
          >>> d.values() == d.values()
          False
 
+   .. describe:: d | other
+
+      Create a new dictionary with the merged keys and values of *d* and
+      *other*, which must both be dictionaries. The values of *other* take
+      priority when *d* and *other* share keys.
+
+      .. versionadded:: 3.9
+
+   .. describe:: d |= other
+
+      Update the dictionary *d* with keys and values from *other*, which may be
+      either a :term:`mapping` or an :term:`iterable` of key/value pairs. The
+      values of *other* take priority when *d* and *other* share keys.
+
+      .. versionadded:: 3.9
+
    Dictionaries compare equal if and only if they have the same ``(key,
    value)`` pairs (regardless of ordering). Order comparisons ('<', '<=', '>=', '>') raise
    :exc:`TypeError`.
diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst
@@ -70,6 +70,12 @@ Summary -- Release highlights
 New Features
 ============
 
+Dictionary Merge & Update Operators
+-----------------------------------
+
+Merge (``|``) and update (``|=``) operators have been added to the built-in
+:class:`dict` class.  See :pep:`584` for a full description.
+(Contributed by Brandt Bucher in :issue:`36144`.)
 
 
 Other Language Changes
diff --git a/Lib/test/test_asyncio/test_subprocess.py b/Lib/test/test_asyncio/test_subprocess.py
@@ -477,12 +477,19 @@ def kill():
             proc.kill = kill
             returncode = transport.get_returncode()
             transport.close()
-            await transport._wait()
+            await asyncio.wait_for(transport._wait(), 5)
             return (returncode, kill_called)
 
         # Ignore "Close running child process: kill ..." log
         with test_utils.disable_logger():
-            returncode, killed = self.loop.run_until_complete(kill_running())
+            try:
+                returncode, killed = self.loop.run_until_complete(
+                    kill_running()
+                )
+            except asyncio.TimeoutError:
+                self.skipTest(
+                    "Timeout failure on waiting for subprocess stopping"
+                )
         self.assertIsNone(returncode)
 
         # transport.close() must kill the process if it is still running
diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-02-03-21-12-39.bpo-39087.YnbUpL.rst b/Misc/NEWS.d/next/Core and Builtins/2020-02-03-21-12-39.bpo-39087.YnbUpL.rst
@@ -0,0 +1,2 @@
+Optimize :c:func:`PyUnicode_AsUTF8` and :c:func:`PyUnicode_AsUTF8AndSize`
+slightly when they need to create internal UTF-8 cache.
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
@@ -256,8 +256,9 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
 /* UTF-8 encoder specialized for a Unicode kind to avoid the slow
    PyUnicode_READ() macro. Delete some parts of the code depending on the kind:
    UCS-1 strings don't need to handle surrogates for example. */
-Py_LOCAL_INLINE(PyObject *)
-STRINGLIB(utf8_encoder)(PyObject *unicode,
+Py_LOCAL_INLINE(char *)
+STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
+                        PyObject *unicode,
                         STRINGLIB_CHAR *data,
                         Py_ssize_t size,
                         _Py_error_handler error_handler,
@@ -277,17 +278,16 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
 #else /*  STRINGLIB_SIZEOF_CHAR == 4 */
     const Py_ssize_t max_char_size = 4;
 #endif
-    _PyBytesWriter writer;
 
     assert(size >= 0);
-    _PyBytesWriter_Init(&writer);
-
     if (size > PY_SSIZE_T_MAX / max_char_size) {
         /* integer overflow */
-        return PyErr_NoMemory();
+        PyErr_NoMemory();
+        return NULL;
     }
 
-    p = _PyBytesWriter_Alloc(&writer, size * max_char_size);
+    _PyBytesWriter_Init(writer);
+    p = _PyBytesWriter_Alloc(writer, size * max_char_size);
     if (p == NULL)
         return NULL;
 
@@ -323,7 +323,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
                 endpos++;
 
             /* Only overallocate the buffer if it's not the last write */
-            writer.overallocate = (endpos < size);
+            writer->overallocate = (endpos < size);
 
             switch (error_handler)
             {
@@ -347,8 +347,8 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
 
             case _Py_ERROR_BACKSLASHREPLACE:
                 /* subtract preallocated bytes */
-                writer.min_size -= max_char_size * (endpos - startpos);
-                p = backslashreplace(&writer, p,
+                writer->min_size -= max_char_size * (endpos - startpos);
+                p = backslashreplace(writer, p,
                                      unicode, startpos, endpos);
                 if (p == NULL)
                     goto error;
@@ -357,8 +357,8 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
 
             case _Py_ERROR_XMLCHARREFREPLACE:
                 /* subtract preallocated bytes */
-                writer.min_size -= max_char_size * (endpos - startpos);
-                p = xmlcharrefreplace(&writer, p,
+                writer->min_size -= max_char_size * (endpos - startpos);
+                p = xmlcharrefreplace(writer, p,
                                       unicode, startpos, endpos);
                 if (p == NULL)
                     goto error;
@@ -387,10 +387,10 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
                     goto error;
 
                 /* subtract preallocated bytes */
-                writer.min_size -= max_char_size * (newpos - startpos);
+                writer->min_size -= max_char_size * (newpos - startpos);
 
                 if (PyBytes_Check(rep)) {
-                    p = _PyBytesWriter_WriteBytes(&writer, p,
+                    p = _PyBytesWriter_WriteBytes(writer, p,
                                                   PyBytes_AS_STRING(rep),
                                                   PyBytes_GET_SIZE(rep));
                 }
@@ -406,7 +406,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
                         goto error;
                     }
 
-                    p = _PyBytesWriter_WriteBytes(&writer, p,
+                    p = _PyBytesWriter_WriteBytes(writer, p,
                                                   PyUnicode_DATA(rep),
                                                   PyUnicode_GET_LENGTH(rep));
                 }
@@ -420,7 +420,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
 
             /* If overallocation was disabled, ensure that it was the last
                write. Otherwise, we missed an optimization */
-            assert(writer.overallocate || i == size);
+            assert(writer->overallocate || i == size);
         }
         else
 #if STRINGLIB_SIZEOF_CHAR > 2
@@ -449,14 +449,13 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
     Py_XDECREF(error_handler_obj);
     Py_XDECREF(exc);
 #endif
-    return _PyBytesWriter_Finish(&writer, p);
+    return p;
 
 #if STRINGLIB_SIZEOF_CHAR > 1
  error:
     Py_XDECREF(rep);
     Py_XDECREF(error_handler_obj);
     Py_XDECREF(exc);
-    _PyBytesWriter_Dealloc(&writer);
     return NULL;
 #endif
 }
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
@@ -3991,11 +3991,11 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
 }
 
 
+static int unicode_fill_utf8(PyObject *unicode);
+
 const char *
 PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
 {
-    PyObject *bytes;
-
     if (!PyUnicode_Check(unicode)) {
         PyErr_BadArgument();
         return NULL;
@@ -4004,21 +4004,9 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
         return NULL;
 
     if (PyUnicode_UTF8(unicode) == NULL) {
-        assert(!PyUnicode_IS_COMPACT_ASCII(unicode));
-        bytes = _PyUnicode_AsUTF8String(unicode, NULL);
-        if (bytes == NULL)
-            return NULL;
-        _PyUnicode_UTF8(unicode) = PyObject_MALLOC(PyBytes_GET_SIZE(bytes) + 1);
-        if (_PyUnicode_UTF8(unicode) == NULL) {
-            PyErr_NoMemory();
-            Py_DECREF(bytes);
+        if (unicode_fill_utf8(unicode) == -1) {
             return NULL;
         }
-        _PyUnicode_UTF8_LENGTH(unicode) = PyBytes_GET_SIZE(bytes);
-        memcpy(_PyUnicode_UTF8(unicode),
-                  PyBytes_AS_STRING(bytes),
-                  _PyUnicode_UTF8_LENGTH(unicode) + 1);
-        Py_DECREF(bytes);
     }
 
     if (psize)
@@ -5381,10 +5369,6 @@ static PyObject *
 unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
                     const char *errors)
 {
-    enum PyUnicode_Kind kind;
-    void *data;
-    Py_ssize_t size;
-
     if (!PyUnicode_Check(unicode)) {
         PyErr_BadArgument();
         return NULL;
@@ -5397,22 +5381,86 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
         return PyBytes_FromStringAndSize(PyUnicode_UTF8(unicode),
                                          PyUnicode_UTF8_LENGTH(unicode));
 
-    kind = PyUnicode_KIND(unicode);
-    data = PyUnicode_DATA(unicode);
-    size = PyUnicode_GET_LENGTH(unicode);
+    enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
+    void *data = PyUnicode_DATA(unicode);
+    Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
+
+    _PyBytesWriter writer;
+    char *end;
 
     switch (kind) {
     default:
         Py_UNREACHABLE();
     case PyUnicode_1BYTE_KIND:
         /* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
         assert(!PyUnicode_IS_ASCII(unicode));
-        return ucs1lib_utf8_encoder(unicode, data, size, error_handler, errors);
+        end = ucs1lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
+        break;
+    case PyUnicode_2BYTE_KIND:
+        end = ucs2lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
+        break;
+    case PyUnicode_4BYTE_KIND:
+        end = ucs4lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
+        break;
+    }
+
+    if (end == NULL) {
+        _PyBytesWriter_Dealloc(&writer);
+        return NULL;
+    }
+    return _PyBytesWriter_Finish(&writer, end);
+}
+
+static int
+unicode_fill_utf8(PyObject *unicode)
+{
+    /* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
+    assert(!PyUnicode_IS_ASCII(unicode));
+
+    enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
+    void *data = PyUnicode_DATA(unicode);
+    Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
+
+    _PyBytesWriter writer;
+    char *end;
+
+    switch (kind) {
+    default:
+        Py_UNREACHABLE();
+    case PyUnicode_1BYTE_KIND:
+        end = ucs1lib_utf8_encoder(&writer, unicode, data, size,
+                                   _Py_ERROR_STRICT, NULL);
+        break;
     case PyUnicode_2BYTE_KIND:
-        return ucs2lib_utf8_encoder(unicode, data, size, error_handler, errors);
+        end = ucs2lib_utf8_encoder(&writer, unicode, data, size,
+                                   _Py_ERROR_STRICT, NULL);
+        break;
     case PyUnicode_4BYTE_KIND:
-        return ucs4lib_utf8_encoder(unicode, data, size, error_handler, errors);
+        end = ucs4lib_utf8_encoder(&writer, unicode, data, size,
+                                   _Py_ERROR_STRICT, NULL);
+        break;
+    }
+    if (end == NULL) {
+        _PyBytesWriter_Dealloc(&writer);
+        return -1;
+    }
+
+    char *start = writer.use_small_buffer ? writer.small_buffer :
+                    PyBytes_AS_STRING(writer.buffer);
+    Py_ssize_t len = end - start;
+
+    char *cache = PyObject_MALLOC(len + 1);
+    if (cache == NULL) {
+        _PyBytesWriter_Dealloc(&writer);
+        PyErr_NoMemory();
+        return -1;
     }
+    _PyUnicode_UTF8(unicode) = cache;
+    _PyUnicode_UTF8_LENGTH(unicode) = len;
+    memcpy(cache, start, len);
+    cache[len] = '\0';
+    _PyBytesWriter_Dealloc(&writer);
+    return 0;
 }
 
 PyObject *

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	+Optimize :c:func:`PyUnicode_AsUTF8` and :c:func:`PyUnicode_AsUTF8AndSize`
	`2`	`+slightly when they need to create internal UTF-8 cache.`