sthagen · sthagen · Mar 20, 2020 · Mar 19, 2020 · Mar 19, 2020 · Mar 19, 2020
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -195,7 +195,7 @@
 
 # The theme to use for HTML and HTML Help pages.  Major themes that come with
 # Sphinx are currently 'default' and 'sphinxdoc'.
-html_theme = "pandas_sphinx_theme"
+html_theme = "pydata_sphinx_theme"
 
 # The style sheet to use for HTML and HTML Help pages. A file of that name
 # must exist either in Sphinx' static/ path, or in one of the custom paths

diff --git a/doc/source/getting_started/intro_tutorials/03_subset_data.rst b/doc/source/getting_started/intro_tutorials/03_subset_data.rst
@@ -88,7 +88,7 @@ name of the column of interest.
     </ul>
 
 Each column in a :class:`DataFrame` is a :class:`Series`. As a single column is
-selected, the returned object is a pandas :class:`DataFrame`. We can verify this
+selected, the returned object is a pandas :class:`Series`. We can verify this
 by checking the type of the output:
 
 .. ipython:: python

diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst
@@ -246,6 +246,7 @@ We'll import ``dask.dataframe`` and notice that the API feels similar to pandas.
 We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in.
 
 .. ipython:: python
+   :okwarning:
 
    import dask.dataframe as dd
 

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -405,6 +405,7 @@ Other
 - Fixed :func:`pandas.testing.assert_series_equal` to correctly raise if left object is a different subclass with ``check_series_type=True`` (:issue:`32670`).
 - :meth:`IntegerArray.astype` now supports ``datetime64`` dtype (:issue:32538`)
 - Fixed bug in :func:`pandas.testing.assert_series_equal` where dtypes were checked for ``Interval`` and ``ExtensionArray`` operands when ``check_dtype`` was ``False`` (:issue:`32747`)
+- Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/environment.yml b/environment.yml
@@ -104,5 +104,5 @@ dependencies:
   - pyreadstat  # pandas.read_spss
   - tabulate>=0.8.3  # DataFrame.to_markdown
   - pip:
-    - git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master
+    - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master
     - git+https://github.com/numpy/numpydoc
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -12,6 +12,9 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 from pandas._libs.tslibs.util cimport get_c_string
 from pandas._libs.missing cimport C_NA
 
+cdef extern from "Python.h":
+    void PyErr_Clear()
+
 {{py:
 
 # name, dtype, c_type
@@ -193,7 +196,7 @@ cdef class StringVector:
 
         append_data_string(self.data, x)
 
-    cdef extend(self, ndarray[:] x):
+    cdef extend(self, ndarray[object] x):
         for i in range(len(x)):
             self.append(x[i])
 
@@ -238,7 +241,7 @@ cdef class ObjectVector:
         self.external_view_exists = True
         return self.ao
 
-    cdef extend(self, ndarray[:] x):
+    cdef extend(self, ndarray[object] x):
         for i in range(len(x)):
             self.append(x[i])
 
@@ -790,6 +793,9 @@ cdef class StringHashTable(HashTable):
             else:
                 # if ignore_na is False, we also stringify NaN/None/etc.
                 v = get_c_string(<str>val)
+                if v == NULL:
+                    PyErr_Clear()
+                    v = get_c_string(<str>repr(val))
                 vecs[i] = v
 
         # compute

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
@@ -378,25 +378,23 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True):
 
         object blkno
         object group_dict = defaultdict(list)
-        int64_t[:] res_view
 
     n = blknos.shape[0]
-
-    if n == 0:
-        return
-
+    result = list()
     start = 0
     cur_blkno = blknos[start]
 
-    if group is False:
+    if n == 0:
+        pass
+    elif group is False:
         for i in range(1, n):
             if blknos[i] != cur_blkno:
-                yield cur_blkno, slice(start, i)
+                result.append((cur_blkno, slice(start, i)))
 
                 start = i
                 cur_blkno = blknos[i]
 
-        yield cur_blkno, slice(start, n)
+        result.append((cur_blkno, slice(start, n)))
     else:
         for i in range(1, n):
             if blknos[i] != cur_blkno:
@@ -409,19 +407,20 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True):
 
         for blkno, slices in group_dict.items():
             if len(slices) == 1:
-                yield blkno, slice(slices[0][0], slices[0][1])
+                result.append((blkno, slice(slices[0][0], slices[0][1])))
             else:
                 tot_len = sum(stop - start for start, stop in slices)
-                result = np.empty(tot_len, dtype=np.int64)
-                res_view = result
+                arr = np.empty(tot_len, dtype=np.int64)
 
                 i = 0
                 for start, stop in slices:
                     for diff in range(start, stop):
-                        res_view[i] = diff
+                        arr[i] = diff
                         i += 1
 
-                yield blkno, result
+                result.append((blkno, arr))
+
+    return result
 
 
 def get_blkno_placements(blknos, group: bool = True):

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -792,7 +792,6 @@ cdef class TextReader:
                 self._tokenize_rows(1)
 
             header = [ self.names ]
-            data_line = 0
 
             if self.parser.lines < 1:
                 field_count = len(header[0])

diff --git a/pandas/_libs/src/ujson/python/date_conversions.c b/pandas/_libs/src/ujson/python/date_conversions.c
@@ -67,7 +67,7 @@ npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) {
 }
 
 /* Convert PyDatetime To ISO C-string. mutates len */
-char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base,
+char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base,
                       size_t *len) {
     npy_datetimestruct dts;
     int ret;
@@ -98,7 +98,7 @@ char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base,
     return result;
 }
 
-npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base) {
+npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) {
     npy_datetimestruct dts;
     int ret;
 

diff --git a/pandas/_libs/src/ujson/python/date_conversions.h b/pandas/_libs/src/ujson/python/date_conversions.h
@@ -4,7 +4,6 @@
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
 #include <numpy/ndarraytypes.h>
-#include "datetime.h"
 
 // Scales value inplace from nanosecond resolution to unit resolution
 int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit);
@@ -23,10 +22,10 @@ npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base);
 // up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z
 // while base="ns" yields "2020-01-01T00:00:00.000000000Z"
 // len is mutated to save the length of the returned string
-char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base, size_t *len);
+char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, size_t *len);
 
 // Convert a Python Date/Datetime to Unix epoch with resolution base
-npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base);
+npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base);
 
 char *int64ToIsoDuration(int64_t value, size_t *len);
 

diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c
@@ -1451,7 +1451,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
                 } else {
                     // datetime.* objects don't follow above rules
                     nanosecVal =
-                        PyDateTimeToEpoch((PyDateTime_Date *)item, NPY_FR_ns);
+                        PyDateTimeToEpoch(item, NPY_FR_ns);
                 }
             }
         }
@@ -1469,8 +1469,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
                         if (type_num == NPY_DATETIME) {
                             cLabel = int64ToIso(nanosecVal, base, &len);
                         } else {
-                            cLabel = PyDateTimeToIso((PyDateTime_Date *)item,
-                                                     base, &len);
+                            cLabel = PyDateTimeToIso(item, base, &len);
                         }
                     }
                     if (cLabel == NULL) {
@@ -1683,7 +1682,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
             NPY_DATETIMEUNIT base =
                 ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
             GET_TC(tc)->longValue =
-                PyDateTimeToEpoch((PyDateTime_Date *)obj, base);
+                PyDateTimeToEpoch(obj, base);
             tc->type = JT_LONG;
         }
         return;
@@ -1710,7 +1709,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
             NPY_DATETIMEUNIT base =
                 ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
             GET_TC(tc)->longValue =
-                PyDateTimeToEpoch((PyDateTime_Date *)obj, base);
+                PyDateTimeToEpoch(obj, base);
             tc->type = JT_LONG;
         }
         return;

diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c
@@ -21,7 +21,6 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
 #endif  // NPY_NO_DEPRECATED_API
 
 #include <Python.h>
-#include <datetime.h>
 
 #include <numpy/arrayobject.h>
 #include <numpy/arrayscalars.h>
@@ -313,15 +312,14 @@ int cmp_npy_datetimestruct(const npy_datetimestruct *a,
  * object into a NumPy npy_datetimestruct.  Uses tzinfo (if present)
  * to convert to UTC time.
  *
- * While the C API has PyDate_* and PyDateTime_* functions, the following
- * implementation just asks for attributes, and thus supports
- * datetime duck typing. The tzinfo time zone conversion would require
- * this style of access anyway.
+ * The following implementation just asks for attributes, and thus 
+ * supports datetime duck typing. The tzinfo time zone conversion
+ * requires this style of access as well.
  *
  * Returns -1 on error, 0 on success, and 1 (with no error set)
  * if obj doesn't have the needed date or datetime attributes.
  */
-int convert_pydatetime_to_datetimestruct(PyDateTime_Date *dtobj,
+int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
                                          npy_datetimestruct *out) {
     // Assumes that obj is a valid datetime object
     PyObject *tmp;

diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/tslibs/src/datetime/np_datetime.h
@@ -22,7 +22,6 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
 #endif  // NPY_NO_DEPRECATED_API
 
 #include <numpy/ndarraytypes.h>
-#include <datetime.h>
 
 typedef struct {
         npy_int64 days;
@@ -35,7 +34,7 @@ extern const npy_datetimestruct _NS_MAX_DTS;
 // stuff pandas needs
 // ----------------------------------------------------------------------------
 
-int convert_pydatetime_to_datetimestruct(PyDateTime_Date *dtobj,
+int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
                                          npy_datetimestruct *out);
 
 npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,