Skip to content

Sync Fork from Upstream Repo #102

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Mar 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@

# The theme to use for HTML and HTML Help pages. Major themes that come with
# Sphinx are currently 'default' and 'sphinxdoc'.
html_theme = "pandas_sphinx_theme"
html_theme = "pydata_sphinx_theme"

# The style sheet to use for HTML and HTML Help pages. A file of that name
# must exist either in Sphinx' static/ path, or in one of the custom paths
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ name of the column of interest.
</ul>

Each column in a :class:`DataFrame` is a :class:`Series`. As a single column is
selected, the returned object is a pandas :class:`DataFrame`. We can verify this
selected, the returned object is a pandas :class:`Series`. We can verify this
by checking the type of the output:

.. ipython:: python
Expand Down
1 change: 1 addition & 0 deletions doc/source/user_guide/scale.rst
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ We'll import ``dask.dataframe`` and notice that the API feels similar to pandas.
We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in.

.. ipython:: python
:okwarning:

import dask.dataframe as dd

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@ Other
- Fixed :func:`pandas.testing.assert_series_equal` to correctly raise if left object is a different subclass with ``check_series_type=True`` (:issue:`32670`).
- :meth:`IntegerArray.astype` now supports ``datetime64`` dtype (:issue:32538`)
- Fixed bug in :func:`pandas.testing.assert_series_equal` where dtypes were checked for ``Interval`` and ``ExtensionArray`` operands when ``check_dtype`` was ``False`` (:issue:`32747`)
- Bug in :meth:`DataFrame.__dir__` caused a segfault when using unicode surrogates in a column name (:issue:`25509`)

.. ---------------------------------------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,5 +104,5 @@ dependencies:
- pyreadstat # pandas.read_spss
- tabulate>=0.8.3 # DataFrame.to_markdown
- pip:
- git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master
- git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master
- git+https://github.com/numpy/numpydoc
10 changes: 8 additions & 2 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
from pandas._libs.tslibs.util cimport get_c_string
from pandas._libs.missing cimport C_NA

cdef extern from "Python.h":
void PyErr_Clear()

{{py:

# name, dtype, c_type
Expand Down Expand Up @@ -193,7 +196,7 @@ cdef class StringVector:

append_data_string(self.data, x)

cdef extend(self, ndarray[:] x):
cdef extend(self, ndarray[object] x):
for i in range(len(x)):
self.append(x[i])

Expand Down Expand Up @@ -238,7 +241,7 @@ cdef class ObjectVector:
self.external_view_exists = True
return self.ao

cdef extend(self, ndarray[:] x):
cdef extend(self, ndarray[object] x):
for i in range(len(x)):
self.append(x[i])

Expand Down Expand Up @@ -790,6 +793,9 @@ cdef class StringHashTable(HashTable):
else:
# if ignore_na is False, we also stringify NaN/None/etc.
v = get_c_string(<str>val)
if v == NULL:
PyErr_Clear()
v = get_c_string(<str>repr(val))
vecs[i] = v

# compute
Expand Down
25 changes: 12 additions & 13 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -378,25 +378,23 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True):

object blkno
object group_dict = defaultdict(list)
int64_t[:] res_view

n = blknos.shape[0]

if n == 0:
return

result = list()
start = 0
cur_blkno = blknos[start]

if group is False:
if n == 0:
pass
elif group is False:
for i in range(1, n):
if blknos[i] != cur_blkno:
yield cur_blkno, slice(start, i)
result.append((cur_blkno, slice(start, i)))

start = i
cur_blkno = blknos[i]

yield cur_blkno, slice(start, n)
result.append((cur_blkno, slice(start, n)))
else:
for i in range(1, n):
if blknos[i] != cur_blkno:
Expand All @@ -409,19 +407,20 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True):

for blkno, slices in group_dict.items():
if len(slices) == 1:
yield blkno, slice(slices[0][0], slices[0][1])
result.append((blkno, slice(slices[0][0], slices[0][1])))
else:
tot_len = sum(stop - start for start, stop in slices)
result = np.empty(tot_len, dtype=np.int64)
res_view = result
arr = np.empty(tot_len, dtype=np.int64)

i = 0
for start, stop in slices:
for diff in range(start, stop):
res_view[i] = diff
arr[i] = diff
i += 1

yield blkno, result
result.append((blkno, arr))

return result


def get_blkno_placements(blknos, group: bool = True):
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -792,7 +792,6 @@ cdef class TextReader:
self._tokenize_rows(1)

header = [ self.names ]
data_line = 0

if self.parser.lines < 1:
field_count = len(header[0])
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/src/ujson/python/date_conversions.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) {
}

/* Convert PyDatetime To ISO C-string. mutates len */
char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base,
char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base,
size_t *len) {
npy_datetimestruct dts;
int ret;
Expand Down Expand Up @@ -98,7 +98,7 @@ char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base,
return result;
}

npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base) {
npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base) {
npy_datetimestruct dts;
int ret;

Expand Down
5 changes: 2 additions & 3 deletions pandas/_libs/src/ujson/python/date_conversions.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <numpy/ndarraytypes.h>
#include "datetime.h"

// Scales value inplace from nanosecond resolution to unit resolution
int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit);
Expand All @@ -23,10 +22,10 @@ npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base);
// up to precision `base` e.g. base="s" yields 2020-01-03T00:00:00Z
// while base="ns" yields "2020-01-01T00:00:00.000000000Z"
// len is mutated to save the length of the returned string
char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base, size_t *len);
char *PyDateTimeToIso(PyObject *obj, NPY_DATETIMEUNIT base, size_t *len);

// Convert a Python Date/Datetime to Unix epoch with resolution base
npy_datetime PyDateTimeToEpoch(PyDateTime_Date *dt, NPY_DATETIMEUNIT base);
npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base);

char *int64ToIsoDuration(int64_t value, size_t *len);

Expand Down
9 changes: 4 additions & 5 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -1451,7 +1451,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
} else {
// datetime.* objects don't follow above rules
nanosecVal =
PyDateTimeToEpoch((PyDateTime_Date *)item, NPY_FR_ns);
PyDateTimeToEpoch(item, NPY_FR_ns);
}
}
}
Expand All @@ -1469,8 +1469,7 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
if (type_num == NPY_DATETIME) {
cLabel = int64ToIso(nanosecVal, base, &len);
} else {
cLabel = PyDateTimeToIso((PyDateTime_Date *)item,
base, &len);
cLabel = PyDateTimeToIso(item, base, &len);
}
}
if (cLabel == NULL) {
Expand Down Expand Up @@ -1683,7 +1682,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
NPY_DATETIMEUNIT base =
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
GET_TC(tc)->longValue =
PyDateTimeToEpoch((PyDateTime_Date *)obj, base);
PyDateTimeToEpoch(obj, base);
tc->type = JT_LONG;
}
return;
Expand All @@ -1710,7 +1709,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
NPY_DATETIMEUNIT base =
((PyObjectEncoder *)tc->encoder)->datetimeUnit;
GET_TC(tc)->longValue =
PyDateTimeToEpoch((PyDateTime_Date *)obj, base);
PyDateTimeToEpoch(obj, base);
tc->type = JT_LONG;
}
return;
Expand Down
10 changes: 4 additions & 6 deletions pandas/_libs/tslibs/src/datetime/np_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
#endif // NPY_NO_DEPRECATED_API

#include <Python.h>
#include <datetime.h>

#include <numpy/arrayobject.h>
#include <numpy/arrayscalars.h>
Expand Down Expand Up @@ -313,15 +312,14 @@ int cmp_npy_datetimestruct(const npy_datetimestruct *a,
* object into a NumPy npy_datetimestruct. Uses tzinfo (if present)
* to convert to UTC time.
*
* While the C API has PyDate_* and PyDateTime_* functions, the following
* implementation just asks for attributes, and thus supports
* datetime duck typing. The tzinfo time zone conversion would require
* this style of access anyway.
* The following implementation just asks for attributes, and thus
* supports datetime duck typing. The tzinfo time zone conversion
* requires this style of access as well.
*
* Returns -1 on error, 0 on success, and 1 (with no error set)
* if obj doesn't have the needed date or datetime attributes.
*/
int convert_pydatetime_to_datetimestruct(PyDateTime_Date *dtobj,
int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
npy_datetimestruct *out) {
// Assumes that obj is a valid datetime object
PyObject *tmp;
Expand Down
3 changes: 1 addition & 2 deletions pandas/_libs/tslibs/src/datetime/np_datetime.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
#endif // NPY_NO_DEPRECATED_API

#include <numpy/ndarraytypes.h>
#include <datetime.h>

typedef struct {
npy_int64 days;
Expand All @@ -35,7 +34,7 @@ extern const npy_datetimestruct _NS_MAX_DTS;
// stuff pandas needs
// ----------------------------------------------------------------------------

int convert_pydatetime_to_datetimestruct(PyDateTime_Date *dtobj,
int convert_pydatetime_to_datetimestruct(PyObject *dtobj,
npy_datetimestruct *out);

npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
Expand Down
Loading