Skip to content

Commit 497c7b7

Browse files
authored
Merge pull request #1 from brute4s99/master
rebasing
2 parents af146f4 + 5551bcf commit 497c7b7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+1426
-812
lines changed

.travis.yml

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -53,18 +53,20 @@ matrix:
5353
- dist: trusty
5454
env:
5555
- JOB="3.6, coverage" ENV_FILE="ci/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true DOCTEST=true
56-
# In allow_failures
57-
- dist: trusty
58-
env:
59-
- JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true
60-
# In allow_failures
56+
6157
- dist: trusty
6258
env:
6359
- JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate"
6460
addons:
6561
apt:
6662
packages:
6763
- xsel
64+
65+
# In allow_failures
66+
- dist: trusty
67+
env:
68+
- JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true
69+
6870
# In allow_failures
6971
- dist: trusty
7072
env:
@@ -73,13 +75,6 @@ matrix:
7375
- dist: trusty
7476
env:
7577
- JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true
76-
- dist: trusty
77-
env:
78-
- JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate"
79-
addons:
80-
apt:
81-
packages:
82-
- xsel
8378
- dist: trusty
8479
env:
8580
- JOB="3.6, doc" ENV_FILE="ci/travis-36-doc.yaml" DOC=true

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ easy and intuitive. It aims to be the fundamental high-level building block for
9797
doing practical, **real world** data analysis in Python. Additionally, it has
9898
the broader goal of becoming **the most powerful and flexible open source data
9999
analysis / manipulation tool available in any language**. It is already well on
100-
its way toward this goal.
100+
its way towards this goal.
101101

102102
## Main Features
103103
Here are just a few of the things that pandas does well:

asv_bench/benchmarks/indexing.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
import numpy as np
44
import pandas.util.testing as tm
5-
from pandas import (Series, DataFrame, MultiIndex, Int64Index, Float64Index,
6-
IntervalIndex, CategoricalIndex,
7-
IndexSlice, concat, date_range)
8-
from .pandas_vb_common import setup, Panel # noqa
5+
from pandas import (Series, DataFrame, MultiIndex, Panel,
6+
Int64Index, Float64Index, IntervalIndex,
7+
CategoricalIndex, IndexSlice, concat, date_range)
8+
from .pandas_vb_common import setup # noqa
99

1010

1111
class NumericSeriesIndexing(object):

asv_bench/benchmarks/join_merge.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@
33

44
import numpy as np
55
import pandas.util.testing as tm
6-
from pandas import (DataFrame, Series, MultiIndex, date_range, concat, merge,
7-
merge_asof)
6+
from pandas import (DataFrame, Series, Panel, MultiIndex,
7+
date_range, concat, merge, merge_asof)
8+
89
try:
910
from pandas import merge_ordered
1011
except ImportError:
1112
from pandas import ordered_merge as merge_ordered
1213

13-
from .pandas_vb_common import Panel, setup # noqa
14+
from .pandas_vb_common import setup # noqa
1415

1516

1617
class Append(object):

asv_bench/benchmarks/panel_ctor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import warnings
22
from datetime import datetime, timedelta
33

4-
from pandas import DataFrame, DatetimeIndex, date_range
4+
from pandas import DataFrame, Panel, DatetimeIndex, date_range
55

6-
from .pandas_vb_common import Panel, setup # noqa
6+
from .pandas_vb_common import setup # noqa
77

88

99
class DifferentIndexes(object):

asv_bench/benchmarks/panel_methods.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import warnings
22

33
import numpy as np
4+
from pandas import Panel
45

5-
from .pandas_vb_common import Panel, setup # noqa
6+
from .pandas_vb_common import setup # noqa
67

78

89
class PanelMethods(object):

ci/travis-27.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ dependencies:
4444
# universal
4545
- pytest
4646
- pytest-xdist
47-
- moto
47+
- moto==1.3.4
4848
- hypothesis>=3.58.0
4949
- pip:
5050
- backports.lzma

ci/travis-36-doc.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ dependencies:
1212
- html5lib
1313
- hypothesis>=3.58.0
1414
- ipykernel
15-
- ipython
15+
- ipython==6.5.0
1616
- ipywidgets
1717
- lxml
1818
- matplotlib

doc/source/contributing.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -880,7 +880,7 @@ If your change involves checking that a warning is actually emitted, use
880880

881881
.. code-block:: python
882882
883-
with tm.assert_prodcues_warning(FutureWarning):
883+
with tm.assert_produces_warning(FutureWarning):
884884
df.some_operation()
885885
886886
We prefer this to the ``pytest.warns`` context manager because ours checks that the warning's

doc/source/whatsnew/v0.24.0.txt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ Other Enhancements
199199

200200
Backwards incompatible API changes
201201
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
202+
- A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`)
202203

203204

204205
.. _whatsnew_0240.api_breaking.interval_values:
@@ -506,6 +507,7 @@ ExtensionType Changes
506507
- :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185:`).
507508
- Slicing a single row of a ``DataFrame`` with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
508509
- Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`)
510+
- Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`)
509511

510512
.. _whatsnew_0240.api.incompatibilities:
511513

@@ -533,6 +535,35 @@ Current Behavior:
533535
...
534536
OverflowError: Trying to coerce negative values to unsigned integers
535537

538+
.. _whatsnew_0240.api.crosstab_dtypes
539+
540+
Crosstab Preserves Dtypes
541+
^^^^^^^^^^^^^^^^^^^^^^^^^
542+
543+
:func:`crosstab` will preserve now dtypes in some cases that previously would
544+
cast from integer dtype to floating dtype (:issue:`22019`)
545+
546+
Previous Behavior:
547+
548+
.. code-block:: ipython
549+
550+
In [3]: df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
551+
...: 'c': [1, 1, np.nan, 1, 1]})
552+
In [4]: pd.crosstab(df.a, df.b, normalize='columns')
553+
Out[4]:
554+
b 3 4
555+
a
556+
1 0.5 0.0
557+
2 0.5 1.0
558+
559+
Current Behavior:
560+
561+
.. code-block:: ipython
562+
563+
In [3]: df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4],
564+
...: 'c': [1, 1, np.nan, 1, 1]})
565+
In [4]: pd.crosstab(df.a, df.b, normalize='columns')
566+
536567
Datetimelike API Changes
537568
^^^^^^^^^^^^^^^^^^^^^^^^
538569

@@ -795,6 +826,7 @@ Groupby/Resample/Rolling
795826
- Bug in :meth:`Resampler.asfreq` when frequency of ``TimedeltaIndex`` is a subperiod of a new frequency (:issue:`13022`).
796827
- Bug in :meth:`SeriesGroupBy.mean` when values were integral but could not fit inside of int64, overflowing instead. (:issue:`22487`)
797828
- :func:`RollingGroupby.agg` and :func:`ExpandingGroupby.agg` now support multiple aggregation functions as parameters (:issue:`15072`)
829+
- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` when resampling by a weekly offset (``'W'``) across a DST transition (:issue:`9119`, :issue:`21459`)
798830

799831
Sparse
800832
^^^^^^
@@ -818,6 +850,7 @@ Reshaping
818850
- Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`)
819851
- Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`)
820852
- Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`)
853+
- Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`)
821854

822855
Build Changes
823856
^^^^^^^^^^^^^

pandas/_libs/algos_common_helper.pxi.in

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
Template for each `dtype` helper function using 1-d template
33

44
# 1-d template
5-
- map_indices
65
- pad
76
- pad_1d
87
- pad_2d

pandas/_libs/algos_rank_helper.pxi.in

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,8 @@ dtypes = [('object', 'object', 'Infinity()', 'NegInfinity()'),
2424

2525
@cython.wraparound(False)
2626
@cython.boundscheck(False)
27-
{{if dtype == 'object'}}
28-
29-
3027
def rank_1d_{{dtype}}(object in_arr, ties_method='average',
3128
ascending=True, na_option='keep', pct=False):
32-
{{else}}
33-
34-
35-
def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
36-
na_option='keep', pct=False):
37-
{{endif}}
3829
"""
3930
Fast NaN-friendly version of scipy.stats.rankdata
4031
"""

pandas/_libs/algos_take_helper.pxi.in

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -260,33 +260,39 @@ def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
260260

261261
{{endfor}}
262262

263-
#----------------------------------------------------------------------
263+
# ----------------------------------------------------------------------
264264
# take_2d internal function
265-
#----------------------------------------------------------------------
265+
# ----------------------------------------------------------------------
266266

267-
{{py:
268-
269-
# dtype, ctype, init_result
270-
dtypes = [('float64', 'float64_t', 'np.empty_like(values)'),
271-
('uint64', 'uint64_t', 'np.empty_like(values)'),
272-
('object', 'object', 'values.copy()'),
273-
('int64', 'int64_t', 'np.empty_like(values)')]
274-
}}
267+
ctypedef fused take_t:
268+
float64_t
269+
uint64_t
270+
int64_t
271+
object
275272

276-
{{for dtype, ctype, init_result in dtypes}}
277273

278-
cdef _take_2d_{{dtype}}(ndarray[{{ctype}}, ndim=2] values, object idx):
274+
cdef _take_2d(ndarray[take_t, ndim=2] values, object idx):
279275
cdef:
280276
Py_ssize_t i, j, N, K
281277
ndarray[Py_ssize_t, ndim=2, cast=True] indexer = idx
282-
ndarray[{{ctype}}, ndim=2] result
278+
ndarray[take_t, ndim=2] result
283279
object val
284280

285281
N, K = (<object> values).shape
286-
result = {{init_result}}
282+
283+
if take_t is object:
284+
# evaluated at compile-time
285+
result = values.copy()
286+
else:
287+
result = np.empty_like(values)
288+
287289
for i in range(N):
288290
for j in range(K):
289291
result[i, j] = values[i, indexer[i, j]]
290292
return result
291293

292-
{{endfor}}
294+
295+
_take_2d_object = _take_2d[object]
296+
_take_2d_float64 = _take_2d[float64_t]
297+
_take_2d_int64 = _take_2d[int64_t]
298+
_take_2d_uint64 = _take_2d[uint64_t]

pandas/_libs/join_func_helper.pxi.in

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -68,21 +68,21 @@ def asof_join_backward_{{on_dtype}}_by_{{by_dtype}}(
6868

6969
# find last position in right whose value is less than left's
7070
if allow_exact_matches:
71-
while right_pos < right_size and\
72-
right_values[right_pos] <= left_values[left_pos]:
71+
while (right_pos < right_size and
72+
right_values[right_pos] <= left_values[left_pos]):
7373
hash_table.set_item(right_by_values[right_pos], right_pos)
7474
right_pos += 1
7575
else:
76-
while right_pos < right_size and\
77-
right_values[right_pos] < left_values[left_pos]:
76+
while (right_pos < right_size and
77+
right_values[right_pos] < left_values[left_pos]):
7878
hash_table.set_item(right_by_values[right_pos], right_pos)
7979
right_pos += 1
8080
right_pos -= 1
8181

8282
# save positions as the desired index
8383
by_value = left_by_values[left_pos]
84-
found_right_pos = hash_table.get_item(by_value)\
85-
if by_value in hash_table else -1
84+
found_right_pos = (hash_table.get_item(by_value)
85+
if by_value in hash_table else -1)
8686
left_indexer[left_pos] = left_pos
8787
right_indexer[left_pos] = found_right_pos
8888

@@ -133,21 +133,21 @@ def asof_join_forward_{{on_dtype}}_by_{{by_dtype}}(
133133

134134
# find first position in right whose value is greater than left's
135135
if allow_exact_matches:
136-
while right_pos >= 0 and\
137-
right_values[right_pos] >= left_values[left_pos]:
136+
while (right_pos >= 0 and
137+
right_values[right_pos] >= left_values[left_pos]):
138138
hash_table.set_item(right_by_values[right_pos], right_pos)
139139
right_pos -= 1
140140
else:
141-
while right_pos >= 0 and\
142-
right_values[right_pos] > left_values[left_pos]:
141+
while (right_pos >= 0 and
142+
right_values[right_pos] > left_values[left_pos]):
143143
hash_table.set_item(right_by_values[right_pos], right_pos)
144144
right_pos -= 1
145145
right_pos += 1
146146

147147
# save positions as the desired index
148148
by_value = left_by_values[left_pos]
149-
found_right_pos = hash_table.get_item(by_value)\
150-
if by_value in hash_table else -1
149+
found_right_pos = (hash_table.get_item(by_value)
150+
if by_value in hash_table else -1)
151151
left_indexer[left_pos] = left_pos
152152
right_indexer[left_pos] = found_right_pos
153153

@@ -259,12 +259,12 @@ def asof_join_backward_{{on_dtype}}(
259259

260260
# find last position in right whose value is less than left's
261261
if allow_exact_matches:
262-
while right_pos < right_size and\
263-
right_values[right_pos] <= left_values[left_pos]:
262+
while (right_pos < right_size and
263+
right_values[right_pos] <= left_values[left_pos]):
264264
right_pos += 1
265265
else:
266-
while right_pos < right_size and\
267-
right_values[right_pos] < left_values[left_pos]:
266+
while (right_pos < right_size and
267+
right_values[right_pos] < left_values[left_pos]):
268268
right_pos += 1
269269
right_pos -= 1
270270

@@ -313,19 +313,19 @@ def asof_join_forward_{{on_dtype}}(
313313

314314
# find first position in right whose value is greater than left's
315315
if allow_exact_matches:
316-
while right_pos >= 0 and\
317-
right_values[right_pos] >= left_values[left_pos]:
316+
while (right_pos >= 0 and
317+
right_values[right_pos] >= left_values[left_pos]):
318318
right_pos -= 1
319319
else:
320-
while right_pos >= 0 and\
321-
right_values[right_pos] > left_values[left_pos]:
320+
while (right_pos >= 0 and
321+
right_values[right_pos] > left_values[left_pos]):
322322
right_pos -= 1
323323
right_pos += 1
324324

325325
# save positions as the desired index
326326
left_indexer[left_pos] = left_pos
327-
right_indexer[left_pos] = right_pos\
328-
if right_pos != right_size else -1
327+
right_indexer[left_pos] = (right_pos
328+
if right_pos != right_size else -1)
329329

330330
# if needed, verify that tolerance is met
331331
if has_tolerance and right_pos != right_size:

0 commit comments

Comments
 (0)