Skip to content

Commit 6f05b90

Browse files
authored
Merge pull request #169 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents ad987b2 + 36b4990 commit 6f05b90

File tree

24 files changed

+337
-247
lines changed

24 files changed

+337
-247
lines changed

pandas/_testing/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@
100100
)
101101
from pandas.core.arrays import (
102102
DatetimeArray,
103+
PandasArray,
103104
PeriodArray,
104105
TimedeltaArray,
105106
period_array,
@@ -204,7 +205,11 @@ def box_expected(expected, box_cls, transpose=True):
204205
subclass of box_cls
205206
"""
206207
if box_cls is pd.array:
207-
expected = pd.array(expected)
208+
if isinstance(expected, RangeIndex):
209+
# pd.array would return an IntegerArray
210+
expected = PandasArray(np.asarray(expected._values))
211+
else:
212+
expected = pd.array(expected)
208213
elif box_cls is Index:
209214
expected = Index(expected)
210215
elif box_cls is Series:

pandas/core/array_algos/take.py

Lines changed: 45 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,14 @@ def take_nd(
100100
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
101101

102102
arr = np.asarray(arr)
103-
return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)
103+
return _take_nd_ndarray(arr, indexer, axis, None, fill_value, allow_fill)
104104

105105

106106
def _take_nd_ndarray(
107107
arr: np.ndarray,
108108
indexer,
109109
axis: int,
110+
out: np.ndarray | None,
110111
fill_value,
111112
allow_fill: bool,
112113
) -> np.ndarray:
@@ -117,11 +118,8 @@ def _take_nd_ndarray(
117118
else:
118119
indexer = ensure_platform_int(indexer)
119120

120-
if not allow_fill:
121-
return arr.take(indexer, axis=axis)
122-
123-
dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
124-
arr, indexer, fill_value
121+
indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
122+
arr, indexer, out, fill_value, allow_fill
125123
)
126124

127125
flip_order = False
@@ -131,20 +129,23 @@ def _take_nd_ndarray(
131129
if flip_order:
132130
arr = arr.T
133131
axis = arr.ndim - axis - 1
132+
if out is not None:
133+
out = out.T
134134

135135
# at this point, it's guaranteed that dtype can hold both the arr values
136136
# and the fill_value
137-
out_shape_ = list(arr.shape)
138-
out_shape_[axis] = len(indexer)
139-
out_shape = tuple(out_shape_)
140-
if arr.flags.f_contiguous and axis == arr.ndim - 1:
141-
# minor tweak that can make an order-of-magnitude difference
142-
# for dataframes initialized directly from 2-d ndarrays
143-
# (s.t. df.values is c-contiguous and df._mgr.blocks[0] is its
144-
# f-contiguous transpose)
145-
out = np.empty(out_shape, dtype=dtype, order="F")
146-
else:
147-
out = np.empty(out_shape, dtype=dtype)
137+
if out is None:
138+
out_shape_ = list(arr.shape)
139+
out_shape_[axis] = len(indexer)
140+
out_shape = tuple(out_shape_)
141+
if arr.flags.f_contiguous and axis == arr.ndim - 1:
142+
# minor tweak that can make an order-of-magnitude difference
143+
# for dataframes initialized directly from 2-d ndarrays
144+
# (s.t. df.values is c-contiguous and df._mgr.blocks[0] is its
145+
# f-contiguous transpose)
146+
out = np.empty(out_shape, dtype=dtype, order="F")
147+
else:
148+
out = np.empty(out_shape, dtype=dtype)
148149

149150
func = _get_take_nd_function(
150151
arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info
@@ -188,8 +189,8 @@ def take_1d(
188189
if not allow_fill:
189190
return arr.take(indexer)
190191

191-
dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
192-
arr, indexer, fill_value
192+
indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
193+
arr, indexer, None, fill_value, True
193194
)
194195

195196
# at this point, it's guaranteed that dtype can hold both the arr values
@@ -515,22 +516,32 @@ def _take_2d_multi_object(
515516
def _take_preprocess_indexer_and_fill_value(
516517
arr: np.ndarray,
517518
indexer: np.ndarray,
519+
out: np.ndarray | None,
518520
fill_value,
521+
allow_fill: bool,
519522
):
520523
mask_info = None
521524

522-
# check for promotion based on types only (do this first because
523-
# it's faster than computing a mask)
524-
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
525-
if dtype != arr.dtype:
526-
# check if promotion is actually required based on indexer
527-
mask = indexer == -1
528-
needs_masking = mask.any()
529-
mask_info = mask, needs_masking
530-
if not needs_masking:
531-
# if not, then depromote, set fill_value to dummy
532-
# (it won't be used but we don't want the cython code
533-
# to crash when trying to cast it to dtype)
534-
dtype, fill_value = arr.dtype, arr.dtype.type()
535-
536-
return dtype, fill_value, mask_info
525+
if not allow_fill:
526+
dtype, fill_value = arr.dtype, arr.dtype.type()
527+
mask_info = None, False
528+
else:
529+
# check for promotion based on types only (do this first because
530+
# it's faster than computing a mask)
531+
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
532+
if dtype != arr.dtype:
533+
# check if promotion is actually required based on indexer
534+
mask = indexer == -1
535+
needs_masking = mask.any()
536+
mask_info = mask, needs_masking
537+
if needs_masking:
538+
if out is not None and out.dtype != dtype:
539+
raise TypeError("Incompatible type for fill_value")
540+
else:
541+
# if not, then depromote, set fill_value to dummy
542+
# (it won't be used but we don't want the cython code
543+
# to crash when trying to cast it to dtype)
544+
dtype, fill_value = arr.dtype, arr.dtype.type()
545+
546+
indexer = ensure_platform_int(indexer)
547+
return indexer, dtype, fill_value, mask_info

pandas/core/construction.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
ABCExtensionArray,
5959
ABCIndex,
6060
ABCPandasArray,
61+
ABCRangeIndex,
6162
ABCSeries,
6263
)
6364
from pandas.core.dtypes.missing import isna
@@ -367,7 +368,9 @@ def array(
367368
return PandasArray._from_sequence(data, dtype=dtype, copy=copy)
368369

369370

370-
def extract_array(obj: object, extract_numpy: bool = False) -> Any | ArrayLike:
371+
def extract_array(
372+
obj: object, extract_numpy: bool = False, extract_range: bool = False
373+
) -> Any | ArrayLike:
371374
"""
372375
Extract the ndarray or ExtensionArray from a Series or Index.
373376
@@ -382,6 +385,10 @@ def extract_array(obj: object, extract_numpy: bool = False) -> Any | ArrayLike:
382385
extract_numpy : bool, default False
383386
Whether to extract the ndarray from a PandasArray
384387
388+
extract_range : bool, default False
389+
If we have a RangeIndex, return range._values if True
390+
(which is a materialized integer ndarray), otherwise return unchanged.
391+
385392
Returns
386393
-------
387394
arr : object
@@ -410,6 +417,11 @@ def extract_array(obj: object, extract_numpy: bool = False) -> Any | ArrayLike:
410417
array([1, 2, 3])
411418
"""
412419
if isinstance(obj, (ABCIndex, ABCSeries)):
420+
if isinstance(obj, ABCRangeIndex):
421+
if extract_range:
422+
return obj._values
423+
return obj
424+
413425
obj = obj.array
414426

415427
if extract_numpy and isinstance(obj, ABCPandasArray):

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5373,8 +5373,8 @@ def reset_index(
53735373
@overload
53745374
def reset_index(
53755375
self,
5376-
*,
53775376
level: Hashable | Sequence[Hashable] | None,
5377+
*,
53785378
inplace: Literal[True],
53795379
col_level: Hashable = ...,
53805380
col_fill: Hashable = ...,

pandas/core/groupby/groupby.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1140,7 +1140,7 @@ def _numba_prep(self, func, data):
11401140
)
11411141
labels, _, n_groups = self.grouper.group_info
11421142
sorted_index = get_group_index_sorter(labels, n_groups)
1143-
sorted_labels = labels.take(sorted_index)
1143+
sorted_labels = algorithms.take_nd(labels, sorted_index, allow_fill=False)
11441144

11451145
sorted_data = data.take(sorted_index, axis=self.axis).to_numpy()
11461146

pandas/core/groupby/ops.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
maybe_fill,
6868
)
6969

70+
from pandas.core import algorithms
7071
from pandas.core.arrays import ExtensionArray
7172
from pandas.core.base import SelectionMixin
7273
import pandas.core.common as com
@@ -765,7 +766,7 @@ def _aggregate_series_fast(self, obj: Series, func: F):
765766
# avoids object / Series creation overhead
766767
indexer = get_group_index_sorter(group_index, ngroups)
767768
obj = obj.take(indexer)
768-
group_index = group_index.take(indexer)
769+
group_index = algorithms.take_nd(group_index, indexer, allow_fill=False)
769770
grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups)
770771
result, counts = grouper.get_result()
771772
return result, counts
@@ -996,7 +997,7 @@ def __init__(self, data: FrameOrSeries, labels, ngroups: int, axis: int = 0):
996997
@cache_readonly
997998
def slabels(self) -> np.ndarray: # np.ndarray[np.intp]
998999
# Sorted labels
999-
return self.labels.take(self._sort_idx)
1000+
return algorithms.take_nd(self.labels, self._sort_idx, allow_fill=False)
10001001

10011002
@cache_readonly
10021003
def _sort_idx(self) -> np.ndarray: # np.ndarray[np.intp]

0 commit comments

Comments
 (0)