Skip to content

Commit d3b5992

Browse files
committed
PERF: period factorization
1 parent d98e982 commit d3b5992

File tree

2 files changed

+24
-10
lines changed

2 files changed

+24
-10
lines changed

asv_bench/benchmarks/groupby.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,18 @@ def setup(self):
547547
def time_groupby_sum(self):
548548
self.df.groupby(['a'])['b'].sum()
549549

550+
class groupby_period(object):
551+
# GH 14338
552+
goal_time = 0.2
553+
554+
def setup(self):
555+
N = 10000
556+
self.pi = pd.period_range('1900-01-01', freq='D', periods=N)
557+
self.df = pd.DataFrame(np.random.randn(N, 2))
558+
559+
def time_groupby_sum(self):
560+
self.df.groupby(self.pi).sum()
561+
550562

551563
#----------------------------------------------------------------------
552564
# Series.value_counts

pandas/core/algorithms.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
is_categorical_dtype,
1515
is_extension_type,
1616
is_datetimetz,
17+
is_period,
1718
is_period_dtype,
1819
is_period_arraylike,
1920
is_float_dtype,
@@ -285,15 +286,19 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
285286
note: an array of Periods will ignore sort as it returns an always sorted
286287
PeriodIndex
287288
"""
288-
from pandas import Index, Series, DatetimeIndex
289+
from pandas import Index, Series, DatetimeIndex, PeriodIndex
289290

290-
vals = np.asarray(values)
291291

292-
# localize to UTC
293-
is_datetimetz_type = is_datetimetz(values)
294-
if is_datetimetz_type:
292+
if is_datetimetz(values):
295293
values = DatetimeIndex(values)
296-
vals = values.asi8
294+
295+
if is_period_dtype(values):
296+
values = PeriodIndex(values)
297+
# period array interface goes to object so intercept
298+
vals = values.view(np.int64)
299+
else:
300+
vals = np.asarray(values)
301+
297302

298303
is_datetime = is_datetime64_dtype(vals)
299304
is_timedelta = is_timedelta64_dtype(vals)
@@ -311,10 +316,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
311316
uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel,
312317
assume_unique=True)
313318

314-
if is_datetimetz_type:
315-
# reset tz
316-
uniques = values._shallow_copy(uniques)
317-
elif is_datetime:
319+
if is_datetime:
318320
uniques = uniques.astype('M8[ns]')
319321
elif is_timedelta:
320322
uniques = uniques.astype('m8[ns]')

0 commit comments

Comments
 (0)