Skip to content

Commit 6ba030c

Browse files
committed
refactor; add asv
1 parent c9c3d7e commit 6ba030c

File tree

2 files changed

+34
-15
lines changed

2 files changed

+34
-15
lines changed

asv_bench/benchmarks/groupby.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -554,12 +554,25 @@ class groupby_period(object):
554554

555555
def setup(self):
556556
N = 10000
557-
self.pi = pd.period_range('1900-01-01', freq='D', periods=N)
557+
self.grouper = self.make_grouper()
558558
self.df = pd.DataFrame(np.random.randn(N, 2))
559559

560+
def make_grouper(self):
561+
return pd.period_range('1900-01-01', freq='D', periods=N)
562+
560563
def time_groupby_sum(self):
561-
self.df.groupby(self.pi).sum()
564+
self.df.groupby(self.grouper).sum()
565+
566+
567+
class groupby_datetime(groupby_period):
568+
def make_grouper(self):
569+
return pd.date_range('1900-01-01', freq='D', periods=N)
570+
562571

572+
class groupby_datetimetz(groupby_period):
573+
def make_grouper(self):
574+
return pd.date_range('1900-01-01', freq='D', periods=N
575+
tz='US/Central')
563576

564577
#----------------------------------------------------------------------
565578
# Series.value_counts

pandas/core/algorithms.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -287,18 +287,25 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
287287
"""
288288
from pandas import Index, Series, DatetimeIndex, PeriodIndex
289289

290-
if is_datetimetz(values):
291-
values = DatetimeIndex(values)
292-
293-
if is_period_dtype(values):
294-
values = PeriodIndex(values)
295-
# period array interface goes to object so intercept
296-
vals = values.view(np.int64)
290+
# handling two possibilities here
291+
# - for a numpy datetimelike simply view as i8 then cast back
292+
# - for an extension datetimelike view as i8 then
293+
# reconstruct from boxed values to transfer metadata
294+
dtype = None
295+
if needs_i8_conversion(values):
296+
if is_period_dtype(values):
297+
values = PeriodIndex(values)
298+
vals = values.asi8
299+
elif is_datetimetz(values):
300+
values = DatetimeIndex(values)
301+
vals = values.asi8
302+
else:
303+
# numpy dtype
304+
dtype = values.dtype
305+
vals = values.view(np.int64)
297306
else:
298307
vals = np.asarray(values)
299308

300-
is_datetime = is_datetime64_dtype(vals)
301-
is_timedelta = is_timedelta64_dtype(vals)
302309
(hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables)
303310

304311
table = hash_klass(size_hint or len(vals))
@@ -313,10 +320,9 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
313320
uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel,
314321
assume_unique=True)
315322

316-
if is_datetime:
317-
uniques = uniques.astype('M8[ns]')
318-
elif is_timedelta:
319-
uniques = uniques.astype('m8[ns]')
323+
if dtype is not None:
324+
uniques = uniques.astype(dtype)
325+
320326
if isinstance(values, Index):
321327
uniques = values._shallow_copy(uniques, name=None)
322328
elif isinstance(values, Series):

0 commit comments

Comments
 (0)