Skip to content

Commit da5e2c1

Browse files
committed
ENH: array of Periods to PeriodIndex handling, close #1215
1 parent 2044601 commit da5e2c1

File tree

5 files changed

+69
-28
lines changed

5 files changed

+69
-28
lines changed

RELEASE.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ pandas 0.8.1
4848
- Always apply passed functions in ``resample``, even if upsampling (#1596)
4949
- Avoid unnecessary copies in DataFrame constructor with explicit dtype (#1572)
5050
- Cleaner DatetimeIndex string representation with 1 or 2 elements (#1611)
51+
- Improve performance of array-of-Period to PeriodIndex, convert such arrays
52+
to PeriodIndex inside Index (#1215)
5153

5254
**Bug fixes**
5355

pandas/core/index.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ class InvalidIndexError(Exception):
3838

3939
_o_dtype = np.dtype(object)
4040

41+
42+
def _shouldbe_timestamp(obj):
43+
return (lib.is_datetime_array(obj) or lib.is_datetime64_array(obj)
44+
or lib.is_timestamp_array(obj))
45+
46+
4147
class Index(np.ndarray):
4248
"""
4349
Immutable ndarray implementing an ordered, sliceable set. The basic object
@@ -100,12 +106,14 @@ def __new__(cls, data, dtype=None, copy=False, name=None):
100106
subarr = com._asarray_tuplesafe(data, dtype=object)
101107

102108
if dtype is None:
103-
if (lib.is_datetime_array(subarr)
104-
or lib.is_datetime64_array(subarr)
105-
or lib.is_timestamp_array(subarr)):
109+
if _shouldbe_timestamp(subarr):
106110
from pandas.tseries.index import DatetimeIndex
107111
return DatetimeIndex(subarr, copy=copy, name=name)
108112

113+
if lib.is_period_array(subarr):
114+
from pandas.tseries.period import PeriodIndex
115+
return PeriodIndex(subarr, name=name)
116+
109117
if lib.is_integer_array(subarr):
110118
return Int64Index(subarr.astype('i8'), name=name)
111119

pandas/src/inference.pyx

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,33 @@ def is_time_array(ndarray[object] values):
256256
return False
257257
return True
258258

259+
def is_period_array(ndarray[object] values):
260+
cdef int i, n = len(values)
261+
from pandas import Period
262+
263+
if n == 0:
264+
return False
265+
for i in range(n):
266+
if not isinstance(values[i], Period):
267+
return False
268+
return True
269+
270+
def extract_ordinals(ndarray[object] values, freq):
271+
cdef:
272+
Py_ssize_t i, n = len(values)
273+
ndarray[int64_t] ordinals = np.empty(n, dtype=np.int64)
274+
object p
275+
276+
for i in range(n):
277+
p = values[i]
278+
ordinals[i] = p.ordinal
279+
if p.freq != freq:
280+
raise ValueError("%s is wrong freq" % p)
281+
282+
return ordinals
283+
284+
285+
259286
def maybe_convert_numeric(ndarray[object] values, set na_values,
260287
convert_empty=True):
261288
'''

pandas/tseries/period.py

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -404,20 +404,12 @@ def _get_date_and_freq(value, freq):
404404
return dt, freq
405405

406406

407-
def _period_unbox(key, check=None):
408-
'''
409-
Period-like => int64
410-
'''
411-
if not isinstance(key, Period):
412-
key = Period(key, freq=check)
413-
elif check is not None:
414-
if key.freq != check:
415-
raise ValueError("%s is wrong freq" % key)
416-
return np.int64(key.ordinal)
417-
418-
def _period_unbox_array(arr, check=None):
419-
unboxer = np.frompyfunc(lambda x: _period_unbox(x, check=check), 1, 1)
420-
return unboxer(arr)
407+
def _get_ordinals(data, freq):
408+
f = lambda x: Period(x, freq=freq).ordinal
409+
if isinstance(data[0], Period):
410+
return lib.extract_ordinals(data, freq)
411+
else:
412+
return lib.map_infer(data, f)
421413

422414
def dt64arr_to_periodarr(data, freq):
423415
if data.dtype != np.dtype('M8[ns]'):
@@ -575,18 +567,22 @@ def _from_arraylike(cls, data, freq):
575567
data = list(data)
576568

577569
try:
578-
data = np.array(data, dtype='i8')
570+
data = com._ensure_int64(data)
571+
if freq is None:
572+
raise ValueError('freq not specified')
573+
data = np.array([Period(x, freq=freq).ordinal for x in data],
574+
dtype=np.int64)
579575
except (TypeError, ValueError):
580-
data = np.array(data, dtype='O')
576+
data = com._ensure_object(data)
581577

582-
if freq is None and len(data) > 0:
583-
freq = getattr(data[0], 'freq', None)
578+
if freq is None and len(data) > 0:
579+
freq = getattr(data[0], 'freq', None)
584580

585-
if freq is None:
586-
raise ValueError(('freq not specified and cannot be inferred '
587-
'from first element'))
581+
if freq is None:
582+
raise ValueError('freq not specified and cannot be '
583+
'inferred from first element')
588584

589-
data = _period_unbox_array(data, check=freq)
585+
data = _get_ordinals(data, freq)
590586
else:
591587
if isinstance(data, PeriodIndex):
592588
if freq is None or freq == data.freq:
@@ -610,10 +606,10 @@ def _from_arraylike(cls, data, freq):
610606
pass
611607
else:
612608
try:
613-
data = data.astype('i8')
609+
data = com._ensure_int64(data)
614610
except (TypeError, ValueError):
615-
data = data.astype('O')
616-
data = _period_unbox_array(data, check=freq)
611+
data = com._ensure_object(data)
612+
data = _get_ordinals(data, freq)
617613

618614
return data, freq
619615

pandas/tseries/tests/test_period.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import pandas.tseries.period as pmod
2020

2121
import pandas.core.datetools as datetools
22+
import pandas as pd
2223
import numpy as np
2324
randn = np.random.randn
2425

@@ -1761,6 +1762,13 @@ def test_map(self):
17611762
exp = [x.ordinal for x in index]
17621763
self.assert_(np.array_equal(result, exp))
17631764

1765+
def test_convert_array_of_periods(self):
1766+
rng = period_range('1/1/2000', periods=20, freq='D')
1767+
periods = list(rng)
1768+
1769+
result = pd.Index(periods)
1770+
self.assert_(isinstance(result, PeriodIndex))
1771+
17641772
def _permute(obj):
17651773
return obj.take(np.random.permutation(len(obj)))
17661774

0 commit comments

Comments
 (0)