Skip to content

Update from original. #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ Indexing
- Bug in :meth:`DatetimeIndex.get_loc` raising ``KeyError`` with converted-integer key instead of the user-passed key (:issue:`31425`)
- Bug in :meth:`Series.xs` incorrectly returning ``Timestamp`` instead of ``datetime64`` in some object-dtype cases (:issue:`31630`)
- Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`)
- Bug in :meth:`DataFrame.at` when either columns or index is non-unique (:issue:`33041`)
- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`)
- Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`)
- Bug in :meth:`DataFrame.loc:` and :meth:`Series.loc` with a :class:`DatetimeIndex`, :class:`TimedeltaIndex`, or :class:`PeriodIndex` incorrectly allowing lookups of non-matching datetime-like dtypes (:issue:`32650`)
Expand Down
108 changes: 39 additions & 69 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -806,24 +806,22 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil:
return unix_date_to_week(unix_date, freq - FR_WK)


cdef void get_date_info(int64_t ordinal, int freq,
npy_datetimestruct *dts) nogil:
cdef void get_date_info(int64_t ordinal, int freq, npy_datetimestruct *dts) nogil:
cdef:
int64_t unix_date
double abstime
int64_t unix_date, nanos
npy_datetimestruct dts2

unix_date = get_unix_date(ordinal, freq)
abstime = get_abs_time(freq, unix_date, ordinal)

while abstime < 0:
abstime += 86400
unix_date -= 1
nanos = get_time_nanos(freq, unix_date, ordinal)

while abstime >= 86400:
abstime -= 86400
unix_date += 1
pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, dts)

date_info_from_days_and_time(dts, unix_date, abstime)
dt64_to_dtstruct(nanos, &dts2)
dts.hour = dts2.hour
dts.min = dts2.min
dts.sec = dts2.sec
dts.us = dts2.us
dts.ps = dts2.ps


cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil:
Expand Down Expand Up @@ -855,74 +853,50 @@ cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil:


@cython.cdivision
cdef void date_info_from_days_and_time(npy_datetimestruct *dts,
int64_t unix_date,
double abstime) nogil:
cdef int64_t get_time_nanos(int freq, int64_t unix_date, int64_t ordinal) nogil:
"""
Set the instance's value using the given date and time.
Find the number of nanoseconds after midnight on the given unix_date
that the ordinal represents in the given frequency.

Parameters
----------
dts : npy_datetimestruct*
freq : int
unix_date : int64_t
days elapsed since datetime(1970, 1, 1)
abstime : double
seconds elapsed since beginning of day described by unix_date
ordinal : int64_t

Notes
-----
Updates dts inplace
Returns
-------
int64_t
"""
cdef:
int inttime
int hour, minute
double second, subsecond_fraction

# Bounds check
# The calling function is responsible for ensuring that
# abstime >= 0.0 and abstime <= 86400

# Calculate the date
pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, dts)
int64_t sub, factor

# Calculate the time
inttime = <int>abstime
hour = inttime / 3600
minute = (inttime % 3600) / 60
second = abstime - <double>(hour * 3600 + minute * 60)
freq = get_freq_group(freq)

dts.hour = hour
dts.min = minute
dts.sec = <int>second

subsecond_fraction = second - dts.sec
dts.us = int((subsecond_fraction) * 1e6)
dts.ps = int(((subsecond_fraction) * 1e6 - dts.us) * 1e6)
if freq <= FR_DAY:
return 0

elif freq == FR_NS:
factor = 1

@cython.cdivision
cdef double get_abs_time(int freq, int64_t unix_date, int64_t ordinal) nogil:
cdef:
int freq_index, day_index, base_index
int64_t per_day, start_ord
double unit, result
elif freq == FR_US:
factor = 10**3

if freq <= FR_DAY:
return 0
elif freq == FR_MS:
factor = 10**6

freq_index = freq // 1000
day_index = FR_DAY // 1000
base_index = FR_SEC // 1000
elif freq == FR_SEC:
factor = 10 **9

per_day = get_daytime_conversion_factor(day_index, freq_index)
unit = get_daytime_conversion_factor(freq_index, base_index)
elif freq == FR_MIN:
factor = 10**9 * 60

if base_index < freq_index:
unit = 1 / unit
else:
# We must have freq == FR_HR
factor = 10**9 * 3600

start_ord = unix_date * per_day
result = <double>(unit * (ordinal - start_ord))
return result
sub = ordinal - unix_date * 24 * 3600 * 10**9 / factor
return sub * factor


cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year):
Expand Down Expand Up @@ -1176,11 +1150,7 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1:
if ordinal == NPY_NAT:
return NPY_NAT

if freq == 11000:
# Microsecond, avoid get_date_info to prevent floating point errors
pandas_datetime_to_datetimestruct(ordinal, NPY_FR_us, &dts)
else:
get_date_info(ordinal, freq, &dts)
get_date_info(ordinal, freq, &dts)

check_dts_bounds(&dts)
return dtstruct_to_dt64(&dts)
Expand Down
21 changes: 9 additions & 12 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2586,7 +2586,7 @@ def _ixs(self, i: int, axis: int = 0):
label = self.columns[i]

values = self._mgr.iget(i)
result = self._box_col_values(values, label)
result = self._box_col_values(values, i)

# this is a cached value, mark it so
result._set_as_cached(label, self)
Expand Down Expand Up @@ -2691,7 +2691,7 @@ def _getitem_bool_array(self, key):
def _getitem_multilevel(self, key):
# self.columns is a MultiIndex
loc = self.columns.get_loc(key)
if isinstance(loc, (slice, Series, np.ndarray, Index)):
if isinstance(loc, (slice, np.ndarray)):
new_columns = self.columns[loc]
result_columns = maybe_droplevels(new_columns, key)
if self._is_mixed_type:
Expand Down Expand Up @@ -2724,7 +2724,8 @@ def _getitem_multilevel(self, key):
result._set_is_copy(self)
return result
else:
return self._get_item_cache(key)
# loc is neither a slice nor ndarray, so must be an int
return self._ixs(loc, axis=1)

def _get_value(self, index, col, takeable: bool = False):
"""
Expand Down Expand Up @@ -2915,19 +2916,15 @@ def _ensure_valid_index(self, value):
value.index.copy(), axis=1, fill_value=np.nan
)

def _box_item_values(self, key, values):
items = self.columns[self.columns.get_loc(key)]
if values.ndim == 2:
return self._constructor(values.T, columns=items, index=self.index)
else:
return self._box_col_values(values, items)

def _box_col_values(self, values, items):
def _box_col_values(self, values, loc: int) -> Series:
"""
Provide boxed values for a column.
"""
# Lookup in columns so that if e.g. a str datetime was passed
# we attach the Timestamp object as the name.
name = self.columns[loc]
klass = self._constructor_sliced
return klass(values, index=self.index, name=items, fastpath=True)
return klass(values, index=self.index, name=name, fastpath=True)

# ----------------------------------------------------------------------
# Unsorted
Expand Down
12 changes: 7 additions & 5 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3558,18 +3558,20 @@ def _get_item_cache(self, item):
cache = self._item_cache
res = cache.get(item)
if res is None:
values = self._mgr.get(item)
res = self._box_item_values(item, values)
# All places that call _get_item_cache have unique columns,
# pending resolution of GH#33047

loc = self.columns.get_loc(item)
values = self._mgr.iget(loc)
res = self._box_col_values(values, loc)

cache[item] = res
res._set_as_cached(item, self)

# for a chain
res._is_copy = self._is_copy
return res

def _box_item_values(self, key, values):
raise AbstractMethodError(self)

def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries:
"""
Construct a slice of this container.
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from pandas.core.algorithms import take_1d
from pandas.core.arrays.categorical import Categorical, contains, recode_for_categories
import pandas.core.common as com
from pandas.core.construction import extract_array
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name
from pandas.core.indexes.extension import ExtensionIndex, inherit_names
Expand Down Expand Up @@ -198,8 +199,13 @@ def __new__(
data = []

assert isinstance(dtype, CategoricalDtype), dtype
if not isinstance(data, Categorical) or data.dtype != dtype:
data = extract_array(data, extract_numpy=True)

if not isinstance(data, Categorical):
data = Categorical(data, dtype=dtype)
elif isinstance(dtype, CategoricalDtype) and dtype != data.dtype:
# we want to silently ignore dtype='category'
data = data._set_dtype(dtype)

data = data.copy() if copy else data

Expand Down
32 changes: 25 additions & 7 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2045,6 +2045,7 @@ def __setitem__(self, key, value):
key = _tuplify(self.ndim, key)
if len(key) != self.ndim:
raise ValueError("Not enough indexers for scalar access (setting)!")

key = list(self._convert_key(key, is_setter=True))
self.obj._set_value(*key, value=value, takeable=self._takeable)

Expand All @@ -2064,15 +2065,32 @@ def _convert_key(self, key, is_setter: bool = False):

return key

@property
def _axes_are_unique(self) -> bool:
# Only relevant for self.ndim == 2
assert self.ndim == 2
return self.obj.index.is_unique and self.obj.columns.is_unique

def __getitem__(self, key):
if self.ndim != 1 or not is_scalar(key):
# FIXME: is_scalar check is a kludge
return super().__getitem__(key)

# Like Index.get_value, but we do not allow positional fallback
obj = self.obj
loc = obj.index.get_loc(key)
return obj.index._get_values_for_loc(obj, loc, key)
if self.ndim == 2 and not self._axes_are_unique:
# GH#33041 fall back to .loc
if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
raise ValueError("Invalid call for scalar access (getting)!")
return self.obj.loc[key]

return super().__getitem__(key)

def __setitem__(self, key, value):
if self.ndim == 2 and not self._axes_are_unique:
# GH#33041 fall back to .loc
if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
raise ValueError("Invalid call for scalar access (setting)!")

self.obj.loc[key] = value
return

return super().__setitem__(key, value)


@doc(IndexingMixin.iat)
Expand Down
Loading