Skip to content

Commit 74eec6c

Browse files
authored
Merge pull request #1 from pandas-dev/master
Update from original.
2 parents d106b81 + 3d4f9dc commit 74eec6c

File tree

18 files changed

+251
-307
lines changed

18 files changed

+251
-307
lines changed

doc/source/whatsnew/v1.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,7 @@ Indexing
467467
- Bug in :meth:`DatetimeIndex.get_loc` raising ``KeyError`` with converted-integer key instead of the user-passed key (:issue:`31425`)
468468
- Bug in :meth:`Series.xs` incorrectly returning ``Timestamp`` instead of ``datetime64`` in some object-dtype cases (:issue:`31630`)
469469
- Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`)
470+
- Bug in :meth:`DataFrame.at` when either columns or index is non-unique (:issue:`33041`)
470471
- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`)
471472
- Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`)
472473
- Bug in :meth:`DataFrame.loc:` and :meth:`Series.loc` with a :class:`DatetimeIndex`, :class:`TimedeltaIndex`, or :class:`PeriodIndex` incorrectly allowing lookups of non-matching datetime-like dtypes (:issue:`32650`)

pandas/_libs/tslibs/period.pyx

Lines changed: 39 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -806,24 +806,22 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil:
806806
return unix_date_to_week(unix_date, freq - FR_WK)
807807

808808

809-
cdef void get_date_info(int64_t ordinal, int freq,
810-
npy_datetimestruct *dts) nogil:
809+
cdef void get_date_info(int64_t ordinal, int freq, npy_datetimestruct *dts) nogil:
811810
cdef:
812-
int64_t unix_date
813-
double abstime
811+
int64_t unix_date, nanos
812+
npy_datetimestruct dts2
814813

815814
unix_date = get_unix_date(ordinal, freq)
816-
abstime = get_abs_time(freq, unix_date, ordinal)
817-
818-
while abstime < 0:
819-
abstime += 86400
820-
unix_date -= 1
815+
nanos = get_time_nanos(freq, unix_date, ordinal)
821816

822-
while abstime >= 86400:
823-
abstime -= 86400
824-
unix_date += 1
817+
pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, dts)
825818

826-
date_info_from_days_and_time(dts, unix_date, abstime)
819+
dt64_to_dtstruct(nanos, &dts2)
820+
dts.hour = dts2.hour
821+
dts.min = dts2.min
822+
dts.sec = dts2.sec
823+
dts.us = dts2.us
824+
dts.ps = dts2.ps
827825

828826

829827
cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil:
@@ -855,74 +853,50 @@ cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil:
855853

856854

857855
@cython.cdivision
858-
cdef void date_info_from_days_and_time(npy_datetimestruct *dts,
859-
int64_t unix_date,
860-
double abstime) nogil:
856+
cdef int64_t get_time_nanos(int freq, int64_t unix_date, int64_t ordinal) nogil:
861857
"""
862-
Set the instance's value using the given date and time.
858+
Find the number of nanoseconds after midnight on the given unix_date
859+
that the ordinal represents in the given frequency.
863860
864861
Parameters
865862
----------
866-
dts : npy_datetimestruct*
863+
freq : int
867864
unix_date : int64_t
868-
days elapsed since datetime(1970, 1, 1)
869-
abstime : double
870-
seconds elapsed since beginning of day described by unix_date
865+
ordinal : int64_t
871866
872-
Notes
873-
-----
874-
Updates dts inplace
867+
Returns
868+
-------
869+
int64_t
875870
"""
876871
cdef:
877-
int inttime
878-
int hour, minute
879-
double second, subsecond_fraction
880-
881-
# Bounds check
882-
# The calling function is responsible for ensuring that
883-
# abstime >= 0.0 and abstime <= 86400
884-
885-
# Calculate the date
886-
pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, dts)
872+
int64_t sub, factor
887873

888-
# Calculate the time
889-
inttime = <int>abstime
890-
hour = inttime / 3600
891-
minute = (inttime % 3600) / 60
892-
second = abstime - <double>(hour * 3600 + minute * 60)
874+
freq = get_freq_group(freq)
893875

894-
dts.hour = hour
895-
dts.min = minute
896-
dts.sec = <int>second
897-
898-
subsecond_fraction = second - dts.sec
899-
dts.us = int((subsecond_fraction) * 1e6)
900-
dts.ps = int(((subsecond_fraction) * 1e6 - dts.us) * 1e6)
876+
if freq <= FR_DAY:
877+
return 0
901878

879+
elif freq == FR_NS:
880+
factor = 1
902881

903-
@cython.cdivision
904-
cdef double get_abs_time(int freq, int64_t unix_date, int64_t ordinal) nogil:
905-
cdef:
906-
int freq_index, day_index, base_index
907-
int64_t per_day, start_ord
908-
double unit, result
882+
elif freq == FR_US:
883+
factor = 10**3
909884

910-
if freq <= FR_DAY:
911-
return 0
885+
elif freq == FR_MS:
886+
factor = 10**6
912887

913-
freq_index = freq // 1000
914-
day_index = FR_DAY // 1000
915-
base_index = FR_SEC // 1000
888+
elif freq == FR_SEC:
889+
factor = 10 **9
916890

917-
per_day = get_daytime_conversion_factor(day_index, freq_index)
918-
unit = get_daytime_conversion_factor(freq_index, base_index)
891+
elif freq == FR_MIN:
892+
factor = 10**9 * 60
919893

920-
if base_index < freq_index:
921-
unit = 1 / unit
894+
else:
895+
# We must have freq == FR_HR
896+
factor = 10**9 * 3600
922897

923-
start_ord = unix_date * per_day
924-
result = <double>(unit * (ordinal - start_ord))
925-
return result
898+
sub = ordinal - unix_date * 24 * 3600 * 10**9 / factor
899+
return sub * factor
926900

927901

928902
cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year):
@@ -1176,11 +1150,7 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1:
11761150
if ordinal == NPY_NAT:
11771151
return NPY_NAT
11781152

1179-
if freq == 11000:
1180-
# Microsecond, avoid get_date_info to prevent floating point errors
1181-
pandas_datetime_to_datetimestruct(ordinal, NPY_FR_us, &dts)
1182-
else:
1183-
get_date_info(ordinal, freq, &dts)
1153+
get_date_info(ordinal, freq, &dts)
11841154

11851155
check_dts_bounds(&dts)
11861156
return dtstruct_to_dt64(&dts)

pandas/core/frame.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2586,7 +2586,7 @@ def _ixs(self, i: int, axis: int = 0):
25862586
label = self.columns[i]
25872587

25882588
values = self._mgr.iget(i)
2589-
result = self._box_col_values(values, label)
2589+
result = self._box_col_values(values, i)
25902590

25912591
# this is a cached value, mark it so
25922592
result._set_as_cached(label, self)
@@ -2691,7 +2691,7 @@ def _getitem_bool_array(self, key):
26912691
def _getitem_multilevel(self, key):
26922692
# self.columns is a MultiIndex
26932693
loc = self.columns.get_loc(key)
2694-
if isinstance(loc, (slice, Series, np.ndarray, Index)):
2694+
if isinstance(loc, (slice, np.ndarray)):
26952695
new_columns = self.columns[loc]
26962696
result_columns = maybe_droplevels(new_columns, key)
26972697
if self._is_mixed_type:
@@ -2724,7 +2724,8 @@ def _getitem_multilevel(self, key):
27242724
result._set_is_copy(self)
27252725
return result
27262726
else:
2727-
return self._get_item_cache(key)
2727+
# loc is neither a slice nor ndarray, so must be an int
2728+
return self._ixs(loc, axis=1)
27282729

27292730
def _get_value(self, index, col, takeable: bool = False):
27302731
"""
@@ -2915,19 +2916,15 @@ def _ensure_valid_index(self, value):
29152916
value.index.copy(), axis=1, fill_value=np.nan
29162917
)
29172918

2918-
def _box_item_values(self, key, values):
2919-
items = self.columns[self.columns.get_loc(key)]
2920-
if values.ndim == 2:
2921-
return self._constructor(values.T, columns=items, index=self.index)
2922-
else:
2923-
return self._box_col_values(values, items)
2924-
2925-
def _box_col_values(self, values, items):
2919+
def _box_col_values(self, values, loc: int) -> Series:
29262920
"""
29272921
Provide boxed values for a column.
29282922
"""
2923+
# Lookup in columns so that if e.g. a str datetime was passed
2924+
# we attach the Timestamp object as the name.
2925+
name = self.columns[loc]
29292926
klass = self._constructor_sliced
2930-
return klass(values, index=self.index, name=items, fastpath=True)
2927+
return klass(values, index=self.index, name=name, fastpath=True)
29312928

29322929
# ----------------------------------------------------------------------
29332930
# Unsorted

pandas/core/generic.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3558,18 +3558,20 @@ def _get_item_cache(self, item):
35583558
cache = self._item_cache
35593559
res = cache.get(item)
35603560
if res is None:
3561-
values = self._mgr.get(item)
3562-
res = self._box_item_values(item, values)
3561+
# All places that call _get_item_cache have unique columns,
3562+
# pending resolution of GH#33047
3563+
3564+
loc = self.columns.get_loc(item)
3565+
values = self._mgr.iget(loc)
3566+
res = self._box_col_values(values, loc)
3567+
35633568
cache[item] = res
35643569
res._set_as_cached(item, self)
35653570

35663571
# for a chain
35673572
res._is_copy = self._is_copy
35683573
return res
35693574

3570-
def _box_item_values(self, key, values):
3571-
raise AbstractMethodError(self)
3572-
35733575
def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries:
35743576
"""
35753577
Construct a slice of this container.

pandas/core/indexes/category.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from pandas.core.algorithms import take_1d
2626
from pandas.core.arrays.categorical import Categorical, contains, recode_for_categories
2727
import pandas.core.common as com
28+
from pandas.core.construction import extract_array
2829
import pandas.core.indexes.base as ibase
2930
from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name
3031
from pandas.core.indexes.extension import ExtensionIndex, inherit_names
@@ -198,8 +199,13 @@ def __new__(
198199
data = []
199200

200201
assert isinstance(dtype, CategoricalDtype), dtype
201-
if not isinstance(data, Categorical) or data.dtype != dtype:
202+
data = extract_array(data, extract_numpy=True)
203+
204+
if not isinstance(data, Categorical):
202205
data = Categorical(data, dtype=dtype)
206+
elif isinstance(dtype, CategoricalDtype) and dtype != data.dtype:
207+
# we want to silently ignore dtype='category'
208+
data = data._set_dtype(dtype)
203209

204210
data = data.copy() if copy else data
205211

pandas/core/indexing.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2045,6 +2045,7 @@ def __setitem__(self, key, value):
20452045
key = _tuplify(self.ndim, key)
20462046
if len(key) != self.ndim:
20472047
raise ValueError("Not enough indexers for scalar access (setting)!")
2048+
20482049
key = list(self._convert_key(key, is_setter=True))
20492050
self.obj._set_value(*key, value=value, takeable=self._takeable)
20502051

@@ -2064,15 +2065,32 @@ def _convert_key(self, key, is_setter: bool = False):
20642065

20652066
return key
20662067

2068+
@property
2069+
def _axes_are_unique(self) -> bool:
2070+
# Only relevant for self.ndim == 2
2071+
assert self.ndim == 2
2072+
return self.obj.index.is_unique and self.obj.columns.is_unique
2073+
20672074
def __getitem__(self, key):
2068-
if self.ndim != 1 or not is_scalar(key):
2069-
# FIXME: is_scalar check is a kludge
2070-
return super().__getitem__(key)
20712075

2072-
# Like Index.get_value, but we do not allow positional fallback
2073-
obj = self.obj
2074-
loc = obj.index.get_loc(key)
2075-
return obj.index._get_values_for_loc(obj, loc, key)
2076+
if self.ndim == 2 and not self._axes_are_unique:
2077+
# GH#33041 fall back to .loc
2078+
if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
2079+
raise ValueError("Invalid call for scalar access (getting)!")
2080+
return self.obj.loc[key]
2081+
2082+
return super().__getitem__(key)
2083+
2084+
def __setitem__(self, key, value):
2085+
if self.ndim == 2 and not self._axes_are_unique:
2086+
# GH#33041 fall back to .loc
2087+
if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
2088+
raise ValueError("Invalid call for scalar access (setting)!")
2089+
2090+
self.obj.loc[key] = value
2091+
return
2092+
2093+
return super().__setitem__(key, value)
20762094

20772095

20782096
@doc(IndexingMixin.iat)

0 commit comments

Comments
 (0)