Skip to content

Commit 8cbfc36

Browse files
committed
Merge remote-tracking branch 'upstream/master' into fu1+sort
2 parents ce92f7b + 0cd5c5c commit 8cbfc36

File tree

29 files changed

+778
-226
lines changed

29 files changed

+778
-226
lines changed

doc/source/whatsnew/v0.23.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -896,6 +896,7 @@ Timezones
896896
- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`)
897897
- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`)
898898
- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`)
899+
- Bug in :func:`melt` that converted tz-aware dtypes to tz-naive (:issue:`15785`)
899900

900901
Offsets
901902
^^^^^^^

pandas/_libs/tslibs/period.pyx

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,11 +1241,55 @@ cdef class _Period(object):
12411241

12421242
@property
12431243
def hour(self):
1244+
"""
1245+
Get the hour of the day component of the Period.
1246+
1247+
Returns
1248+
-------
1249+
int
1250+
The hour as an integer, between 0 and 23.
1251+
1252+
See Also
1253+
--------
1254+
Period.second : Get the second component of the Period.
1255+
Period.minute : Get the minute component of the Period.
1256+
1257+
Examples
1258+
--------
1259+
>>> p = pd.Period("2018-03-11 13:03:12.050000")
1260+
>>> p.hour
1261+
13
1262+
1263+
Period longer than a day
1264+
1265+
>>> p = pd.Period("2018-03-11", freq="M")
1266+
>>> p.hour
1267+
0
1268+
"""
12441269
base, mult = get_freq_code(self.freq)
12451270
return phour(self.ordinal, base)
12461271

12471272
@property
12481273
def minute(self):
1274+
"""
1275+
Get minute of the hour component of the Period.
1276+
1277+
Returns
1278+
-------
1279+
int
1280+
The minute as an integer, between 0 and 59.
1281+
1282+
See Also
1283+
--------
1284+
Period.hour : Get the hour component of the Period.
1285+
Period.second : Get the second component of the Period.
1286+
1287+
Examples
1288+
--------
1289+
>>> p = pd.Period("2018-03-11 13:03:12.050000")
1290+
>>> p.minute
1291+
3
1292+
"""
12491293
base, mult = get_freq_code(self.freq)
12501294
return pminute(self.ordinal, base)
12511295

@@ -1265,6 +1309,37 @@ cdef class _Period(object):
12651309

12661310
@property
12671311
def dayofweek(self):
1312+
"""
1313+
Return the day of the week.
1314+
1315+
This attribute returns the day of the week on which the particular
1316+
date for the given period occurs depending on the frequency with
1317+
Monday=0, Sunday=6.
1318+
1319+
Returns
1320+
-------
1321+
Int
1322+
Range from 0 to 6 (included).
1323+
1324+
See also
1325+
--------
1326+
Period.dayofyear : Return the day of the year.
1327+
Period.daysinmonth : Return the number of days in that month.
1328+
1329+
Examples
1330+
--------
1331+
>>> period1 = pd.Period('2012-1-1 19:00', freq='H')
1332+
>>> period1
1333+
Period('2012-01-01 19:00', 'H')
1334+
>>> period1.dayofweek
1335+
6
1336+
1337+
>>> period2 = pd.Period('2013-1-9 11:00', freq='H')
1338+
>>> period2
1339+
Period('2013-01-09 11:00', 'H')
1340+
>>> period2.dayofweek
1341+
2
1342+
"""
12681343
base, mult = get_freq_code(self.freq)
12691344
return pweekday(self.ordinal, base)
12701345

@@ -1274,6 +1349,36 @@ cdef class _Period(object):
12741349

12751350
@property
12761351
def dayofyear(self):
1352+
"""
1353+
Return the day of the year.
1354+
1355+
This attribute returns the day of the year on which the particular
1356+
date occurs. The return value ranges between 1 to 365 for regular
1357+
years and 1 to 366 for leap years.
1358+
1359+
Returns
1360+
-------
1361+
int
1362+
The day of year.
1363+
1364+
See Also
1365+
--------
1366+
Period.day : Return the day of the month.
1367+
Period.dayofweek : Return the day of week.
1368+
PeriodIndex.dayofyear : Return the day of year of all indexes.
1369+
1370+
Examples
1371+
--------
1372+
>>> period = pd.Period("2015-10-23", freq='H')
1373+
>>> period.dayofyear
1374+
296
1375+
>>> period = pd.Period("2012-12-31", freq='D')
1376+
>>> period.dayofyear
1377+
366
1378+
>>> period = pd.Period("2013-01-01", freq='D')
1379+
>>> period.dayofyear
1380+
1
1381+
"""
12771382
base, mult = get_freq_code(self.freq)
12781383
return pday_of_year(self.ordinal, base)
12791384

pandas/conftest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,3 +89,11 @@ def compression_no_zip(request):
8989
def datetime_tz_utc():
9090
from datetime import timezone
9191
return timezone.utc
92+
93+
94+
@pytest.fixture(params=['inner', 'outer', 'left', 'right'])
95+
def join_type(request):
96+
"""
97+
Fixture for trying all types of join operations
98+
"""
99+
return request.param

pandas/core/algorithms.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
maybe_promote, construct_1d_object_array_from_listlike)
1111
from pandas.core.dtypes.generic import (
1212
ABCSeries, ABCIndex,
13-
ABCIndexClass, ABCCategorical)
13+
ABCIndexClass)
1414
from pandas.core.dtypes.common import (
15+
is_array_like,
1516
is_unsigned_integer_dtype, is_signed_integer_dtype,
1617
is_integer_dtype, is_complex_dtype,
1718
is_object_dtype,
@@ -168,8 +169,7 @@ def _ensure_arraylike(values):
168169
"""
169170
ensure that we are arraylike if not already
170171
"""
171-
if not isinstance(values, (np.ndarray, ABCCategorical,
172-
ABCIndexClass, ABCSeries)):
172+
if not is_array_like(values):
173173
inferred = lib.infer_dtype(values)
174174
if inferred in ['mixed', 'string', 'unicode']:
175175
if isinstance(values, tuple):
@@ -353,11 +353,8 @@ def unique(values):
353353

354354
values = _ensure_arraylike(values)
355355

356-
# categorical is a fast-path
357-
# this will coerce Categorical, CategoricalIndex,
358-
# and category dtypes Series to same return of Category
359-
if is_categorical_dtype(values):
360-
values = getattr(values, '.values', values)
356+
if is_extension_array_dtype(values):
357+
# Dispatch to extension dtype's unique.
361358
return values.unique()
362359

363360
original = values

pandas/core/arrays/base.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,18 @@ def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
283283
result = result[::-1]
284284
return result
285285

286+
def unique(self):
287+
"""Compute the ExtensionArray of unique values.
288+
289+
Returns
290+
-------
291+
uniques : ExtensionArray
292+
"""
293+
from pandas import unique
294+
295+
uniques = unique(self.astype(object))
296+
return self._constructor_from_sequence(uniques)
297+
286298
# ------------------------------------------------------------------------
287299
# Indexing methods
288300
# ------------------------------------------------------------------------

pandas/core/frame.py

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4456,44 +4456,55 @@ def last_valid_index(self):
44564456

44574457
def pivot(self, index=None, columns=None, values=None):
44584458
"""
4459+
Return reshaped DataFrame organized by given index / column values.
4460+
44594461
Reshape data (produce a "pivot" table) based on column values. Uses
4460-
unique values from index / columns to form axes of the resulting
4461-
DataFrame.
4462+
unique values from specified `index` / `columns` to form axes of the resulting
4463+
DataFrame. This function does not support data aggregation, multiple
4464+
values will result in a MultiIndex in the columns. See the
4465+
:ref:`User Guide <reshaping>` for more on reshaping.
44624466
44634467
Parameters
44644468
----------
44654469
index : string or object, optional
4466-
Column name to use to make new frame's index. If None, uses
4470+
Column to use to make new frame's index. If None, uses
44674471
existing index.
44684472
columns : string or object
4469-
Column name to use to make new frame's columns
4473+
Column to use to make new frame's columns.
44704474
values : string or object, optional
4471-
Column name to use for populating new frame's values. If not
4475+
Column to use for populating new frame's values. If not
44724476
specified, all remaining columns will be used and the result will
4473-
have hierarchically indexed columns
4477+
have hierarchically indexed columns.
44744478
44754479
Returns
44764480
-------
4477-
pivoted : DataFrame
4481+
DataFrame
4482+
Returns reshaped DataFrame.
44784483
4479-
See also
4484+
Raises
4485+
------
4486+
ValueError:
4487+
When there are any `index`, `columns` combinations with multiple
4488+
values. `DataFrame.pivot_table` when you need to aggregate.
4489+
4490+
See Also
44804491
--------
44814492
DataFrame.pivot_table : generalization of pivot that can handle
4482-
duplicate values for one index/column pair
4493+
duplicate values for one index/column pair.
44834494
DataFrame.unstack : pivot based on the index values instead of a
4484-
column
4495+
column.
44854496
44864497
Notes
44874498
-----
44884499
For finer-tuned control, see hierarchical indexing documentation along
4489-
with the related stack/unstack methods
4500+
with the related stack/unstack methods.
44904501
44914502
Examples
44924503
--------
4493-
4494-
>>> df = pd.DataFrame({'foo': ['one','one','one','two','two','two'],
4495-
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
4496-
'baz': [1, 2, 3, 4, 5, 6]})
4504+
>>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two',
4505+
... 'two'],
4506+
... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
4507+
... 'baz': [1, 2, 3, 4, 5, 6]})
44974508
>>> df
44984509
foo bar baz
44994510
0 one A 1
@@ -4504,16 +4515,36 @@ def pivot(self, index=None, columns=None, values=None):
45044515
5 two C 6
45054516
45064517
>>> df.pivot(index='foo', columns='bar', values='baz')
4507-
A B C
4518+
bar A B C
4519+
foo
45084520
one 1 2 3
45094521
two 4 5 6
45104522
45114523
>>> df.pivot(index='foo', columns='bar')['baz']
4512-
A B C
4524+
bar A B C
4525+
foo
45134526
one 1 2 3
45144527
two 4 5 6
45154528
4529+
A ValueError is raised if there are any duplicates.
4530+
4531+
>>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'],
4532+
... "bar": ['A', 'A', 'B', 'C'],
4533+
... "baz": [1, 2, 3, 4]})
4534+
>>> df
4535+
foo bar baz
4536+
0 one A 1
4537+
1 one A 2
4538+
2 two B 3
4539+
3 two C 4
4540+
4541+
Notice that the first two rows are the same for our `index`
4542+
and `columns` arguments.
45164543
4544+
>>> df.pivot(index='foo', columns='bar', values='baz')
4545+
Traceback (most recent call last):
4546+
...
4547+
ValueError: Index contains duplicate entries, cannot reshape
45174548
"""
45184549
from pandas.core.reshape.reshape import pivot
45194550
return pivot(self, index=index, columns=columns, values=values)

0 commit comments

Comments
 (0)