14
14
from hypothesis import given
15
15
from hypothesis .strategies import composite , dates , integers , sampled_from
16
16
17
- from pandas import (notna , DataFrame , Series , MultiIndex , date_range ,
18
- Timestamp , compat )
19
- import pandas as pd
17
+ from pandas import compat
18
+ from pandas import (DataFrame , Series , Index , MultiIndex , DatetimeIndex ,
19
+ Timestamp , Timedelta , Period , date_range , to_datetime ,
20
+ to_timedelta , NaT , concat , notna )
21
+
20
22
from pandas .core .dtypes .dtypes import CategoricalDtype
21
23
from pandas .core .apply import frame_apply
22
24
from pandas .util .testing import (assert_series_equal ,
@@ -72,7 +74,7 @@ def test_apply_mixed_datetimelike(self):
72
74
# mixed datetimelike
73
75
# GH 7778
74
76
df = DataFrame ({'A' : date_range ('20130101' , periods = 3 ),
75
- 'B' : pd . to_timedelta (np .arange (3 ), unit = 's' )})
77
+ 'B' : to_timedelta (np .arange (3 ), unit = 's' )})
76
78
result = df .apply (lambda x : x , axis = 1 )
77
79
assert_frame_equal (result , df )
78
80
@@ -106,14 +108,14 @@ def test_apply_with_reduce_empty(self, empty_frame):
106
108
assert_frame_equal (result , empty_frame )
107
109
result = empty_frame .apply (x .append , axis = 1 , result_type = 'reduce' )
108
110
assert_series_equal (result , Series (
109
- [], index = pd . Index ([], dtype = object )))
111
+ [], index = Index ([], dtype = object )))
110
112
111
113
empty_with_cols = DataFrame (columns = ['a' , 'b' , 'c' ])
112
114
result = empty_with_cols .apply (x .append , axis = 1 , result_type = 'expand' )
113
115
assert_frame_equal (result , empty_with_cols )
114
116
result = empty_with_cols .apply (x .append , axis = 1 , result_type = 'reduce' )
115
117
assert_series_equal (result , Series (
116
- [], index = pd . Index ([], dtype = object )))
118
+ [], index = Index ([], dtype = object )))
117
119
118
120
# Ensure that x.append hasn't been called
119
121
assert x == []
@@ -241,7 +243,7 @@ def test_apply_mixed_dtype_corner(self):
241
243
result = df [:0 ].apply (np .mean , axis = 1 )
242
244
# the result here is actually kind of ambiguous, should it be a Series
243
245
# or a DataFrame?
244
- expected = Series (np .nan , index = pd . Index ([], dtype = 'int64' ))
246
+ expected = Series (np .nan , index = Index ([], dtype = 'int64' ))
245
247
assert_series_equal (result , expected )
246
248
247
249
df = DataFrame ({'A' : ['foo' ],
@@ -371,10 +373,10 @@ def transform2(row):
371
373
def test_apply_bug (self ):
372
374
373
375
# GH 6125
374
- positions = pd . DataFrame ([[1 , 'ABC0' , 50 ], [1 , 'YUM0' , 20 ],
375
- [1 , 'DEF0' , 20 ], [2 , 'ABC1' , 50 ],
376
- [2 , 'YUM1' , 20 ], [2 , 'DEF1' , 20 ]],
377
- columns = ['a' , 'market' , 'position' ])
376
+ positions = DataFrame ([[1 , 'ABC0' , 50 ], [1 , 'YUM0' , 20 ],
377
+ [1 , 'DEF0' , 20 ], [2 , 'ABC1' , 50 ],
378
+ [2 , 'YUM1' , 20 ], [2 , 'DEF1' , 20 ]],
379
+ columns = ['a' , 'market' , 'position' ])
378
380
379
381
def f (r ):
380
382
return r ['market' ]
@@ -493,47 +495,47 @@ def test_applymap(self, float_frame):
493
495
494
496
# datetime/timedelta
495
497
df ['datetime' ] = Timestamp ('20130101' )
496
- df ['timedelta' ] = pd . Timedelta ('1 min' )
498
+ df ['timedelta' ] = Timedelta ('1 min' )
497
499
result = df .applymap (str )
498
500
for f in ['datetime' , 'timedelta' ]:
499
501
assert result .loc [0 , f ] == str (df .loc [0 , f ])
500
502
501
503
# see gh-8222
502
- empty_frames = [pd . DataFrame (),
503
- pd . DataFrame (columns = list ('ABC' )),
504
- pd . DataFrame (index = list ('ABC' )),
505
- pd . DataFrame ({'A' : [], 'B' : [], 'C' : []})]
504
+ empty_frames = [DataFrame (),
505
+ DataFrame (columns = list ('ABC' )),
506
+ DataFrame (index = list ('ABC' )),
507
+ DataFrame ({'A' : [], 'B' : [], 'C' : []})]
506
508
for frame in empty_frames :
507
509
for func in [round , lambda x : x ]:
508
510
result = frame .applymap (func )
509
511
tm .assert_frame_equal (result , frame )
510
512
511
513
def test_applymap_box_timestamps (self ):
512
514
# #2689, #2627
513
- ser = pd . Series (date_range ('1/1/2000' , periods = 10 ))
515
+ ser = Series (date_range ('1/1/2000' , periods = 10 ))
514
516
515
517
def func (x ):
516
518
return (x .hour , x .day , x .month )
517
519
518
520
# it works!
519
- pd . DataFrame (ser ).applymap (func )
521
+ DataFrame (ser ).applymap (func )
520
522
521
523
def test_applymap_box (self ):
522
524
# ufunc will not be boxed. Same test cases as the test_map_box
523
- df = pd . DataFrame ({'a' : [pd . Timestamp ('2011-01-01' ),
524
- pd . Timestamp ('2011-01-02' )],
525
- 'b' : [pd . Timestamp ('2011-01-01' , tz = 'US/Eastern' ),
526
- pd . Timestamp ('2011-01-02' , tz = 'US/Eastern' )],
527
- 'c' : [pd . Timedelta ('1 days' ),
528
- pd . Timedelta ('2 days' )],
529
- 'd' : [pd . Period ('2011-01-01' , freq = 'M' ),
530
- pd . Period ('2011-01-02' , freq = 'M' )]})
525
+ df = DataFrame ({'a' : [Timestamp ('2011-01-01' ),
526
+ Timestamp ('2011-01-02' )],
527
+ 'b' : [Timestamp ('2011-01-01' , tz = 'US/Eastern' ),
528
+ Timestamp ('2011-01-02' , tz = 'US/Eastern' )],
529
+ 'c' : [Timedelta ('1 days' ),
530
+ Timedelta ('2 days' )],
531
+ 'd' : [Period ('2011-01-01' , freq = 'M' ),
532
+ Period ('2011-01-02' , freq = 'M' )]})
531
533
532
534
res = df .applymap (lambda x : '{0}' .format (x .__class__ .__name__ ))
533
- exp = pd . DataFrame ({'a' : ['Timestamp' , 'Timestamp' ],
534
- 'b' : ['Timestamp' , 'Timestamp' ],
535
- 'c' : ['Timedelta' , 'Timedelta' ],
536
- 'd' : ['Period' , 'Period' ]})
535
+ exp = DataFrame ({'a' : ['Timestamp' , 'Timestamp' ],
536
+ 'b' : ['Timestamp' , 'Timestamp' ],
537
+ 'c' : ['Timedelta' , 'Timedelta' ],
538
+ 'd' : ['Period' , 'Period' ]})
537
539
tm .assert_frame_equal (res , exp )
538
540
539
541
def test_frame_apply_dont_convert_datetime64 (self ):
@@ -547,14 +549,14 @@ def test_frame_apply_dont_convert_datetime64(self):
547
549
548
550
def test_apply_non_numpy_dtype (self ):
549
551
# See gh-12244
550
- df = DataFrame ({'dt' : pd . date_range (
552
+ df = DataFrame ({'dt' : date_range (
551
553
"2015-01-01" , periods = 3 , tz = 'Europe/Brussels' )})
552
554
result = df .apply (lambda x : x )
553
555
assert_frame_equal (result , df )
554
556
555
- result = df .apply (lambda x : x + pd . Timedelta ('1day' ))
556
- expected = DataFrame ({'dt' : pd . date_range (
557
- "2015-01-02" , periods = 3 , tz = 'Europe/Brussels' )})
557
+ result = df .apply (lambda x : x + Timedelta ('1day' ))
558
+ expected = DataFrame ({'dt' : date_range ("2015-01-02" , periods = 3 ,
559
+ tz = 'Europe/Brussels' )})
558
560
assert_frame_equal (result , expected )
559
561
560
562
df = DataFrame ({'dt' : ['a' , 'b' , 'c' , 'a' ]}, dtype = 'category' )
@@ -563,8 +565,8 @@ def test_apply_non_numpy_dtype(self):
563
565
564
566
def test_apply_dup_names_multi_agg (self ):
565
567
# GH 21063
566
- df = pd . DataFrame ([[0 , 1 ], [2 , 3 ]], columns = ['a' , 'a' ])
567
- expected = pd . DataFrame ([[0 , 1 ]], columns = ['a' , 'a' ], index = ['min' ])
568
+ df = DataFrame ([[0 , 1 ], [2 , 3 ]], columns = ['a' , 'a' ])
569
+ expected = DataFrame ([[0 , 1 ]], columns = ['a' , 'a' ], index = ['min' ])
568
570
result = df .agg (['min' ])
569
571
570
572
tm .assert_frame_equal (result , expected )
@@ -578,7 +580,7 @@ class TestInferOutputShape(object):
578
580
def test_infer_row_shape (self ):
579
581
# gh-17437
580
582
# if row shape is changing, infer it
581
- df = pd . DataFrame (np .random .rand (10 , 2 ))
583
+ df = DataFrame (np .random .rand (10 , 2 ))
582
584
result = df .apply (np .fft .fft , axis = 0 )
583
585
assert result .shape == (10 , 2 )
584
586
@@ -593,8 +595,8 @@ def test_with_dictlike_columns(self):
593
595
expected = Series ([{'s' : 3 } for t in df .itertuples ()])
594
596
assert_series_equal (result , expected )
595
597
596
- df ['tm' ] = [pd . Timestamp ('2017-05-01 00:00:00' ),
597
- pd . Timestamp ('2017-05-02 00:00:00' )]
598
+ df ['tm' ] = [Timestamp ('2017-05-01 00:00:00' ),
599
+ Timestamp ('2017-05-02 00:00:00' )]
598
600
result = df .apply (lambda x : {'s' : x ['a' ] + x ['b' ]},
599
601
axis = 1 )
600
602
assert_series_equal (result , expected )
@@ -608,9 +610,9 @@ def test_with_dictlike_columns(self):
608
610
df = DataFrame ()
609
611
df ["author" ] = ["X" , "Y" , "Z" ]
610
612
df ["publisher" ] = ["BBC" , "NBC" , "N24" ]
611
- df ["date" ] = pd . to_datetime (['17-10-2010 07:15:30' ,
612
- '13-05-2011 08:20:35' ,
613
- '15-01-2013 09:09:09' ])
613
+ df ["date" ] = to_datetime (['17-10-2010 07:15:30' ,
614
+ '13-05-2011 08:20:35' ,
615
+ '15-01-2013 09:09:09' ])
614
616
result = df .apply (lambda x : {}, axis = 1 )
615
617
expected = Series ([{}, {}, {}])
616
618
assert_series_equal (result , expected )
@@ -623,8 +625,8 @@ def test_with_dictlike_columns_with_infer(self):
623
625
expected = DataFrame ({'s' : [3 , 3 ]})
624
626
assert_frame_equal (result , expected )
625
627
626
- df ['tm' ] = [pd . Timestamp ('2017-05-01 00:00:00' ),
627
- pd . Timestamp ('2017-05-02 00:00:00' )]
628
+ df ['tm' ] = [Timestamp ('2017-05-01 00:00:00' ),
629
+ Timestamp ('2017-05-02 00:00:00' )]
628
630
result = df .apply (lambda x : {'s' : x ['a' ] + x ['b' ]},
629
631
axis = 1 , result_type = 'expand' )
630
632
assert_frame_equal (result , expected )
@@ -659,8 +661,8 @@ def test_infer_output_shape_columns(self):
659
661
660
662
df = DataFrame ({'number' : [1. , 2. ],
661
663
'string' : ['foo' , 'bar' ],
662
- 'datetime' : [pd . Timestamp ('2017-11-29 03:30:00' ),
663
- pd . Timestamp ('2017-11-29 03:45:00' )]})
664
+ 'datetime' : [Timestamp ('2017-11-29 03:30:00' ),
665
+ Timestamp ('2017-11-29 03:45:00' )]})
664
666
result = df .apply (lambda row : (row .number , row .string ), axis = 1 )
665
667
expected = Series ([(t .number , t .string ) for t in df .itertuples ()])
666
668
assert_series_equal (result , expected )
@@ -692,13 +694,13 @@ def test_infer_output_shape_listlike_columns(self):
692
694
assert_series_equal (result , expected )
693
695
694
696
# gh-17892
695
- df = pd . DataFrame ({'a' : [pd . Timestamp ('2010-02-01' ),
696
- pd . Timestamp ('2010-02-04' ),
697
- pd . Timestamp ('2010-02-05' ),
698
- pd . Timestamp ('2010-02-06' )],
699
- 'b' : [9 , 5 , 4 , 3 ],
700
- 'c' : [5 , 3 , 4 , 2 ],
701
- 'd' : [1 , 2 , 3 , 4 ]})
697
+ df = DataFrame ({'a' : [Timestamp ('2010-02-01' ),
698
+ Timestamp ('2010-02-04' ),
699
+ Timestamp ('2010-02-05' ),
700
+ Timestamp ('2010-02-06' )],
701
+ 'b' : [9 , 5 , 4 , 3 ],
702
+ 'c' : [5 , 3 , 4 , 2 ],
703
+ 'd' : [1 , 2 , 3 , 4 ]})
702
704
703
705
def fun (x ):
704
706
return (1 , 2 )
@@ -815,11 +817,11 @@ def zip_frames(frames, axis=1):
815
817
if axis == 1 :
816
818
columns = frames [0 ].columns
817
819
zipped = [f .loc [:, c ] for c in columns for f in frames ]
818
- return pd . concat (zipped , axis = 1 )
820
+ return concat (zipped , axis = 1 )
819
821
else :
820
822
index = frames [0 ].index
821
823
zipped = [f .loc [i , :] for i in index for f in frames ]
822
- return pd . DataFrame (zipped )
824
+ return DataFrame (zipped )
823
825
824
826
825
827
class TestDataFrameAggregate ():
@@ -847,10 +849,10 @@ def test_agg_transform(self, axis, float_frame):
847
849
result = float_frame .apply ([np .sqrt ], axis = axis )
848
850
expected = f_sqrt .copy ()
849
851
if axis in {0 , 'index' }:
850
- expected .columns = pd . MultiIndex .from_product (
852
+ expected .columns = MultiIndex .from_product (
851
853
[float_frame .columns , ['sqrt' ]])
852
854
else :
853
- expected .index = pd . MultiIndex .from_product (
855
+ expected .index = MultiIndex .from_product (
854
856
[float_frame .index , ['sqrt' ]])
855
857
assert_frame_equal (result , expected )
856
858
@@ -863,10 +865,10 @@ def test_agg_transform(self, axis, float_frame):
863
865
result = float_frame .apply ([np .abs , np .sqrt ], axis = axis )
864
866
expected = zip_frames ([f_abs , f_sqrt ], axis = other_axis )
865
867
if axis in {0 , 'index' }:
866
- expected .columns = pd . MultiIndex .from_product (
868
+ expected .columns = MultiIndex .from_product (
867
869
[float_frame .columns , ['absolute' , 'sqrt' ]])
868
870
else :
869
- expected .index = pd . MultiIndex .from_product (
871
+ expected .index = MultiIndex .from_product (
870
872
[float_frame .index , ['absolute' , 'sqrt' ]])
871
873
assert_frame_equal (result , expected )
872
874
@@ -889,7 +891,7 @@ def f():
889
891
float_frame .transform (['max' , 'sqrt' ], axis = axis )
890
892
pytest .raises (ValueError , f )
891
893
892
- df = pd . DataFrame ({'A' : range (5 ), 'B' : 5 })
894
+ df = DataFrame ({'A' : range (5 ), 'B' : 5 })
893
895
894
896
def f ():
895
897
with np .errstate (all = 'ignore' ):
@@ -900,14 +902,14 @@ def f():
900
902
])
901
903
def test_transform_method_name (self , method ):
902
904
# https://github.com/pandas-dev/pandas/issues/19760
903
- df = pd . DataFrame ({"A" : [- 1 , 2 ]})
905
+ df = DataFrame ({"A" : [- 1 , 2 ]})
904
906
result = df .transform (method )
905
907
expected = operator .methodcaller (method )(df )
906
908
tm .assert_frame_equal (result , expected )
907
909
908
910
def test_demo (self ):
909
911
# demonstration tests
910
- df = pd . DataFrame ({'A' : range (5 ), 'B' : 5 })
912
+ df = DataFrame ({'A' : range (5 ), 'B' : 5 })
911
913
912
914
result = df .agg (['min' , 'max' ])
913
915
expected = DataFrame ({'A' : [0 , 4 ], 'B' : [5 , 5 ]},
@@ -924,14 +926,14 @@ def test_demo(self):
924
926
925
927
def test_agg_multiple_mixed_no_warning (self ):
926
928
# https://github.com/pandas-dev/pandas/issues/20909
927
- mdf = pd . DataFrame ({'A' : [1 , 2 , 3 ],
928
- 'B' : [1. , 2. , 3. ],
929
- 'C' : ['foo' , 'bar' , 'baz' ],
930
- 'D' : pd . date_range ('20130101' , periods = 3 )})
931
- expected = pd . DataFrame ({"A" : [1 , 6 ], 'B' : [1.0 , 6.0 ],
932
- "C" : ['bar' , 'foobarbaz' ],
933
- "D" : [pd . Timestamp ('2013-01-01' ), pd . NaT ]},
934
- index = ['min' , 'sum' ])
929
+ mdf = DataFrame ({'A' : [1 , 2 , 3 ],
930
+ 'B' : [1. , 2. , 3. ],
931
+ 'C' : ['foo' , 'bar' , 'baz' ],
932
+ 'D' : date_range ('20130101' , periods = 3 )})
933
+ expected = DataFrame ({'A' : [1 , 6 ], 'B' : [1.0 , 6.0 ],
934
+ 'C' : ['bar' , 'foobarbaz' ],
935
+ 'D' : [Timestamp ('2013-01-01' ), NaT ]},
936
+ index = ['min' , 'sum' ])
935
937
# sorted index
936
938
with tm .assert_produces_warning (None ):
937
939
result = mdf .agg (['min' , 'sum' ])
@@ -949,7 +951,7 @@ def test_agg_multiple_mixed_no_warning(self):
949
951
950
952
def test_agg_dict_nested_renaming_depr (self ):
951
953
952
- df = pd . DataFrame ({'A' : range (5 ), 'B' : 5 })
954
+ df = DataFrame ({'A' : range (5 ), 'B' : 5 })
953
955
954
956
# nested renaming
955
957
with tm .assert_produces_warning (FutureWarning , check_stacklevel = False ):
@@ -961,10 +963,10 @@ def test_agg_reduce(self, axis, float_frame):
961
963
name1 , name2 = float_frame .axes [other_axis ].unique ()[:2 ].sort_values ()
962
964
963
965
# all reducers
964
- expected = pd . concat ([float_frame .mean (axis = axis ),
965
- float_frame .max (axis = axis ),
966
- float_frame .sum (axis = axis ),
967
- ], axis = 1 )
966
+ expected = concat ([float_frame .mean (axis = axis ),
967
+ float_frame .max (axis = axis ),
968
+ float_frame .sum (axis = axis ),
969
+ ], axis = 1 )
968
970
expected .columns = ['mean' , 'max' , 'sum' ]
969
971
expected = expected .T if axis in {0 , 'index' } else expected
970
972
@@ -1010,15 +1012,15 @@ def test_nuiscance_columns(self):
1010
1012
df = DataFrame ({'A' : [1 , 2 , 3 ],
1011
1013
'B' : [1. , 2. , 3. ],
1012
1014
'C' : ['foo' , 'bar' , 'baz' ],
1013
- 'D' : pd . date_range ('20130101' , periods = 3 )})
1015
+ 'D' : date_range ('20130101' , periods = 3 )})
1014
1016
1015
1017
result = df .agg ('min' )
1016
- expected = Series ([1 , 1. , 'bar' , pd . Timestamp ('20130101' )],
1018
+ expected = Series ([1 , 1. , 'bar' , Timestamp ('20130101' )],
1017
1019
index = df .columns )
1018
1020
assert_series_equal (result , expected )
1019
1021
1020
1022
result = df .agg (['min' ])
1021
- expected = DataFrame ([[1 , 1. , 'bar' , pd . Timestamp ('20130101' )]],
1023
+ expected = DataFrame ([[1 , 1. , 'bar' , Timestamp ('20130101' )]],
1022
1024
index = ['min' ], columns = df .columns )
1023
1025
assert_frame_equal (result , expected )
1024
1026
@@ -1058,9 +1060,9 @@ def test_non_callable_aggregates(self):
1058
1060
result2 = df .agg ({'A' : ['count' , 'size' ],
1059
1061
'B' : ['count' , 'size' ],
1060
1062
'C' : ['count' , 'size' ]})
1061
- expected = pd . DataFrame ({'A' : {'count' : 2 , 'size' : 3 },
1062
- 'B' : {'count' : 2 , 'size' : 3 },
1063
- 'C' : {'count' : 2 , 'size' : 3 }})
1063
+ expected = DataFrame ({'A' : {'count' : 2 , 'size' : 3 },
1064
+ 'B' : {'count' : 2 , 'size' : 3 },
1065
+ 'C' : {'count' : 2 , 'size' : 3 }})
1064
1066
1065
1067
assert_frame_equal (result1 , result2 , check_like = True )
1066
1068
assert_frame_equal (result2 , expected , check_like = True )
@@ -1152,7 +1154,7 @@ def indices(draw, max_length=5):
1152
1154
periods = draw (integers (0 , max_length ))
1153
1155
freq = draw (sampled_from (list ("BDHTS" )))
1154
1156
dr = date_range (date , periods = periods , freq = freq )
1155
- return pd . DatetimeIndex (list (dr ))
1157
+ return DatetimeIndex (list (dr ))
1156
1158
1157
1159
@given (index = indices (5 ), num_columns = integers (0 , 5 ))
1158
1160
def test_frequency_is_original (self , index , num_columns ):
0 commit comments