12
12
import numpy .random as nrandom
13
13
import pandas
14
14
from pandas .testing import assert_frame_equal
15
- from pandas .io .json import json_normalize
15
+ try :
16
+ from pandas import json_normalize
17
+ except ImportError :
18
+ from pandas .io .json import json_normalize
16
19
from .dataframe_split import sklearn_train_test_split , sklearn_train_test_split_streaming
17
20
from .dataframe_io_helpers import enumerate_json_items , JsonIterator2Stream
18
21
@@ -609,6 +612,22 @@ def reservoir_iterate(sdf, indices, chunksize):
609
612
return StreamingDataFrame (
610
613
lambda : reservoir_iterate (sdf = self , indices = indices , chunksize = 1000 ))
611
614
615
+ def drop (self , labels = None , * , axis = 0 , index = None , columns = None , level = None ,
616
+ inplace = False , errors = 'raise' ) -> 'StreamingDataFrame' :
617
+ """
618
+ Applies :epkg:`pandas:DataFrame:drop`.
619
+ This function returns a @see cl StreamingDataFrame.
620
+ """
621
+ if axis == 0 :
622
+ raise NotImplementedError (f"drop is not implemented for axis={ axis } ." )
623
+ if inplace :
624
+ raise NotImplementedError (f"drop is not implemented for inplace={ inplace } ." )
625
+ return StreamingDataFrame (
626
+ lambda : map (lambda df : df .drop (
627
+ labels , axis = axis , index = index , columns = columns ,
628
+ level = level , inplace = False , errors = errors ), self ),
629
+ ** self .get_kwargs ())
630
+
612
631
def apply (self , * args , ** kwargs ) -> 'StreamingDataFrame' :
613
632
"""
614
633
Applies :epkg:`pandas:DataFrame:apply`.
@@ -1078,8 +1097,7 @@ def iterate_na(self, **kwargs):
1078
1097
return StreamingDataFrame (
1079
1098
lambda : iterate_na (self , ** kwargs ), ** self .get_kwargs ())
1080
1099
1081
- def describe (self , percentiles = None , include = None , exclude = None ,
1082
- datetime_is_numeric = False ):
1100
+ def describe (self , percentiles = None , include = None , exclude = None ):
1083
1101
"""
1084
1102
Calls :epkg:`pandas:DataFrame:describe` on every piece
1085
1103
of the datasets. *percentiles* are not really accurate
@@ -1088,16 +1106,19 @@ def describe(self, percentiles=None, include=None, exclude=None,
1088
1106
:param percentiles: see :epkg:`pandas:DataFrame:describe`
1089
1107
:param include: see :epkg:`pandas:DataFrame:describe`
1090
1108
:param exclude: see :epkg:`pandas:DataFrame:describe`
1091
- :param datetime_is_numeric: see :epkg:`pandas:DataFrame:describe`
1092
1109
:return: :epkg:`pandas:DataFrame:describe`
1110
+
1111
+ .. versionchanged:: 0.3.219
1112
+
1113
+ Parameter *datetime_is_numeric* was removed
1114
+ (see :epkg:`pandas:DataFrame:describe`).
1093
1115
"""
1094
1116
merged = None
1095
1117
stack = []
1096
1118
notper = ['count' , 'mean' , 'std' ]
1097
1119
for df in self :
1098
1120
desc = df .describe (
1099
- percentiles = percentiles , include = include , exclude = exclude ,
1100
- datetime_is_numeric = datetime_is_numeric )
1121
+ percentiles = percentiles , include = include , exclude = exclude )
1101
1122
count = desc .loc ['count' , :]
1102
1123
rows = [name for name in desc .index if name not in notper ]
1103
1124
stack .append (desc .loc [rows , :])
@@ -1120,8 +1141,7 @@ def describe(self, percentiles=None, include=None, exclude=None,
1120
1141
merged .loc ['std' , :] / merged .loc ['count' , :] -
1121
1142
merged .loc ['mean' , :] ** 2 ) ** 0.5
1122
1143
values = pandas .concat (stack )
1123
- summary = values .describe (percentiles = percentiles ,
1124
- datetime_is_numeric = datetime_is_numeric )
1144
+ summary = values .describe (percentiles = percentiles )
1125
1145
merged = merged .loc [notper , :]
1126
1146
rows = [name for name in summary .index if name not in notper ]
1127
1147
summary = summary .loc [rows , :]
0 commit comments