@@ -797,6 +797,149 @@ def test_set_categories_inplace(self):
797
797
cat .set_categories (['a' , 'b' , 'c' , 'd' ], inplace = True )
798
798
tm .assert_index_equal (cat .categories , pd .Index (['a' , 'b' , 'c' , 'd' ]))
799
799
800
+ @pytest .mark .parametrize (
801
+ "input1, input2, cat_array" ,
802
+ [
803
+ (
804
+ np .array ([1 , 2 , 3 , 3 ], dtype = np .dtype ('int_' )),
805
+ np .array ([1 , 2 , 3 , 5 , 3 , 2 , 4 ], dtype = np .dtype ('int_' )),
806
+ np .array ([1 , 2 , 3 , 4 , 5 ], dtype = np .dtype ('int_' ))
807
+ ),
808
+ (
809
+ np .array ([1 , 2 , 3 , 3 ], dtype = np .dtype ('uint' )),
810
+ np .array ([1 , 2 , 3 , 5 , 3 , 2 , 4 ], dtype = np .dtype ('uint' )),
811
+ np .array ([1 , 2 , 3 , 4 , 5 ], dtype = np .dtype ('uint' ))
812
+ ),
813
+ (
814
+ np .array ([1 , 2 , 3 , 3 ], dtype = np .dtype ('float_' )),
815
+ np .array ([1 , 2 , 3 , 5 , 3 , 2 , 4 ], dtype = np .dtype ('float_' )),
816
+ np .array ([1 , 2 , 3 , 4 , 5 ], dtype = np .dtype ('float_' ))
817
+ ),
818
+ (
819
+ np .array (
820
+ [1 , 2 , 3 , 3 ], dtype = np .dtype ('unicode_' )
821
+ ),
822
+ np .array (
823
+ [1 , 2 , 3 , 5 , 3 , 2 , 4 ], dtype = np .dtype ('unicode_' )
824
+ ),
825
+ np .array (
826
+ [1 , 2 , 3 , 4 , 5 ], dtype = np .dtype ('unicode_' )
827
+ )
828
+ ),
829
+ (
830
+ np .array (
831
+ [
832
+ '2017-01-01 10:00:00' , '2017-02-01 10:00:00' ,
833
+ '2017-03-01 10:00:00' , '2017-03-01 10:00:00'
834
+ ],
835
+ dtype = 'datetime64'
836
+ ),
837
+ np .array (
838
+ [
839
+ '2017-01-01 10:00:00' , '2017-02-01 10:00:00' ,
840
+ '2017-03-01 10:00:00' , '2017-05-01 10:00:00' ,
841
+ '2017-03-01 10:00:00' , '2017-02-01 10:00:00' ,
842
+ '2017-04-01 10:00:00'
843
+ ],
844
+ dtype = 'datetime64'
845
+ ),
846
+ np .array (
847
+ [
848
+ '2017-01-01 10:00:00' , '2017-02-01 10:00:00' ,
849
+ '2017-03-01 10:00:00' , '2017-04-01 10:00:00' ,
850
+ '2017-05-01 10:00:00'
851
+ ],
852
+ dtype = 'datetime64'
853
+ )
854
+ ),
855
+ (
856
+ pd .to_timedelta (['1 days' , '2 days' , '3 days' , '3 days' ],
857
+ unit = "D" ),
858
+ pd .to_timedelta (['1 days' , '2 days' , '3 days' , '5 days' ,
859
+ '3 days' , '2 days' , '4 days' ], unit = "D" ),
860
+ pd .timedelta_range ("1 days" , periods = 5 , freq = "D" )
861
+ )
862
+ ]
863
+ )
864
+ @pytest .mark .parametrize ("is_ordered" , [True , False ])
865
+ def test_drop_duplicates_non_bool (self , input1 , input2 ,
866
+ cat_array , is_ordered ):
867
+ # Test case 1
868
+ tc1 = Series (Categorical (input1 , categories = cat_array ,
869
+ ordered = is_ordered ))
870
+ expected = Series ([False , False , False , True ])
871
+ tm .assert_series_equal (tc1 .duplicated (), expected )
872
+ tm .assert_series_equal (tc1 .drop_duplicates (), tc1 [~ expected ])
873
+ sc = tc1 .copy ()
874
+ sc .drop_duplicates (inplace = True )
875
+ tm .assert_series_equal (sc , tc1 [~ expected ])
876
+
877
+ expected = Series ([False , False , True , False ])
878
+ tm .assert_series_equal (tc1 .duplicated (keep = 'last' ), expected )
879
+ tm .assert_series_equal (tc1 .drop_duplicates (keep = 'last' ),
880
+ tc1 [~ expected ])
881
+ sc = tc1 .copy ()
882
+ sc .drop_duplicates (keep = 'last' , inplace = True )
883
+ tm .assert_series_equal (sc , tc1 [~ expected ])
884
+
885
+ expected = Series ([False , False , True , True ])
886
+ tm .assert_series_equal (tc1 .duplicated (keep = False ), expected )
887
+ tm .assert_series_equal (tc1 .drop_duplicates (keep = False ), tc1 [~ expected ])
888
+ sc = tc1 .copy ()
889
+ sc .drop_duplicates (keep = False , inplace = True )
890
+ tm .assert_series_equal (sc , tc1 [~ expected ])
891
+
892
+ # Test case 2
893
+ tc2 = Series (Categorical (input2 , categories = cat_array ,
894
+ ordered = is_ordered ))
895
+ expected = Series ([False , False , False , False , True , True , False ])
896
+ tm .assert_series_equal (tc2 .duplicated (), expected )
897
+ tm .assert_series_equal (tc2 .drop_duplicates (), tc2 [~ expected ])
898
+ sc = tc2 .copy ()
899
+ sc .drop_duplicates (inplace = True )
900
+ tm .assert_series_equal (sc , tc2 [~ expected ])
901
+
902
+ expected = Series ([False , True , True , False , False , False , False ])
903
+ tm .assert_series_equal (tc2 .duplicated (keep = 'last' ), expected )
904
+ tm .assert_series_equal (tc2 .drop_duplicates (keep = 'last' ),
905
+ tc2 [~ expected ])
906
+ sc = tc2 .copy ()
907
+ sc .drop_duplicates (keep = 'last' , inplace = True )
908
+ tm .assert_series_equal (sc , tc2 [~ expected ])
909
+
910
+ expected = Series ([False , True , True , False , True , True , False ])
911
+ tm .assert_series_equal (tc2 .duplicated (keep = False ), expected )
912
+ tm .assert_series_equal (tc2 .drop_duplicates (keep = False ), tc2 [~ expected ])
913
+ sc = tc2 .copy ()
914
+ sc .drop_duplicates (keep = False , inplace = True )
915
+ tm .assert_series_equal (sc , tc2 [~ expected ])
916
+
917
+ @pytest .mark .parametrize ("is_ordered" , [True , False ])
918
+ def test_drop_duplicates_bool (self , is_ordered ):
919
+ tc = Series (Categorical ([True , False , True , False ],
920
+ categories = [True , False ], ordered = is_ordered ))
921
+
922
+ expected = Series ([False , False , True , True ])
923
+ tm .assert_series_equal (tc .duplicated (), expected )
924
+ tm .assert_series_equal (tc .drop_duplicates (), tc [~ expected ])
925
+ sc = tc .copy ()
926
+ sc .drop_duplicates (inplace = True )
927
+ tm .assert_series_equal (sc , tc [~ expected ])
928
+
929
+ expected = Series ([True , True , False , False ])
930
+ tm .assert_series_equal (tc .duplicated (keep = 'last' ), expected )
931
+ tm .assert_series_equal (tc .drop_duplicates (keep = 'last' ), tc [~ expected ])
932
+ sc = tc .copy ()
933
+ sc .drop_duplicates (keep = 'last' , inplace = True )
934
+ tm .assert_series_equal (sc , tc [~ expected ])
935
+
936
+ expected = Series ([True , True , True , True ])
937
+ tm .assert_series_equal (tc .duplicated (keep = False ), expected )
938
+ tm .assert_series_equal (tc .drop_duplicates (keep = False ), tc [~ expected ])
939
+ sc = tc .copy ()
940
+ sc .drop_duplicates (keep = False , inplace = True )
941
+ tm .assert_series_equal (sc , tc [~ expected ])
942
+
800
943
def test_describe (self ):
801
944
# string type
802
945
desc = self .factor .describe ()
0 commit comments