@@ -1771,6 +1771,139 @@ def to_numpy(
1771
1771
1772
1772
return result
1773
1773
1774
+ def _to_dict_helper (self , orient , into_c , into ):
1775
+ """Helper function to do main work to convert frame into dict based on
1776
+ `orient` and `into`
1777
+
1778
+ As part of GH46470 also takes care in when to use maybe_box_native as this
1779
+ function can perform badly and is not necessary for non object cols
1780
+ """
1781
+ object_dtype_cols = {
1782
+ col for col , dtype in self .dtypes .items () if is_object_dtype (dtype )
1783
+ }
1784
+ are_all_object_dtype_cols = len (object_dtype_cols ) == len (self .dtypes )
1785
+ if orient == "dict" :
1786
+ return into_c ((k , v .to_dict (into )) for k , v in self .items ())
1787
+ elif orient == "list" :
1788
+ return into_c (
1789
+ (
1790
+ k ,
1791
+ list (map (maybe_box_native , v .tolist ()))
1792
+ if k in object_dtype_cols
1793
+ else v .tolist (),
1794
+ )
1795
+ for k , v in self .items ()
1796
+ )
1797
+ elif orient == "split" :
1798
+ if are_all_object_dtype_cols :
1799
+ data = [
1800
+ list (map (maybe_box_native , t ))
1801
+ for t in self .itertuples (index = False , name = None )
1802
+ ]
1803
+ elif object_dtype_cols :
1804
+ is_object_dtype_by_index = [
1805
+ col in object_dtype_cols for col in self .columns
1806
+ ]
1807
+ data = [
1808
+ [
1809
+ maybe_box_native (v ) if is_object_dtype_by_index [i ] else v
1810
+ for i , v in enumerate (t )
1811
+ ]
1812
+ for t in self .itertuples (index = False , name = None )
1813
+ ]
1814
+ else :
1815
+ data = [list (t ) for t in self .itertuples (index = False , name = None )]
1816
+ return into_c (
1817
+ (
1818
+ ("index" , self .index .tolist ()),
1819
+ ("columns" , self .columns .tolist ()),
1820
+ ("data" , data ),
1821
+ )
1822
+ )
1823
+ elif orient == "series" :
1824
+ return into_c ((k , v ) for k , v in self .items ())
1825
+ elif orient == "records" :
1826
+ columns = self .columns .tolist ()
1827
+ if object_dtype_cols :
1828
+ is_object_dtype_by_index = [col in object_dtype_cols for col in columns ]
1829
+ return [
1830
+ into_c (
1831
+ zip (
1832
+ columns ,
1833
+ [
1834
+ maybe_box_native (v )
1835
+ if is_object_dtype_by_index [i ]
1836
+ else v
1837
+ for i , v in enumerate (t )
1838
+ ],
1839
+ )
1840
+ )
1841
+ for t in self .itertuples (index = False , name = None )
1842
+ ]
1843
+ else :
1844
+ return [
1845
+ into_c (zip (columns , t ))
1846
+ for t in self .itertuples (index = False , name = None )
1847
+ ]
1848
+ elif orient == "index" :
1849
+ if not self .index .is_unique :
1850
+ raise ValueError ("DataFrame index must be unique for orient='index'." )
1851
+ columns = self .columns .tolist ()
1852
+ if object_dtype_cols :
1853
+ is_object_dtype_by_index = [
1854
+ col in object_dtype_cols for col in self .columns
1855
+ ]
1856
+ return into_c (
1857
+ (
1858
+ t [0 ],
1859
+ {
1860
+ columns [i ]: maybe_box_native (v )
1861
+ if is_object_dtype_by_index [i ]
1862
+ else v
1863
+ for i , v in enumerate (t [1 :])
1864
+ },
1865
+ )
1866
+ for t in self .itertuples (name = None )
1867
+ )
1868
+ else :
1869
+ return into_c (
1870
+ (
1871
+ t [0 ],
1872
+ {columns [i ]: v for i , v in enumerate (t [1 :])},
1873
+ )
1874
+ for t in self .itertuples (name = None )
1875
+ )
1876
+ elif orient == "tight" :
1877
+ if are_all_object_dtype_cols :
1878
+ data = [
1879
+ list (map (maybe_box_native , t ))
1880
+ for t in self .itertuples (index = False , name = None )
1881
+ ]
1882
+ elif object_dtype_cols :
1883
+ is_object_dtype_by_index = [
1884
+ col in object_dtype_cols for col in self .columns
1885
+ ]
1886
+ data = [
1887
+ [
1888
+ maybe_box_native (v ) if is_object_dtype_by_index [i ] else v
1889
+ for i , v in enumerate (t )
1890
+ ]
1891
+ for t in self .itertuples (index = False , name = None )
1892
+ ]
1893
+ else :
1894
+ data = [list (t ) for t in self .itertuples (index = False , name = None )]
1895
+ return into_c (
1896
+ (
1897
+ ("index" , self .index .tolist ()),
1898
+ ("columns" , self .columns .tolist ()),
1899
+ ("data" , data ),
1900
+ ("index_names" , list (self .index .names )),
1901
+ ("column_names" , list (self .columns .names )),
1902
+ )
1903
+ )
1904
+ else :
1905
+ raise ValueError (f"orient '{ orient } ' not understood" )
1906
+
1774
1907
def to_dict (self , orient : str = "dict" , into = dict ):
1775
1908
"""
1776
1909
Convert the DataFrame to a dictionary.
0 commit comments