@@ -1811,6 +1811,28 @@ def to_numpy(
1811
1811
1812
1812
return result
1813
1813
1814
+ def _create_data_for_split_and_tight_to_dict (
1815
+ self , are_all_object_dtype_cols : bool , object_dtype_indices : list [int ]
1816
+ ) -> list :
1817
+ """
1818
+ Simple helper method to create data for to ``to_dict(orient="split")`` and
1819
+ ``to_dict(orient="tight")`` to create the main output data
1820
+ """
1821
+ if are_all_object_dtype_cols :
1822
+ data = [
1823
+ list (map (maybe_box_native , t ))
1824
+ for t in self .itertuples (index = False , name = None )
1825
+ ]
1826
+ else :
1827
+ data = [list (t ) for t in self .itertuples (index = False , name = None )]
1828
+ if object_dtype_indices :
1829
+ # If we have object_dtype_cols, apply maybe_box_naive after list
1830
+ # comprehension for perf
1831
+ for row in data :
1832
+ for i in object_dtype_indices :
1833
+ row [i ] = maybe_box_native (row [i ])
1834
+ return data
1835
+
1814
1836
@overload
1815
1837
def to_dict (
1816
1838
self ,
@@ -1950,30 +1972,50 @@ def to_dict(
1950
1972
"'index=False' is only valid when 'orient' is 'split' or 'tight'"
1951
1973
)
1952
1974
1975
+ if orient == "series" :
1976
+ # GH46470 Return quickly if orient series to avoid creating dtype objects
1977
+ return into_c ((k , v ) for k , v in self .items ())
1978
+
1979
+ object_dtype_indices = [
1980
+ i
1981
+ for i , col_dtype in enumerate (self .dtypes .values )
1982
+ if is_object_dtype (col_dtype )
1983
+ ]
1984
+ are_all_object_dtype_cols = len (object_dtype_indices ) == len (self .dtypes )
1985
+
1953
1986
if orient == "dict" :
1954
1987
return into_c ((k , v .to_dict (into )) for k , v in self .items ())
1955
1988
1956
1989
elif orient == "list" :
1990
+ object_dtype_indices_as_set = set (object_dtype_indices )
1957
1991
return into_c (
1958
- (k , list (map (maybe_box_native , v .tolist ()))) for k , v in self .items ()
1992
+ (
1993
+ k ,
1994
+ list (map (maybe_box_native , v .tolist ()))
1995
+ if i in object_dtype_indices_as_set
1996
+ else v .tolist (),
1997
+ )
1998
+ for i , (k , v ) in enumerate (self .items ())
1959
1999
)
1960
2000
1961
2001
elif orient == "split" :
2002
+ data = self ._create_data_for_split_and_tight_to_dict (
2003
+ are_all_object_dtype_cols , object_dtype_indices
2004
+ )
2005
+
1962
2006
return into_c (
1963
2007
((("index" , self .index .tolist ()),) if index else ())
1964
2008
+ (
1965
2009
("columns" , self .columns .tolist ()),
1966
- (
1967
- "data" ,
1968
- [
1969
- list (map (maybe_box_native , t ))
1970
- for t in self .itertuples (index = False , name = None )
1971
- ],
1972
- ),
2010
+ ("data" , data ),
1973
2011
)
1974
2012
)
1975
2013
1976
2014
elif orient == "tight" :
2015
+ data = self ._create_data_for_split_and_tight_to_dict (
2016
+ are_all_object_dtype_cols , object_dtype_indices
2017
+ )
2018
+
1977
2019
return into_c (
1978
2020
((("index" , self .index .tolist ()),) if index else ())
1979
2021
+ (
@@ -1990,26 +2032,65 @@ def to_dict(
1990
2032
+ (("column_names" , list (self .columns .names )),)
1991
2033
)
1992
2034
1993
- elif orient == "series" :
1994
- return into_c ((k , v ) for k , v in self .items ())
1995
-
1996
2035
elif orient == "records" :
1997
2036
columns = self .columns .tolist ()
1998
- rows = (
1999
- dict (zip (columns , row ))
2000
- for row in self .itertuples (index = False , name = None )
2001
- )
2002
- return [
2003
- into_c ((k , maybe_box_native (v )) for k , v in row .items ()) for row in rows
2004
- ]
2037
+ if are_all_object_dtype_cols :
2038
+ rows = (
2039
+ dict (zip (columns , row ))
2040
+ for row in self .itertuples (index = False , name = None )
2041
+ )
2042
+ return [
2043
+ into_c ((k , maybe_box_native (v )) for k , v in row .items ())
2044
+ for row in rows
2045
+ ]
2046
+ else :
2047
+ data = [
2048
+ into_c (zip (columns , t ))
2049
+ for t in self .itertuples (index = False , name = None )
2050
+ ]
2051
+ if object_dtype_indices :
2052
+ object_dtype_indices_as_set = set (object_dtype_indices )
2053
+ object_dtype_cols = {
2054
+ col
2055
+ for i , col in enumerate (self .columns )
2056
+ if i in object_dtype_indices_as_set
2057
+ }
2058
+ for row in data :
2059
+ for col in object_dtype_cols :
2060
+ row [col ] = maybe_box_native (row [col ])
2061
+ return data
2005
2062
2006
2063
elif orient == "index" :
2007
2064
if not self .index .is_unique :
2008
2065
raise ValueError ("DataFrame index must be unique for orient='index'." )
2009
- return into_c (
2010
- (t [0 ], dict (zip (self .columns , map (maybe_box_native , t [1 :]))))
2011
- for t in self .itertuples (name = None )
2012
- )
2066
+ columns = self .columns .tolist ()
2067
+ if are_all_object_dtype_cols :
2068
+ return into_c (
2069
+ (t [0 ], dict (zip (self .columns , map (maybe_box_native , t [1 :]))))
2070
+ for t in self .itertuples (name = None )
2071
+ )
2072
+ elif object_dtype_indices :
2073
+ object_dtype_indices_as_set = set (object_dtype_indices )
2074
+ is_object_dtype_by_index = [
2075
+ i in object_dtype_indices_as_set for i in range (len (self .columns ))
2076
+ ]
2077
+ return into_c (
2078
+ (
2079
+ t [0 ],
2080
+ {
2081
+ columns [i ]: maybe_box_native (v )
2082
+ if is_object_dtype_by_index [i ]
2083
+ else v
2084
+ for i , v in enumerate (t [1 :])
2085
+ },
2086
+ )
2087
+ for t in self .itertuples (name = None )
2088
+ )
2089
+ else :
2090
+ return into_c (
2091
+ (t [0 ], dict (zip (self .columns , t [1 :])))
2092
+ for t in self .itertuples (name = None )
2093
+ )
2013
2094
2014
2095
else :
2015
2096
raise ValueError (f"orient '{ orient } ' not understood" )
0 commit comments