Skip to content

Commit 2f993c2

Browse files
authored
REF: Move to_dict implementation from frame.py to core.methods.to_dict (#50253)
1 parent 22491dc commit 2f993c2

File tree

2 files changed

+204
-140
lines changed

2 files changed

+204
-140
lines changed

pandas/core/frame.py

+2-140
Original file line numberDiff line numberDiff line change
@@ -1958,147 +1958,9 @@ def to_dict(
19581958
[defaultdict(<class 'list'>, {'col1': 1, 'col2': 0.5}),
19591959
defaultdict(<class 'list'>, {'col1': 2, 'col2': 0.75})]
19601960
"""
1961-
if not self.columns.is_unique:
1962-
warnings.warn(
1963-
"DataFrame columns are not unique, some columns will be omitted.",
1964-
UserWarning,
1965-
stacklevel=find_stack_level(),
1966-
)
1967-
# GH16122
1968-
into_c = com.standardize_mapping(into)
1969-
1970-
# error: Incompatible types in assignment (expression has type "str",
1971-
# variable has type "Literal['dict', 'list', 'series', 'split', 'tight',
1972-
# 'records', 'index']")
1973-
orient = orient.lower() # type: ignore[assignment]
1974-
1975-
if not index and orient not in ["split", "tight"]:
1976-
raise ValueError(
1977-
"'index=False' is only valid when 'orient' is 'split' or 'tight'"
1978-
)
1979-
1980-
if orient == "series":
1981-
# GH46470 Return quickly if orient series to avoid creating dtype objects
1982-
return into_c((k, v) for k, v in self.items())
1983-
1984-
object_dtype_indices = [
1985-
i
1986-
for i, col_dtype in enumerate(self.dtypes.values)
1987-
if is_object_dtype(col_dtype)
1988-
]
1989-
are_all_object_dtype_cols = len(object_dtype_indices) == len(self.dtypes)
1990-
1991-
if orient == "dict":
1992-
return into_c((k, v.to_dict(into)) for k, v in self.items())
1993-
1994-
elif orient == "list":
1995-
object_dtype_indices_as_set = set(object_dtype_indices)
1996-
return into_c(
1997-
(
1998-
k,
1999-
list(map(maybe_box_native, v.tolist()))
2000-
if i in object_dtype_indices_as_set
2001-
else v.tolist(),
2002-
)
2003-
for i, (k, v) in enumerate(self.items())
2004-
)
2005-
2006-
elif orient == "split":
2007-
data = self._create_data_for_split_and_tight_to_dict(
2008-
are_all_object_dtype_cols, object_dtype_indices
2009-
)
2010-
2011-
return into_c(
2012-
((("index", self.index.tolist()),) if index else ())
2013-
+ (
2014-
("columns", self.columns.tolist()),
2015-
("data", data),
2016-
)
2017-
)
2018-
2019-
elif orient == "tight":
2020-
data = self._create_data_for_split_and_tight_to_dict(
2021-
are_all_object_dtype_cols, object_dtype_indices
2022-
)
2023-
2024-
return into_c(
2025-
((("index", self.index.tolist()),) if index else ())
2026-
+ (
2027-
("columns", self.columns.tolist()),
2028-
(
2029-
"data",
2030-
[
2031-
list(map(maybe_box_native, t))
2032-
for t in self.itertuples(index=False, name=None)
2033-
],
2034-
),
2035-
)
2036-
+ ((("index_names", list(self.index.names)),) if index else ())
2037-
+ (("column_names", list(self.columns.names)),)
2038-
)
1961+
from pandas.core.methods.to_dict import to_dict
20391962

2040-
elif orient == "records":
2041-
columns = self.columns.tolist()
2042-
if are_all_object_dtype_cols:
2043-
rows = (
2044-
dict(zip(columns, row))
2045-
for row in self.itertuples(index=False, name=None)
2046-
)
2047-
return [
2048-
into_c((k, maybe_box_native(v)) for k, v in row.items())
2049-
for row in rows
2050-
]
2051-
else:
2052-
data = [
2053-
into_c(zip(columns, t))
2054-
for t in self.itertuples(index=False, name=None)
2055-
]
2056-
if object_dtype_indices:
2057-
object_dtype_indices_as_set = set(object_dtype_indices)
2058-
object_dtype_cols = {
2059-
col
2060-
for i, col in enumerate(self.columns)
2061-
if i in object_dtype_indices_as_set
2062-
}
2063-
for row in data:
2064-
for col in object_dtype_cols:
2065-
row[col] = maybe_box_native(row[col])
2066-
return data
2067-
2068-
elif orient == "index":
2069-
if not self.index.is_unique:
2070-
raise ValueError("DataFrame index must be unique for orient='index'.")
2071-
columns = self.columns.tolist()
2072-
if are_all_object_dtype_cols:
2073-
return into_c(
2074-
(t[0], dict(zip(self.columns, map(maybe_box_native, t[1:]))))
2075-
for t in self.itertuples(name=None)
2076-
)
2077-
elif object_dtype_indices:
2078-
object_dtype_indices_as_set = set(object_dtype_indices)
2079-
is_object_dtype_by_index = [
2080-
i in object_dtype_indices_as_set for i in range(len(self.columns))
2081-
]
2082-
return into_c(
2083-
(
2084-
t[0],
2085-
{
2086-
columns[i]: maybe_box_native(v)
2087-
if is_object_dtype_by_index[i]
2088-
else v
2089-
for i, v in enumerate(t[1:])
2090-
},
2091-
)
2092-
for t in self.itertuples(name=None)
2093-
)
2094-
else:
2095-
return into_c(
2096-
(t[0], dict(zip(self.columns, t[1:])))
2097-
for t in self.itertuples(name=None)
2098-
)
2099-
2100-
else:
2101-
raise ValueError(f"orient '{orient}' not understood")
1963+
return to_dict(self, orient, into, index)
21021964

21031965
def to_gbq(
21041966
self,

pandas/core/methods/to_dict.py

+202
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
from __future__ import annotations
2+
3+
from typing import Literal
4+
import warnings
5+
6+
from pandas.util._exceptions import find_stack_level
7+
8+
from pandas.core.dtypes.cast import maybe_box_native
9+
from pandas.core.dtypes.common import is_object_dtype
10+
11+
from pandas import DataFrame
12+
from pandas.core import common as com
13+
14+
15+
def to_dict(
16+
df: DataFrame,
17+
orient: Literal[
18+
"dict", "list", "series", "split", "tight", "records", "index"
19+
] = "dict",
20+
into: type[dict] = dict,
21+
index: bool = True,
22+
) -> dict | list[dict]:
23+
"""
24+
Convert the DataFrame to a dictionary.
25+
26+
The type of the key-value pairs can be customized with the parameters
27+
(see below).
28+
29+
Parameters
30+
----------
31+
orient : str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'}
32+
Determines the type of the values of the dictionary.
33+
34+
- 'dict' (default) : dict like {column -> {index -> value}}
35+
- 'list' : dict like {column -> [values]}
36+
- 'series' : dict like {column -> Series(values)}
37+
- 'split' : dict like
38+
{'index' -> [index], 'columns' -> [columns], 'data' -> [values]}
39+
- 'tight' : dict like
40+
{'index' -> [index], 'columns' -> [columns], 'data' -> [values],
41+
'index_names' -> [index.names], 'column_names' -> [column.names]}
42+
- 'records' : list like
43+
[{column -> value}, ... , {column -> value}]
44+
- 'index' : dict like {index -> {column -> value}}
45+
46+
.. versionadded:: 1.4.0
47+
'tight' as an allowed value for the ``orient`` argument
48+
49+
into : class, default dict
50+
The collections.abc.Mapping subclass used for all Mappings
51+
in the return value. Can be the actual class or an empty
52+
instance of the mapping type you want. If you want a
53+
collections.defaultdict, you must pass it initialized.
54+
55+
index : bool, default True
56+
Whether to include the index item (and index_names item if `orient`
57+
is 'tight') in the returned dictionary. Can only be ``False``
58+
when `orient` is 'split' or 'tight'.
59+
60+
.. versionadded:: 1.6.0
61+
62+
Returns
63+
-------
64+
dict, list or collections.abc.Mapping
65+
Return a collections.abc.Mapping object representing the DataFrame.
66+
The resulting transformation depends on the `orient` parameter.
67+
"""
68+
if not df.columns.is_unique:
69+
warnings.warn(
70+
"DataFrame columns are not unique, some columns will be omitted.",
71+
UserWarning,
72+
stacklevel=find_stack_level(),
73+
)
74+
# GH16122
75+
into_c = com.standardize_mapping(into)
76+
77+
# error: Incompatible types in assignment (expression has type "str",
78+
# variable has type "Literal['dict', 'list', 'series', 'split', 'tight',
79+
# 'records', 'index']")
80+
orient = orient.lower() # type: ignore[assignment]
81+
82+
if not index and orient not in ["split", "tight"]:
83+
raise ValueError(
84+
"'index=False' is only valid when 'orient' is 'split' or 'tight'"
85+
)
86+
87+
if orient == "series":
88+
# GH46470 Return quickly if orient series to avoid creating dtype objects
89+
return into_c((k, v) for k, v in df.items())
90+
91+
object_dtype_indices = [
92+
i for i, col_dtype in enumerate(df.dtypes.values) if is_object_dtype(col_dtype)
93+
]
94+
are_all_object_dtype_cols = len(object_dtype_indices) == len(df.dtypes)
95+
96+
if orient == "dict":
97+
return into_c((k, v.to_dict(into)) for k, v in df.items())
98+
99+
elif orient == "list":
100+
object_dtype_indices_as_set = set(object_dtype_indices)
101+
return into_c(
102+
(
103+
k,
104+
list(map(maybe_box_native, v.tolist()))
105+
if i in object_dtype_indices_as_set
106+
else v.tolist(),
107+
)
108+
for i, (k, v) in enumerate(df.items())
109+
)
110+
111+
elif orient == "split":
112+
data = df._create_data_for_split_and_tight_to_dict(
113+
are_all_object_dtype_cols, object_dtype_indices
114+
)
115+
116+
return into_c(
117+
((("index", df.index.tolist()),) if index else ())
118+
+ (
119+
("columns", df.columns.tolist()),
120+
("data", data),
121+
)
122+
)
123+
124+
elif orient == "tight":
125+
data = df._create_data_for_split_and_tight_to_dict(
126+
are_all_object_dtype_cols, object_dtype_indices
127+
)
128+
129+
return into_c(
130+
((("index", df.index.tolist()),) if index else ())
131+
+ (
132+
("columns", df.columns.tolist()),
133+
(
134+
"data",
135+
[
136+
list(map(maybe_box_native, t))
137+
for t in df.itertuples(index=False, name=None)
138+
],
139+
),
140+
)
141+
+ ((("index_names", list(df.index.names)),) if index else ())
142+
+ (("column_names", list(df.columns.names)),)
143+
)
144+
145+
elif orient == "records":
146+
columns = df.columns.tolist()
147+
if are_all_object_dtype_cols:
148+
rows = (
149+
dict(zip(columns, row)) for row in df.itertuples(index=False, name=None)
150+
)
151+
return [
152+
into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows
153+
]
154+
else:
155+
data = [
156+
into_c(zip(columns, t)) for t in df.itertuples(index=False, name=None)
157+
]
158+
if object_dtype_indices:
159+
object_dtype_indices_as_set = set(object_dtype_indices)
160+
object_dtype_cols = {
161+
col
162+
for i, col in enumerate(df.columns)
163+
if i in object_dtype_indices_as_set
164+
}
165+
for row in data:
166+
for col in object_dtype_cols:
167+
row[col] = maybe_box_native(row[col])
168+
return data
169+
170+
elif orient == "index":
171+
if not df.index.is_unique:
172+
raise ValueError("DataFrame index must be unique for orient='index'.")
173+
columns = df.columns.tolist()
174+
if are_all_object_dtype_cols:
175+
return into_c(
176+
(t[0], dict(zip(df.columns, map(maybe_box_native, t[1:]))))
177+
for t in df.itertuples(name=None)
178+
)
179+
elif object_dtype_indices:
180+
object_dtype_indices_as_set = set(object_dtype_indices)
181+
is_object_dtype_by_index = [
182+
i in object_dtype_indices_as_set for i in range(len(df.columns))
183+
]
184+
return into_c(
185+
(
186+
t[0],
187+
{
188+
columns[i]: maybe_box_native(v)
189+
if is_object_dtype_by_index[i]
190+
else v
191+
for i, v in enumerate(t[1:])
192+
},
193+
)
194+
for t in df.itertuples(name=None)
195+
)
196+
else:
197+
return into_c(
198+
(t[0], dict(zip(df.columns, t[1:]))) for t in df.itertuples(name=None)
199+
)
200+
201+
else:
202+
raise ValueError(f"orient '{orient}' not understood")

0 commit comments

Comments
 (0)