24
24
from pandas .core import common as com
25
25
26
26
if TYPE_CHECKING :
27
+ from collections .abc import Generator
28
+
27
29
from pandas ._typing import MutableMappingT
28
30
29
31
from pandas import DataFrame
30
32
31
33
34
+ def create_data_for_split (
35
+ df : DataFrame , are_all_object_dtype_cols : bool , object_dtype_indices : list [int ]
36
+ ) -> Generator [list , None , None ]:
37
+ """
38
+ Simple helper method to create data for to ``to_dict(orient="split")``
39
+ to create the main output data
40
+ """
41
+ if are_all_object_dtype_cols :
42
+ for tup in df .itertuples (index = False , name = None ):
43
+ yield list (map (maybe_box_native , tup ))
44
+ else :
45
+ for tup in df .itertuples (index = False , name = None ):
46
+ data = list (tup )
47
+ if object_dtype_indices :
48
+ # If we have object_dtype_cols, apply maybe_box_naive after
49
+ # for perf
50
+ for i in object_dtype_indices :
51
+ data [i ] = maybe_box_native (data [i ])
52
+ yield data
53
+
54
+
32
55
@overload
33
56
def to_dict (
34
57
df : DataFrame ,
@@ -152,35 +175,38 @@ def to_dict(
152
175
# GH46470 Return quickly if orient series to avoid creating dtype objects
153
176
return into_c ((k , v ) for k , v in df .items ())
154
177
178
+ if orient == "dict" :
179
+ return into_c ((k , v .to_dict (into = into )) for k , v in df .items ())
180
+
155
181
box_native_indices = [
156
182
i
157
183
for i , col_dtype in enumerate (df .dtypes .values )
158
184
if col_dtype == np .dtype (object ) or isinstance (col_dtype , ExtensionDtype )
159
185
]
160
- box_na_values = [
161
- lib .no_default if not isinstance (col_dtype , BaseMaskedDtype ) else libmissing .NA
162
- for i , col_dtype in enumerate (df .dtypes .values )
163
- ]
164
- are_all_object_dtype_cols = len (box_native_indices ) == len (df .dtypes )
165
186
166
- if orient == "dict" :
167
- return into_c ((k , v .to_dict (into = into )) for k , v in df .items ())
187
+ are_all_object_dtype_cols = len (box_native_indices ) == len (df .dtypes )
168
188
169
- elif orient == "list" :
189
+ if orient == "list" :
170
190
object_dtype_indices_as_set : set [int ] = set (box_native_indices )
191
+ box_na_values = (
192
+ lib .no_default
193
+ if not isinstance (col_dtype , BaseMaskedDtype )
194
+ else libmissing .NA
195
+ for col_dtype in df .dtypes .values
196
+ )
171
197
return into_c (
172
198
(
173
199
k ,
174
- list (map (maybe_box_native , v .to_numpy (na_value = box_na_values [ i ] )))
200
+ list (map (maybe_box_native , v .to_numpy (na_value = box_na_value )))
175
201
if i in object_dtype_indices_as_set
176
202
else list (map (maybe_box_native , v .to_numpy ())),
177
203
)
178
- for i , (k , v ) in enumerate (df .items ())
204
+ for i , (box_na_value , ( k , v )) in enumerate (zip ( box_na_values , df .items () ))
179
205
)
180
206
181
207
elif orient == "split" :
182
- data = df . _create_data_for_split_and_tight_to_dict (
183
- are_all_object_dtype_cols , box_native_indices
208
+ data = list (
209
+ create_data_for_split ( df , are_all_object_dtype_cols , box_native_indices )
184
210
)
185
211
186
212
return into_c (
@@ -192,10 +218,6 @@ def to_dict(
192
218
)
193
219
194
220
elif orient == "tight" :
195
- data = df ._create_data_for_split_and_tight_to_dict (
196
- are_all_object_dtype_cols , box_native_indices
197
- )
198
-
199
221
return into_c (
200
222
((("index" , df .index .tolist ()),) if index else ())
201
223
+ (
@@ -215,11 +237,9 @@ def to_dict(
215
237
elif orient == "records" :
216
238
columns = df .columns .tolist ()
217
239
if are_all_object_dtype_cols :
218
- rows = (
219
- dict (zip (columns , row )) for row in df .itertuples (index = False , name = None )
220
- )
221
240
return [
222
- into_c ((k , maybe_box_native (v )) for k , v in row .items ()) for row in rows
241
+ into_c (zip (columns , map (maybe_box_native , row )))
242
+ for row in df .itertuples (index = False , name = None )
223
243
]
224
244
else :
225
245
data = [
@@ -235,7 +255,7 @@ def to_dict(
235
255
for row in data :
236
256
for col in object_dtype_cols :
237
257
row [col ] = maybe_box_native (row [col ])
238
- return data
258
+ return data # type: ignore[return-value]
239
259
240
260
elif orient == "index" :
241
261
if not df .index .is_unique :
@@ -248,24 +268,21 @@ def to_dict(
248
268
)
249
269
elif box_native_indices :
250
270
object_dtype_indices_as_set = set (box_native_indices )
251
- is_object_dtype_by_index = [
252
- i in object_dtype_indices_as_set for i in range (len (df .columns ))
253
- ]
254
271
return into_c (
255
272
(
256
273
t [0 ],
257
274
{
258
- columns [ i ] : maybe_box_native (v )
259
- if is_object_dtype_by_index [ i ]
275
+ column : maybe_box_native (v )
276
+ if i in object_dtype_indices_as_set
260
277
else v
261
- for i , v in enumerate (t [1 :])
278
+ for i , ( column , v ) in enumerate (zip ( columns , t [1 :]) )
262
279
},
263
280
)
264
281
for t in df .itertuples (name = None )
265
282
)
266
283
else :
267
284
return into_c (
268
- (t [0 ], dict (zip (df . columns , t [1 :]))) for t in df .itertuples (name = None )
285
+ (t [0 ], dict (zip (columns , t [1 :]))) for t in df .itertuples (name = None )
269
286
)
270
287
271
288
else :
0 commit comments