@@ -64,6 +64,7 @@ def arrays_to_mgr(
64
64
columns ,
65
65
dtype : Optional [DtypeObj ] = None ,
66
66
verify_integrity : bool = True ,
67
+ copy : bool = False ,
67
68
):
68
69
"""
69
70
Segregate Series based on type and coerce into matrices.
@@ -80,7 +81,7 @@ def arrays_to_mgr(
80
81
index = ensure_index (index )
81
82
82
83
# don't force copy because getting jammed in an ndarray anyway
83
- arrays = _homogenize (arrays , index , dtype )
84
+ arrays = _homogenize (arrays , index , dtype , copy = copy )
84
85
85
86
columns = ensure_index (columns )
86
87
else :
@@ -234,7 +235,9 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
234
235
return create_block_manager_from_blocks (block_values , [columns , index ])
235
236
236
237
237
- def init_dict (data : Dict , index , columns , dtype : Optional [DtypeObj ] = None ):
238
+ def init_dict (
239
+ data : Dict , index , columns , dtype : Optional [DtypeObj ] = None , copy : bool = False
240
+ ):
238
241
"""
239
242
Segregate Series based on type and coerce into matrices.
240
243
Needs to handle a lot of exceptional cases.
@@ -272,6 +275,7 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
272
275
keys = list (data .keys ())
273
276
columns = data_names = Index (keys )
274
277
arrays = [com .maybe_iterable_to_list (data [k ]) for k in keys ]
278
+ # breakpoint()
275
279
# GH#24096 need copy to be deep for datetime64tz case
276
280
# TODO: See if we can avoid these copies
277
281
arrays = [
@@ -280,7 +284,7 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
280
284
arrays = [
281
285
arr if not is_datetime64tz_dtype (arr ) else arr .copy () for arr in arrays
282
286
]
283
- return arrays_to_mgr (arrays , data_names , index , columns , dtype = dtype )
287
+ return arrays_to_mgr (arrays , data_names , index , columns , dtype = dtype , copy = copy )
284
288
285
289
286
290
# ---------------------------------------------------------------------
@@ -326,14 +330,16 @@ def convert(v):
326
330
return values
327
331
328
332
329
- def _homogenize (data , index , dtype : Optional [DtypeObj ]):
333
+ def _homogenize (data , index , dtype : Optional [DtypeObj ], copy : bool = False ):
330
334
oindex = None
331
335
homogenized = []
332
336
333
337
for val in data :
334
338
if isinstance (val , ABCSeries ):
335
339
if dtype is not None :
336
- val = val .astype (dtype )
340
+ val = val .astype (dtype , copy = copy )
341
+ elif copy :
342
+ val = val .copy ()
337
343
if val .index is not index :
338
344
# Forces alignment. No need to copy data since we
339
345
# are putting it into an ndarray later
@@ -349,7 +355,7 @@ def _homogenize(data, index, dtype: Optional[DtypeObj]):
349
355
val = dict (val )
350
356
val = lib .fast_multiget (val , oindex ._values , default = np .nan )
351
357
val = sanitize_array (
352
- val , index , dtype = dtype , copy = False , raise_cast_failure = False
358
+ val , index , dtype = dtype , copy = copy , raise_cast_failure = False
353
359
)
354
360
355
361
homogenized .append (val )
0 commit comments