|
31 | 31 | is_list_like,
|
32 | 32 | is_named_tuple,
|
33 | 33 | is_object_dtype,
|
| 34 | + is_scalar, |
34 | 35 | )
|
35 | 36 | from pandas.core.dtypes.dtypes import ExtensionDtype
|
36 | 37 | from pandas.core.dtypes.generic import (
|
37 | 38 | ABCDataFrame,
|
38 | 39 | ABCSeries,
|
39 | 40 | )
|
| 41 | +from pandas.core.dtypes.missing import isna |
40 | 42 |
|
41 | 43 | from pandas.core import (
|
42 | 44 | algorithms,
|
@@ -354,45 +356,48 @@ def dict_to_mgr(
|
354 | 356 |
|
355 | 357 | Used in DataFrame.__init__
|
356 | 358 | """
|
357 |
| - arrays: Sequence[Any] | Series |
| 359 | + arrays: Sequence[Any] |
358 | 360 |
|
359 | 361 | if columns is not None:
|
360 |
| - from pandas.core.series import Series |
| 362 | + columns = ensure_index(columns) |
| 363 | + arrays = [np.nan] * len(columns) |
| 364 | + midxs = set() |
| 365 | + data_keys = ensure_index(data.keys()) # type: ignore[arg-type] |
| 366 | + data_values = list(data.values()) |
| 367 | + |
| 368 | + for i, column in enumerate(columns): |
| 369 | + try: |
| 370 | + idx = data_keys.get_loc(column) |
| 371 | + except KeyError: |
| 372 | + midxs.add(i) |
| 373 | + continue |
| 374 | + array = data_values[idx] |
| 375 | + arrays[i] = array |
| 376 | + if is_scalar(array) and isna(array): |
| 377 | + midxs.add(i) |
361 | 378 |
|
362 |
| - arrays = Series(data, index=columns, dtype=object) |
363 |
| - missing = arrays.isna() |
364 | 379 | if index is None:
|
365 | 380 | # GH10856
|
366 | 381 | # raise ValueError if only scalars in dict
|
367 |
| - index = _extract_index(arrays[~missing]) |
| 382 | + if midxs: |
| 383 | + index = _extract_index( |
| 384 | + [array for i, array in enumerate(arrays) if i not in midxs] |
| 385 | + ) |
| 386 | + else: |
| 387 | + index = _extract_index(arrays) |
368 | 388 | else:
|
369 | 389 | index = ensure_index(index)
|
370 | 390 |
|
371 | 391 | # no obvious "empty" int column
|
372 |
| - if missing.any() and not is_integer_dtype(dtype): |
373 |
| - nan_dtype: DtypeObj |
374 |
| - |
375 |
| - if dtype is not None: |
376 |
| - # calling sanitize_array ensures we don't mix-and-match |
377 |
| - # NA dtypes |
378 |
| - midxs = missing.values.nonzero()[0] |
379 |
| - for i in midxs: |
380 |
| - arr = sanitize_array(arrays.iat[i], index, dtype=dtype) |
381 |
| - arrays.iat[i] = arr |
382 |
| - else: |
383 |
| - # GH#1783 |
384 |
| - nan_dtype = np.dtype("object") |
385 |
| - val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) |
386 |
| - nmissing = missing.sum() |
387 |
| - if copy: |
388 |
| - rhs = [val] * nmissing |
389 |
| - else: |
390 |
| - # GH#45369 |
391 |
| - rhs = [val.copy() for _ in range(nmissing)] |
392 |
| - arrays.loc[missing] = rhs |
393 |
| - |
394 |
| - arrays = list(arrays) |
395 |
| - columns = ensure_index(columns) |
| 392 | + if midxs and not is_integer_dtype(dtype): |
| 393 | + # GH#1783 |
| 394 | + for i in midxs: |
| 395 | + arr = construct_1d_arraylike_from_scalar( |
| 396 | + arrays[i], |
| 397 | + len(index), |
| 398 | + dtype if dtype is not None else np.dtype("object"), |
| 399 | + ) |
| 400 | + arrays[i] = arr |
396 | 401 |
|
397 | 402 | else:
|
398 | 403 | keys = list(data.keys())
|
|
0 commit comments