From ac881f9485654f72afbcde28a852935a485fee0b Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 2 Jun 2021 07:32:00 -0700 Subject: [PATCH] REF: de-duplicate nested-dict handling in DataFrame construction --- pandas/_libs/lib.pyi | 2 -- pandas/_libs/lib.pyx | 22 -------------------- pandas/core/dtypes/cast.py | 16 -------------- pandas/core/internals/construction.py | 19 +++++------------ pandas/tests/dtypes/cast/test_dict_compat.py | 14 ------------- 5 files changed, 5 insertions(+), 68 deletions(-) delete mode 100644 pandas/tests/dtypes/cast/test_dict_compat.py diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 06620c2ad0dca..c1caf474b2020 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -52,8 +52,6 @@ def is_float_array(values: np.ndarray, skipna: bool = False): ... def is_integer_array(values: np.ndarray, skipna: bool = False): ... def is_bool_array(values: np.ndarray, skipna: bool = False): ... -def fast_multiget(mapping: dict, keys: np.ndarray, default=np.nan) -> np.ndarray: ... - def fast_unique_multiple_list_gen(gen: Generator, sort: bool = True) -> list: ... def fast_unique_multiple_list(lists: list, sort: bool = True) -> list: ... def fast_unique_multiple(arrays: list, sort: bool = True) -> list: ... diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 4d184ee13e3db..352f50df01dc9 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2841,25 +2841,3 @@ def to_object_array_tuples(rows: object) -> np.ndarray: result[i, j] = row[j] return result - - -@cython.wraparound(False) -@cython.boundscheck(False) -def fast_multiget(dict mapping, ndarray keys, default=np.nan) -> np.ndarray: - cdef: - Py_ssize_t i, n = len(keys) - object val - ndarray[object] output = np.empty(n, dtype='O') - - if n == 0: - # kludge, for Series - return np.empty(0, dtype='f8') - - for i in range(n): - val = keys[i] - if val in mapping: - output[i] = mapping[val] - else: - output[i] = default - - return maybe_convert_objects(output) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5c7211a5d1852..161572f3f1ac3 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -780,22 +780,6 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, return dtype, val -def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]: - """ - Convert datetimelike-keyed dicts to a Timestamp-keyed dict. - - Parameters - ---------- - d: dict-like object - - Returns - ------- - dict - - """ - return {maybe_box_datetimelike(key): value for key, value in d.items()} - - def infer_dtype_from_array( arr, pandas_dtype: bool = False ) -> tuple[DtypeObj, ArrayLike]: diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 46eb138dc74d1..270eddf2bd3a5 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -27,7 +27,6 @@ from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, construct_1d_ndarray_preserving_na, - dict_compat, maybe_cast_to_datetime, maybe_convert_platform, maybe_infer_to_datetimelike, @@ -61,6 +60,7 @@ TimedeltaArray, ) from pandas.core.construction import ( + create_series_with_explicit_dtype, ensure_wrapped_if_datetimelike, extract_array, range_to_ndarray, @@ -68,9 +68,7 @@ ) from pandas.core.indexes import base as ibase from pandas.core.indexes.api import ( - DatetimeIndex, Index, - TimedeltaIndex, ensure_index, get_objs_combined_axis, union_indexes, @@ -566,7 +564,6 @@ def convert(v): def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]: - oindex = None homogenized = [] for val in data: @@ -581,16 +578,10 @@ def _homogenize(data, index: Index, dtype: DtypeObj | None) -> list[ArrayLike]: val = val._values else: if isinstance(val, dict): - if oindex is None: - oindex = index.astype("O") - - if isinstance(index, (DatetimeIndex, TimedeltaIndex)): - # see test_constructor_dict_datetime64_index - val = dict_compat(val) - else: - # see test_constructor_subclass_dict - val = dict(val) - val = lib.fast_multiget(val, oindex._values, default=np.nan) + # see test_constructor_subclass_dict + # test_constructor_dict_datetime64_index + val = create_series_with_explicit_dtype(val, index=index)._values + val = sanitize_array( val, index, dtype=dtype, copy=False, raise_cast_failure=False ) diff --git a/pandas/tests/dtypes/cast/test_dict_compat.py b/pandas/tests/dtypes/cast/test_dict_compat.py deleted file mode 100644 index 13dc82d779f95..0000000000000 --- a/pandas/tests/dtypes/cast/test_dict_compat.py +++ /dev/null @@ -1,14 +0,0 @@ -import numpy as np - -from pandas.core.dtypes.cast import dict_compat - -from pandas import Timestamp - - -def test_dict_compat(): - data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2} - data_unchanged = {1: 2, 3: 4, 5: 6} - expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2} - assert dict_compat(data_datetime64) == expected - assert dict_compat(expected) == expected - assert dict_compat(data_unchanged) == data_unchanged