diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 6147d6d9c1658..77caaa87d9b98 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2526,3 +2526,20 @@ def fast_multiget(dict mapping, ndarray keys, default=np.nan): output[i] = default return maybe_convert_objects(output) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def from_nested_dict(dict data) -> dict: + cdef: + object index, column, value, dict_or_series + dict new_data = {} + + for index, dict_or_series in data.items(): + for column, value in dict_or_series.items(): + if column in new_data: + new_data[column][index] = value + else: + new_data[column] = {index: value} + + return new_data diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d19f1a263f71a..3add94c891a1e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1265,8 +1265,13 @@ def from_dict(cls, data, orient="columns", dtype=None, columns=None) -> "DataFra if orient == "index": if len(data) > 0: # TODO speed up Series case - if isinstance(list(data.values())[0], (Series, dict)): - data = _from_nested_dict(data) + first_val = next(iter((data.values())), None) + if isinstance(first_val, (Series, dict)): + # If we are dealing with not a builtin dict, + # `collections.defaultdict` for example, we need to convert it + # to a regular dict so Cython will not raise. + data = dict(data) if not type(data) is dict else data + data = lib.from_nested_dict(data) else: data, index = list(data.values()), list(data.keys()) elif orient == "columns": @@ -8817,12 +8822,3 @@ def isin(self, values) -> "DataFrame": ops.add_flex_arithmetic_methods(DataFrame) ops.add_special_arithmetic_methods(DataFrame) - - -def _from_nested_dict(data): - # TODO: this should be seriously cythonized - new_data = collections.defaultdict(dict) - for index, s in data.items(): - for col, v in s.items(): - new_data[col][index] = v - return new_data