diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index ab242ece98181..510918e1e1698 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -1149,6 +1149,7 @@ Reshaping - Bug in :func:`DataFrame.sort_index` where an error is thrown when a multi-indexed ``DataFrame`` is sorted on all levels with the initial level sorted last (:issue:`26053`) - Bug in :meth:`Series.nlargest` treats ``True`` as smaller than ``False`` (:issue:`26154`) - Bug in :func:`DataFrame.pivot_table` with a :class:`IntervalIndex` as pivot index would raise ``TypeError`` (:issue:`25814`) +- Bug in which :meth:`DataFrame.from_dict` ignored order of ``OrderedDict`` when ``orient='index'`` (:issue:`8425`). - Bug in :meth:`DataFrame.transpose` where transposing a DataFrame with a timezone-aware datetime column would incorrectly raise ``ValueError`` (:issue:`26825`) - Bug in :func:`pivot_table` when pivoting a timezone aware column as the ``values`` would remove timezone information (:issue:`14948`) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 4d64be34e624f..b4752039cf5b1 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -9,6 +9,7 @@ from pandas._libs import lib from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime +import pandas.compat as compat from pandas.compat import raise_with_traceback from pandas.core.dtypes.cast import ( @@ -338,6 +339,7 @@ def extract_index(data): have_raw_arrays = False have_series = False have_dicts = False + have_ordered = False for val in data: if isinstance(val, ABCSeries): @@ -345,6 +347,8 @@ def extract_index(data): indexes.append(val.index) elif isinstance(val, dict): have_dicts = True + if isinstance(val, OrderedDict): + have_ordered = True indexes.append(list(val.keys())) elif is_list_like(val) and getattr(val, "ndim", 1) == 1: have_raw_arrays = True @@ -353,8 +357,10 @@ def extract_index(data): if not indexes and not raw_lengths: raise ValueError("If using all scalar values, you must pass" " an index") - if have_series or have_dicts: + if have_series: index = _union_indexes(indexes) + elif have_dicts: + index = _union_indexes(indexes, sort=not (compat.PY36 or have_ordered)) if have_raw_arrays: lengths = list(set(raw_lengths)) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a16ca7045cfdd..2708b94d6ec0c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -517,7 +517,8 @@ def test_constructor_subclass_dict(self, float_frame): dct.update(v.to_dict()) data[k] = dct frame = DataFrame(data) - tm.assert_frame_equal(float_frame.sort_index(), frame) + expected = frame.reindex(index=float_frame.index) + tm.assert_frame_equal(float_frame, expected) def test_constructor_dict_block(self): expected = np.array([[4.0, 3.0, 2.0, 1.0]]) @@ -1203,7 +1204,7 @@ def test_constructor_list_of_series(self): sdict = OrderedDict(zip(["x", "Unnamed 0"], data)) expected = DataFrame.from_dict(sdict, orient="index") - tm.assert_frame_equal(result.sort_index(), expected) + tm.assert_frame_equal(result, expected) # none named data = [ @@ -1342,7 +1343,7 @@ def test_constructor_list_of_namedtuples(self): def test_constructor_orient(self, float_string_frame): data_dict = float_string_frame.T._series recons = DataFrame.from_dict(data_dict, orient="index") - expected = float_string_frame.sort_index() + expected = float_string_frame.reindex(index=recons.index) tm.assert_frame_equal(recons, expected) # dict of sequence @@ -1351,6 +1352,19 @@ def test_constructor_orient(self, float_string_frame): xp = DataFrame.from_dict(a).T.reindex(list(a.keys())) tm.assert_frame_equal(rs, xp) + def test_constructor_from_ordered_dict(self): + # GH8425 + a = OrderedDict( + [ + ("one", OrderedDict([("col_a", "foo1"), ("col_b", "bar1")])), + ("two", OrderedDict([("col_a", "foo2"), ("col_b", "bar2")])), + ("three", OrderedDict([("col_a", "foo3"), ("col_b", "bar3")])), + ] + ) + expected = DataFrame.from_dict(a, orient="columns").T + result = DataFrame.from_dict(a, orient="index") + tm.assert_frame_equal(result, expected) + def test_from_dict_columns_parameter(self): # GH 18529 # Test new columns parameter for from_dict that was added to make