From 3929945d4a24525b3b0330d18abf6f10ed0f9066 Mon Sep 17 00:00:00 2001 From: mazayo <45595210+mazayo@users.noreply.github.com> Date: Sat, 15 Jun 2019 19:01:35 +0900 Subject: [PATCH 1/6] BUG: from_dict ignored order of OrderedDict (#8425) --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/internals/construction.py | 4 +++- pandas/tests/frame/test_constructors.py | 16 +++++++++++++--- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index fd47ca14dc788..82ccacb374ea0 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -633,6 +633,7 @@ Indexing - Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`). - Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`) - Allow keyword arguments for callable local reference used in the :meth:`DataFrame.query` string (:issue:`26426`) +- Bug in which :meth:`DataFrame.from_dict` ignored order of OrderedDict when orient='index' (:issue:`8425`). Missing diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 863b9f7fb16d7..1ba972c4313f6 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -301,8 +301,10 @@ def extract_index(data): raise ValueError('If using all scalar values, you must pass' ' an index') - if have_series or have_dicts: + if have_series: index = _union_indexes(indexes) + elif have_dicts: + index = _union_indexes(indexes, sort=False) if have_raw_arrays: lengths = list(set(raw_lengths)) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 68017786eb6a6..efa7d66a917f6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -483,7 +483,7 @@ def test_constructor_subclass_dict(self): dct.update(v.to_dict()) data[k] = dct frame = DataFrame(data) - tm.assert_frame_equal(self.frame.sort_index(), frame) + tm.assert_frame_equal(self.frame, frame) def test_constructor_dict_block(self): expected = np.array([[4., 3., 2., 1.]]) @@ -1153,7 +1153,7 @@ def test_constructor_list_of_series(self): sdict = OrderedDict(zip(['x', 'Unnamed 0'], data)) expected = DataFrame.from_dict(sdict, orient='index') - tm.assert_frame_equal(result.sort_index(), expected) + tm.assert_frame_equal(result, expected) # none named data = [OrderedDict([['a', 1.5], ['b', 3], ['c', 4], ['d', 6]]), @@ -1288,7 +1288,7 @@ def test_constructor_list_of_namedtuples(self): def test_constructor_orient(self): data_dict = self.mixed_frame.T._series recons = DataFrame.from_dict(data_dict, orient='index') - expected = self.mixed_frame.sort_index() + expected = self.mixed_frame.reindex(recons.index) tm.assert_frame_equal(recons, expected) # dict of sequence @@ -1298,6 +1298,16 @@ def test_constructor_orient(self): xp = DataFrame.from_dict(a).T.reindex(list(a.keys())) tm.assert_frame_equal(rs, xp) + def test_constructor_from_ordered_dict(self): + # GH8425 + a = OrderedDict([ + ('one', OrderedDict([('col_a', 'foo1'), ('col_b', 'bar1')])), + ('two', OrderedDict([('col_a', 'foo2'), ('col_b', 'bar2')])), + ('three', OrderedDict([('col_a', 'foo3'), ('col_b', 'bar3')]))]) + expected = DataFrame.from_dict(a, orient='columns').T + result = DataFrame.from_dict(a, orient='index') + tm.assert_frame_equal(result, expected) + def test_from_dict_columns_parameter(self): # GH 18529 # Test new columns parameter for from_dict that was added to make From ae19e6c850f13daacdf018a1b06b282ef72d3f67 Mon Sep 17 00:00:00 2001 From: mazayo <45595210+mazayo@users.noreply.github.com> Date: Sat, 22 Jun 2019 06:53:49 +0900 Subject: [PATCH 2/6] Merged upstream/master --- doc/source/whatsnew/v0.25.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 239da8b0fe8a7..f3f0878764119 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -670,8 +670,6 @@ Indexing - Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`). - Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`) - Allow keyword arguments for callable local reference used in the :meth:`DataFrame.query` string (:issue:`26426`) -- Bug in which :meth:`DataFrame.from_dict` ignored order of OrderedDict when orient='index' (:issue:`8425`). - Missing ^^^^^^^ @@ -768,6 +766,7 @@ Reshaping - Bug in :func:`DataFrame.sort_index` where an error is thrown when a multi-indexed ``DataFrame`` is sorted on all levels with the initial level sorted last (:issue:`26053`) - Bug in :meth:`Series.nlargest` treats ``True`` as smaller than ``False`` (:issue:`26154`) - Bug in :func:`DataFrame.pivot_table` with a :class:`IntervalIndex` as pivot index would raise ``TypeError`` (:issue:`25814`) +- Bug in which :meth:`DataFrame.from_dict` ignored order of ``OrderedDict`` when ``orient='index'`` (:issue:`8425`). Sparse ^^^^^^ From de4406bf59f1a8e87c35e5d7c8dcedb9806cb9b4 Mon Sep 17 00:00:00 2001 From: mazayo <45595210+mazayo@users.noreply.github.com> Date: Thu, 4 Jul 2019 06:54:05 +0900 Subject: [PATCH 3/6] Changed for compatibility. Python=3.5 needs sort. --- pandas/core/internals/construction.py | 7 ++++++- pandas/tests/frame/test_constructors.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 8cb6c04d9dc3a..0c5d0352e661c 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -10,6 +10,7 @@ from pandas._libs import lib from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime from pandas.compat import raise_with_traceback +import pandas.compat as compat from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, construct_1d_ndarray_preserving_na, @@ -284,6 +285,7 @@ def extract_index(data): have_raw_arrays = False have_series = False have_dicts = False + have_ordered = False for val in data: if isinstance(val, ABCSeries): @@ -291,6 +293,8 @@ def extract_index(data): indexes.append(val.index) elif isinstance(val, dict): have_dicts = True + if isinstance(val, OrderedDict): + have_ordered = True indexes.append(list(val.keys())) elif is_list_like(val) and getattr(val, 'ndim', 1) == 1: have_raw_arrays = True @@ -303,7 +307,8 @@ def extract_index(data): if have_series: index = _union_indexes(indexes) elif have_dicts: - index = _union_indexes(indexes, sort=False) + index = _union_indexes(indexes, + sort=not (compat.PY36 or have_ordered)) if have_raw_arrays: lengths = list(set(raw_lengths)) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 663e83184a8d7..98e572d2f229e 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -484,7 +484,7 @@ def test_constructor_subclass_dict(self): dct.update(v.to_dict()) data[k] = dct frame = DataFrame(data) - tm.assert_frame_equal(self.frame, frame) + tm.assert_frame_equal(self.frame, frame.reindex(self.frame.index)) def test_constructor_dict_block(self): expected = np.array([[4., 3., 2., 1.]]) From 50c5467d3d91f8806884c542bf8da7a99bf4723e Mon Sep 17 00:00:00 2001 From: mazayo <45595210+mazayo@users.noreply.github.com> Date: Fri, 5 Jul 2019 07:13:22 +0900 Subject: [PATCH 4/6] Fixed linting error --- pandas/core/internals/construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 97ec178ff58c6..5d7081e34b09c 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -9,8 +9,8 @@ from pandas._libs import lib from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime -from pandas.compat import raise_with_traceback import pandas.compat as compat +from pandas.compat import raise_with_traceback from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, From 7bc716b4135f14edc2ae015a94f683b53fadb7fb Mon Sep 17 00:00:00 2001 From: mazayo <45595210+mazayo@users.noreply.github.com> Date: Sat, 6 Jul 2019 02:30:17 +0900 Subject: [PATCH 5/6] Resolved the error found in black --- pandas/core/internals/construction.py | 3 +-- pandas/tests/frame/test_constructors.py | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 5d7081e34b09c..b4752039cf5b1 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -360,8 +360,7 @@ def extract_index(data): if have_series: index = _union_indexes(indexes) elif have_dicts: - index = _union_indexes(indexes, - sort=not (compat.PY36 or have_ordered)) + index = _union_indexes(indexes, sort=not (compat.PY36 or have_ordered)) if have_raw_arrays: lengths = list(set(raw_lengths)) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index eb4c87eddfe7f..c1ab93799a74e 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1201,8 +1201,8 @@ def test_constructor_list_of_series(self): ] result = DataFrame(data2) - sdict = OrderedDict(zip(['x', 'Unnamed 0'], data)) - expected = DataFrame.from_dict(sdict, orient='index') + sdict = OrderedDict(zip(["x", "Unnamed 0"], data)) + expected = DataFrame.from_dict(sdict, orient="index") tm.assert_frame_equal(result, expected) # none named @@ -1353,12 +1353,15 @@ def test_constructor_orient(self, float_string_frame): def test_constructor_from_ordered_dict(self): # GH8425 - a = OrderedDict([ - ('one', OrderedDict([('col_a', 'foo1'), ('col_b', 'bar1')])), - ('two', OrderedDict([('col_a', 'foo2'), ('col_b', 'bar2')])), - ('three', OrderedDict([('col_a', 'foo3'), ('col_b', 'bar3')]))]) - expected = DataFrame.from_dict(a, orient='columns').T - result = DataFrame.from_dict(a, orient='index') + a = OrderedDict( + [ + ("one", OrderedDict([("col_a", "foo1"), ("col_b", "bar1")])), + ("two", OrderedDict([("col_a", "foo2"), ("col_b", "bar2")])), + ("three", OrderedDict([("col_a", "foo3"), ("col_b", "bar3")])), + ] + ) + expected = DataFrame.from_dict(a, orient="columns").T + result = DataFrame.from_dict(a, orient="index") tm.assert_frame_equal(result, expected) def test_from_dict_columns_parameter(self): From 74966cb7552549064d0151dcfdd2024fc47e652a Mon Sep 17 00:00:00 2001 From: mazayo <45595210+mazayo@users.noreply.github.com> Date: Mon, 8 Jul 2019 05:36:25 +0900 Subject: [PATCH 6/6] Fixed according to the review comments --- pandas/tests/frame/test_constructors.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c1ab93799a74e..2708b94d6ec0c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -517,7 +517,8 @@ def test_constructor_subclass_dict(self, float_frame): dct.update(v.to_dict()) data[k] = dct frame = DataFrame(data) - tm.assert_frame_equal(float_frame, frame.reindex(float_frame.index)) + expected = frame.reindex(index=float_frame.index) + tm.assert_frame_equal(float_frame, expected) def test_constructor_dict_block(self): expected = np.array([[4.0, 3.0, 2.0, 1.0]]) @@ -1342,8 +1343,8 @@ def test_constructor_list_of_namedtuples(self): def test_constructor_orient(self, float_string_frame): data_dict = float_string_frame.T._series recons = DataFrame.from_dict(data_dict, orient="index") - expected = float_string_frame - tm.assert_frame_equal(recons, expected.reindex(recons.index)) + expected = float_string_frame.reindex(index=recons.index) + tm.assert_frame_equal(recons, expected) # dict of sequence a = {"hi": [32, 3, 3], "there": [3, 5, 3]}