diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 284943cf49070..cb3042bb3b79b 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -241,6 +241,7 @@ Reshaping - Bug in :func:`pandas.merge` adds a string of ``None`` if ``None`` is assigned in suffixes instead of remain the column name as-is (:issue:`24782`). - Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`) - :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`) +- Bug in :func:`concat` where order of ``OrderedDict`` (and ``dict`` in Python 3.6+) is not respected, when passed in as ``objs`` argument (:issue:`21510`) Sparse diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 683c21f7bd47a..c5f9e52e07ecf 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -822,7 +822,7 @@ def _aggregate_multiple_funcs(self, arg, _level): columns.append(com.get_callable_name(f)) arg = lzip(columns, arg) - results = {} + results = collections.OrderedDict() for name, func in arg: obj = self if name in results: diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 6cc355fb62f23..4ad05f2b52ec5 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -253,7 +253,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, if isinstance(objs, dict): if keys is None: - keys = sorted(objs) + keys = com.dict_keys_to_ordered_list(objs) objs = [objs[k] for k in keys] else: objs = list(objs) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index c062fb90ca43b..f80a7300334e4 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1690,9 +1690,9 @@ def test_groupby_agg_ohlc_non_first(): [1, 1, 1, 1, 1], [1, 1, 1, 1, 1] ], columns=pd.MultiIndex.from_tuples(( - ('foo', 'ohlc', 'open'), ('foo', 'ohlc', 'high'), - ('foo', 'ohlc', 'low'), ('foo', 'ohlc', 'close'), - ('foo', 'sum', 'foo'))), index=pd.date_range( + ('foo', 'sum', 'foo'), ('foo', 'ohlc', 'open'), + ('foo', 'ohlc', 'high'), ('foo', 'ohlc', 'low'), + ('foo', 'ohlc', 'close'))), index=pd.date_range( '2018-01-01', periods=2, freq='D')) result = df.groupby(pd.Grouper(freq='D')).agg(['sum', 'ohlc']) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 9dbc14c23f3f4..ccd50998e39b1 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1,4 +1,4 @@ -from collections import deque +from collections import OrderedDict, deque import datetime as dt from datetime import datetime from decimal import Decimal @@ -18,6 +18,7 @@ from pandas import ( Categorical, DataFrame, DatetimeIndex, Index, MultiIndex, Series, Timestamp, concat, date_range, isna, read_csv) +import pandas.core.common as com from pandas.tests.extension.decimal import to_decimal from pandas.util import testing as tm from pandas.util.testing import assert_frame_equal, makeCustomDataframe as mkdf @@ -1162,7 +1163,7 @@ def test_concat_dict(self): 'baz': DataFrame(np.random.randn(4, 3)), 'qux': DataFrame(np.random.randn(4, 3))} - sorted_keys = sorted(frames) + sorted_keys = com.dict_keys_to_ordered_list(frames) result = concat(frames) expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys) @@ -2370,6 +2371,14 @@ def test_concat_different_extension_dtypes_upcasts(self): ], dtype=object) tm.assert_series_equal(result, expected) + def test_concat_odered_dict(self): + # GH 21510 + expected = pd.concat([pd.Series(range(3)), pd.Series(range(4))], + keys=['First', 'Another']) + result = pd.concat(OrderedDict([('First', pd.Series(range(3))), + ('Another', pd.Series(range(4)))])) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame]) @pytest.mark.parametrize('dt', np.sctypes['float'])