Skip to content

Commit 0eeef4d

Browse files
author
tp
committed
initialization from dicts for py>=3.6 maintains insertion order
1 parent feedf66 commit 0eeef4d

File tree

5 files changed

+81
-8
lines changed

5 files changed

+81
-8
lines changed

doc/source/whatsnew/v0.23.0.txt

+54-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
v0.23.0
44
-------
55

6-
This is a major release from 0.21.1 and includes a number of API changes,
6+
This is a major release from 0.22.0 and includes a number of API changes,
77
deprecations, new features, enhancements, and performance improvements along
88
with a large number of bug fixes. We recommend that all users upgrade to this
99
version.
@@ -240,7 +240,7 @@ The :func:`DataFrame.assign` now accepts dependent keyword arguments for python
240240
using ``.assign()`` to update an existing column. Previously, callables
241241
referring to other variables being updated would get the "old" values
242242

243-
Previous Behaviour:
243+
Previous behaviour:
244244

245245
.. code-block:: ipython
246246

@@ -253,7 +253,7 @@ The :func:`DataFrame.assign` now accepts dependent keyword arguments for python
253253
1 3 -2
254254
2 4 -3
255255

256-
New Behaviour:
256+
New behaviour:
257257

258258
.. ipython:: python
259259

@@ -320,6 +320,57 @@ If installed, we now require:
320320
| openpyxl | 2.4.0 | |
321321
+-----------------+-----------------+----------+
322322

323+
.. _whatsnew_0230.api_breaking.dict_insertion_order:
324+
325+
Creating dataframes and series from dicts preserves dict insertion order for python 3.6+
326+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
327+
328+
Until Python 3.6, dicts in Python had no formally defined ordering. Python
329+
version 3.6 and later have changed the ordering definition of dicts, so dicts
330+
in these newer versions are ordered by insertion order
331+
(see also `PEP 468 <https://www.python.org/dev/peps/pep-0468/>`_).
332+
Pandas will from version 0.23 use insertion order, when creating series or
333+
data frames from dicts (:issue:`19018`) .
334+
335+
Previous behaviour (and current behaviour if on Python < 3.6):
336+
337+
.. code-block:: ipython
338+
339+
In [1]: pd.Series({'Income': 2000,
340+
... 'Expenses': -1500,
341+
... 'Taxes': -200,
342+
... 'Net result': 300})
343+
Expenses -1500
344+
Income 2000
345+
Net result 300
346+
Taxes -200
347+
dtype: int64
348+
349+
Note the series above is ordered alphabetically by the index values.
350+
351+
New behaviour (for Python >= 3.6):
352+
353+
.. ipython:: python
354+
355+
pd.Series({'Income': 2000,
356+
'Expenses': -1500,
357+
'Taxes': -200,
358+
'Net result': 300})
359+
360+
Notice that the series is now ordered by insertion order. This new behaviour is
361+
used for all relevant pandas types (``Series``, ``DataFrame``, ``SparseSeries``
362+
and ``SparseDataFrame``).
363+
364+
If you wish to retain the old behaviour while using Python >= 3.6, you can use
365+
``sort_index``:
366+
367+
.. ipython:: python
368+
369+
pd.Series({'Income': 2000,
370+
'Expenses': -1500,
371+
'Taxes': -200,
372+
'Net result': 300}).sort_index()
373+
323374
.. _whatsnew_0230.api_breaking.deprecate_panel:
324375

325376
Deprecate Panel

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ def _init_dict(self, data, index, columns, dtype=None):
460460

461461
else:
462462
keys = list(data.keys())
463-
if not isinstance(data, OrderedDict):
463+
if not PY36 and not isinstance(data, OrderedDict):
464464
keys = com._try_sort(keys)
465465
columns = data_names = Index(keys)
466466
arrays = [data[k] for k in keys]

pandas/core/series.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
from pandas import compat
5555
from pandas.io.formats.terminal import get_terminal_size
5656
from pandas.compat import (
57-
zip, u, OrderedDict, StringIO, range, get_range_parameters)
57+
zip, u, OrderedDict, StringIO, range, get_range_parameters, PY36)
5858
from pandas.compat.numpy import function as nv
5959

6060
import pandas.core.ops as ops
@@ -286,7 +286,7 @@ def _init_dict(self, data, index=None, dtype=None):
286286
# Now we just make sure the order is respected, if any
287287
if index is not None:
288288
s = s.reindex(index, copy=False)
289-
elif not isinstance(data, OrderedDict):
289+
elif not PY36 and not isinstance(data, OrderedDict):
290290
try:
291291
s = s.sort_index()
292292
except TypeError:

pandas/tests/frame/test_constructors.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from pandas.core.dtypes.common import is_integer_dtype
1717
from pandas.compat import (lmap, long, zip, range, lrange, lzip,
18-
OrderedDict, is_platform_little_endian)
18+
OrderedDict, is_platform_little_endian, PY36)
1919
from pandas import compat
2020
from pandas import (DataFrame, Index, Series, isna,
2121
MultiIndex, Timedelta, Timestamp,
@@ -241,6 +241,17 @@ def test_constructor_dict(self):
241241
# Corner cases
242242
assert len(DataFrame({})) == 0
243243

244+
# GH19018
245+
# initialization ordering: by insertion order if python>= 3.6, else
246+
# by value
247+
d = {'b': self.ts2, 'a': self.ts1}
248+
frame = DataFrame(data=d)
249+
if PY36:
250+
expected = DataFrame(data=d, columns=list('ba'))
251+
else:
252+
expected = DataFrame(data=d, columns=list('ab'))
253+
tm.assert_frame_equal(frame, expected)
254+
244255
# mix dict and array, wrong size - no spec for which error should raise
245256
# first
246257
with pytest.raises(ValueError):

pandas/tests/series/test_constructors.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from pandas._libs import lib
2323
from pandas._libs.tslib import iNaT
2424

25-
from pandas.compat import lrange, range, zip, long
25+
from pandas.compat import lrange, range, zip, long, PY36
2626
from pandas.util.testing import assert_series_equal
2727
import pandas.util.testing as tm
2828

@@ -775,6 +775,17 @@ def test_constructor_dict(self):
775775
expected = Series([1, 2, nan, 0], index=['b', 'c', 'd', 'a'])
776776
assert_series_equal(result, expected)
777777

778+
# GH19018
779+
# initialization ordering: by insertion order if python>= 3.6, else
780+
# by value
781+
d = {'b': 1, 'a': 0, 'c': 2}
782+
result = Series(d)
783+
if PY36:
784+
expected = Series([1, 0, 2], index=list('bac'))
785+
else:
786+
expected = Series([0, 1, 2], index=list('abc'))
787+
tm.assert_series_equal(result, expected)
788+
778789
pidx = tm.makePeriodIndex(100)
779790
d = {pidx[0]: 0, pidx[1]: 1}
780791
result = Series(d, index=pidx)

0 commit comments

Comments
 (0)