Skip to content

Commit ce8ccba

Browse files
toobazjowens
authored andcommitted
BUG: make order of index from pd.concat deterministic (pandas-dev#17364)
closes pandas-dev#17344
1 parent 5a6f2ac commit ce8ccba

File tree

4 files changed

+29
-8
lines changed

4 files changed

+29
-8
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ Reshaping
407407
- Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`)
408408
- :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`).
409409
- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`)
410+
- Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`)
410411

411412
Numeric
412413
^^^^^^^

pandas/core/common.py

+14
Original file line numberDiff line numberDiff line change
@@ -629,3 +629,17 @@ def _random_state(state=None):
629629
else:
630630
raise ValueError("random_state must be an integer, a numpy "
631631
"RandomState, or None")
632+
633+
634+
def _get_distinct_objs(objs):
635+
"""
636+
Return a list with distinct elements of "objs" (different ids).
637+
Preserves order.
638+
"""
639+
ids = set()
640+
res = []
641+
for obj in objs:
642+
if not id(obj) in ids:
643+
ids.add(id(obj))
644+
res.append(obj)
645+
return res

pandas/core/indexes/api.py

+2-7
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@
2323
'PeriodIndex', 'DatetimeIndex',
2424
'_new_Index', 'NaT',
2525
'_ensure_index', '_get_na_value', '_get_combined_index',
26-
'_get_objs_combined_axis',
27-
'_get_distinct_indexes', '_union_indexes',
26+
'_get_objs_combined_axis', '_union_indexes',
2827
'_get_consensus_names',
2928
'_all_indexes_same']
3029

@@ -41,7 +40,7 @@ def _get_objs_combined_axis(objs, intersect=False, axis=0):
4140

4241
def _get_combined_index(indexes, intersect=False):
4342
# TODO: handle index names!
44-
indexes = _get_distinct_indexes(indexes)
43+
indexes = com._get_distinct_objs(indexes)
4544
if len(indexes) == 0:
4645
return Index([])
4746
if len(indexes) == 1:
@@ -55,10 +54,6 @@ def _get_combined_index(indexes, intersect=False):
5554
return _ensure_index(union)
5655

5756

58-
def _get_distinct_indexes(indexes):
59-
return list(dict((id(x), x) for x in indexes).values())
60-
61-
6257
def _union_indexes(indexes):
6358
if len(indexes) == 0:
6459
raise AssertionError('Must have at least 1 Index to union')

pandas/tests/reshape/test_concat.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from numpy.random import randn
66

77
from datetime import datetime
8-
from pandas.compat import StringIO, iteritems
8+
from pandas.compat import StringIO, iteritems, PY2
99
import pandas as pd
1010
from pandas import (DataFrame, concat,
1111
read_csv, isna, Series, date_range,
@@ -1944,6 +1944,17 @@ def test_concat_categoricalindex(self):
19441944
index=exp_idx)
19451945
tm.assert_frame_equal(result, exp)
19461946

1947+
def test_concat_order(self):
1948+
# GH 17344
1949+
dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])]
1950+
dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a'])
1951+
for i in range(100)]
1952+
result = pd.concat(dfs).columns
1953+
expected = dfs[0].columns
1954+
if PY2:
1955+
expected = expected.sort_values()
1956+
tm.assert_index_equal(result, expected)
1957+
19471958

19481959
@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
19491960
@pytest.mark.parametrize('dt', np.sctypes['float'])

0 commit comments

Comments
 (0)