Skip to content

Commit f1fb4c5

Browse files
committed
BUG: make order of index from pd.concat deterministic
closes pandas-dev#17344
1 parent 36dadd7 commit f1fb4c5

File tree

4 files changed

+25
-7
lines changed

4 files changed

+25
-7
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,7 @@ Reshaping
405405
- Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`)
406406
- :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`).
407407
- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`)
408+
- Bug in :func:`concat` which would not respect the order of the index along the common dimension (:issue:`17344`)
408409

409410
Numeric
410411
^^^^^^^

pandas/core/common.py

+14
Original file line numberDiff line numberDiff line change
@@ -629,3 +629,17 @@ def _random_state(state=None):
629629
else:
630630
raise ValueError("random_state must be an integer, a numpy "
631631
"RandomState, or None")
632+
633+
634+
def _get_distinct_objs(objs):
635+
"""
636+
Return a list with distinct elements of "objs" (different ids).
637+
Preserves order.
638+
"""
639+
ids = set()
640+
res = []
641+
for obj in objs:
642+
if not id(obj) in ids:
643+
ids.add(id(obj))
644+
res.append(obj)
645+
return res

pandas/core/indexes/api.py

+2-7
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@
2323
'PeriodIndex', 'DatetimeIndex',
2424
'_new_Index', 'NaT',
2525
'_ensure_index', '_get_na_value', '_get_combined_index',
26-
'_get_objs_combined_axis',
27-
'_get_distinct_indexes', '_union_indexes',
26+
'_get_objs_combined_axis', '_union_indexes',
2827
'_get_consensus_names',
2928
'_all_indexes_same']
3029

@@ -41,7 +40,7 @@ def _get_objs_combined_axis(objs, intersect=False, axis=0):
4140

4241
def _get_combined_index(indexes, intersect=False):
4342
# TODO: handle index names!
44-
indexes = _get_distinct_indexes(indexes)
43+
indexes = com._get_distinct_objs(indexes)
4544
if len(indexes) == 0:
4645
return Index([])
4746
if len(indexes) == 1:
@@ -55,10 +54,6 @@ def _get_combined_index(indexes, intersect=False):
5554
return _ensure_index(union)
5655

5756

58-
def _get_distinct_indexes(indexes):
59-
return list(dict((id(x), x) for x in indexes).values())
60-
61-
6257
def _union_indexes(indexes):
6358
if len(indexes) == 0:
6459
raise AssertionError('Must have at least 1 Index to union')

pandas/tests/reshape/test_concat.py

+8
Original file line numberDiff line numberDiff line change
@@ -1944,6 +1944,14 @@ def test_concat_categoricalindex(self):
19441944
index=exp_idx)
19451945
tm.assert_frame_equal(result, exp)
19461946

1947+
def test_concat_order(self):
1948+
# GH 17344
1949+
dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])]
1950+
dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a'])
1951+
for i in range(100)]
1952+
result = pd.concat(dfs).columns
1953+
expected = dfs[0].columns
1954+
tm.assert_index_equal(result, expected)
19471955

19481956
@pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
19491957
@pytest.mark.parametrize('dt', np.sctypes['float'])

0 commit comments

Comments
 (0)