Skip to content

Commit c21be05

Browse files
authored
BUG: fix union_indexes not supporting sort=False for Index subclasses (#35098)
1 parent c15f080 commit c21be05

File tree

7 files changed

+64
-18
lines changed

7 files changed

+64
-18
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1117,6 +1117,7 @@ Reshaping
11171117
- Fixed bug in :func:`melt` where melting MultiIndex columns with ``col_level`` > 0 would raise a ``KeyError`` on ``id_vars`` (:issue:`34129`)
11181118
- Bug in :meth:`Series.where` with an empty Series and empty ``cond`` having non-bool dtype (:issue:`34592`)
11191119
- Fixed regression where :meth:`DataFrame.apply` would raise ``ValueError`` for elements whth ``S`` dtype (:issue:`34529`)
1120+
- Bug in :meth:`DataFrame.append` leading to sorting columns even when ``sort=False`` is specified (:issue:`35092`)
11201121

11211122
Sparse
11221123
^^^^^^

pandas/core/indexes/api.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,13 @@ def conv(i):
214214
return result.union_many(indexes[1:])
215215
else:
216216
for other in indexes[1:]:
217-
result = result.union(other)
217+
# GH 35092. Index.union expects sort=None instead of sort=True
218+
# to signify that sort=True isn't fully implemented and
219+
# legacy implementation sometimes might not sort (see GH 24959)
220+
# In this case we currently sort in _get_combined_index
221+
if sort:
222+
sort = None
223+
result = result.union(other, sort=sort)
218224
return result
219225
elif kind == "array":
220226
index = indexes[0]

pandas/tests/frame/test_constructors.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -2542,11 +2542,13 @@ def test_construct_with_two_categoricalindex_series(self):
25422542
index=pd.CategoricalIndex(["f", "female", "m", "male", "unknown"]),
25432543
)
25442544
result = DataFrame([s1, s2])
2545+
# GH 35092. Extra s2 columns are now appended to s1 columns
2546+
# in original order
25452547
expected = DataFrame(
25462548
np.array(
2547-
[[np.nan, 39.0, np.nan, 6.0, 4.0], [2.0, 152.0, 2.0, 242.0, 150.0]]
2549+
[[39.0, 6.0, 4.0, np.nan, np.nan], [152.0, 242.0, 150.0, 2.0, 2.0]]
25482550
),
2549-
columns=["f", "female", "m", "male", "unknown"],
2551+
columns=["female", "male", "unknown", "f", "m"],
25502552
)
25512553
tm.assert_frame_equal(result, expected)
25522554

pandas/tests/indexes/test_common.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@
1313
from pandas.core.dtypes.common import is_period_dtype, needs_i8_conversion
1414

1515
import pandas as pd
16-
from pandas import CategoricalIndex, MultiIndex, RangeIndex
16+
from pandas import CategoricalIndex, Index, MultiIndex, RangeIndex
1717
import pandas._testing as tm
18+
from pandas.core.indexes.api import union_indexes
1819

1920

2021
class TestCommon:
@@ -395,3 +396,18 @@ def test_astype_preserves_name(self, index, dtype, copy):
395396
assert result.names == index.names
396397
else:
397398
assert result.name == index.name
399+
400+
401+
@pytest.mark.parametrize("arr", [[0, 1, 4, 3]])
402+
@pytest.mark.parametrize("dtype", ["int8", "int16", "int32", "int64"])
403+
def test_union_index_no_sort(arr, sort, dtype):
404+
# GH 35092. Check that we don't sort with sort=False
405+
ind1 = Index(arr[:2], dtype=dtype)
406+
ind2 = Index(arr[2:], dtype=dtype)
407+
408+
# sort is None indicates that we sort the combined index
409+
if sort is None:
410+
arr.sort()
411+
expected = Index(arr, dtype=dtype)
412+
result = union_indexes([ind1, ind2], sort=sort)
413+
tm.assert_index_equal(result, expected)

pandas/tests/reshape/test_concat.py

+14
Original file line numberDiff line numberDiff line change
@@ -2857,3 +2857,17 @@ def test_concat_frame_axis0_extension_dtypes():
28572857
result = pd.concat([df2, df1], ignore_index=True)
28582858
expected = pd.DataFrame({"a": [4, 5, 6, 1, 2, 3]}, dtype="Int64")
28592859
tm.assert_frame_equal(result, expected)
2860+
2861+
2862+
@pytest.mark.parametrize("sort", [True, False])
2863+
def test_append_sort(sort):
2864+
# GH 35092. Check that DataFrame.append respects the sort argument.
2865+
df1 = pd.DataFrame(data={0: [1, 2], 1: [3, 4]})
2866+
df2 = pd.DataFrame(data={3: [1, 2], 2: [3, 4]})
2867+
cols = list(df1.columns) + list(df2.columns)
2868+
if sort:
2869+
cols.sort()
2870+
2871+
result = df1.append(df2, sort=sort).columns
2872+
expected = type(result)(cols)
2873+
tm.assert_index_equal(result, expected)

pandas/tests/reshape/test_melt.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -691,11 +691,11 @@ def test_unbalanced(self):
691691
)
692692
df["id"] = df.index
693693
exp_data = {
694-
"X": ["X1", "X1", "X2", "X2"],
695-
"A": [1.0, 3.0, 2.0, 4.0],
696-
"B": [5.0, np.nan, 6.0, np.nan],
697-
"id": [0, 0, 1, 1],
698-
"year": [2010, 2011, 2010, 2011],
694+
"X": ["X1", "X2", "X1", "X2"],
695+
"A": [1.0, 2.0, 3.0, 4.0],
696+
"B": [5.0, 6.0, np.nan, np.nan],
697+
"id": [0, 1, 0, 1],
698+
"year": [2010, 2010, 2011, 2011],
699699
}
700700
expected = pd.DataFrame(exp_data)
701701
expected = expected.set_index(["id", "year"])[["X", "A", "B"]]
@@ -938,10 +938,10 @@ def test_nonnumeric_suffix(self):
938938
)
939939
expected = pd.DataFrame(
940940
{
941-
"A": ["X1", "X1", "X2", "X2"],
942-
"colname": ["placebo", "test", "placebo", "test"],
943-
"result": [5.0, np.nan, 6.0, np.nan],
944-
"treatment": [1.0, 3.0, 2.0, 4.0],
941+
"A": ["X1", "X2", "X1", "X2"],
942+
"colname": ["placebo", "placebo", "test", "test"],
943+
"result": [5.0, 6.0, np.nan, np.nan],
944+
"treatment": [1.0, 2.0, 3.0, 4.0],
945945
}
946946
)
947947
expected = expected.set_index(["A", "colname"])
@@ -985,10 +985,10 @@ def test_float_suffix(self):
985985
)
986986
expected = pd.DataFrame(
987987
{
988-
"A": ["X1", "X1", "X1", "X1", "X2", "X2", "X2", "X2"],
989-
"colname": [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1],
990-
"result": [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan],
991-
"treatment": [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0],
988+
"A": ["X1", "X2", "X1", "X2", "X1", "X2", "X1", "X2"],
989+
"colname": [1.2, 1.2, 1.0, 1.0, 1.1, 1.1, 2.1, 2.1],
990+
"result": [5.0, 6.0, 0.0, 9.0, np.nan, np.nan, np.nan, np.nan],
991+
"treatment": [np.nan, np.nan, np.nan, np.nan, 1.0, 2.0, 3.0, 4.0],
992992
}
993993
)
994994
expected = expected.set_index(["A", "colname"])

pandas/tests/test_strings.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -636,8 +636,15 @@ def test_str_cat_align_mixed_inputs(self, join):
636636
# mixed list of indexed/unindexed
637637
u = np.array(["A", "B", "C", "D"])
638638
expected_outer = Series(["aaA", "bbB", "c-C", "ddD", "-e-"])
639+
639640
# joint index of rhs [t, u]; u will be forced have index of s
640-
rhs_idx = t.index & s.index if join == "inner" else t.index | s.index
641+
# GH 35092. If right join, maintain order of t.index
642+
if join == "inner":
643+
rhs_idx = t.index & s.index
644+
elif join == "right":
645+
rhs_idx = t.index.union(s.index, sort=False)
646+
else:
647+
rhs_idx = t.index | s.index
641648

642649
expected = expected_outer.loc[s.index.join(rhs_idx, how=join)]
643650
result = s.str.cat([t, u], join=join, na_rep="-")

0 commit comments

Comments
 (0)