pandas-dev · jreback · Jul 9, 2020 · Jul 2, 2020 · Jul 2, 2020 · Jul 2, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -1112,6 +1112,7 @@ Reshaping
 - Fixed bug in :func:`melt` where melting MultiIndex columns with ``col_level`` > 0 would raise a ``KeyError`` on ``id_vars`` (:issue:`34129`)
 - Bug in :meth:`Series.where` with an empty Series and empty ``cond`` having non-bool dtype (:issue:`34592`)
 - Fixed regression where :meth:`DataFrame.apply` would raise ``ValueError`` for elements whth ``S`` dtype (:issue:`34529`)
+- Bug in :meth:`DataFrame.append` leading to sorting columns even when ``sort=False`` is specified (:issue:`35092`)
 
 Sparse
 ^^^^^^

diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
@@ -214,7 +214,13 @@ def conv(i):
             return result.union_many(indexes[1:])
         else:
             for other in indexes[1:]:
-                result = result.union(other)
+                # GH 35092. Index.union expects sort=None instead of sort=True
+                # to signify that sort=True isn't fully implemented and
+                # legacy implementation sometimes might not sort (see GH 24959)
+                # In this case we currently sort in _get_combined_index
+                if sort:
+                    sort = None
+                result = result.union(other, sort=sort)
             return result
     elif kind == "array":
         index = indexes[0]

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -2542,11 +2542,13 @@ def test_construct_with_two_categoricalindex_series(self):
             index=pd.CategoricalIndex(["f", "female", "m", "male", "unknown"]),
         )
         result = DataFrame([s1, s2])
+        # GH 35092. Extra s2 columns are now appended to s1 columns
+        # in original order
         expected = DataFrame(
             np.array(
-                [[np.nan, 39.0, np.nan, 6.0, 4.0], [2.0, 152.0, 2.0, 242.0, 150.0]]
+                [[39.0, 6.0, 4.0, np.nan, np.nan], [152.0, 242.0, 150.0, 2.0, 2.0]]
             ),
-            columns=["f", "female", "m", "male", "unknown"],
+            columns=["female", "male", "unknown", "f", "m"],
         )
         tm.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
@@ -13,8 +13,9 @@
 from pandas.core.dtypes.common import is_period_dtype, needs_i8_conversion
 
 import pandas as pd
-from pandas import CategoricalIndex, MultiIndex, RangeIndex
+from pandas import CategoricalIndex, Index, MultiIndex, RangeIndex
 import pandas._testing as tm
+from pandas.core.indexes.api import union_indexes
 
 
 class TestCommon:
@@ -395,3 +396,18 @@ def test_astype_preserves_name(self, index, dtype, copy):
             assert result.names == index.names
         else:
             assert result.name == index.name
+
+
+@pytest.mark.parametrize("arr", [[0, 1, 4, 3]])
+@pytest.mark.parametrize("dtype", ["int8", "int16", "int32", "int64"])
+def test_union_index_no_sort(arr, sort, dtype):
+    # GH 35092. Check that we don't sort with sort=False
+    ind1 = Index(arr[:2], dtype=dtype)
+    ind2 = Index(arr[2:], dtype=dtype)
+
+    # sort is None indicates that we sort the combined index
+    if sort is None:
+        arr.sort()
+    expected = Index(arr, dtype=dtype)
+    result = union_indexes([ind1, ind2], sort=sort)
+    tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
@@ -2857,3 +2857,17 @@ def test_concat_frame_axis0_extension_dtypes():
     result = pd.concat([df2, df1], ignore_index=True)
     expected = pd.DataFrame({"a": [4, 5, 6, 1, 2, 3]}, dtype="Int64")
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("sort", [True, False])
+def test_append_sort(sort):
+    # GH 35092. Check that DataFrame.append respects the sort argument.
+    df1 = pd.DataFrame(data={0: [1, 2], 1: [3, 4]})
+    df2 = pd.DataFrame(data={3: [1, 2], 2: [3, 4]})
+    cols = list(df1.columns) + list(df2.columns)
+    if sort:
+        cols.sort()
+
+    result = df1.append(df2, sort=sort).columns
+    expected = type(result)(cols)
+    tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
@@ -691,11 +691,11 @@ def test_unbalanced(self):
         )
         df["id"] = df.index
         exp_data = {
-            "X": ["X1", "X1", "X2", "X2"],
-            "A": [1.0, 3.0, 2.0, 4.0],
-            "B": [5.0, np.nan, 6.0, np.nan],
-            "id": [0, 0, 1, 1],
-            "year": [2010, 2011, 2010, 2011],
+            "X": ["X1", "X2", "X1", "X2"],
+            "A": [1.0, 2.0, 3.0, 4.0],
+            "B": [5.0, 6.0, np.nan, np.nan],
+            "id": [0, 1, 0, 1],
+            "year": [2010, 2010, 2011, 2011],
         }
         expected = pd.DataFrame(exp_data)
         expected = expected.set_index(["id", "year"])[["X", "A", "B"]]
@@ -938,10 +938,10 @@ def test_nonnumeric_suffix(self):
         )
         expected = pd.DataFrame(
             {
-                "A": ["X1", "X1", "X2", "X2"],
-                "colname": ["placebo", "test", "placebo", "test"],
-                "result": [5.0, np.nan, 6.0, np.nan],
-                "treatment": [1.0, 3.0, 2.0, 4.0],
+                "A": ["X1", "X2", "X1", "X2"],
+                "colname": ["placebo", "placebo", "test", "test"],
+                "result": [5.0, 6.0, np.nan, np.nan],
+                "treatment": [1.0, 2.0, 3.0, 4.0],
             }
         )
         expected = expected.set_index(["A", "colname"])
@@ -985,10 +985,10 @@ def test_float_suffix(self):
         )
         expected = pd.DataFrame(
             {
-                "A": ["X1", "X1", "X1", "X1", "X2", "X2", "X2", "X2"],
-                "colname": [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1],
-                "result": [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan],
-                "treatment": [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0],
+                "A": ["X1", "X2", "X1", "X2", "X1", "X2", "X1", "X2"],
+                "colname": [1.2, 1.2, 1.0, 1.0, 1.1, 1.1, 2.1, 2.1],
+                "result": [5.0, 6.0, 0.0, 9.0, np.nan, np.nan, np.nan, np.nan],
+                "treatment": [np.nan, np.nan, np.nan, np.nan, 1.0, 2.0, 3.0, 4.0],
             }
         )
         expected = expected.set_index(["A", "colname"])

diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
@@ -636,8 +636,15 @@ def test_str_cat_align_mixed_inputs(self, join):
         # mixed list of indexed/unindexed
         u = np.array(["A", "B", "C", "D"])
         expected_outer = Series(["aaA", "bbB", "c-C", "ddD", "-e-"])
+
         # joint index of rhs [t, u]; u will be forced have index of s
-        rhs_idx = t.index & s.index if join == "inner" else t.index | s.index
+        # GH 35092. If right join, maintain order of t.index
+        if join == "inner":
+            rhs_idx = t.index & s.index
+        elif join == "right":
+            rhs_idx = t.index.union(s.index, sort=False)
+        else:
+            rhs_idx = t.index | s.index
 
         expected = expected_outer.loc[s.index.join(rhs_idx, how=join)]
         result = s.str.cat([t, u], join=join, na_rep="-")