From 913723bb59d4912d86854675c5424963628f0b85 Mon Sep 17 00:00:00 2001
From: Bryce Guinta <bryce.paul.guinta@gmail.com>
Date: Wed, 4 Apr 2018 20:51:53 -0600
Subject: [PATCH 01/34] Stop concat from attempting to sort mismatched columns
 by default

Preserve column order upon concatenation to obey
least astonishment principle.

Allow old behavior to be enabled by adding a boolean switch to
concat and DataFrame.append, mismatch_sort, which is by default disabled.

Close #4588
---
 doc/source/whatsnew/v0.23.0.txt     |  1 +
 pandas/_libs/lib.pyx                | 11 ++++-----
 pandas/core/frame.py                |  8 +++++--
 pandas/core/indexes/api.py          | 13 ++++++-----
 pandas/core/reshape/concat.py       | 13 +++++++----
 pandas/tests/reshape/test_concat.py | 35 +++++++++++++++++++++++------
 6 files changed, 57 insertions(+), 24 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index a808b83119a40..c261891aa8897 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -1160,6 +1160,7 @@ Reshaping
 - Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`)
 - Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`)
 - Bug in :class:`Series` constructor with a ``dtype=str``, previously raised in some cases (:issue:`19853`)
+- Stop :func:`concat` and ``Dataframe.append`` from sorting columns by default. Use ``sort=True`` to retain old behavior (:issue:`4588`)
 
 Other
 ^^^^^
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 30521760327b4..ae9d240afcb93 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -157,7 +157,7 @@ def fast_unique_multiple(list arrays):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def fast_unique_multiple_list(list lists):
+def fast_unique_multiple_list(list lists, bint sort=True):
     cdef:
         list buf
         Py_ssize_t k = len(lists)
@@ -174,10 +174,11 @@ def fast_unique_multiple_list(list lists):
             if val not in table:
                 table[val] = stub
                 uniques.append(val)
-    try:
-        uniques.sort()
-    except Exception:
-        pass
+    if sort:
+        try:
+            uniques.sort()
+        except Exception:
+            pass
 
     return uniques
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f33ef9597f456..f82305ac3913a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5982,7 +5982,8 @@ def infer(x):
     # ----------------------------------------------------------------------
     # Merging / joining methods
 
-    def append(self, other, ignore_index=False, verify_integrity=False):
+    def append(self, other, ignore_index=False,
+               verify_integrity=False, sort=False):
         """
         Append rows of `other` to the end of this frame, returning a new
         object. Columns not in this frame are added as new columns.
@@ -5995,6 +5996,8 @@ def append(self, other, ignore_index=False, verify_integrity=False):
             If True, do not use the index labels.
         verify_integrity : boolean, default False
             If True, raise ValueError on creating index with duplicates.
+        sort: boolean, default False
+            Sort columns if given object doesn't have the same columns
 
         Returns
         -------
@@ -6103,7 +6106,8 @@ def append(self, other, ignore_index=False, verify_integrity=False):
         else:
             to_concat = [self, other]
         return concat(to_concat, ignore_index=ignore_index,
-                      verify_integrity=verify_integrity)
+                      verify_integrity=verify_integrity,
+                      sort=sort)
 
     def join(self, other, on=None, how='left', lsuffix='', rsuffix='',
              sort=False):
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index 2e5ec8b554ce7..75232e3db7e55 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -31,17 +31,17 @@
            '_all_indexes_same']
 
 
-def _get_objs_combined_axis(objs, intersect=False, axis=0):
+def _get_objs_combined_axis(objs, intersect=False, axis=0, sort=True):
     # Extract combined index: return intersection or union (depending on the
     # value of "intersect") of indexes on given axis, or None if all objects
     # lack indexes (e.g. they are numpy arrays)
     obs_idxes = [obj._get_axis(axis) for obj in objs
                  if hasattr(obj, '_get_axis')]
     if obs_idxes:
-        return _get_combined_index(obs_idxes, intersect=intersect)
+        return _get_combined_index(obs_idxes, intersect=intersect, sort=sort)
 
 
-def _get_combined_index(indexes, intersect=False):
+def _get_combined_index(indexes, intersect=False, sort=True):
     # TODO: handle index names!
     indexes = com._get_distinct_objs(indexes)
     if len(indexes) == 0:
@@ -53,11 +53,11 @@ def _get_combined_index(indexes, intersect=False):
         for other in indexes[1:]:
             index = index.intersection(other)
         return index
-    union = _union_indexes(indexes)
+    union = _union_indexes(indexes, sort=sort)
     return _ensure_index(union)
 
 
-def _union_indexes(indexes):
+def _union_indexes(indexes, sort=True):
     if len(indexes) == 0:
         raise AssertionError('Must have at least 1 Index to union')
     if len(indexes) == 1:
@@ -74,7 +74,8 @@ def conv(i):
                 i = i.tolist()
             return i
 
-        return Index(lib.fast_unique_multiple_list([conv(i) for i in inds]))
+        return Index(
+            lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort))
 
     if kind == 'special':
         result = indexes[0]
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 20f4384a3d698..3630edbcbf58f 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -20,7 +20,7 @@
 
 def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
            keys=None, levels=None, names=None, verify_integrity=False,
-           copy=True):
+           sort=False, copy=True):
     """
     Concatenate pandas objects along a particular axis with optional set logic
     along the other axes.
@@ -60,6 +60,8 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
     verify_integrity : boolean, default False
         Check whether the new concatenated axis contains duplicates. This can
         be very expensive relative to the actual data concatenation
+    sort : boolean, default False
+        Sort columns if all passed object columns are not the same
     copy : boolean, default True
         If False, do not copy data unnecessarily
 
@@ -209,7 +211,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
                        ignore_index=ignore_index, join=join,
                        keys=keys, levels=levels, names=names,
                        verify_integrity=verify_integrity,
-                       copy=copy)
+                       copy=copy, sort=sort)
     return op.get_result()
 
 
@@ -220,7 +222,8 @@ class _Concatenator(object):
 
     def __init__(self, objs, axis=0, join='outer', join_axes=None,
                  keys=None, levels=None, names=None,
-                 ignore_index=False, verify_integrity=False, copy=True):
+                 ignore_index=False, verify_integrity=False, copy=True,
+                 sort=False):
         if isinstance(objs, (NDFrame, compat.string_types)):
             raise TypeError('first argument must be an iterable of pandas '
                             'objects, you passed an object of type '
@@ -355,6 +358,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
         self.keys = keys
         self.names = names or getattr(keys, 'names', None)
         self.levels = levels
+        self.sort = sort
 
         self.ignore_index = ignore_index
         self.verify_integrity = verify_integrity
@@ -447,7 +451,8 @@ def _get_comb_axis(self, i):
         data_axis = self.objs[0]._get_block_manager_axis(i)
         try:
             return _get_objs_combined_axis(self.objs, axis=data_axis,
-                                           intersect=self.intersect)
+                                           intersect=self.intersect,
+                                           sort=self.sort)
         except IndexError:
             types = [type(x).__name__ for x in self.objs]
             raise TypeError("Cannot concatenate list of {types}"
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 437b4179c580a..48260d90746c3 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -5,7 +5,7 @@
 from numpy.random import randn
 
 from datetime import datetime
-from pandas.compat import StringIO, iteritems, PY2
+from pandas.compat import StringIO, iteritems
 import pandas as pd
 from pandas import (DataFrame, concat,
                     read_csv, isna, Series, date_range,
@@ -852,8 +852,9 @@ def test_append_dtype_coerce(self):
                                    dt.datetime(2013, 1, 2, 0, 0),
                                    dt.datetime(2013, 1, 3, 0, 0),
                                    dt.datetime(2013, 1, 4, 0, 0)],
-                                  name='start_time')], axis=1)
-        result = df1.append(df2, ignore_index=True)
+                                  name='start_time')],
+                          axis=1, sort=True)
+        result = df1.append(df2, ignore_index=True, sort=True)
         assert_frame_equal(result, expected)
 
     def test_append_missing_column_proper_upcast(self):
@@ -1011,7 +1012,8 @@ def test_concat_ignore_index(self):
         frame1.index = Index(["x", "y", "z"])
         frame2.index = Index(["x", "y", "q"])
 
-        v1 = concat([frame1, frame2], axis=1, ignore_index=True)
+        v1 = concat([frame1, frame2], axis=1,
+                    ignore_index=True, sort=True)
 
         nan = np.nan
         expected = DataFrame([[nan, nan, nan, 4.3],
@@ -1463,7 +1465,7 @@ def test_concat_series_axis1(self):
         # must reindex, #2603
         s = Series(randn(3), index=['c', 'a', 'b'], name='A')
         s2 = Series(randn(4), index=['d', 'a', 'b', 'c'], name='B')
-        result = concat([s, s2], axis=1)
+        result = concat([s, s2], axis=1, sort=True)
         expected = DataFrame({'A': s, 'B': s2})
         assert_frame_equal(result, expected)
 
@@ -2070,8 +2072,6 @@ def test_concat_order(self):
                 for i in range(100)]
         result = pd.concat(dfs).columns
         expected = dfs[0].columns
-        if PY2:
-            expected = expected.sort_values()
         tm.assert_index_equal(result, expected)
 
     def test_concat_datetime_timezone(self):
@@ -2155,3 +2155,24 @@ def test_concat_empty_and_non_empty_series_regression():
     expected = s1
     result = pd.concat([s1, s2])
     tm.assert_series_equal(result, expected)
+
+
+def test_concat_preserve_column_order_differing_columns():
+    # GH 4588 regression test
+    # for new columns in concat
+    dfa = pd.DataFrame(columns=['C', 'A'], data=[[1, 2]])
+    dfb = pd.DataFrame(columns=['C', 'Z'], data=[[5, 6]])
+    result = pd.concat([dfa, dfb])
+    assert result.columns.tolist() == ['C', 'A', 'Z']
+
+
+def test_concat_preserve_column_order_uneven_data():
+    # GH 4588 regression test
+    # add to column, concat with uneven data
+    df = pd.DataFrame()
+    df['b'] = [1, 2, 3]
+    df['c'] = [1, 2, 3]
+    df['a'] = [1, 2, 3]
+    df2 = pd.DataFrame({'a': [4, 5]})
+    df3 = pd.concat([df, df2])
+    assert df3.columns.tolist() == ['b', 'c', 'a']

From 02b2db93a5171b63615a450a7819a1c0ddc7f94e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 26 Apr 2018 09:13:23 -0500
Subject: [PATCH 02/34] Updates

API: Updated the default to be compatible and warn.

DOC: updated the whatsnew and concat docstring.
---
 doc/source/whatsnew/v0.23.0.txt     | 26 ++++++++++++++++-
 pandas/core/indexes/api.py          | 20 ++++++++++++++
 pandas/core/reshape/concat.py       | 13 +++++++--
 pandas/tests/reshape/test_concat.py | 43 ++++++++++++++++++++++++++---
 4 files changed, 94 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 0bbe8102a4bbd..ab5a174b9a3bb 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -639,6 +639,31 @@ Returning a ``Series`` allows one to control the exact return structure and colu
 
     df.apply(lambda x: Series([1, 2, 3], index=['D', 'E', 'F']), axis=1)
 
+Concatenation will no longer sort
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In a future version of pandas :func:`pandas.concat` will no longer sort the non-concatenation axis when it is not already aligned.
+The current behavior is the same as the previous (sorting), but now a warning is issued.
+
+.. ipython:: python
+   :okwarning:
+
+   df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a'])
+   df2 = pd.DataFrame({"a": [4, 5]})
+
+   pd.concat([df1, df2])
+
+To keep the previous behavior (sorting) and silence the warning, pass ``sort=True``
+
+.. ipython:: python
+
+   pd.concat([df1, df2], sort=True)
+
+To accept the future behavior (no sorting), pass ``sort=False``
+
+.. ipython
+
+   pd.concat([df1, df2], sort=False)
 
 .. _whatsnew_0230.api_breaking.build_changes:
 
@@ -1237,7 +1262,6 @@ Reshaping
 - Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`)
 - Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`)
 - Bug in :class:`Series` constructor with a ``dtype=str``, previously raised in some cases (:issue:`19853`)
-- Stop :func:`concat` and ``Dataframe.append`` from sorting columns by default. Use ``sort=True`` to retain old behavior (:issue:`4588`)
 
 Other
 ^^^^^
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index 75232e3db7e55..32cf5c47bbd6b 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -1,3 +1,6 @@
+import textwrap
+import warnings
+
 from pandas.core.indexes.base import (Index,
                                       _new_Index,
                                       _ensure_index,
@@ -17,6 +20,16 @@
 from pandas._libs import lib
 from pandas._libs.tslib import NaT
 
+_sort_msg = textwrap.dedent("""\
+Sorting because non-concatenation axis is not aligned. A future version
+of pandas will change to not sort by default.
+
+To accept the future behavior, pass 'sort=True'.
+
+To retain the current behavior and silence the warning, pass sort=False
+""")
+
+
 # TODO: there are many places that rely on these private methods existing in
 # pandas.core.index
 __all__ = ['Index', 'MultiIndex', 'NumericIndex', 'Float64Index', 'Int64Index',
@@ -90,6 +103,12 @@ def conv(i):
         index = indexes[0]
         for other in indexes[1:]:
             if not index.equals(other):
+
+                if sort is None:
+                    # TODO: remove once pd.concat sort default changes
+                    warnings.warn(_sort_msg, FutureWarning, stacklevel=8)
+                    sort = True
+
                 return _unique_indices(indexes)
 
         name = _get_consensus_names(indexes)[0]
@@ -97,6 +116,7 @@ def conv(i):
             index = index._shallow_copy(name=name)
         return index
     else:
+        # XXX: here too?
         return _unique_indices(indexes)
 
 
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 531d1715cdf27..4879e32d8348b 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -20,7 +20,7 @@
 
 def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
            keys=None, levels=None, names=None, verify_integrity=False,
-           sort=False, copy=True):
+           sort=None, copy=True):
     """
     Concatenate pandas objects along a particular axis with optional set logic
     along the other axes.
@@ -60,8 +60,15 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
     verify_integrity : boolean, default False
         Check whether the new concatenated axis contains duplicates. This can
         be very expensive relative to the actual data concatenation
-    sort : boolean, default False
-        Sort columns if all passed object columns are not the same
+    sort : boolean, default None
+        Sort non-concatenation axis if it is not already aligned. The current
+        default of sorting is deprecated and will change to not-sorting in a
+        future version of pandas. Explicitly pass ``sort=True`` to silence
+        the warning and sort. Explicitly pass ``sort=False`` to silence the
+        warning and not sort.
+
+        .. versionadded:: 0.23.0
+
     copy : boolean, default True
         If False, do not copy data unnecessarily
 
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 8051f39284d6f..c4f7a3454c7f7 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -2251,13 +2251,43 @@ def test_concat_empty_and_non_empty_series_regression():
     tm.assert_series_equal(result, expected)
 
 
+def test_concat_sort_columns():
+    # GH-4588
+    df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a'])
+    df2 = pd.DataFrame({"a": [3, 4]})
+
+    expected = pd.DataFrame({"a": [1, 2, 3, 4],
+                             "b": [1, 2, None, None]},
+                            columns=['a', 'b'])
+    with tm.assert_produces_warning(FutureWarning):
+        result = pd.concat([df1, df2], ignore_index=True)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_concat_sorts_index():
+    df1 = pd.DataFrame({"a": [1, 2, 3]}, index=['c', 'a', 'b'])
+    df2 = pd.DataFrame({"b": [1, 2]}, index=['a', 'b'])
+
+    with tm.assert_produces_warning(FutureWarning):
+        result = pd.concat([df1, df2], axis=1)
+
+    expected = pd.DataFrame({"a": [2, 3, 1], "b": [1, 2, None]},
+                            index=['a', 'b', 'c'],
+                            columns=['a', 'b'])
+    tm.assert_frame_equal(result, expected)
+
+
 def test_concat_preserve_column_order_differing_columns():
     # GH 4588 regression test
     # for new columns in concat
     dfa = pd.DataFrame(columns=['C', 'A'], data=[[1, 2]])
     dfb = pd.DataFrame(columns=['C', 'Z'], data=[[5, 6]])
-    result = pd.concat([dfa, dfb])
-    assert result.columns.tolist() == ['C', 'A', 'Z']
+    result = pd.concat([dfa, dfb], ignore_index=True)
+
+    expected = pd.DataFrame({"A": [2, None], "C": [1, 5],
+                             "Z": [None, 6]}, columns=["A", "C", "Z"])
+    tm.assert_frame_equal(result, expected)
 
 
 def test_concat_preserve_column_order_uneven_data():
@@ -2268,5 +2298,10 @@ def test_concat_preserve_column_order_uneven_data():
     df['c'] = [1, 2, 3]
     df['a'] = [1, 2, 3]
     df2 = pd.DataFrame({'a': [4, 5]})
-    df3 = pd.concat([df, df2])
-    assert df3.columns.tolist() == ['b', 'c', 'a']
+    result = pd.concat([df, df2])
+    expected = pd.DataFrame({
+        'a': [1, 2, 3, 4, 5],
+        'b': [1, 2, 3, None, None],
+        'c': [1, 2, 3, None, None]
+    }, index=[0, 1, 2, 0, 1])
+    tm.assert_frame_equal(result, expected)

From a497763acc6effdda3f4083742e72a90be9be193 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 26 Apr 2018 15:46:06 -0500
Subject: [PATCH 03/34] Test fallout

---
 pandas/core/base.py                        |  2 +-
 pandas/core/groupby/groupby.py             |  3 +-
 pandas/core/indexes/api.py                 |  3 +-
 pandas/tests/indexing/test_iloc.py         |  3 +-
 pandas/tests/indexing/test_partial.py      |  5 +--
 pandas/tests/reshape/merge/test_merge.py   |  2 +-
 pandas/tests/reshape/test_concat.py        | 11 ++++---
 pandas/tests/sparse/test_combine_concat.py | 38 +++++++++++++++-------
 8 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 9ca1c8bea4db7..2f25a9ce41369 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -507,7 +507,7 @@ def is_any_frame():
                            for r in compat.itervalues(result))
 
             if isinstance(result, list):
-                return concat(result, keys=keys, axis=1), True
+                return concat(result, keys=keys, axis=1, sort=True), True
 
             elif is_any_frame():
                 # we have a dict of DataFrames
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 8c20d62117e25..00ea96890dd27 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1098,7 +1098,8 @@ def reset_identity(values):
                 group_names = self.grouper.names
 
                 result = concat(values, axis=self.axis, keys=group_keys,
-                                levels=group_levels, names=group_names)
+                                levels=group_levels, names=group_names,
+                                sort=True)
             else:
 
                 # GH5610, returns a MI, with the first level being a
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index 32cf5c47bbd6b..b919c8ab9a23f 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -115,8 +115,7 @@ def conv(i):
         if name != index.name:
             index = index._shallow_copy(name=name)
         return index
-    else:
-        # XXX: here too?
+    else:  # kind='list
         return _unique_indices(indexes)
 
 
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index f1178d44dbfe0..bfc74db73b813 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -629,7 +629,8 @@ def test_iloc_non_unique_indexing(self):
             new_list.append(s * 3)
 
         expected = DataFrame(new_list)
-        expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])])
+        expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])],
+                          sort=True)
         with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
             result = df2.loc[idx]
         tm.assert_frame_equal(result, expected, check_index_type=False)
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
index f95f493c66043..3c7a7f070805d 100644
--- a/pandas/tests/indexing/test_partial.py
+++ b/pandas/tests/indexing/test_partial.py
@@ -156,8 +156,9 @@ def f():
         df_orig = DataFrame(np.random.randn(8, 4), index=dates,
                             columns=['A', 'B', 'C', 'D'])
 
-        expected = pd.concat([df_orig, DataFrame(
-            {'A': 7}, index=[dates[-1] + 1])])
+        expected = pd.concat([df_orig,
+                              DataFrame({'A': 7}, index=[dates[-1] + 1])],
+                             sort=True)
         df = df_orig.copy()
         df.loc[dates[-1] + 1, 'A'] = 7
         tm.assert_frame_equal(df, expected)
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index dbf7c7f100b0e..4f68514e8fcaf 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -1286,7 +1286,7 @@ def test_join_multi_levels(self):
                 index=MultiIndex.from_tuples(
                     [(4, np.nan)],
                     names=['household_id', 'asset_id'])))
-        ], axis=0).reindex(columns=expected.columns))
+        ], axis=0, sort=True).reindex(columns=expected.columns))
         assert_frame_equal(result, expected)
 
         # invalid cases
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index c4f7a3454c7f7..ef21181452bc2 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -1318,7 +1318,7 @@ def test_with_mixed_tuples(self):
         df2 = DataFrame({u'B': 'foo', (u'B', 1): 'bar'}, index=range(2))
 
         # it works
-        concat([df1, df2])
+        concat([df1, df2], sort=True)
 
     def test_handle_empty_objects(self):
         df = DataFrame(np.random.randn(10, 4), columns=list('abcd'))
@@ -1328,7 +1328,7 @@ def test_handle_empty_objects(self):
         empty = df[5:5]
 
         frames = [baz, empty, empty, df[5:]]
-        concatted = concat(frames, axis=0)
+        concatted = concat(frames, axis=0, sort=True)
 
         expected = df.reindex(columns=['a', 'b', 'c', 'd', 'foo'])
         expected['foo'] = expected['foo'].astype('O')
@@ -2056,7 +2056,7 @@ def test_categorical_concat(self):
         cat_values = ["one", "one", "two", "one", "two", "two", "one"]
         df2['h'] = Series(Categorical(cat_values))
 
-        res = pd.concat((df1, df2), axis=0, ignore_index=True)
+        res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=True)
         exp = DataFrame({'a': [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12],
                          'b': [1, 4, 7, 10, 13, 16, np.nan, np.nan, np.nan,
                                np.nan, np.nan, np.nan, np.nan],
@@ -2165,6 +2165,7 @@ def test_concat_order(self):
         dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a'])
                 for i in range(100)]
         result = pd.concat(dfs).columns
+
         expected = dfs[0].columns
         tm.assert_index_equal(result, expected)
 
@@ -2283,7 +2284,7 @@ def test_concat_preserve_column_order_differing_columns():
     # for new columns in concat
     dfa = pd.DataFrame(columns=['C', 'A'], data=[[1, 2]])
     dfb = pd.DataFrame(columns=['C', 'Z'], data=[[5, 6]])
-    result = pd.concat([dfa, dfb], ignore_index=True)
+    result = pd.concat([dfa, dfb], ignore_index=True, sort=True)
 
     expected = pd.DataFrame({"A": [2, None], "C": [1, 5],
                              "Z": [None, 6]}, columns=["A", "C", "Z"])
@@ -2298,7 +2299,7 @@ def test_concat_preserve_column_order_uneven_data():
     df['c'] = [1, 2, 3]
     df['a'] = [1, 2, 3]
     df2 = pd.DataFrame({'a': [4, 5]})
-    result = pd.concat([df, df2])
+    result = pd.concat([df, df2], sort=True)
     expected = pd.DataFrame({
         'a': [1, 2, 3, 4, 5],
         'b': [1, 2, 3, None, None],
diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py
index 70fd1da529d46..9e392457edbc3 100644
--- a/pandas/tests/sparse/test_combine_concat.py
+++ b/pandas/tests/sparse/test_combine_concat.py
@@ -202,17 +202,29 @@ def test_concat_different_fill_value(self):
         exp._default_fill_value = np.nan
         tm.assert_sp_frame_equal(res, exp)
 
+    def test_concat_different_columns_sort_warns(self):
+        sparse = self.dense1.to_sparse()
+        sparse3 = self.dense3.to_sparse()
+
+        with tm.assert_produces_warning(FutureWarning):
+            res = pd.concat([sparse, sparse3])
+        with tm.assert_produces_warning(FutureWarning):
+            exp = pd.concat([self.dense1, self.dense3])
+
+        exp = exp.to_sparse()
+        tm.assert_sp_frame_equal(res, exp)
+
     def test_concat_different_columns(self):
         # fill_value = np.nan
         sparse = self.dense1.to_sparse()
         sparse3 = self.dense3.to_sparse()
 
-        res = pd.concat([sparse, sparse3])
-        exp = pd.concat([self.dense1, self.dense3]).to_sparse()
+        res = pd.concat([sparse, sparse3], sort=True)
+        exp = pd.concat([self.dense1, self.dense3], sort=True).to_sparse()
         tm.assert_sp_frame_equal(res, exp)
 
-        res = pd.concat([sparse3, sparse])
-        exp = pd.concat([self.dense3, self.dense1]).to_sparse()
+        res = pd.concat([sparse3, sparse], sort=True)
+        exp = pd.concat([self.dense3, self.dense1], sort=True).to_sparse()
         exp._default_fill_value = np.nan
         tm.assert_sp_frame_equal(res, exp)
 
@@ -220,13 +232,15 @@ def test_concat_different_columns(self):
         sparse = self.dense1.to_sparse(fill_value=0)
         sparse3 = self.dense3.to_sparse(fill_value=0)
 
-        res = pd.concat([sparse, sparse3])
-        exp = pd.concat([self.dense1, self.dense3]).to_sparse(fill_value=0)
+        res = pd.concat([sparse, sparse3], sort=True)
+        exp = (pd.concat([self.dense1, self.dense3], sort=True)
+                 .to_sparse(fill_value=0))
         exp._default_fill_value = np.nan
         tm.assert_sp_frame_equal(res, exp)
 
-        res = pd.concat([sparse3, sparse])
-        exp = pd.concat([self.dense3, self.dense1]).to_sparse(fill_value=0)
+        res = pd.concat([sparse3, sparse], sort=True)
+        exp = (pd.concat([self.dense3, self.dense1], sort=True)
+                 .to_sparse(fill_value=0))
         exp._default_fill_value = np.nan
         tm.assert_sp_frame_equal(res, exp)
 
@@ -234,13 +248,13 @@ def test_concat_different_columns(self):
         sparse = self.dense1.to_sparse()
         sparse3 = self.dense3.to_sparse(fill_value=0)
         # each columns keeps its fill_value, thus compare in dense
-        res = pd.concat([sparse, sparse3])
-        exp = pd.concat([self.dense1, self.dense3])
+        res = pd.concat([sparse, sparse3], sort=True)
+        exp = pd.concat([self.dense1, self.dense3], sort=True)
         assert isinstance(res, pd.SparseDataFrame)
         tm.assert_frame_equal(res.to_dense(), exp)
 
-        res = pd.concat([sparse3, sparse])
-        exp = pd.concat([self.dense3, self.dense1])
+        res = pd.concat([sparse3, sparse], sort=True)
+        exp = pd.concat([self.dense3, self.dense1], sort=True)
         assert isinstance(res, pd.SparseDataFrame)
         tm.assert_frame_equal(res.to_dense(), exp)
 

From 954a1b696703f30537b17b58f80ce38316be9a9c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 26 Apr 2018 16:14:05 -0500
Subject: [PATCH 04/34] Updated append

---
 doc/source/whatsnew/v0.23.0.txt     |  5 ++-
 pandas/core/frame.py                | 10 ++++--
 pandas/tests/reshape/test_concat.py | 48 ++++++++++++++++++++++-------
 3 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index ab5a174b9a3bb..cf7ef24a32ed5 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -643,7 +643,7 @@ Concatenation will no longer sort
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 In a future version of pandas :func:`pandas.concat` will no longer sort the non-concatenation axis when it is not already aligned.
-The current behavior is the same as the previous (sorting), but now a warning is issued.
+The current behavior is the same as the previous (sorting), but now a warning is issued (:issue:`4588`).
 
 .. ipython:: python
    :okwarning:
@@ -665,6 +665,9 @@ To accept the future behavior (no sorting), pass ``sort=False``
 
    pd.concat([df1, df2], sort=False)
 
+Note that this change also applies to :meth:`DataFrame.append`, which has also received a `sort` keyword for controlling this behavior.
+
+
 .. _whatsnew_0230.api_breaking.build_changes:
 
 Build Changes
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 36fca8d77bf38..ee1ca5e832f09 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6039,7 +6039,7 @@ def infer(x):
     # Merging / joining methods
 
     def append(self, other, ignore_index=False,
-               verify_integrity=False, sort=False):
+               verify_integrity=False, sort=None):
         """
         Append rows of `other` to the end of this frame, returning a new
         object. Columns not in this frame are added as new columns.
@@ -6052,8 +6052,12 @@ def append(self, other, ignore_index=False,
             If True, do not use the index labels.
         verify_integrity : boolean, default False
             If True, raise ValueError on creating index with duplicates.
-        sort: boolean, default False
-            Sort columns if given object doesn't have the same columns
+        sort : boolean, default None
+            Sort columns if the columns of `self` and `other` are not aligned.
+            The default sorting is deprecated and will change to not-sorting
+            in a future version of pandas. Explicitly pass ``sort=True`` to
+            silence the warning and sort. Explicitly pass ``sort=False`` to
+            silence the warning and not sort.
 
         Returns
         -------
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index ef21181452bc2..494d340d7f880 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -727,10 +727,10 @@ def test_append(self):
         tm.assert_almost_equal(appended['A'], self.frame['A'])
 
         del end_frame['A']
-        partial_appended = begin_frame.append(end_frame)
+        partial_appended = begin_frame.append(end_frame, sort=True)
         assert 'A' in partial_appended
 
-        partial_appended = end_frame.append(begin_frame)
+        partial_appended = end_frame.append(begin_frame, sort=True)
         assert 'A' in partial_appended
 
         # mixed type handling
@@ -738,8 +738,9 @@ def test_append(self):
         tm.assert_frame_equal(appended, self.mixed_frame)
 
         # what to test here
-        mixed_appended = self.mixed_frame[:5].append(self.frame[5:])
-        mixed_appended2 = self.frame[:5].append(self.mixed_frame[5:])
+        mixed_appended = self.mixed_frame[:5].append(self.frame[5:], sort=True)
+        mixed_appended2 = self.frame[:5].append(self.mixed_frame[5:],
+                                                sort=True)
 
         # all equal except 'foo' column
         tm.assert_frame_equal(
@@ -772,7 +773,7 @@ def test_append(self):
     def test_append_length0_frame(self):
         df = DataFrame(columns=['A', 'B', 'C'])
         df3 = DataFrame(index=[0, 1], columns=['A', 'B'])
-        df5 = df.append(df3)
+        df5 = df.append(df3, sort=True)
 
         expected = DataFrame(index=[0, 1], columns=['A', 'B', 'C'])
         assert_frame_equal(df5, expected)
@@ -793,6 +794,31 @@ def test_append_records(self):
         expected = DataFrame(np.concatenate((arr1, arr2)))
         assert_frame_equal(result, expected)
 
+    def test_append_sorts(self):
+        df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a'])
+        df2 = pd.DataFrame({"a": [1, 2], 'c': [3, 4]}, index=[2, 3])
+        # default, changing in the future
+
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            # from append we have an extra function call. Not worth hacking
+            # around to get the right stackleve.
+            result = df1.append(df2)
+
+        expected = pd.DataFrame({"b": [1, 2, None, None],
+                                 "a": [1, 2, 1, 2],
+                                 "c": [None, None, 3, 4]},
+                                columns=['a', 'b', 'c'])
+        tm.assert_frame_equal(result, expected)
+
+        # sort=True, the previous behavior
+        result = df1.append(df2, sort=True)
+        tm.assert_frame_equal(result, expected)
+
+        # sort=False, the future behvior.
+        result = df1.append(df2, sort=False)
+        expected = expected[['b', 'a', 'c']]
+        tm.assert_frame_equal(result, expected)
+
     def test_append_different_columns(self):
         df = DataFrame({'bools': np.random.randn(10) > 0,
                         'ints': np.random.randint(0, 10, 10),
@@ -802,7 +828,7 @@ def test_append_different_columns(self):
         a = df[:5].loc[:, ['bools', 'ints', 'floats']]
         b = df[5:].loc[:, ['strings', 'ints', 'floats']]
 
-        appended = a.append(b)
+        appended = a.append(b, sort=True)
         assert isna(appended['strings'][0:4]).all()
         assert isna(appended['bools'][5:]).all()
 
@@ -815,7 +841,7 @@ def test_append_many(self):
 
         chunks[-1] = chunks[-1].copy()
         chunks[-1]['foo'] = 'bar'
-        result = chunks[0].append(chunks[1:])
+        result = chunks[0].append(chunks[1:], sort=True)
         tm.assert_frame_equal(result.loc[:, self.frame.columns], self.frame)
         assert (result['foo'][15:] == 'bar').all()
         assert result['foo'][:15].isna().all()
@@ -956,7 +982,7 @@ def test_append_missing_column_proper_upcast(self):
         df2 = DataFrame({'B': np.array([True, False, True, False],
                                        dtype=bool)})
 
-        appended = df1.append(df2, ignore_index=True)
+        appended = df1.append(df2, ignore_index=True, sort=True)
         assert appended['A'].dtype == 'f8'
         assert appended['B'].dtype == 'O'
 
@@ -1052,7 +1078,7 @@ def test_concat_dataframe_keys_bug(self):
             'value': Series([7, 8], index=Index(['a', 'b'], name='id'))})
 
         # it works
-        result = concat([t1, t2], axis=1, keys=['t1', 't2'])
+        result = concat([t1, t2], axis=1, keys=['t1', 't2'], sort=True)
         assert list(result.columns) == [('t1', 'value'), ('t2', 'value')]
 
     def test_concat_series_partial_columns_names(self):
@@ -1505,7 +1531,7 @@ def df():
             panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2)
 
             # it works!
-            concat([panel1, panel3], axis=1, verify_integrity=True)
+            concat([panel1, panel3], axis=1, verify_integrity=True, sort=True)
 
     def test_concat_series(self):
 
@@ -2164,7 +2190,7 @@ def test_concat_order(self):
         dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])]
         dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a'])
                 for i in range(100)]
-        result = pd.concat(dfs).columns
+        result = pd.concat(dfs, sort=True).columns
 
         expected = dfs[0].columns
         tm.assert_index_equal(result, expected)

From 2a203774bf9044657dcd76fd989e3fe784d0c028 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 26 Apr 2018 16:15:44 -0500
Subject: [PATCH 05/34] versionadded

---
 pandas/core/frame.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index ee1ca5e832f09..9f673733bf7e4 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6059,6 +6059,8 @@ def append(self, other, ignore_index=False,
             silence the warning and sort. Explicitly pass ``sort=False`` to
             silence the warning and not sort.
 
+            .. versionadded:: 0.23.0
+
         Returns
         -------
         appended : DataFrame

From 35570c4f8276b10d42b8c275b6f44c135bec86e5 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 27 Apr 2018 06:49:21 -0500
Subject: [PATCH 06/34] Squash more test warnings

---
 pandas/tests/frame/test_combine_concat.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py
index e82faaeef2986..15ca65395e4fc 100644
--- a/pandas/tests/frame/test_combine_concat.py
+++ b/pandas/tests/frame/test_combine_concat.py
@@ -96,7 +96,7 @@ def test_append_series_dict(self):
 
         result = df.append(series[::-1][:3], ignore_index=True)
         expected = df.append(DataFrame({0: series[::-1][:3]}).T,
-                             ignore_index=True)
+                             ignore_index=True, sort=True)
         assert_frame_equal(result, expected.loc[:, result.columns])
 
         # can append when name set
@@ -119,8 +119,8 @@ def test_append_list_of_series_dicts(self):
         # different columns
         dicts = [{'foo': 1, 'bar': 2, 'baz': 3, 'peekaboo': 4},
                  {'foo': 5, 'bar': 6, 'baz': 7, 'peekaboo': 8}]
-        result = df.append(dicts, ignore_index=True)
-        expected = df.append(DataFrame(dicts), ignore_index=True)
+        result = df.append(dicts, ignore_index=True, sort=True)
+        expected = df.append(DataFrame(dicts), ignore_index=True, sort=True)
         assert_frame_equal(result, expected)
 
     def test_append_empty_dataframe(self):

From 983d0c1db55ee033214875324523d86177f315be Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 27 Apr 2018 07:22:04 -0500
Subject: [PATCH 07/34] py2 compat

---
 pandas/tests/reshape/test_concat.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 494d340d7f880..ae7a2c2c5f5fd 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -7,7 +7,7 @@
 from numpy.random import randn
 
 from datetime import datetime
-from pandas.compat import StringIO, iteritems
+from pandas.compat import StringIO, iteritems, PY2
 import pandas as pd
 from pandas import (DataFrame, concat,
                     read_csv, isna, Series, date_range,
@@ -2190,9 +2190,15 @@ def test_concat_order(self):
         dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])]
         dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a'])
                 for i in range(100)]
+
         result = pd.concat(dfs, sort=True).columns
 
-        expected = dfs[0].columns
+        if PY2:
+            # Different sort order between incomparable objects between
+            # python 2 and python3 via Index.union.
+            expected = dfs[1].columns
+        else:
+            expected = dfs[0].columns
         tm.assert_index_equal(result, expected)
 
     def test_concat_datetime_timezone(self):

From 4960e3f57a11581e10a5cf7304d11713110b0de9 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 27 Apr 2018 10:02:15 -0500
Subject: [PATCH 08/34] Document outer is not affected

---
 pandas/core/reshape/concat.py       | 14 +++++++++-----
 pandas/tests/reshape/test_concat.py | 17 +++++++++++++++++
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 4879e32d8348b..b36e9b8d900fd 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -61,11 +61,15 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
         Check whether the new concatenated axis contains duplicates. This can
         be very expensive relative to the actual data concatenation
     sort : boolean, default None
-        Sort non-concatenation axis if it is not already aligned. The current
-        default of sorting is deprecated and will change to not-sorting in a
-        future version of pandas. Explicitly pass ``sort=True`` to silence
-        the warning and sort. Explicitly pass ``sort=False`` to silence the
-        warning and not sort.
+        Sort non-concatenation axis if it is not already aligned when `join`
+        is 'outer'. The current default of sorting is deprecated and will
+        change to not-sorting in a future version of pandas.
+
+        Explicitly pass ``sort=True`` to silence the warning and sort.
+        Explicitly pass ``sort=False`` to silence the warning and not sort.
+
+        This has no effect when ``join='inner'``, which already preserves
+        the order of the non-concatenation axis.
 
         .. versionadded:: 0.23.0
 
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index ae7a2c2c5f5fd..f74d652ae012c 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -2311,6 +2311,23 @@ def test_concat_sorts_index():
     tm.assert_frame_equal(result, expected)
 
 
+def test_concat_inner_sort_unaffected():
+    # https://github.com/pandas-dev/pandas/pull/20613
+    df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]},
+                       columns=['b', 'a', 'c'])
+    df2 = pd.DataFrame({"a": [1, 2], 'b': [3, 4]}, index=[3, 4])
+    with tm.assert_produces_warning(None):
+        r0 = pd.concat([df1, df2], join='inner', ignore_index=True)
+    r1 = pd.concat([df1, df2], join='inner', sort=True, ignore_index=True)
+    r2 = pd.concat([df1, df2], join='inner', sort=True, ignore_index=True)
+
+    expected = pd.DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]},
+                            columns=['b', 'a'])
+    tm.assert_frame_equal(r0, expected)
+    tm.assert_frame_equal(r1, expected)
+    tm.assert_frame_equal(r2, expected)
+
+
 def test_concat_preserve_column_order_differing_columns():
     # GH 4588 regression test
     # for new columns in concat

From 8bbbdd52a9d223215eef408c75f2526ce828e19b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 27 Apr 2018 10:08:31 -0500
Subject: [PATCH 09/34] Docs

---
 doc/source/merging.rst | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/doc/source/merging.rst b/doc/source/merging.rst
index 74b21c21252ec..de37dc6dab59e 100644
--- a/doc/source/merging.rst
+++ b/doc/source/merging.rst
@@ -153,10 +153,10 @@ Set logic on the other axes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 When gluing together multiple DataFrames, you have a choice of how to handle
-the other axes (other than the one being concatenated). This can be done in 
+the other axes (other than the one being concatenated). This can be done in
 the following three ways:
 
-- Take the (sorted) union of them all, ``join='outer'``. This is the default
+- Take the union of them all, ``join='outer'``. This is the default
   option as it results in zero information loss.
 - Take the intersection, ``join='inner'``.
 - Use a specific index, as passed to the ``join_axes`` argument.
@@ -167,10 +167,10 @@ behavior:
 .. ipython:: python
 
    df4 = pd.DataFrame({'B': ['B2', 'B3', 'B6', 'B7'],
-                    'D': ['D2', 'D3', 'D6', 'D7'],
-                    'F': ['F2', 'F3', 'F6', 'F7']},
-                   index=[2, 3, 6, 7])
-   result = pd.concat([df1, df4], axis=1)
+                       'D': ['D2', 'D3', 'D6', 'D7'],
+                       'F': ['F2', 'F3', 'F6', 'F7']},
+                      index=[2, 3, 6, 7])
+   result = pd.concat([df1, df4], axis=1, sort=False)
 
 
 .. ipython:: python
@@ -181,8 +181,14 @@ behavior:
           labels=['df1', 'df4'], vertical=False);
    plt.close('all');
 
-Note that the row indexes have been unioned and sorted. Here is the same thing
-with ``join='inner'``:
+.. versionchanged:: 0.23.0
+
+   The default behavior with ``join='outer'`` is to sort the other axis
+   (columns in this case). In a future version of pandas, the default will
+   be to not sort. We specified ``sort=False`` to opt in to the new
+   behavior now.
+
+Here is the same thing with ``join='inner'``:
 
 .. ipython:: python
 

From dcfa6d0a84be399bc22b1ec6fe68900f6b3a5588 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 28 Apr 2018 13:32:04 -0500
Subject: [PATCH 10/34] Sort for intersection

---
 doc/source/merging.rst              |  4 +++-
 doc/source/whatsnew/v0.23.0.txt     |  2 ++
 pandas/core/indexes/api.py          |  7 +++++--
 pandas/tests/reshape/test_concat.py | 24 ++++++++++++++++++++++++
 4 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/doc/source/merging.rst b/doc/source/merging.rst
index de37dc6dab59e..1161656731f88 100644
--- a/doc/source/merging.rst
+++ b/doc/source/merging.rst
@@ -181,7 +181,9 @@ behavior:
           labels=['df1', 'df4'], vertical=False);
    plt.close('all');
 
-.. versionchanged:: 0.23.0
+.. warning::
+
+   .. versionchanged:: 0.23.0
 
    The default behavior with ``join='outer'`` is to sort the other axis
    (columns in this case). In a future version of pandas, the default will
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index cf7ef24a32ed5..bbbf2172efea4 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -639,6 +639,8 @@ Returning a ``Series`` allows one to control the exact return structure and colu
 
     df.apply(lambda x: Series([1, 2, 3], index=['D', 'E', 'F']), axis=1)
 
+.. _whatsnew_0230.api_breaking.concat:
+
 Concatenation will no longer sort
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index b919c8ab9a23f..f345c21b2f2f0 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -60,7 +60,10 @@ def _get_combined_index(indexes, intersect=False, sort=True):
     if len(indexes) == 0:
         return Index([])
     if len(indexes) == 1:
-        return indexes[0]
+        index = indexes[0]
+        if sort:
+            index = index.sort_values()
+        return index
     if intersect:
         index = indexes[0]
         for other in indexes[1:]:
@@ -115,7 +118,7 @@ def conv(i):
         if name != index.name:
             index = index._shallow_copy(name=name)
         return index
-    else:  # kind='list
+    else:  # kind='list'
         return _unique_indices(indexes)
 
 
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index f74d652ae012c..fc91647321a05 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -2355,3 +2355,27 @@ def test_concat_preserve_column_order_uneven_data():
         'c': [1, 2, 3, None, None]
     }, index=[0, 1, 2, 0, 1])
     tm.assert_frame_equal(result, expected)
+
+
+def test_concat_aligned_sort():
+    # GH-4588
+    df = pd.DataFrame({"b": [1, 2], "a": [3, 4]}, columns=['b', 'a'])
+    result = pd.concat([df, df], sort=True, ignore_index=True)
+    expected = pd.DataFrame({'b': [1, 2, 1, 2], 'a': [3, 4, 3, 4]},
+                            columns=['a', 'b'])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_concat_aligned_sort_raises():
+    # GH-4588
+    df = pd.DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, 'a'])
+
+    if PY2:
+        expected = pd.DataFrame({1: [1, 2, 1, 2], 'a': [3, 4, 3, 4]},
+                                columns=[1, 'a'])
+        result = pd.concat([df, df], ignore_index=True)
+        tm.assert_frame_equal(result, expected)
+    else:
+        msg = "'<' not supported between instances"
+        with tm.assert_raises_regex(TypeError, msg):
+            pd.concat([df, df], sort=True)

From 2eaeb1eb5ee6ea143f3215ecec7cb7ddac1b8a80 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 28 Apr 2018 14:46:11 -0500
Subject: [PATCH 11/34] More tests

---
 pandas/core/indexes/api.py               |  22 ++--
 pandas/tests/reshape/merge/test_merge.py |   2 +-
 pandas/tests/reshape/test_concat.py      | 126 +++++++++++++----------
 3 files changed, 88 insertions(+), 62 deletions(-)

diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index f345c21b2f2f0..07ddbcc6fec18 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -58,19 +58,23 @@ def _get_combined_index(indexes, intersect=False, sort=True):
     # TODO: handle index names!
     indexes = com._get_distinct_objs(indexes)
     if len(indexes) == 0:
-        return Index([])
-    if len(indexes) == 1:
+        index = Index([])
+    elif len(indexes) == 1:
         index = indexes[0]
-        if sort:
-            index = index.sort_values()
-        return index
-    if intersect:
+    elif intersect:
         index = indexes[0]
         for other in indexes[1:]:
             index = index.intersection(other)
-        return index
-    union = _union_indexes(indexes, sort=sort)
-    return _ensure_index(union)
+    else:
+        index = _union_indexes(indexes, sort=sort)
+        index = _ensure_index(index)
+
+    if sort and not index.is_monotonic_increasing:
+        try:
+            index = index.sort_values()
+        except TypeError:
+            pass
+    return index
 
 
 def _union_indexes(indexes, sort=True):
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 4f68514e8fcaf..f3827ac251cf0 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -826,7 +826,7 @@ def test_validation(self):
 
         # Dups on left
         left_w_dups = left.append(pd.DataFrame({'a': ['a'], 'c': ['cow']},
-                                               index=[3]))
+                                               index=[3]), sort=True)
         merge(left_w_dups, right, left_index=True, right_index=True,
               validate='many_to_one')
 
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index fc91647321a05..67471bd2d2c6d 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -21,6 +21,12 @@
 import pytest
 
 
+@pytest.fixture(params=[True, False])
+def sort(request):
+    """Boolean sort keyword for concat and DataFrame.append."""
+    return request.param
+
+
 class ConcatenateBase(object):
 
     def setup_method(self, method):
@@ -716,7 +722,7 @@ def test_concat_categorical_empty(self):
 
 class TestAppend(ConcatenateBase):
 
-    def test_append(self):
+    def test_append(self, sort):
         begin_index = self.frame.index[:5]
         end_index = self.frame.index[5:]
 
@@ -727,10 +733,10 @@ def test_append(self):
         tm.assert_almost_equal(appended['A'], self.frame['A'])
 
         del end_frame['A']
-        partial_appended = begin_frame.append(end_frame, sort=True)
+        partial_appended = begin_frame.append(end_frame, sort=sort)
         assert 'A' in partial_appended
 
-        partial_appended = end_frame.append(begin_frame, sort=True)
+        partial_appended = end_frame.append(begin_frame, sort=sort)
         assert 'A' in partial_appended
 
         # mixed type handling
@@ -738,9 +744,9 @@ def test_append(self):
         tm.assert_frame_equal(appended, self.mixed_frame)
 
         # what to test here
-        mixed_appended = self.mixed_frame[:5].append(self.frame[5:], sort=True)
+        mixed_appended = self.mixed_frame[:5].append(self.frame[5:], sort=sort)
         mixed_appended2 = self.frame[:5].append(self.mixed_frame[5:],
-                                                sort=True)
+                                                sort=sort)
 
         # all equal except 'foo' column
         tm.assert_frame_equal(
@@ -770,10 +776,10 @@ def test_append(self):
         result = df.append(row)
         tm.assert_frame_equal(result, expected)
 
-    def test_append_length0_frame(self):
+    def test_append_length0_frame(self, sort):
         df = DataFrame(columns=['A', 'B', 'C'])
         df3 = DataFrame(index=[0, 1], columns=['A', 'B'])
-        df5 = df.append(df3, sort=True)
+        df5 = df.append(df3, sort=sort)
 
         expected = DataFrame(index=[0, 1], columns=['A', 'B', 'C'])
         assert_frame_equal(df5, expected)
@@ -819,7 +825,7 @@ def test_append_sorts(self):
         expected = expected[['b', 'a', 'c']]
         tm.assert_frame_equal(result, expected)
 
-    def test_append_different_columns(self):
+    def test_append_different_columns(self, sort):
         df = DataFrame({'bools': np.random.randn(10) > 0,
                         'ints': np.random.randint(0, 10, 10),
                         'floats': np.random.randn(10),
@@ -828,11 +834,11 @@ def test_append_different_columns(self):
         a = df[:5].loc[:, ['bools', 'ints', 'floats']]
         b = df[5:].loc[:, ['strings', 'ints', 'floats']]
 
-        appended = a.append(b, sort=True)
+        appended = a.append(b, sort=sort)
         assert isna(appended['strings'][0:4]).all()
         assert isna(appended['bools'][5:]).all()
 
-    def test_append_many(self):
+    def test_append_many(self, sort):
         chunks = [self.frame[:5], self.frame[5:10],
                   self.frame[10:15], self.frame[15:]]
 
@@ -841,7 +847,7 @@ def test_append_many(self):
 
         chunks[-1] = chunks[-1].copy()
         chunks[-1]['foo'] = 'bar'
-        result = chunks[0].append(chunks[1:], sort=True)
+        result = chunks[0].append(chunks[1:], sort=sort)
         tm.assert_frame_equal(result.loc[:, self.frame.columns], self.frame)
         assert (result['foo'][15:] == 'bar').all()
         assert result['foo'][:15].isna().all()
@@ -949,7 +955,7 @@ def test_append_different_columns_types_raises(
         with pytest.raises(TypeError):
             df.append(ser)
 
-    def test_append_dtype_coerce(self):
+    def test_append_dtype_coerce(self, sort):
 
         # GH 4993
         # appending with datetime will incorrectly convert datetime64
@@ -973,16 +979,21 @@ def test_append_dtype_coerce(self):
                                    dt.datetime(2013, 1, 3, 0, 0),
                                    dt.datetime(2013, 1, 4, 0, 0)],
                                   name='start_time')],
-                          axis=1, sort=True)
-        result = df1.append(df2, ignore_index=True, sort=True)
+                          axis=1, sort=sort)
+        result = df1.append(df2, ignore_index=True, sort=sort)
+        if sort:
+            expected = expected[['end_time', 'start_time']]
+        else:
+            expected = expected[['start_time', 'end_time']]
+
         assert_frame_equal(result, expected)
 
-    def test_append_missing_column_proper_upcast(self):
+    def test_append_missing_column_proper_upcast(self, sort):
         df1 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='i8')})
         df2 = DataFrame({'B': np.array([True, False, True, False],
                                        dtype=bool)})
 
-        appended = df1.append(df2, ignore_index=True, sort=True)
+        appended = df1.append(df2, ignore_index=True, sort=sort)
         assert appended['A'].dtype == 'f8'
         assert appended['B'].dtype == 'O'
 
@@ -1070,7 +1081,7 @@ def test_concat_keys_specific_levels(self):
                               Index(level, name='group_key'))
         assert result.columns.names[0] == 'group_key'
 
-    def test_concat_dataframe_keys_bug(self):
+    def test_concat_dataframe_keys_bug(self, sort):
         t1 = DataFrame({
             'value': Series([1, 2, 3], index=Index(['a', 'b', 'c'],
                                                    name='id'))})
@@ -1078,7 +1089,7 @@ def test_concat_dataframe_keys_bug(self):
             'value': Series([7, 8], index=Index(['a', 'b'], name='id'))})
 
         # it works
-        result = concat([t1, t2], axis=1, keys=['t1', 't2'], sort=True)
+        result = concat([t1, t2], axis=1, keys=['t1', 't2'], sort=sort)
         assert list(result.columns) == [('t1', 'value'), ('t2', 'value')]
 
     def test_concat_series_partial_columns_names(self):
@@ -1124,7 +1135,7 @@ def test_concat_dict(self):
         expected = concat([frames[k] for k in keys], keys=keys)
         tm.assert_frame_equal(result, expected)
 
-    def test_concat_ignore_index(self):
+    def test_concat_ignore_index(self, sort):
         frame1 = DataFrame({"test1": ["a", "b", "c"],
                             "test2": [1, 2, 3],
                             "test3": [4.5, 3.2, 1.2]})
@@ -1133,7 +1144,7 @@ def test_concat_ignore_index(self):
         frame2.index = Index(["x", "y", "q"])
 
         v1 = concat([frame1, frame2], axis=1,
-                    ignore_index=True, sort=True)
+                    ignore_index=True, sort=sort)
 
         nan = np.nan
         expected = DataFrame([[nan, nan, nan, 4.3],
@@ -1141,6 +1152,8 @@ def test_concat_ignore_index(self):
                               ['b', 2, 3.2, 2.2],
                               ['c', 3, 1.2, nan]],
                              index=Index(["q", "x", "y", "z"]))
+        if not sort:
+            expected = expected.loc[['x', 'y', 'z', 'q']]
 
         tm.assert_frame_equal(v1, expected)
 
@@ -1337,16 +1350,16 @@ def test_dups_index(self):
         result = df.append(df)
         assert_frame_equal(result, expected)
 
-    def test_with_mixed_tuples(self):
+    def test_with_mixed_tuples(self, sort):
         # 10697
         # columns have mixed tuples, so handle properly
         df1 = DataFrame({u'A': 'foo', (u'B', 1): 'bar'}, index=range(2))
         df2 = DataFrame({u'B': 'foo', (u'B', 1): 'bar'}, index=range(2))
 
         # it works
-        concat([df1, df2], sort=True)
+        concat([df1, df2], sort=sort)
 
-    def test_handle_empty_objects(self):
+    def test_handle_empty_objects(self, sort):
         df = DataFrame(np.random.randn(10, 4), columns=list('abcd'))
 
         baz = df[:5].copy()
@@ -1354,7 +1367,7 @@ def test_handle_empty_objects(self):
         empty = df[5:5]
 
         frames = [baz, empty, empty, df[5:]]
-        concatted = concat(frames, axis=0, sort=True)
+        concatted = concat(frames, axis=0, sort=sort)
 
         expected = df.reindex(columns=['a', 'b', 'c', 'd', 'foo'])
         expected['foo'] = expected['foo'].astype('O')
@@ -1506,7 +1519,7 @@ def test_panel_concat_other_axes(self):
             expected.loc['ItemC', :, :2] = 'baz'
             tm.assert_panel_equal(result, expected)
 
-    def test_panel_concat_buglet(self):
+    def test_panel_concat_buglet(self, sort):
         with catch_warnings(record=True):
             # #2257
             def make_panel():
@@ -1531,7 +1544,7 @@ def df():
             panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2)
 
             # it works!
-            concat([panel1, panel3], axis=1, verify_integrity=True, sort=True)
+            concat([panel1, panel3], axis=1, verify_integrity=True, sort=sort)
 
     def test_concat_series(self):
 
@@ -1556,7 +1569,7 @@ def test_concat_series(self):
         expected.index = exp_index
         tm.assert_series_equal(result, expected)
 
-    def test_concat_series_axis1(self):
+    def test_concat_series_axis1(self, sort=sort):
         ts = tm.makeTimeSeries()
 
         pieces = [ts[:-2], ts[2:], ts[2:-2]]
@@ -1585,7 +1598,7 @@ def test_concat_series_axis1(self):
         # must reindex, #2603
         s = Series(randn(3), index=['c', 'a', 'b'], name='A')
         s2 = Series(randn(4), index=['d', 'a', 'b', 'c'], name='B')
-        result = concat([s, s2], axis=1, sort=True)
+        result = concat([s, s2], axis=1, sort=sort)
         expected = DataFrame({'A': s, 'B': s2})
         assert_frame_equal(result, expected)
 
@@ -2071,7 +2084,7 @@ def test_categorical_concat_dtypes(self):
         expected = Series([True, False, False], index=index)
         tm.assert_series_equal(result, expected)
 
-    def test_categorical_concat(self):
+    def test_categorical_concat(self, sort):
         # See GH 10177
         df1 = DataFrame(np.arange(18, dtype='int64').reshape(6, 3),
                         columns=["a", "b", "c"])
@@ -2082,7 +2095,7 @@ def test_categorical_concat(self):
         cat_values = ["one", "one", "two", "one", "two", "two", "one"]
         df2['h'] = Series(Categorical(cat_values))
 
-        res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=True)
+        res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=sort)
         exp = DataFrame({'a': [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12],
                          'b': [1, 4, 7, 10, 13, 16, np.nan, np.nan, np.nan,
                                np.nan, np.nan, np.nan, np.nan],
@@ -2311,21 +2324,28 @@ def test_concat_sorts_index():
     tm.assert_frame_equal(result, expected)
 
 
-def test_concat_inner_sort_unaffected():
+@pytest.mark.parametrize('sort', [None, False, True])
+def test_concat_inner_sort(sort):
     # https://github.com/pandas-dev/pandas/pull/20613
     df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]},
                        columns=['b', 'a', 'c'])
     df2 = pd.DataFrame({"a": [1, 2], 'b': [3, 4]}, index=[3, 4])
-    with tm.assert_produces_warning(None):
-        r0 = pd.concat([df1, df2], join='inner', ignore_index=True)
-    r1 = pd.concat([df1, df2], join='inner', sort=True, ignore_index=True)
-    r2 = pd.concat([df1, df2], join='inner', sort=True, ignore_index=True)
+
+    if sort is None:
+        with tm.assert_produces_warning(None):
+            # unset sort should *not* warn for inner join
+            # since that never sorted
+            result = pd.concat([df1, df2], sort=sort, join='inner',
+                               ignore_index=True)
+    else:
+        result = pd.concat([df1, df2], sort=sort, join='inner',
+                           ignore_index=True)
 
     expected = pd.DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]},
                             columns=['b', 'a'])
-    tm.assert_frame_equal(r0, expected)
-    tm.assert_frame_equal(r1, expected)
-    tm.assert_frame_equal(r2, expected)
+    if sort:
+        expected = expected[['a', 'b']]
+    tm.assert_frame_equal(result, expected)
 
 
 def test_concat_preserve_column_order_differing_columns():
@@ -2359,23 +2379,25 @@ def test_concat_preserve_column_order_uneven_data():
 
 def test_concat_aligned_sort():
     # GH-4588
-    df = pd.DataFrame({"b": [1, 2], "a": [3, 4]}, columns=['b', 'a'])
+    df = pd.DataFrame({"c": [1, 2], "b": [3, 4], 'a': [5, 6]},
+                      columns=['c', 'b', 'a'])
     result = pd.concat([df, df], sort=True, ignore_index=True)
-    expected = pd.DataFrame({'b': [1, 2, 1, 2], 'a': [3, 4, 3, 4]},
-                            columns=['a', 'b'])
+    expected = pd.DataFrame({'a': [5, 6, 5, 6], 'b': [3, 4, 3, 4],
+                             'c': [1, 2, 1, 2]},
+                            columns=['a', 'b', 'c'])
+    tm.assert_frame_equal(result, expected)
+
+    result = pd.concat([df, df[['c', 'b']]], join='inner', sort=True,
+                       ignore_index=True)
+    expected = expected[['b', 'c']]
     tm.assert_frame_equal(result, expected)
 
 
-def test_concat_aligned_sort_raises():
+def test_concat_aligned_sort_does_not_raise():
     # GH-4588
+    # We catch TypeErrors from sorting internally and do not re-raise.
     df = pd.DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, 'a'])
-
-    if PY2:
-        expected = pd.DataFrame({1: [1, 2, 1, 2], 'a': [3, 4, 3, 4]},
-                                columns=[1, 'a'])
-        result = pd.concat([df, df], ignore_index=True)
-        tm.assert_frame_equal(result, expected)
-    else:
-        msg = "'<' not supported between instances"
-        with tm.assert_raises_regex(TypeError, msg):
-            pd.concat([df, df], sort=True)
+    expected = pd.DataFrame({1: [1, 2, 1, 2], 'a': [3, 4, 3, 4]},
+                            columns=[1, 'a'])
+    result = pd.concat([df, df], ignore_index=True, sort=True)
+    tm.assert_frame_equal(result, expected)

From bc7dd48249dc0d75053c0cc678d5f91c3aa1f4c7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 29 Apr 2018 06:29:26 -0500
Subject: [PATCH 12/34] Test fixup.

Sparse as well
---
 pandas/core/groupby/groupby.py          |  2 +-
 pandas/tests/sparse/frame/test_frame.py | 23 ++++++++++++++++++++++-
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 00ea96890dd27..4132d8e69704a 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1099,7 +1099,7 @@ def reset_identity(values):
 
                 result = concat(values, axis=self.axis, keys=group_keys,
                                 levels=group_levels, names=group_names,
-                                sort=True)
+                                sort=False)
             else:
 
                 # GH5610, returns a MI, with the first level being a
diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
index 540933cb90be2..8a8ed520d45d2 100644
--- a/pandas/tests/sparse/frame/test_frame.py
+++ b/pandas/tests/sparse/frame/test_frame.py
@@ -629,10 +629,31 @@ def test_append(self):
 
         a = self.frame.iloc[:5, :3]
         b = self.frame.iloc[5:]
-        appended = a.append(b)
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            # Stacklevel is set for pd.concat, not append
+            appended = a.append(b)
         tm.assert_sp_frame_equal(appended.iloc[:, :3], self.frame.iloc[:, :3],
                                  exact_indices=False)
 
+        a = a[['B', 'C', 'A']].head(2)
+        b = b.head(2)
+
+        expected = pd.SparseDataFrame({
+            "B": [0., 1, None, 3],
+            "C": [0., 1, 5, 6],
+            "A": [None, None, 2, 3],
+            "D": [None, None, 5, None],
+        }, index=a.index | b.index)
+        with tm.assert_produces_warning(None):
+            appended = a.append(b, sort=False)
+
+        tm.assert_frame_equal(appended, expected)
+
+        with tm.assert_produces_warning(None):
+            appended = a.append(b, sort=True)
+
+        tm.assert_sp_frame_equal(appended, expected[['A', 'B', 'C', 'D']])
+
     def test_astype(self):
         sparse = pd.SparseDataFrame({'A': SparseArray([1, 2, 3, 4],
                                                       dtype=np.int64),

From b3f95dd399d6f0acc4baa3b81d922959e2401f3b Mon Sep 17 00:00:00 2001
From: Bryce Guinta <bryce.paul.guinta@gmail.com>
Date: Wed, 4 Apr 2018 20:51:53 -0600
Subject: [PATCH 13/34] Stop concat from attempting to sort mismatched columns
 by default

Preserve column order upon concatenation to obey
least astonishment principle.

Allow old behavior to be enabled by adding a boolean switch to
concat and DataFrame.append, mismatch_sort, which is by default disabled.

Close #4588
---
 doc/source/whatsnew/v0.23.0.txt     |  1 +
 pandas/_libs/lib.pyx                | 11 ++++-----
 pandas/core/frame.py                |  8 +++++--
 pandas/core/indexes/api.py          | 13 ++++++-----
 pandas/core/reshape/concat.py       | 13 +++++++----
 pandas/tests/reshape/test_concat.py | 35 +++++++++++++++++++++++------
 6 files changed, 57 insertions(+), 24 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index c128058858c17..dd557bbfa45a2 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -1237,6 +1237,7 @@ Reshaping
 - Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`)
 - Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`)
 - Bug in :class:`Series` constructor with a ``dtype=str``, previously raised in some cases (:issue:`19853`)
+- Stop :func:`concat` and ``Dataframe.append`` from sorting columns by default. Use ``sort=True`` to retain old behavior (:issue:`4588`)
 
 Other
 ^^^^^
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 30521760327b4..ae9d240afcb93 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -157,7 +157,7 @@ def fast_unique_multiple(list arrays):
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def fast_unique_multiple_list(list lists):
+def fast_unique_multiple_list(list lists, bint sort=True):
     cdef:
         list buf
         Py_ssize_t k = len(lists)
@@ -174,10 +174,11 @@ def fast_unique_multiple_list(list lists):
             if val not in table:
                 table[val] = stub
                 uniques.append(val)
-    try:
-        uniques.sort()
-    except Exception:
-        pass
+    if sort:
+        try:
+            uniques.sort()
+        except Exception:
+            pass
 
     return uniques
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 82d5a0286b117..5f884d5426d47 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6038,7 +6038,8 @@ def infer(x):
     # ----------------------------------------------------------------------
     # Merging / joining methods
 
-    def append(self, other, ignore_index=False, verify_integrity=False):
+    def append(self, other, ignore_index=False,
+               verify_integrity=False, sort=False):
         """
         Append rows of `other` to the end of this frame, returning a new
         object. Columns not in this frame are added as new columns.
@@ -6051,6 +6052,8 @@ def append(self, other, ignore_index=False, verify_integrity=False):
             If True, do not use the index labels.
         verify_integrity : boolean, default False
             If True, raise ValueError on creating index with duplicates.
+        sort: boolean, default False
+            Sort columns if given object doesn't have the same columns
 
         Returns
         -------
@@ -6162,7 +6165,8 @@ def append(self, other, ignore_index=False, verify_integrity=False):
         else:
             to_concat = [self, other]
         return concat(to_concat, ignore_index=ignore_index,
-                      verify_integrity=verify_integrity)
+                      verify_integrity=verify_integrity,
+                      sort=sort)
 
     def join(self, other, on=None, how='left', lsuffix='', rsuffix='',
              sort=False):
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index 2e5ec8b554ce7..75232e3db7e55 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -31,17 +31,17 @@
            '_all_indexes_same']
 
 
-def _get_objs_combined_axis(objs, intersect=False, axis=0):
+def _get_objs_combined_axis(objs, intersect=False, axis=0, sort=True):
     # Extract combined index: return intersection or union (depending on the
     # value of "intersect") of indexes on given axis, or None if all objects
     # lack indexes (e.g. they are numpy arrays)
     obs_idxes = [obj._get_axis(axis) for obj in objs
                  if hasattr(obj, '_get_axis')]
     if obs_idxes:
-        return _get_combined_index(obs_idxes, intersect=intersect)
+        return _get_combined_index(obs_idxes, intersect=intersect, sort=sort)
 
 
-def _get_combined_index(indexes, intersect=False):
+def _get_combined_index(indexes, intersect=False, sort=True):
     # TODO: handle index names!
     indexes = com._get_distinct_objs(indexes)
     if len(indexes) == 0:
@@ -53,11 +53,11 @@ def _get_combined_index(indexes, intersect=False):
         for other in indexes[1:]:
             index = index.intersection(other)
         return index
-    union = _union_indexes(indexes)
+    union = _union_indexes(indexes, sort=sort)
     return _ensure_index(union)
 
 
-def _union_indexes(indexes):
+def _union_indexes(indexes, sort=True):
     if len(indexes) == 0:
         raise AssertionError('Must have at least 1 Index to union')
     if len(indexes) == 1:
@@ -74,7 +74,8 @@ def conv(i):
                 i = i.tolist()
             return i
 
-        return Index(lib.fast_unique_multiple_list([conv(i) for i in inds]))
+        return Index(
+            lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort))
 
     if kind == 'special':
         result = indexes[0]
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 6e564975f34cd..531d1715cdf27 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -20,7 +20,7 @@
 
 def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
            keys=None, levels=None, names=None, verify_integrity=False,
-           copy=True):
+           sort=False, copy=True):
     """
     Concatenate pandas objects along a particular axis with optional set logic
     along the other axes.
@@ -60,6 +60,8 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
     verify_integrity : boolean, default False
         Check whether the new concatenated axis contains duplicates. This can
         be very expensive relative to the actual data concatenation
+    sort : boolean, default False
+        Sort columns if all passed object columns are not the same
     copy : boolean, default True
         If False, do not copy data unnecessarily
 
@@ -209,7 +211,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
                        ignore_index=ignore_index, join=join,
                        keys=keys, levels=levels, names=names,
                        verify_integrity=verify_integrity,
-                       copy=copy)
+                       copy=copy, sort=sort)
     return op.get_result()
 
 
@@ -220,7 +222,8 @@ class _Concatenator(object):
 
     def __init__(self, objs, axis=0, join='outer', join_axes=None,
                  keys=None, levels=None, names=None,
-                 ignore_index=False, verify_integrity=False, copy=True):
+                 ignore_index=False, verify_integrity=False, copy=True,
+                 sort=False):
         if isinstance(objs, (NDFrame, compat.string_types)):
             raise TypeError('first argument must be an iterable of pandas '
                             'objects, you passed an object of type '
@@ -355,6 +358,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
         self.keys = keys
         self.names = names or getattr(keys, 'names', None)
         self.levels = levels
+        self.sort = sort
 
         self.ignore_index = ignore_index
         self.verify_integrity = verify_integrity
@@ -447,7 +451,8 @@ def _get_comb_axis(self, i):
         data_axis = self.objs[0]._get_block_manager_axis(i)
         try:
             return _get_objs_combined_axis(self.objs, axis=data_axis,
-                                           intersect=self.intersect)
+                                           intersect=self.intersect,
+                                           sort=self.sort)
         except IndexError:
             types = [type(x).__name__ for x in self.objs]
             raise TypeError("Cannot concatenate list of {types}"
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 640d09f3587fb..8051f39284d6f 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -7,7 +7,7 @@
 from numpy.random import randn
 
 from datetime import datetime
-from pandas.compat import StringIO, iteritems, PY2
+from pandas.compat import StringIO, iteritems
 import pandas as pd
 from pandas import (DataFrame, concat,
                     read_csv, isna, Series, date_range,
@@ -946,8 +946,9 @@ def test_append_dtype_coerce(self):
                                    dt.datetime(2013, 1, 2, 0, 0),
                                    dt.datetime(2013, 1, 3, 0, 0),
                                    dt.datetime(2013, 1, 4, 0, 0)],
-                                  name='start_time')], axis=1)
-        result = df1.append(df2, ignore_index=True)
+                                  name='start_time')],
+                          axis=1, sort=True)
+        result = df1.append(df2, ignore_index=True, sort=True)
         assert_frame_equal(result, expected)
 
     def test_append_missing_column_proper_upcast(self):
@@ -1105,7 +1106,8 @@ def test_concat_ignore_index(self):
         frame1.index = Index(["x", "y", "z"])
         frame2.index = Index(["x", "y", "q"])
 
-        v1 = concat([frame1, frame2], axis=1, ignore_index=True)
+        v1 = concat([frame1, frame2], axis=1,
+                    ignore_index=True, sort=True)
 
         nan = np.nan
         expected = DataFrame([[nan, nan, nan, 4.3],
@@ -1557,7 +1559,7 @@ def test_concat_series_axis1(self):
         # must reindex, #2603
         s = Series(randn(3), index=['c', 'a', 'b'], name='A')
         s2 = Series(randn(4), index=['d', 'a', 'b', 'c'], name='B')
-        result = concat([s, s2], axis=1)
+        result = concat([s, s2], axis=1, sort=True)
         expected = DataFrame({'A': s, 'B': s2})
         assert_frame_equal(result, expected)
 
@@ -2164,8 +2166,6 @@ def test_concat_order(self):
                 for i in range(100)]
         result = pd.concat(dfs).columns
         expected = dfs[0].columns
-        if PY2:
-            expected = expected.sort_values()
         tm.assert_index_equal(result, expected)
 
     def test_concat_datetime_timezone(self):
@@ -2249,3 +2249,24 @@ def test_concat_empty_and_non_empty_series_regression():
     expected = s1
     result = pd.concat([s1, s2])
     tm.assert_series_equal(result, expected)
+
+
+def test_concat_preserve_column_order_differing_columns():
+    # GH 4588 regression test
+    # for new columns in concat
+    dfa = pd.DataFrame(columns=['C', 'A'], data=[[1, 2]])
+    dfb = pd.DataFrame(columns=['C', 'Z'], data=[[5, 6]])
+    result = pd.concat([dfa, dfb])
+    assert result.columns.tolist() == ['C', 'A', 'Z']
+
+
+def test_concat_preserve_column_order_uneven_data():
+    # GH 4588 regression test
+    # add to column, concat with uneven data
+    df = pd.DataFrame()
+    df['b'] = [1, 2, 3]
+    df['c'] = [1, 2, 3]
+    df['a'] = [1, 2, 3]
+    df2 = pd.DataFrame({'a': [4, 5]})
+    df3 = pd.concat([df, df2])
+    assert df3.columns.tolist() == ['b', 'c', 'a']

From f37d7ef9d6b9a6ba44fd7b1f059ad40e75b4ce4f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 26 Apr 2018 09:13:23 -0500
Subject: [PATCH 14/34] Updates

API: Updated the default to be compatible and warn.

DOC: updated the whatsnew and concat docstring.
---
 doc/source/whatsnew/v0.23.0.txt     | 26 ++++++++++++++++-
 pandas/core/indexes/api.py          | 20 ++++++++++++++
 pandas/core/reshape/concat.py       | 13 +++++++--
 pandas/tests/reshape/test_concat.py | 43 ++++++++++++++++++++++++++---
 4 files changed, 94 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index dd557bbfa45a2..cfb237691c57e 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -639,6 +639,31 @@ Returning a ``Series`` allows one to control the exact return structure and colu
 
     df.apply(lambda x: Series([1, 2, 3], index=['D', 'E', 'F']), axis=1)
 
+Concatenation will no longer sort
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In a future version of pandas :func:`pandas.concat` will no longer sort the non-concatenation axis when it is not already aligned.
+The current behavior is the same as the previous (sorting), but now a warning is issued.
+
+.. ipython:: python
+   :okwarning:
+
+   df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a'])
+   df2 = pd.DataFrame({"a": [4, 5]})
+
+   pd.concat([df1, df2])
+
+To keep the previous behavior (sorting) and silence the warning, pass ``sort=True``
+
+.. ipython:: python
+
+   pd.concat([df1, df2], sort=True)
+
+To accept the future behavior (no sorting), pass ``sort=False``
+
+.. ipython
+
+   pd.concat([df1, df2], sort=False)
 
 .. _whatsnew_0230.api_breaking.build_changes:
 
@@ -1237,7 +1262,6 @@ Reshaping
 - Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`)
 - Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`)
 - Bug in :class:`Series` constructor with a ``dtype=str``, previously raised in some cases (:issue:`19853`)
-- Stop :func:`concat` and ``Dataframe.append`` from sorting columns by default. Use ``sort=True`` to retain old behavior (:issue:`4588`)
 
 Other
 ^^^^^
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index 75232e3db7e55..32cf5c47bbd6b 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -1,3 +1,6 @@
+import textwrap
+import warnings
+
 from pandas.core.indexes.base import (Index,
                                       _new_Index,
                                       _ensure_index,
@@ -17,6 +20,16 @@
 from pandas._libs import lib
 from pandas._libs.tslib import NaT
 
+_sort_msg = textwrap.dedent("""\
+Sorting because non-concatenation axis is not aligned. A future version
+of pandas will change to not sort by default.
+
+To accept the future behavior, pass 'sort=True'.
+
+To retain the current behavior and silence the warning, pass sort=False
+""")
+
+
 # TODO: there are many places that rely on these private methods existing in
 # pandas.core.index
 __all__ = ['Index', 'MultiIndex', 'NumericIndex', 'Float64Index', 'Int64Index',
@@ -90,6 +103,12 @@ def conv(i):
         index = indexes[0]
         for other in indexes[1:]:
             if not index.equals(other):
+
+                if sort is None:
+                    # TODO: remove once pd.concat sort default changes
+                    warnings.warn(_sort_msg, FutureWarning, stacklevel=8)
+                    sort = True
+
                 return _unique_indices(indexes)
 
         name = _get_consensus_names(indexes)[0]
@@ -97,6 +116,7 @@ def conv(i):
             index = index._shallow_copy(name=name)
         return index
     else:
+        # XXX: here too?
         return _unique_indices(indexes)
 
 
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 531d1715cdf27..4879e32d8348b 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -20,7 +20,7 @@
 
 def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
            keys=None, levels=None, names=None, verify_integrity=False,
-           sort=False, copy=True):
+           sort=None, copy=True):
     """
     Concatenate pandas objects along a particular axis with optional set logic
     along the other axes.
@@ -60,8 +60,15 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
     verify_integrity : boolean, default False
         Check whether the new concatenated axis contains duplicates. This can
         be very expensive relative to the actual data concatenation
-    sort : boolean, default False
-        Sort columns if all passed object columns are not the same
+    sort : boolean, default None
+        Sort non-concatenation axis if it is not already aligned. The current
+        default of sorting is deprecated and will change to not-sorting in a
+        future version of pandas. Explicitly pass ``sort=True`` to silence
+        the warning and sort. Explicitly pass ``sort=False`` to silence the
+        warning and not sort.
+
+        .. versionadded:: 0.23.0
+
     copy : boolean, default True
         If False, do not copy data unnecessarily
 
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 8051f39284d6f..c4f7a3454c7f7 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -2251,13 +2251,43 @@ def test_concat_empty_and_non_empty_series_regression():
     tm.assert_series_equal(result, expected)
 
 
+def test_concat_sort_columns():
+    # GH-4588
+    df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a'])
+    df2 = pd.DataFrame({"a": [3, 4]})
+
+    expected = pd.DataFrame({"a": [1, 2, 3, 4],
+                             "b": [1, 2, None, None]},
+                            columns=['a', 'b'])
+    with tm.assert_produces_warning(FutureWarning):
+        result = pd.concat([df1, df2], ignore_index=True)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_concat_sorts_index():
+    df1 = pd.DataFrame({"a": [1, 2, 3]}, index=['c', 'a', 'b'])
+    df2 = pd.DataFrame({"b": [1, 2]}, index=['a', 'b'])
+
+    with tm.assert_produces_warning(FutureWarning):
+        result = pd.concat([df1, df2], axis=1)
+
+    expected = pd.DataFrame({"a": [2, 3, 1], "b": [1, 2, None]},
+                            index=['a', 'b', 'c'],
+                            columns=['a', 'b'])
+    tm.assert_frame_equal(result, expected)
+
+
 def test_concat_preserve_column_order_differing_columns():
     # GH 4588 regression test
     # for new columns in concat
     dfa = pd.DataFrame(columns=['C', 'A'], data=[[1, 2]])
     dfb = pd.DataFrame(columns=['C', 'Z'], data=[[5, 6]])
-    result = pd.concat([dfa, dfb])
-    assert result.columns.tolist() == ['C', 'A', 'Z']
+    result = pd.concat([dfa, dfb], ignore_index=True)
+
+    expected = pd.DataFrame({"A": [2, None], "C": [1, 5],
+                             "Z": [None, 6]}, columns=["A", "C", "Z"])
+    tm.assert_frame_equal(result, expected)
 
 
 def test_concat_preserve_column_order_uneven_data():
@@ -2268,5 +2298,10 @@ def test_concat_preserve_column_order_uneven_data():
     df['c'] = [1, 2, 3]
     df['a'] = [1, 2, 3]
     df2 = pd.DataFrame({'a': [4, 5]})
-    df3 = pd.concat([df, df2])
-    assert df3.columns.tolist() == ['b', 'c', 'a']
+    result = pd.concat([df, df2])
+    expected = pd.DataFrame({
+        'a': [1, 2, 3, 4, 5],
+        'b': [1, 2, 3, None, None],
+        'c': [1, 2, 3, None, None]
+    }, index=[0, 1, 2, 0, 1])
+    tm.assert_frame_equal(result, expected)

From e467f91f97f9b9f5ba7f18d2eeda5b771e58cfe9 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 26 Apr 2018 15:46:06 -0500
Subject: [PATCH 15/34] Test fallout

---
 pandas/core/base.py                        |  2 +-
 pandas/core/groupby/groupby.py             |  3 +-
 pandas/core/indexes/api.py                 |  3 +-
 pandas/tests/indexing/test_iloc.py         |  3 +-
 pandas/tests/indexing/test_partial.py      |  5 +--
 pandas/tests/reshape/merge/test_merge.py   |  2 +-
 pandas/tests/reshape/test_concat.py        | 11 ++++---
 pandas/tests/sparse/test_combine_concat.py | 38 +++++++++++++++-------
 8 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 9ca1c8bea4db7..2f25a9ce41369 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -507,7 +507,7 @@ def is_any_frame():
                            for r in compat.itervalues(result))
 
             if isinstance(result, list):
-                return concat(result, keys=keys, axis=1), True
+                return concat(result, keys=keys, axis=1, sort=True), True
 
             elif is_any_frame():
                 # we have a dict of DataFrames
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 8c20d62117e25..00ea96890dd27 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1098,7 +1098,8 @@ def reset_identity(values):
                 group_names = self.grouper.names
 
                 result = concat(values, axis=self.axis, keys=group_keys,
-                                levels=group_levels, names=group_names)
+                                levels=group_levels, names=group_names,
+                                sort=True)
             else:
 
                 # GH5610, returns a MI, with the first level being a
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index 32cf5c47bbd6b..b919c8ab9a23f 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -115,8 +115,7 @@ def conv(i):
         if name != index.name:
             index = index._shallow_copy(name=name)
         return index
-    else:
-        # XXX: here too?
+    else:  # kind='list
         return _unique_indices(indexes)
 
 
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index f1178d44dbfe0..bfc74db73b813 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -629,7 +629,8 @@ def test_iloc_non_unique_indexing(self):
             new_list.append(s * 3)
 
         expected = DataFrame(new_list)
-        expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])])
+        expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])],
+                          sort=True)
         with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
             result = df2.loc[idx]
         tm.assert_frame_equal(result, expected, check_index_type=False)
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
index f95f493c66043..3c7a7f070805d 100644
--- a/pandas/tests/indexing/test_partial.py
+++ b/pandas/tests/indexing/test_partial.py
@@ -156,8 +156,9 @@ def f():
         df_orig = DataFrame(np.random.randn(8, 4), index=dates,
                             columns=['A', 'B', 'C', 'D'])
 
-        expected = pd.concat([df_orig, DataFrame(
-            {'A': 7}, index=[dates[-1] + 1])])
+        expected = pd.concat([df_orig,
+                              DataFrame({'A': 7}, index=[dates[-1] + 1])],
+                             sort=True)
         df = df_orig.copy()
         df.loc[dates[-1] + 1, 'A'] = 7
         tm.assert_frame_equal(df, expected)
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index dbf7c7f100b0e..4f68514e8fcaf 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -1286,7 +1286,7 @@ def test_join_multi_levels(self):
                 index=MultiIndex.from_tuples(
                     [(4, np.nan)],
                     names=['household_id', 'asset_id'])))
-        ], axis=0).reindex(columns=expected.columns))
+        ], axis=0, sort=True).reindex(columns=expected.columns))
         assert_frame_equal(result, expected)
 
         # invalid cases
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index c4f7a3454c7f7..ef21181452bc2 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -1318,7 +1318,7 @@ def test_with_mixed_tuples(self):
         df2 = DataFrame({u'B': 'foo', (u'B', 1): 'bar'}, index=range(2))
 
         # it works
-        concat([df1, df2])
+        concat([df1, df2], sort=True)
 
     def test_handle_empty_objects(self):
         df = DataFrame(np.random.randn(10, 4), columns=list('abcd'))
@@ -1328,7 +1328,7 @@ def test_handle_empty_objects(self):
         empty = df[5:5]
 
         frames = [baz, empty, empty, df[5:]]
-        concatted = concat(frames, axis=0)
+        concatted = concat(frames, axis=0, sort=True)
 
         expected = df.reindex(columns=['a', 'b', 'c', 'd', 'foo'])
         expected['foo'] = expected['foo'].astype('O')
@@ -2056,7 +2056,7 @@ def test_categorical_concat(self):
         cat_values = ["one", "one", "two", "one", "two", "two", "one"]
         df2['h'] = Series(Categorical(cat_values))
 
-        res = pd.concat((df1, df2), axis=0, ignore_index=True)
+        res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=True)
         exp = DataFrame({'a': [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12],
                          'b': [1, 4, 7, 10, 13, 16, np.nan, np.nan, np.nan,
                                np.nan, np.nan, np.nan, np.nan],
@@ -2165,6 +2165,7 @@ def test_concat_order(self):
         dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a'])
                 for i in range(100)]
         result = pd.concat(dfs).columns
+
         expected = dfs[0].columns
         tm.assert_index_equal(result, expected)
 
@@ -2283,7 +2284,7 @@ def test_concat_preserve_column_order_differing_columns():
     # for new columns in concat
     dfa = pd.DataFrame(columns=['C', 'A'], data=[[1, 2]])
     dfb = pd.DataFrame(columns=['C', 'Z'], data=[[5, 6]])
-    result = pd.concat([dfa, dfb], ignore_index=True)
+    result = pd.concat([dfa, dfb], ignore_index=True, sort=True)
 
     expected = pd.DataFrame({"A": [2, None], "C": [1, 5],
                              "Z": [None, 6]}, columns=["A", "C", "Z"])
@@ -2298,7 +2299,7 @@ def test_concat_preserve_column_order_uneven_data():
     df['c'] = [1, 2, 3]
     df['a'] = [1, 2, 3]
     df2 = pd.DataFrame({'a': [4, 5]})
-    result = pd.concat([df, df2])
+    result = pd.concat([df, df2], sort=True)
     expected = pd.DataFrame({
         'a': [1, 2, 3, 4, 5],
         'b': [1, 2, 3, None, None],
diff --git a/pandas/tests/sparse/test_combine_concat.py b/pandas/tests/sparse/test_combine_concat.py
index 70fd1da529d46..9e392457edbc3 100644
--- a/pandas/tests/sparse/test_combine_concat.py
+++ b/pandas/tests/sparse/test_combine_concat.py
@@ -202,17 +202,29 @@ def test_concat_different_fill_value(self):
         exp._default_fill_value = np.nan
         tm.assert_sp_frame_equal(res, exp)
 
+    def test_concat_different_columns_sort_warns(self):
+        sparse = self.dense1.to_sparse()
+        sparse3 = self.dense3.to_sparse()
+
+        with tm.assert_produces_warning(FutureWarning):
+            res = pd.concat([sparse, sparse3])
+        with tm.assert_produces_warning(FutureWarning):
+            exp = pd.concat([self.dense1, self.dense3])
+
+        exp = exp.to_sparse()
+        tm.assert_sp_frame_equal(res, exp)
+
     def test_concat_different_columns(self):
         # fill_value = np.nan
         sparse = self.dense1.to_sparse()
         sparse3 = self.dense3.to_sparse()
 
-        res = pd.concat([sparse, sparse3])
-        exp = pd.concat([self.dense1, self.dense3]).to_sparse()
+        res = pd.concat([sparse, sparse3], sort=True)
+        exp = pd.concat([self.dense1, self.dense3], sort=True).to_sparse()
         tm.assert_sp_frame_equal(res, exp)
 
-        res = pd.concat([sparse3, sparse])
-        exp = pd.concat([self.dense3, self.dense1]).to_sparse()
+        res = pd.concat([sparse3, sparse], sort=True)
+        exp = pd.concat([self.dense3, self.dense1], sort=True).to_sparse()
         exp._default_fill_value = np.nan
         tm.assert_sp_frame_equal(res, exp)
 
@@ -220,13 +232,15 @@ def test_concat_different_columns(self):
         sparse = self.dense1.to_sparse(fill_value=0)
         sparse3 = self.dense3.to_sparse(fill_value=0)
 
-        res = pd.concat([sparse, sparse3])
-        exp = pd.concat([self.dense1, self.dense3]).to_sparse(fill_value=0)
+        res = pd.concat([sparse, sparse3], sort=True)
+        exp = (pd.concat([self.dense1, self.dense3], sort=True)
+                 .to_sparse(fill_value=0))
         exp._default_fill_value = np.nan
         tm.assert_sp_frame_equal(res, exp)
 
-        res = pd.concat([sparse3, sparse])
-        exp = pd.concat([self.dense3, self.dense1]).to_sparse(fill_value=0)
+        res = pd.concat([sparse3, sparse], sort=True)
+        exp = (pd.concat([self.dense3, self.dense1], sort=True)
+                 .to_sparse(fill_value=0))
         exp._default_fill_value = np.nan
         tm.assert_sp_frame_equal(res, exp)
 
@@ -234,13 +248,13 @@ def test_concat_different_columns(self):
         sparse = self.dense1.to_sparse()
         sparse3 = self.dense3.to_sparse(fill_value=0)
         # each columns keeps its fill_value, thus compare in dense
-        res = pd.concat([sparse, sparse3])
-        exp = pd.concat([self.dense1, self.dense3])
+        res = pd.concat([sparse, sparse3], sort=True)
+        exp = pd.concat([self.dense1, self.dense3], sort=True)
         assert isinstance(res, pd.SparseDataFrame)
         tm.assert_frame_equal(res.to_dense(), exp)
 
-        res = pd.concat([sparse3, sparse])
-        exp = pd.concat([self.dense3, self.dense1])
+        res = pd.concat([sparse3, sparse], sort=True)
+        exp = pd.concat([self.dense3, self.dense1], sort=True)
         assert isinstance(res, pd.SparseDataFrame)
         tm.assert_frame_equal(res.to_dense(), exp)
 

From 058fae5d76108f69dd578c74c9778daaf3603078 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 26 Apr 2018 16:14:05 -0500
Subject: [PATCH 16/34] Updated append

---
 doc/source/whatsnew/v0.23.0.txt     |  5 ++-
 pandas/core/frame.py                | 10 ++++--
 pandas/tests/reshape/test_concat.py | 48 ++++++++++++++++++++++-------
 3 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index cfb237691c57e..241fb46df7f25 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -643,7 +643,7 @@ Concatenation will no longer sort
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 In a future version of pandas :func:`pandas.concat` will no longer sort the non-concatenation axis when it is not already aligned.
-The current behavior is the same as the previous (sorting), but now a warning is issued.
+The current behavior is the same as the previous (sorting), but now a warning is issued (:issue:`4588`).
 
 .. ipython:: python
    :okwarning:
@@ -665,6 +665,9 @@ To accept the future behavior (no sorting), pass ``sort=False``
 
    pd.concat([df1, df2], sort=False)
 
+Note that this change also applies to :meth:`DataFrame.append`, which has also received a `sort` keyword for controlling this behavior.
+
+
 .. _whatsnew_0230.api_breaking.build_changes:
 
 Build Changes
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5f884d5426d47..db312dc67c986 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6039,7 +6039,7 @@ def infer(x):
     # Merging / joining methods
 
     def append(self, other, ignore_index=False,
-               verify_integrity=False, sort=False):
+               verify_integrity=False, sort=None):
         """
         Append rows of `other` to the end of this frame, returning a new
         object. Columns not in this frame are added as new columns.
@@ -6052,8 +6052,12 @@ def append(self, other, ignore_index=False,
             If True, do not use the index labels.
         verify_integrity : boolean, default False
             If True, raise ValueError on creating index with duplicates.
-        sort: boolean, default False
-            Sort columns if given object doesn't have the same columns
+        sort : boolean, default None
+            Sort columns if the columns of `self` and `other` are not aligned.
+            The default sorting is deprecated and will change to not-sorting
+            in a future version of pandas. Explicitly pass ``sort=True`` to
+            silence the warning and sort. Explicitly pass ``sort=False`` to
+            silence the warning and not sort.
 
         Returns
         -------
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index ef21181452bc2..494d340d7f880 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -727,10 +727,10 @@ def test_append(self):
         tm.assert_almost_equal(appended['A'], self.frame['A'])
 
         del end_frame['A']
-        partial_appended = begin_frame.append(end_frame)
+        partial_appended = begin_frame.append(end_frame, sort=True)
         assert 'A' in partial_appended
 
-        partial_appended = end_frame.append(begin_frame)
+        partial_appended = end_frame.append(begin_frame, sort=True)
         assert 'A' in partial_appended
 
         # mixed type handling
@@ -738,8 +738,9 @@ def test_append(self):
         tm.assert_frame_equal(appended, self.mixed_frame)
 
         # what to test here
-        mixed_appended = self.mixed_frame[:5].append(self.frame[5:])
-        mixed_appended2 = self.frame[:5].append(self.mixed_frame[5:])
+        mixed_appended = self.mixed_frame[:5].append(self.frame[5:], sort=True)
+        mixed_appended2 = self.frame[:5].append(self.mixed_frame[5:],
+                                                sort=True)
 
         # all equal except 'foo' column
         tm.assert_frame_equal(
@@ -772,7 +773,7 @@ def test_append(self):
     def test_append_length0_frame(self):
         df = DataFrame(columns=['A', 'B', 'C'])
         df3 = DataFrame(index=[0, 1], columns=['A', 'B'])
-        df5 = df.append(df3)
+        df5 = df.append(df3, sort=True)
 
         expected = DataFrame(index=[0, 1], columns=['A', 'B', 'C'])
         assert_frame_equal(df5, expected)
@@ -793,6 +794,31 @@ def test_append_records(self):
         expected = DataFrame(np.concatenate((arr1, arr2)))
         assert_frame_equal(result, expected)
 
+    def test_append_sorts(self):
+        df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a'])
+        df2 = pd.DataFrame({"a": [1, 2], 'c': [3, 4]}, index=[2, 3])
+        # default, changing in the future
+
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            # from append we have an extra function call. Not worth hacking
+            # around to get the right stackleve.
+            result = df1.append(df2)
+
+        expected = pd.DataFrame({"b": [1, 2, None, None],
+                                 "a": [1, 2, 1, 2],
+                                 "c": [None, None, 3, 4]},
+                                columns=['a', 'b', 'c'])
+        tm.assert_frame_equal(result, expected)
+
+        # sort=True, the previous behavior
+        result = df1.append(df2, sort=True)
+        tm.assert_frame_equal(result, expected)
+
+        # sort=False, the future behvior.
+        result = df1.append(df2, sort=False)
+        expected = expected[['b', 'a', 'c']]
+        tm.assert_frame_equal(result, expected)
+
     def test_append_different_columns(self):
         df = DataFrame({'bools': np.random.randn(10) > 0,
                         'ints': np.random.randint(0, 10, 10),
@@ -802,7 +828,7 @@ def test_append_different_columns(self):
         a = df[:5].loc[:, ['bools', 'ints', 'floats']]
         b = df[5:].loc[:, ['strings', 'ints', 'floats']]
 
-        appended = a.append(b)
+        appended = a.append(b, sort=True)
         assert isna(appended['strings'][0:4]).all()
         assert isna(appended['bools'][5:]).all()
 
@@ -815,7 +841,7 @@ def test_append_many(self):
 
         chunks[-1] = chunks[-1].copy()
         chunks[-1]['foo'] = 'bar'
-        result = chunks[0].append(chunks[1:])
+        result = chunks[0].append(chunks[1:], sort=True)
         tm.assert_frame_equal(result.loc[:, self.frame.columns], self.frame)
         assert (result['foo'][15:] == 'bar').all()
         assert result['foo'][:15].isna().all()
@@ -956,7 +982,7 @@ def test_append_missing_column_proper_upcast(self):
         df2 = DataFrame({'B': np.array([True, False, True, False],
                                        dtype=bool)})
 
-        appended = df1.append(df2, ignore_index=True)
+        appended = df1.append(df2, ignore_index=True, sort=True)
         assert appended['A'].dtype == 'f8'
         assert appended['B'].dtype == 'O'
 
@@ -1052,7 +1078,7 @@ def test_concat_dataframe_keys_bug(self):
             'value': Series([7, 8], index=Index(['a', 'b'], name='id'))})
 
         # it works
-        result = concat([t1, t2], axis=1, keys=['t1', 't2'])
+        result = concat([t1, t2], axis=1, keys=['t1', 't2'], sort=True)
         assert list(result.columns) == [('t1', 'value'), ('t2', 'value')]
 
     def test_concat_series_partial_columns_names(self):
@@ -1505,7 +1531,7 @@ def df():
             panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2)
 
             # it works!
-            concat([panel1, panel3], axis=1, verify_integrity=True)
+            concat([panel1, panel3], axis=1, verify_integrity=True, sort=True)
 
     def test_concat_series(self):
 
@@ -2164,7 +2190,7 @@ def test_concat_order(self):
         dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])]
         dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a'])
                 for i in range(100)]
-        result = pd.concat(dfs).columns
+        result = pd.concat(dfs, sort=True).columns
 
         expected = dfs[0].columns
         tm.assert_index_equal(result, expected)

From 04e51518c31d62da9ca7c95ce53388e1edec06e0 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 26 Apr 2018 16:15:44 -0500
Subject: [PATCH 17/34] versionadded

---
 pandas/core/frame.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index db312dc67c986..9c7a1e123dbc5 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6059,6 +6059,8 @@ def append(self, other, ignore_index=False,
             silence the warning and sort. Explicitly pass ``sort=False`` to
             silence the warning and not sort.
 
+            .. versionadded:: 0.23.0
+
         Returns
         -------
         appended : DataFrame

From c864679d58ebcf02bed3fdf37c8b33e7945148ba Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 27 Apr 2018 06:49:21 -0500
Subject: [PATCH 18/34] Squash more test warnings

---
 pandas/tests/frame/test_combine_concat.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py
index e82faaeef2986..15ca65395e4fc 100644
--- a/pandas/tests/frame/test_combine_concat.py
+++ b/pandas/tests/frame/test_combine_concat.py
@@ -96,7 +96,7 @@ def test_append_series_dict(self):
 
         result = df.append(series[::-1][:3], ignore_index=True)
         expected = df.append(DataFrame({0: series[::-1][:3]}).T,
-                             ignore_index=True)
+                             ignore_index=True, sort=True)
         assert_frame_equal(result, expected.loc[:, result.columns])
 
         # can append when name set
@@ -119,8 +119,8 @@ def test_append_list_of_series_dicts(self):
         # different columns
         dicts = [{'foo': 1, 'bar': 2, 'baz': 3, 'peekaboo': 4},
                  {'foo': 5, 'bar': 6, 'baz': 7, 'peekaboo': 8}]
-        result = df.append(dicts, ignore_index=True)
-        expected = df.append(DataFrame(dicts), ignore_index=True)
+        result = df.append(dicts, ignore_index=True, sort=True)
+        expected = df.append(DataFrame(dicts), ignore_index=True, sort=True)
         assert_frame_equal(result, expected)
 
     def test_append_empty_dataframe(self):

From 7e975c9649cf8401889f0fdbbba5d447217aa2a5 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 27 Apr 2018 07:22:04 -0500
Subject: [PATCH 19/34] py2 compat

---
 pandas/tests/reshape/test_concat.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 494d340d7f880..ae7a2c2c5f5fd 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -7,7 +7,7 @@
 from numpy.random import randn
 
 from datetime import datetime
-from pandas.compat import StringIO, iteritems
+from pandas.compat import StringIO, iteritems, PY2
 import pandas as pd
 from pandas import (DataFrame, concat,
                     read_csv, isna, Series, date_range,
@@ -2190,9 +2190,15 @@ def test_concat_order(self):
         dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])]
         dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a'])
                 for i in range(100)]
+
         result = pd.concat(dfs, sort=True).columns
 
-        expected = dfs[0].columns
+        if PY2:
+            # Different sort order between incomparable objects between
+            # python 2 and python3 via Index.union.
+            expected = dfs[1].columns
+        else:
+            expected = dfs[0].columns
         tm.assert_index_equal(result, expected)
 
     def test_concat_datetime_timezone(self):

From a8ba4307bb051ebbd9e6caa8251236dd9af3be85 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 27 Apr 2018 10:02:15 -0500
Subject: [PATCH 20/34] Document outer is not affected

---
 pandas/core/reshape/concat.py       | 14 +++++++++-----
 pandas/tests/reshape/test_concat.py | 17 +++++++++++++++++
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 4879e32d8348b..b36e9b8d900fd 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -61,11 +61,15 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
         Check whether the new concatenated axis contains duplicates. This can
         be very expensive relative to the actual data concatenation
     sort : boolean, default None
-        Sort non-concatenation axis if it is not already aligned. The current
-        default of sorting is deprecated and will change to not-sorting in a
-        future version of pandas. Explicitly pass ``sort=True`` to silence
-        the warning and sort. Explicitly pass ``sort=False`` to silence the
-        warning and not sort.
+        Sort non-concatenation axis if it is not already aligned when `join`
+        is 'outer'. The current default of sorting is deprecated and will
+        change to not-sorting in a future version of pandas.
+
+        Explicitly pass ``sort=True`` to silence the warning and sort.
+        Explicitly pass ``sort=False`` to silence the warning and not sort.
+
+        This has no effect when ``join='inner'``, which already preserves
+        the order of the non-concatenation axis.
 
         .. versionadded:: 0.23.0
 
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index ae7a2c2c5f5fd..f74d652ae012c 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -2311,6 +2311,23 @@ def test_concat_sorts_index():
     tm.assert_frame_equal(result, expected)
 
 
+def test_concat_inner_sort_unaffected():
+    # https://github.com/pandas-dev/pandas/pull/20613
+    df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]},
+                       columns=['b', 'a', 'c'])
+    df2 = pd.DataFrame({"a": [1, 2], 'b': [3, 4]}, index=[3, 4])
+    with tm.assert_produces_warning(None):
+        r0 = pd.concat([df1, df2], join='inner', ignore_index=True)
+    r1 = pd.concat([df1, df2], join='inner', sort=True, ignore_index=True)
+    r2 = pd.concat([df1, df2], join='inner', sort=True, ignore_index=True)
+
+    expected = pd.DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]},
+                            columns=['b', 'a'])
+    tm.assert_frame_equal(r0, expected)
+    tm.assert_frame_equal(r1, expected)
+    tm.assert_frame_equal(r2, expected)
+
+
 def test_concat_preserve_column_order_differing_columns():
     # GH 4588 regression test
     # for new columns in concat

From 62b1e7bd124a59349dfee80458d4956b78fe2359 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 27 Apr 2018 10:08:31 -0500
Subject: [PATCH 21/34] Docs

---
 doc/source/merging.rst | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/doc/source/merging.rst b/doc/source/merging.rst
index 74b21c21252ec..de37dc6dab59e 100644
--- a/doc/source/merging.rst
+++ b/doc/source/merging.rst
@@ -153,10 +153,10 @@ Set logic on the other axes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 When gluing together multiple DataFrames, you have a choice of how to handle
-the other axes (other than the one being concatenated). This can be done in 
+the other axes (other than the one being concatenated). This can be done in
 the following three ways:
 
-- Take the (sorted) union of them all, ``join='outer'``. This is the default
+- Take the union of them all, ``join='outer'``. This is the default
   option as it results in zero information loss.
 - Take the intersection, ``join='inner'``.
 - Use a specific index, as passed to the ``join_axes`` argument.
@@ -167,10 +167,10 @@ behavior:
 .. ipython:: python
 
    df4 = pd.DataFrame({'B': ['B2', 'B3', 'B6', 'B7'],
-                    'D': ['D2', 'D3', 'D6', 'D7'],
-                    'F': ['F2', 'F3', 'F6', 'F7']},
-                   index=[2, 3, 6, 7])
-   result = pd.concat([df1, df4], axis=1)
+                       'D': ['D2', 'D3', 'D6', 'D7'],
+                       'F': ['F2', 'F3', 'F6', 'F7']},
+                      index=[2, 3, 6, 7])
+   result = pd.concat([df1, df4], axis=1, sort=False)
 
 
 .. ipython:: python
@@ -181,8 +181,14 @@ behavior:
           labels=['df1', 'df4'], vertical=False);
    plt.close('all');
 
-Note that the row indexes have been unioned and sorted. Here is the same thing
-with ``join='inner'``:
+.. versionchanged:: 0.23.0
+
+   The default behavior with ``join='outer'`` is to sort the other axis
+   (columns in this case). In a future version of pandas, the default will
+   be to not sort. We specified ``sort=False`` to opt in to the new
+   behavior now.
+
+Here is the same thing with ``join='inner'``:
 
 .. ipython:: python
 

From 0ace673e72da77d4ffceeca6f96eb5a0e0b3d7ec Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 28 Apr 2018 13:32:04 -0500
Subject: [PATCH 22/34] Sort for intersection

---
 doc/source/merging.rst              |  4 +++-
 doc/source/whatsnew/v0.23.0.txt     |  2 ++
 pandas/core/indexes/api.py          |  7 +++++--
 pandas/tests/reshape/test_concat.py | 24 ++++++++++++++++++++++++
 4 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/doc/source/merging.rst b/doc/source/merging.rst
index de37dc6dab59e..1161656731f88 100644
--- a/doc/source/merging.rst
+++ b/doc/source/merging.rst
@@ -181,7 +181,9 @@ behavior:
           labels=['df1', 'df4'], vertical=False);
    plt.close('all');
 
-.. versionchanged:: 0.23.0
+.. warning::
+
+   .. versionchanged:: 0.23.0
 
    The default behavior with ``join='outer'`` is to sort the other axis
    (columns in this case). In a future version of pandas, the default will
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 241fb46df7f25..6c2b64f402f0a 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -639,6 +639,8 @@ Returning a ``Series`` allows one to control the exact return structure and colu
 
     df.apply(lambda x: Series([1, 2, 3], index=['D', 'E', 'F']), axis=1)
 
+.. _whatsnew_0230.api_breaking.concat:
+
 Concatenation will no longer sort
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index b919c8ab9a23f..f345c21b2f2f0 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -60,7 +60,10 @@ def _get_combined_index(indexes, intersect=False, sort=True):
     if len(indexes) == 0:
         return Index([])
     if len(indexes) == 1:
-        return indexes[0]
+        index = indexes[0]
+        if sort:
+            index = index.sort_values()
+        return index
     if intersect:
         index = indexes[0]
         for other in indexes[1:]:
@@ -115,7 +118,7 @@ def conv(i):
         if name != index.name:
             index = index._shallow_copy(name=name)
         return index
-    else:  # kind='list
+    else:  # kind='list'
         return _unique_indices(indexes)
 
 
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index f74d652ae012c..fc91647321a05 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -2355,3 +2355,27 @@ def test_concat_preserve_column_order_uneven_data():
         'c': [1, 2, 3, None, None]
     }, index=[0, 1, 2, 0, 1])
     tm.assert_frame_equal(result, expected)
+
+
+def test_concat_aligned_sort():
+    # GH-4588
+    df = pd.DataFrame({"b": [1, 2], "a": [3, 4]}, columns=['b', 'a'])
+    result = pd.concat([df, df], sort=True, ignore_index=True)
+    expected = pd.DataFrame({'b': [1, 2, 1, 2], 'a': [3, 4, 3, 4]},
+                            columns=['a', 'b'])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_concat_aligned_sort_raises():
+    # GH-4588
+    df = pd.DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, 'a'])
+
+    if PY2:
+        expected = pd.DataFrame({1: [1, 2, 1, 2], 'a': [3, 4, 3, 4]},
+                                columns=[1, 'a'])
+        result = pd.concat([df, df], ignore_index=True)
+        tm.assert_frame_equal(result, expected)
+    else:
+        msg = "'<' not supported between instances"
+        with tm.assert_raises_regex(TypeError, msg):
+            pd.concat([df, df], sort=True)

From d5cafdf95cb43fadaf59a2e0fd21494b0a30ae41 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 28 Apr 2018 14:46:11 -0500
Subject: [PATCH 23/34] More tests

---
 pandas/core/indexes/api.py               |  22 ++--
 pandas/tests/reshape/merge/test_merge.py |   2 +-
 pandas/tests/reshape/test_concat.py      | 126 +++++++++++++----------
 3 files changed, 88 insertions(+), 62 deletions(-)

diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index f345c21b2f2f0..07ddbcc6fec18 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -58,19 +58,23 @@ def _get_combined_index(indexes, intersect=False, sort=True):
     # TODO: handle index names!
     indexes = com._get_distinct_objs(indexes)
     if len(indexes) == 0:
-        return Index([])
-    if len(indexes) == 1:
+        index = Index([])
+    elif len(indexes) == 1:
         index = indexes[0]
-        if sort:
-            index = index.sort_values()
-        return index
-    if intersect:
+    elif intersect:
         index = indexes[0]
         for other in indexes[1:]:
             index = index.intersection(other)
-        return index
-    union = _union_indexes(indexes, sort=sort)
-    return _ensure_index(union)
+    else:
+        index = _union_indexes(indexes, sort=sort)
+        index = _ensure_index(index)
+
+    if sort and not index.is_monotonic_increasing:
+        try:
+            index = index.sort_values()
+        except TypeError:
+            pass
+    return index
 
 
 def _union_indexes(indexes, sort=True):
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 4f68514e8fcaf..f3827ac251cf0 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -826,7 +826,7 @@ def test_validation(self):
 
         # Dups on left
         left_w_dups = left.append(pd.DataFrame({'a': ['a'], 'c': ['cow']},
-                                               index=[3]))
+                                               index=[3]), sort=True)
         merge(left_w_dups, right, left_index=True, right_index=True,
               validate='many_to_one')
 
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index fc91647321a05..67471bd2d2c6d 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -21,6 +21,12 @@
 import pytest
 
 
+@pytest.fixture(params=[True, False])
+def sort(request):
+    """Boolean sort keyword for concat and DataFrame.append."""
+    return request.param
+
+
 class ConcatenateBase(object):
 
     def setup_method(self, method):
@@ -716,7 +722,7 @@ def test_concat_categorical_empty(self):
 
 class TestAppend(ConcatenateBase):
 
-    def test_append(self):
+    def test_append(self, sort):
         begin_index = self.frame.index[:5]
         end_index = self.frame.index[5:]
 
@@ -727,10 +733,10 @@ def test_append(self):
         tm.assert_almost_equal(appended['A'], self.frame['A'])
 
         del end_frame['A']
-        partial_appended = begin_frame.append(end_frame, sort=True)
+        partial_appended = begin_frame.append(end_frame, sort=sort)
         assert 'A' in partial_appended
 
-        partial_appended = end_frame.append(begin_frame, sort=True)
+        partial_appended = end_frame.append(begin_frame, sort=sort)
         assert 'A' in partial_appended
 
         # mixed type handling
@@ -738,9 +744,9 @@ def test_append(self):
         tm.assert_frame_equal(appended, self.mixed_frame)
 
         # what to test here
-        mixed_appended = self.mixed_frame[:5].append(self.frame[5:], sort=True)
+        mixed_appended = self.mixed_frame[:5].append(self.frame[5:], sort=sort)
         mixed_appended2 = self.frame[:5].append(self.mixed_frame[5:],
-                                                sort=True)
+                                                sort=sort)
 
         # all equal except 'foo' column
         tm.assert_frame_equal(
@@ -770,10 +776,10 @@ def test_append(self):
         result = df.append(row)
         tm.assert_frame_equal(result, expected)
 
-    def test_append_length0_frame(self):
+    def test_append_length0_frame(self, sort):
         df = DataFrame(columns=['A', 'B', 'C'])
         df3 = DataFrame(index=[0, 1], columns=['A', 'B'])
-        df5 = df.append(df3, sort=True)
+        df5 = df.append(df3, sort=sort)
 
         expected = DataFrame(index=[0, 1], columns=['A', 'B', 'C'])
         assert_frame_equal(df5, expected)
@@ -819,7 +825,7 @@ def test_append_sorts(self):
         expected = expected[['b', 'a', 'c']]
         tm.assert_frame_equal(result, expected)
 
-    def test_append_different_columns(self):
+    def test_append_different_columns(self, sort):
         df = DataFrame({'bools': np.random.randn(10) > 0,
                         'ints': np.random.randint(0, 10, 10),
                         'floats': np.random.randn(10),
@@ -828,11 +834,11 @@ def test_append_different_columns(self):
         a = df[:5].loc[:, ['bools', 'ints', 'floats']]
         b = df[5:].loc[:, ['strings', 'ints', 'floats']]
 
-        appended = a.append(b, sort=True)
+        appended = a.append(b, sort=sort)
         assert isna(appended['strings'][0:4]).all()
         assert isna(appended['bools'][5:]).all()
 
-    def test_append_many(self):
+    def test_append_many(self, sort):
         chunks = [self.frame[:5], self.frame[5:10],
                   self.frame[10:15], self.frame[15:]]
 
@@ -841,7 +847,7 @@ def test_append_many(self):
 
         chunks[-1] = chunks[-1].copy()
         chunks[-1]['foo'] = 'bar'
-        result = chunks[0].append(chunks[1:], sort=True)
+        result = chunks[0].append(chunks[1:], sort=sort)
         tm.assert_frame_equal(result.loc[:, self.frame.columns], self.frame)
         assert (result['foo'][15:] == 'bar').all()
         assert result['foo'][:15].isna().all()
@@ -949,7 +955,7 @@ def test_append_different_columns_types_raises(
         with pytest.raises(TypeError):
             df.append(ser)
 
-    def test_append_dtype_coerce(self):
+    def test_append_dtype_coerce(self, sort):
 
         # GH 4993
         # appending with datetime will incorrectly convert datetime64
@@ -973,16 +979,21 @@ def test_append_dtype_coerce(self):
                                    dt.datetime(2013, 1, 3, 0, 0),
                                    dt.datetime(2013, 1, 4, 0, 0)],
                                   name='start_time')],
-                          axis=1, sort=True)
-        result = df1.append(df2, ignore_index=True, sort=True)
+                          axis=1, sort=sort)
+        result = df1.append(df2, ignore_index=True, sort=sort)
+        if sort:
+            expected = expected[['end_time', 'start_time']]
+        else:
+            expected = expected[['start_time', 'end_time']]
+
         assert_frame_equal(result, expected)
 
-    def test_append_missing_column_proper_upcast(self):
+    def test_append_missing_column_proper_upcast(self, sort):
         df1 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='i8')})
         df2 = DataFrame({'B': np.array([True, False, True, False],
                                        dtype=bool)})
 
-        appended = df1.append(df2, ignore_index=True, sort=True)
+        appended = df1.append(df2, ignore_index=True, sort=sort)
         assert appended['A'].dtype == 'f8'
         assert appended['B'].dtype == 'O'
 
@@ -1070,7 +1081,7 @@ def test_concat_keys_specific_levels(self):
                               Index(level, name='group_key'))
         assert result.columns.names[0] == 'group_key'
 
-    def test_concat_dataframe_keys_bug(self):
+    def test_concat_dataframe_keys_bug(self, sort):
         t1 = DataFrame({
             'value': Series([1, 2, 3], index=Index(['a', 'b', 'c'],
                                                    name='id'))})
@@ -1078,7 +1089,7 @@ def test_concat_dataframe_keys_bug(self):
             'value': Series([7, 8], index=Index(['a', 'b'], name='id'))})
 
         # it works
-        result = concat([t1, t2], axis=1, keys=['t1', 't2'], sort=True)
+        result = concat([t1, t2], axis=1, keys=['t1', 't2'], sort=sort)
         assert list(result.columns) == [('t1', 'value'), ('t2', 'value')]
 
     def test_concat_series_partial_columns_names(self):
@@ -1124,7 +1135,7 @@ def test_concat_dict(self):
         expected = concat([frames[k] for k in keys], keys=keys)
         tm.assert_frame_equal(result, expected)
 
-    def test_concat_ignore_index(self):
+    def test_concat_ignore_index(self, sort):
         frame1 = DataFrame({"test1": ["a", "b", "c"],
                             "test2": [1, 2, 3],
                             "test3": [4.5, 3.2, 1.2]})
@@ -1133,7 +1144,7 @@ def test_concat_ignore_index(self):
         frame2.index = Index(["x", "y", "q"])
 
         v1 = concat([frame1, frame2], axis=1,
-                    ignore_index=True, sort=True)
+                    ignore_index=True, sort=sort)
 
         nan = np.nan
         expected = DataFrame([[nan, nan, nan, 4.3],
@@ -1141,6 +1152,8 @@ def test_concat_ignore_index(self):
                               ['b', 2, 3.2, 2.2],
                               ['c', 3, 1.2, nan]],
                              index=Index(["q", "x", "y", "z"]))
+        if not sort:
+            expected = expected.loc[['x', 'y', 'z', 'q']]
 
         tm.assert_frame_equal(v1, expected)
 
@@ -1337,16 +1350,16 @@ def test_dups_index(self):
         result = df.append(df)
         assert_frame_equal(result, expected)
 
-    def test_with_mixed_tuples(self):
+    def test_with_mixed_tuples(self, sort):
         # 10697
         # columns have mixed tuples, so handle properly
         df1 = DataFrame({u'A': 'foo', (u'B', 1): 'bar'}, index=range(2))
         df2 = DataFrame({u'B': 'foo', (u'B', 1): 'bar'}, index=range(2))
 
         # it works
-        concat([df1, df2], sort=True)
+        concat([df1, df2], sort=sort)
 
-    def test_handle_empty_objects(self):
+    def test_handle_empty_objects(self, sort):
         df = DataFrame(np.random.randn(10, 4), columns=list('abcd'))
 
         baz = df[:5].copy()
@@ -1354,7 +1367,7 @@ def test_handle_empty_objects(self):
         empty = df[5:5]
 
         frames = [baz, empty, empty, df[5:]]
-        concatted = concat(frames, axis=0, sort=True)
+        concatted = concat(frames, axis=0, sort=sort)
 
         expected = df.reindex(columns=['a', 'b', 'c', 'd', 'foo'])
         expected['foo'] = expected['foo'].astype('O')
@@ -1506,7 +1519,7 @@ def test_panel_concat_other_axes(self):
             expected.loc['ItemC', :, :2] = 'baz'
             tm.assert_panel_equal(result, expected)
 
-    def test_panel_concat_buglet(self):
+    def test_panel_concat_buglet(self, sort):
         with catch_warnings(record=True):
             # #2257
             def make_panel():
@@ -1531,7 +1544,7 @@ def df():
             panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2)
 
             # it works!
-            concat([panel1, panel3], axis=1, verify_integrity=True, sort=True)
+            concat([panel1, panel3], axis=1, verify_integrity=True, sort=sort)
 
     def test_concat_series(self):
 
@@ -1556,7 +1569,7 @@ def test_concat_series(self):
         expected.index = exp_index
         tm.assert_series_equal(result, expected)
 
-    def test_concat_series_axis1(self):
+    def test_concat_series_axis1(self, sort=sort):
         ts = tm.makeTimeSeries()
 
         pieces = [ts[:-2], ts[2:], ts[2:-2]]
@@ -1585,7 +1598,7 @@ def test_concat_series_axis1(self):
         # must reindex, #2603
         s = Series(randn(3), index=['c', 'a', 'b'], name='A')
         s2 = Series(randn(4), index=['d', 'a', 'b', 'c'], name='B')
-        result = concat([s, s2], axis=1, sort=True)
+        result = concat([s, s2], axis=1, sort=sort)
         expected = DataFrame({'A': s, 'B': s2})
         assert_frame_equal(result, expected)
 
@@ -2071,7 +2084,7 @@ def test_categorical_concat_dtypes(self):
         expected = Series([True, False, False], index=index)
         tm.assert_series_equal(result, expected)
 
-    def test_categorical_concat(self):
+    def test_categorical_concat(self, sort):
         # See GH 10177
         df1 = DataFrame(np.arange(18, dtype='int64').reshape(6, 3),
                         columns=["a", "b", "c"])
@@ -2082,7 +2095,7 @@ def test_categorical_concat(self):
         cat_values = ["one", "one", "two", "one", "two", "two", "one"]
         df2['h'] = Series(Categorical(cat_values))
 
-        res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=True)
+        res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=sort)
         exp = DataFrame({'a': [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12],
                          'b': [1, 4, 7, 10, 13, 16, np.nan, np.nan, np.nan,
                                np.nan, np.nan, np.nan, np.nan],
@@ -2311,21 +2324,28 @@ def test_concat_sorts_index():
     tm.assert_frame_equal(result, expected)
 
 
-def test_concat_inner_sort_unaffected():
+@pytest.mark.parametrize('sort', [None, False, True])
+def test_concat_inner_sort(sort):
     # https://github.com/pandas-dev/pandas/pull/20613
     df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]},
                        columns=['b', 'a', 'c'])
     df2 = pd.DataFrame({"a": [1, 2], 'b': [3, 4]}, index=[3, 4])
-    with tm.assert_produces_warning(None):
-        r0 = pd.concat([df1, df2], join='inner', ignore_index=True)
-    r1 = pd.concat([df1, df2], join='inner', sort=True, ignore_index=True)
-    r2 = pd.concat([df1, df2], join='inner', sort=True, ignore_index=True)
+
+    if sort is None:
+        with tm.assert_produces_warning(None):
+            # unset sort should *not* warn for inner join
+            # since that never sorted
+            result = pd.concat([df1, df2], sort=sort, join='inner',
+                               ignore_index=True)
+    else:
+        result = pd.concat([df1, df2], sort=sort, join='inner',
+                           ignore_index=True)
 
     expected = pd.DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]},
                             columns=['b', 'a'])
-    tm.assert_frame_equal(r0, expected)
-    tm.assert_frame_equal(r1, expected)
-    tm.assert_frame_equal(r2, expected)
+    if sort:
+        expected = expected[['a', 'b']]
+    tm.assert_frame_equal(result, expected)
 
 
 def test_concat_preserve_column_order_differing_columns():
@@ -2359,23 +2379,25 @@ def test_concat_preserve_column_order_uneven_data():
 
 def test_concat_aligned_sort():
     # GH-4588
-    df = pd.DataFrame({"b": [1, 2], "a": [3, 4]}, columns=['b', 'a'])
+    df = pd.DataFrame({"c": [1, 2], "b": [3, 4], 'a': [5, 6]},
+                      columns=['c', 'b', 'a'])
     result = pd.concat([df, df], sort=True, ignore_index=True)
-    expected = pd.DataFrame({'b': [1, 2, 1, 2], 'a': [3, 4, 3, 4]},
-                            columns=['a', 'b'])
+    expected = pd.DataFrame({'a': [5, 6, 5, 6], 'b': [3, 4, 3, 4],
+                             'c': [1, 2, 1, 2]},
+                            columns=['a', 'b', 'c'])
+    tm.assert_frame_equal(result, expected)
+
+    result = pd.concat([df, df[['c', 'b']]], join='inner', sort=True,
+                       ignore_index=True)
+    expected = expected[['b', 'c']]
     tm.assert_frame_equal(result, expected)
 
 
-def test_concat_aligned_sort_raises():
+def test_concat_aligned_sort_does_not_raise():
     # GH-4588
+    # We catch TypeErrors from sorting internally and do not re-raise.
     df = pd.DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, 'a'])
-
-    if PY2:
-        expected = pd.DataFrame({1: [1, 2, 1, 2], 'a': [3, 4, 3, 4]},
-                                columns=[1, 'a'])
-        result = pd.concat([df, df], ignore_index=True)
-        tm.assert_frame_equal(result, expected)
-    else:
-        msg = "'<' not supported between instances"
-        with tm.assert_raises_regex(TypeError, msg):
-            pd.concat([df, df], sort=True)
+    expected = pd.DataFrame({1: [1, 2, 1, 2], 'a': [3, 4, 3, 4]},
+                            columns=[1, 'a'])
+    result = pd.concat([df, df], ignore_index=True, sort=True)
+    tm.assert_frame_equal(result, expected)

From ce8ff05da9c267f70d47f1911fb661c8c6e9de71 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 29 Apr 2018 06:29:26 -0500
Subject: [PATCH 24/34] Test fixup.

Sparse as well
---
 pandas/core/groupby/groupby.py          |  2 +-
 pandas/tests/sparse/frame/test_frame.py | 23 ++++++++++++++++++++++-
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 00ea96890dd27..4132d8e69704a 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1099,7 +1099,7 @@ def reset_identity(values):
 
                 result = concat(values, axis=self.axis, keys=group_keys,
                                 levels=group_levels, names=group_names,
-                                sort=True)
+                                sort=False)
             else:
 
                 # GH5610, returns a MI, with the first level being a
diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
index 540933cb90be2..8a8ed520d45d2 100644
--- a/pandas/tests/sparse/frame/test_frame.py
+++ b/pandas/tests/sparse/frame/test_frame.py
@@ -629,10 +629,31 @@ def test_append(self):
 
         a = self.frame.iloc[:5, :3]
         b = self.frame.iloc[5:]
-        appended = a.append(b)
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            # Stacklevel is set for pd.concat, not append
+            appended = a.append(b)
         tm.assert_sp_frame_equal(appended.iloc[:, :3], self.frame.iloc[:, :3],
                                  exact_indices=False)
 
+        a = a[['B', 'C', 'A']].head(2)
+        b = b.head(2)
+
+        expected = pd.SparseDataFrame({
+            "B": [0., 1, None, 3],
+            "C": [0., 1, 5, 6],
+            "A": [None, None, 2, 3],
+            "D": [None, None, 5, None],
+        }, index=a.index | b.index)
+        with tm.assert_produces_warning(None):
+            appended = a.append(b, sort=False)
+
+        tm.assert_frame_equal(appended, expected)
+
+        with tm.assert_produces_warning(None):
+            appended = a.append(b, sort=True)
+
+        tm.assert_sp_frame_equal(appended, expected[['A', 'B', 'C', 'D']])
+
     def test_astype(self):
         sparse = pd.SparseDataFrame({'A': SparseArray([1, 2, 3, 4],
                                                       dtype=np.int64),

From ce756d4824909d481eb431e6afa404dfb20b15fc Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 29 Apr 2018 14:53:37 -0500
Subject: [PATCH 25/34] ugh

---
 pandas/core/indexes/api.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index 07ddbcc6fec18..e5ab5e144086f 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -44,7 +44,7 @@
            '_all_indexes_same']
 
 
-def _get_objs_combined_axis(objs, intersect=False, axis=0, sort=True):
+def _get_objs_combined_axis(objs, intersect=False, axis=0, sort=False):
     # Extract combined index: return intersection or union (depending on the
     # value of "intersect") of indexes on given axis, or None if all objects
     # lack indexes (e.g. they are numpy arrays)
@@ -54,7 +54,7 @@ def _get_objs_combined_axis(objs, intersect=False, axis=0, sort=True):
         return _get_combined_index(obs_idxes, intersect=intersect, sort=sort)
 
 
-def _get_combined_index(indexes, intersect=False, sort=True):
+def _get_combined_index(indexes, intersect=False, sort=False):
     # TODO: handle index names!
     indexes = com._get_distinct_objs(indexes)
     if len(indexes) == 0:

From 362e84d851c7f24368cf5cac469615c627d9b644 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 30 Apr 2018 08:00:51 -0500
Subject: [PATCH 26/34] quoting

---
 doc/source/whatsnew/v0.23.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 6c2b64f402f0a..2591ab774e6cb 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -667,7 +667,7 @@ To accept the future behavior (no sorting), pass ``sort=False``
 
    pd.concat([df1, df2], sort=False)
 
-Note that this change also applies to :meth:`DataFrame.append`, which has also received a `sort` keyword for controlling this behavior.
+Note that this change also applies to :meth:`DataFrame.append`, which has also received a ``sort`` keyword for controlling this behavior.
 
 
 .. _whatsnew_0230.api_breaking.build_changes:

From 0210d3322ce9a2a2920bdda3c11a38b8e0d412fb Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 30 Apr 2018 08:03:08 -0500
Subject: [PATCH 27/34] Clarify

---
 doc/source/whatsnew/v0.23.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 2591ab774e6cb..94702f5e97264 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -645,7 +645,7 @@ Concatenation will no longer sort
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 In a future version of pandas :func:`pandas.concat` will no longer sort the non-concatenation axis when it is not already aligned.
-The current behavior is the same as the previous (sorting), but now a warning is issued (:issue:`4588`).
+The current behavior is the same as the previous (sorting), but now a warning is issued when ``sort`` is not specified and the non-concatenation axis is not aligned (:issue:`4588`).
 
 .. ipython:: python
    :okwarning:

From 06772b407f4f1215e60ff271d3158db64c2a0685 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 30 Apr 2018 08:15:13 -0500
Subject: [PATCH 28/34] Removed unnescesary check

---
 pandas/core/indexes/api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index e5ab5e144086f..394181f5377d4 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -69,7 +69,7 @@ def _get_combined_index(indexes, intersect=False, sort=False):
         index = _union_indexes(indexes, sort=sort)
         index = _ensure_index(index)
 
-    if sort and not index.is_monotonic_increasing:
+    if sort:
         try:
             index = index.sort_values()
         except TypeError:

From e47cbb957b8d29b962df65fe1dde2de22be9c6d4 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 30 Apr 2018 09:15:13 -0500
Subject: [PATCH 29/34] Prune tests

---
 pandas/tests/reshape/test_concat.py | 130 ++++++++++++++--------------
 1 file changed, 65 insertions(+), 65 deletions(-)

diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 67471bd2d2c6d..57af67422d65f 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -27,6 +27,16 @@ def sort(request):
     return request.param
 
 
+@pytest.fixture(params=[True, False, None])
+def sort_with_none(request):
+    """Boolean sort keyword for concat and DataFrame.append.
+
+    Includes the default of None
+    """
+    # TODO: Replace with sort once keyword changes.
+    return request.param
+
+
 class ConcatenateBase(object):
 
     def setup_method(self, method):
@@ -800,29 +810,30 @@ def test_append_records(self):
         expected = DataFrame(np.concatenate((arr1, arr2)))
         assert_frame_equal(result, expected)
 
-    def test_append_sorts(self):
+    # rewrite sort fixture, since we also want to test default of None
+    def test_append_sorts(self, sort_with_none):
         df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a'])
         df2 = pd.DataFrame({"a": [1, 2], 'c': [3, 4]}, index=[2, 3])
-        # default, changing in the future
 
-        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            # from append we have an extra function call. Not worth hacking
-            # around to get the right stackleve.
-            result = df1.append(df2)
+        if sort_with_none is None:
+            # only warn if not explicitly specified
+            # don't check stacklevel since its set for concat, and append
+            # has an extra stack.
+            ctx = tm.assert_produces_warning(FutureWarning,
+                                             check_stacklevel=False)
+        else:
+            ctx = tm.assert_produces_warning(None)
+
+        with ctx:
+            result = df1.append(df2, sort=sort_with_none)
 
+        # for None / True
         expected = pd.DataFrame({"b": [1, 2, None, None],
                                  "a": [1, 2, 1, 2],
                                  "c": [None, None, 3, 4]},
                                 columns=['a', 'b', 'c'])
-        tm.assert_frame_equal(result, expected)
-
-        # sort=True, the previous behavior
-        result = df1.append(df2, sort=True)
-        tm.assert_frame_equal(result, expected)
-
-        # sort=False, the future behvior.
-        result = df1.append(df2, sort=False)
-        expected = expected[['b', 'a', 'c']]
+        if sort_with_none is False:
+            expected = expected[['b', 'a', 'c']]
         tm.assert_frame_equal(result, expected)
 
     def test_append_different_columns(self, sort):
@@ -2297,86 +2308,75 @@ def test_concat_empty_and_non_empty_series_regression():
     tm.assert_series_equal(result, expected)
 
 
-def test_concat_sort_columns():
+def test_concat_sorts_columns(sort_with_none):
     # GH-4588
     df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a'])
-    df2 = pd.DataFrame({"a": [3, 4]})
+    df2 = pd.DataFrame({"a": [3, 4], "c": [5, 6]})
 
+    # for sort=True/None
     expected = pd.DataFrame({"a": [1, 2, 3, 4],
-                             "b": [1, 2, None, None]},
-                            columns=['a', 'b'])
-    with tm.assert_produces_warning(FutureWarning):
-        result = pd.concat([df1, df2], ignore_index=True)
+                             "b": [1, 2, None, None],
+                             "c": [None, None, 5, 6]},
+                            columns=['a', 'b', 'c'])
+
+    if sort_with_none is False:
+        expected = expected[['b', 'a', 'c']]
 
+    if sort_with_none is None:
+        # only warn if not explicitly specified
+        ctx = tm.assert_produces_warning(FutureWarning)
+    else:
+        ctx = tm.assert_produces_warning(None)
+
+    # default
+    with ctx:
+        result = pd.concat([df1, df2], ignore_index=True, sort=sort_with_none)
     tm.assert_frame_equal(result, expected)
 
 
-def test_concat_sorts_index():
+def test_concat_sorts_index(sort_with_none):
     df1 = pd.DataFrame({"a": [1, 2, 3]}, index=['c', 'a', 'b'])
     df2 = pd.DataFrame({"b": [1, 2]}, index=['a', 'b'])
 
-    with tm.assert_produces_warning(FutureWarning):
-        result = pd.concat([df1, df2], axis=1)
-
+    # For True/None
     expected = pd.DataFrame({"a": [2, 3, 1], "b": [1, 2, None]},
                             index=['a', 'b', 'c'],
                             columns=['a', 'b'])
+    if sort_with_none is False:
+        expected = expected.loc[['c', 'a', 'b']]
+
+    if sort_with_none is None:
+        # only warn if not explicitly specified
+        ctx = tm.assert_produces_warning(FutureWarning)
+    else:
+        ctx = tm.assert_produces_warning(None)
+
+    # Warn and sort by default
+    with ctx:
+        result = pd.concat([df1, df2], axis=1, sort=sort_with_none)
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.parametrize('sort', [None, False, True])
-def test_concat_inner_sort(sort):
+def test_concat_inner_sort(sort_with_none):
     # https://github.com/pandas-dev/pandas/pull/20613
     df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]},
                        columns=['b', 'a', 'c'])
     df2 = pd.DataFrame({"a": [1, 2], 'b': [3, 4]}, index=[3, 4])
 
-    if sort is None:
-        with tm.assert_produces_warning(None):
-            # unset sort should *not* warn for inner join
-            # since that never sorted
-            result = pd.concat([df1, df2], sort=sort, join='inner',
-                               ignore_index=True)
-    else:
-        result = pd.concat([df1, df2], sort=sort, join='inner',
+    with tm.assert_produces_warning(None):
+        # unset sort should *not* warn for inner join
+        # since that never sorted
+        result = pd.concat([df1, df2], sort=sort_with_none,
+                           join='inner',
                            ignore_index=True)
 
     expected = pd.DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]},
                             columns=['b', 'a'])
-    if sort:
+    if sort_with_none is True:
         expected = expected[['a', 'b']]
     tm.assert_frame_equal(result, expected)
 
 
-def test_concat_preserve_column_order_differing_columns():
-    # GH 4588 regression test
-    # for new columns in concat
-    dfa = pd.DataFrame(columns=['C', 'A'], data=[[1, 2]])
-    dfb = pd.DataFrame(columns=['C', 'Z'], data=[[5, 6]])
-    result = pd.concat([dfa, dfb], ignore_index=True, sort=True)
-
-    expected = pd.DataFrame({"A": [2, None], "C": [1, 5],
-                             "Z": [None, 6]}, columns=["A", "C", "Z"])
-    tm.assert_frame_equal(result, expected)
-
-
-def test_concat_preserve_column_order_uneven_data():
-    # GH 4588 regression test
-    # add to column, concat with uneven data
-    df = pd.DataFrame()
-    df['b'] = [1, 2, 3]
-    df['c'] = [1, 2, 3]
-    df['a'] = [1, 2, 3]
-    df2 = pd.DataFrame({'a': [4, 5]})
-    result = pd.concat([df, df2], sort=True)
-    expected = pd.DataFrame({
-        'a': [1, 2, 3, 4, 5],
-        'b': [1, 2, 3, None, None],
-        'c': [1, 2, 3, None, None]
-    }, index=[0, 1, 2, 0, 1])
-    tm.assert_frame_equal(result, expected)
-
-
 def test_concat_aligned_sort():
     # GH-4588
     df = pd.DataFrame({"c": [1, 2], "b": [3, 4], 'a': [5, 6]},

From 0182c98eb50f1871c83e968e998dc95cf0be981f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 30 Apr 2018 12:42:27 -0500
Subject: [PATCH 30/34] Default sort

---
 pandas/core/indexes/api.py | 2 +-
 pandas/core/panel.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index 394181f5377d4..f9501cd2f9ddf 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -44,7 +44,7 @@
            '_all_indexes_same']
 
 
-def _get_objs_combined_axis(objs, intersect=False, axis=0, sort=False):
+def _get_objs_combined_axis(objs, intersect=False, axis=0, sort=True):
     # Extract combined index: return intersection or union (depending on the
     # value of "intersect") of indexes on given axis, or None if all objects
     # lack indexes (e.g. they are numpy arrays)
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index e08d0a7368ccb..ffda5e095a38a 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -1500,7 +1500,7 @@ def _extract_axis(self, data, axis=0, intersect=False):
 
         if have_frames:
             index = _get_objs_combined_axis(data.values(), axis=axis,
-                                            intersect=intersect)
+                                            intersect=intersect, sort=True)
 
         if have_raw_arrays:
             lengths = list(set(raw_lengths))

From 7e589989bd7c6e8c8bf2beb9ce995fd2c8f4d5f6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 30 Apr 2018 13:14:24 -0500
Subject: [PATCH 31/34] Make both tests happy

---
 pandas/core/panel.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index ffda5e095a38a..16e64192fdb20 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -1499,8 +1499,11 @@ def _extract_axis(self, data, axis=0, intersect=False):
                 raw_lengths.append(v.shape[axis])
 
         if have_frames:
+            # we want the "old" behavior here, of sorting only
+            # 1. we're doing a union (intersect=False)
+            # 2. the indices are not aligned.
             index = _get_objs_combined_axis(data.values(), axis=axis,
-                                            intersect=intersect, sort=True)
+                                            intersect=intersect, sort=None)
 
         if have_raw_arrays:
             lengths = list(set(raw_lengths))

From 5b58e7534deee22cea3f1aee4e33bf565e2281d4 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 30 Apr 2018 14:22:03 -0500
Subject: [PATCH 32/34] Explicit columns

---
 pandas/tests/sparse/frame/test_frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
index 8a8ed520d45d2..9cc615e15564f 100644
--- a/pandas/tests/sparse/frame/test_frame.py
+++ b/pandas/tests/sparse/frame/test_frame.py
@@ -643,7 +643,7 @@ def test_append(self):
             "C": [0., 1, 5, 6],
             "A": [None, None, 2, 3],
             "D": [None, None, 5, None],
-        }, index=a.index | b.index)
+        }, index=a.index | b.index, columns=['B', 'C', 'A', 'D'])
         with tm.assert_produces_warning(None):
             appended = a.append(b, sort=False)
 

From 074d03c091dfde8022bc8541e69a61d4555a3cb8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 30 Apr 2018 16:28:12 -0500
Subject: [PATCH 33/34] List of series

---
 pandas/core/frame.py                    |  2 +-
 pandas/tests/frame/test_constructors.py | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1a162cf66e3ec..d475d8b944575 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7491,7 +7491,7 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None):
     from pandas.core.index import _get_objs_combined_axis
 
     if columns is None:
-        columns = _get_objs_combined_axis(data)
+        columns = _get_objs_combined_axis(data, sort=False)
 
     indexer_cache = {}
 
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 47b7d60e3b6e8..6dd38187f7277 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -1071,6 +1071,17 @@ def test_constructor_list_of_series(self):
         expected = DataFrame.from_dict(sdict, orient='index')
         tm.assert_frame_equal(result, expected)
 
+    def test_constructor_list_of_series_aligned_index(self):
+        series = [pd.Series(i, index=['b', 'a', 'c'], name=str(i))
+                  for i in range(3)]
+        result = pd.DataFrame(series)
+        expected = pd.DataFrame({'b': [0, 1, 2],
+                                 'a': [0, 1, 2],
+                                 'c': [0, 1, 2]},
+                                columns=['b', 'a', 'c'],
+                                index=['0', '1', '2'])
+        tm.assert_frame_equal(result, expected)
+
     def test_constructor_list_of_derived_dicts(self):
         class CustomDict(dict):
             pass

From 5e1b0241358c10893939d135e91ab16cfa48cadd Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 30 Apr 2018 19:19:46 -0500
Subject: [PATCH 34/34] test, fix pivot

---
 pandas/core/reshape/pivot.py       |  3 ++-
 pandas/tests/reshape/test_pivot.py | 12 ++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index 74a9b59d3194a..96f8a53b4d253 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -437,7 +437,8 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
     rownames = _get_names(index, rownames, prefix='row')
     colnames = _get_names(columns, colnames, prefix='col')
 
-    common_idx = _get_objs_combined_axis(index + columns, intersect=True)
+    common_idx = _get_objs_combined_axis(index + columns, intersect=True,
+                                         sort=False)
 
     data = {}
     data.update(zip(rownames, index))
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 1004b40bfb4c1..db287a719ae1e 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1724,3 +1724,15 @@ def test_crosstab_tuple_name(self, names):
 
         result = pd.crosstab(s1, s2)
         tm.assert_frame_equal(result, expected)
+
+    def test_crosstab_unsorted_order(self):
+        df = pd.DataFrame({"b": [3, 1, 2], 'a': [5, 4, 6]},
+                          index=['C', 'A', 'B'])
+        result = pd.crosstab(df.index, [df.b, df.a])
+        e_idx = pd.Index(['A', 'B', 'C'], name='row_0')
+        e_columns = pd.MultiIndex.from_tuples([(1, 4), (2, 6), (3, 5)],
+                                              names=['b', 'a'])
+        expected = pd.DataFrame([[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                                index=e_idx,
+                                columns=e_columns)
+        tm.assert_frame_equal(result, expected)