diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index 43d44ff30c64a..6e63e672bb968 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -70,9 +70,8 @@ some configurable handling of "what to do with the other axes": :: - pd.concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, - keys=None, levels=None, names=None, verify_integrity=False, - copy=True) + pd.concat(objs, axis=0, join='outer', ignore_index=False, keys=None, + levels=None, names=None, verify_integrity=False, copy=True) * ``objs`` : a sequence or mapping of Series or DataFrame objects. If a dict is passed, the sorted keys will be used as the `keys` argument, unless @@ -87,8 +86,6 @@ some configurable handling of "what to do with the other axes": n - 1. This is useful if you are concatenating objects where the concatenation axis does not have meaningful indexing information. Note the index values on the other axes are still respected in the join. -* ``join_axes`` : list of Index objects. Specific indexes to use for the other - n - 1 axes instead of performing inner/outer set logic. * ``keys`` : sequence, default None. Construct hierarchical index using the passed keys as the outermost level. If multiple levels passed, should contain tuples. @@ -147,12 +144,11 @@ Set logic on the other axes When gluing together multiple DataFrames, you have a choice of how to handle the other axes (other than the one being concatenated). This can be done in -the following three ways: +the following two ways: * Take the union of them all, ``join='outer'``. This is the default option as it results in zero information loss. * Take the intersection, ``join='inner'``. -* Use a specific index, as passed to the ``join_axes`` argument. Here is an example of each of these methods. First, the default ``join='outer'`` behavior: @@ -202,7 +198,13 @@ DataFrame: .. ipython:: python - result = pd.concat([df1, df4], axis=1, join_axes=[df1.index]) + result = pd.concat([df1, df4], axis=1).reindex(df1.index) + +Similarly, we could index before the concatenation: + +.. ipython:: python + + pd.concat([df1, df4.reindex(df1.index)], axis=1) .. ipython:: python :suppress: diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index e9d23cfd8efc1..713bae34d31dd 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -744,6 +744,7 @@ Other deprecations - The deprecated ``.ix[]`` indexer now raises a more visible ``FutureWarning`` instead of ``DeprecationWarning`` (:issue:`26438`). - Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`) +- :meth:`pandas.concat` has deprecated the ``join_axes``-keyword. Instead, use :meth:`DataFrame.reindex` or :meth:`DataFrame.reindex_like` on the result or on the inputs (:issue:`21951`) - The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or the :meth:`SparseArray.to_dense` method instead (:issue:`26421`). - The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3ff3fff22f4f0..0e7bb1211fb65 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6806,12 +6806,12 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', # join indexes only using concat if can_concat: if how == 'left': - how = 'outer' - join_axes = [self.index] + res = concat(frames, axis=1, join='outer', + verify_integrity=True) + return res.reindex(self.index, copy=False) else: - join_axes = None - return concat(frames, axis=1, join=how, join_axes=join_axes, - verify_integrity=True) + return concat(frames, axis=1, join=how, + verify_integrity=True) joined = frames[0] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 822428c6787be..00429c31ee70b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9706,7 +9706,8 @@ def describe_1d(data): if name not in names: names.append(name) - d = pd.concat(ldesc, join_axes=pd.Index([names]), axis=1) + d = pd.concat([x.reindex(names, copy=False) for x in ldesc], + axis=1, sort=False) d.columns = data.columns.copy() return d diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 210e82837118c..9e7dcafc0b1a4 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -562,8 +562,10 @@ def _transform_general(self, func, *args, **kwargs): applied.append(res) concat_index = obj.columns if self.axis == 0 else obj.index - concatenated = concat(applied, join_axes=[concat_index], - axis=self.axis, verify_integrity=False) + other_axis = 1 if self.axis == 0 else 0 # switches between 0 & 1 + concatenated = concat(applied, axis=self.axis, verify_integrity=False) + concatenated = concatenated.reindex(concat_index, axis=other_axis, + copy=False) return self._set_result_index_ordered(concatenated) @Substitution(klass='DataFrame', selected='') diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 4523a6ad48f19..d4272cf6e406d 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -2,6 +2,8 @@ concat routines """ +import warnings + import numpy as np import pandas.core.dtypes.concat as _concat @@ -44,8 +46,11 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, join : {'inner', 'outer'}, default 'outer' How to handle indexes on other axis (or axes). join_axes : list of Index objects + .. deprecated:: 0.25.0 + Specific indexes to use for the other n - 1 axes instead of performing - inner/outer set logic. + inner/outer set logic. Use .reindex() before or after concatenation + as a replacement. ignore_index : bool, default False If True, do not use the index values along the concatenation axis. The resulting axis will be labeled 0, ..., n - 1. This is useful if you are @@ -221,11 +226,11 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, ... ValueError: Indexes have overlapping values: ['a'] """ - op = _Concatenator(objs, axis=axis, join_axes=join_axes, - ignore_index=ignore_index, join=join, - keys=keys, levels=levels, names=names, - verify_integrity=verify_integrity, + op = _Concatenator(objs, axis=axis, ignore_index=ignore_index, join=join, + join_axes=join_axes, keys=keys, levels=levels, + names=names, verify_integrity=verify_integrity, copy=copy, sort=sort) + return op.get_result() @@ -234,10 +239,9 @@ class _Concatenator: Orchestrates a concatenation operation for BlockManagers """ - def __init__(self, objs, axis=0, join='outer', join_axes=None, - keys=None, levels=None, names=None, - ignore_index=False, verify_integrity=False, copy=True, - sort=False): + def __init__(self, objs, axis=0, join='outer', join_axes=None, keys=None, + levels=None, names=None, ignore_index=False, + verify_integrity=False, copy=True, sort=False): if isinstance(objs, (NDFrame, str)): raise TypeError('first argument must be an iterable of pandas ' 'objects, you passed an object of type ' @@ -310,9 +314,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, if sum(obj.shape) > 0 or isinstance(obj, Series)] if (len(non_empties) and (keys is None and names is None and - levels is None and - join_axes is None and - not self.intersect)): + levels is None and not self.intersect)): objs = non_empties sample = objs[0] @@ -446,7 +448,14 @@ def _get_new_axes(self): if i == self.axis: continue new_axes[i] = self._get_comb_axis(i) + else: + # GH 21951 + warnings.warn( + 'The join_axes-keyword is deprecated. Use .reindex or ' + '.reindex_like on the result to achieve the same ' + 'functionality.', FutureWarning, stacklevel=4) + if len(self.join_axes) != ndim - 1: raise AssertionError("length of join_axes must be equal " "to {length}".format(length=ndim - 1)) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 74ede682dfb5f..031f3abf31b16 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -722,6 +722,26 @@ def test_concat_categorical_empty(self): tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + def test_concat_join_axes_deprecated(self, axis): + # GH21951 + one = pd.DataFrame([[0., 1.], [2., 3.]], columns=list('ab')) + two = pd.DataFrame([[10., 11.], [12., 13.]], index=[1, 2], + columns=list('bc')) + + expected = pd.concat([one, two], + axis=1, sort=False).reindex(index=two.index) + with tm.assert_produces_warning(expected_warning=FutureWarning): + result = pd.concat([one, two], + axis=1, sort=False, join_axes=[two.index]) + tm.assert_frame_equal(result, expected) + + expected = pd.concat([one, two], + axis=0, sort=False).reindex(columns=two.columns) + with tm.assert_produces_warning(expected_warning=FutureWarning): + result = pd.concat([one, two], + axis=0, sort=False, join_axes=[two.columns]) + tm.assert_frame_equal(result, expected) + class TestAppend: