From 154ab0e8c982b7a34608beb5f79e9726119f1bae Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 28 Jun 2019 20:07:09 +0200 Subject: [PATCH 1/3] DEPR: join_axes-kwarg in pd.concat --- doc/source/user_guide/merging.rst | 9 ++-- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/frame.py | 11 +++-- pandas/core/generic.py | 3 +- pandas/core/groupby/generic.py | 6 ++- pandas/core/reshape/concat.py | 57 +++++++++++----------- pandas/tests/resample/test_resample_api.py | 3 ++ pandas/tests/reshape/test_concat.py | 20 ++++++++ 8 files changed, 69 insertions(+), 41 deletions(-) diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index 43d44ff30c64a..6c75ae83d74d3 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -88,7 +88,9 @@ some configurable handling of "what to do with the other axes": concatenation axis does not have meaningful indexing information. Note the index values on the other axes are still respected in the join. * ``join_axes`` : list of Index objects. Specific indexes to use for the other - n - 1 axes instead of performing inner/outer set logic. + n - 1 axes instead of performing inner/outer set logic. This keyword is + deprecated, please use ``.reindex`` or ``.reindex_like`` on the result (or + the individual DataFrames before concatenating). * ``keys`` : sequence, default None. Construct hierarchical index using the passed keys as the outermost level. If multiple levels passed, should contain tuples. @@ -147,12 +149,11 @@ Set logic on the other axes When gluing together multiple DataFrames, you have a choice of how to handle the other axes (other than the one being concatenated). This can be done in -the following three ways: +the following two ways: * Take the union of them all, ``join='outer'``. This is the default option as it results in zero information loss. * Take the intersection, ``join='inner'``. -* Use a specific index, as passed to the ``join_axes`` argument. Here is an example of each of these methods. First, the default ``join='outer'`` behavior: @@ -202,7 +203,7 @@ DataFrame: .. ipython:: python - result = pd.concat([df1, df4], axis=1, join_axes=[df1.index]) + result = pd.concat([df1, df4], axis=1).reindex(df1.index) .. ipython:: python :suppress: diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 1fe808e098860..bfc21359cdd5a 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -595,6 +595,7 @@ Other deprecations - The deprecated ``.ix[]`` indexer now raises a more visible ``FutureWarning`` instead of ``DeprecationWarning`` (:issue:`26438`). - Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`) +- :meth:`pandas.concat` has deprecated the ``join_axes``-keyword. Instead, use :meth:`DataFrame.reindex` or :meth:`DataFrame.reindex_like` on the result (:issue:`21951`) - The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or the :meth:`SparseArray.to_dense` method instead (:issue:`26421`). - The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index df7003ecf000e..48b1aadfb95c6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6879,12 +6879,13 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', # join indexes only using concat if can_concat: if how == 'left': - how = 'outer' - join_axes = [self.index] + res = concat(frames, axis=1, join='outer', + verify_integrity=True) + res = res.reindex(self.index, copy=False) + return res else: - join_axes = None - return concat(frames, axis=1, join=how, join_axes=join_axes, - verify_integrity=True) + return concat(frames, axis=1, join=how, + verify_integrity=True) joined = frames[0] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1af3e9449f3da..c489f23838ef0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9845,7 +9845,8 @@ def describe_1d(data): if name not in names: names.append(name) - d = pd.concat(ldesc, join_axes=pd.Index([names]), axis=1) + d = pd.concat([x.reindex(names, copy=False) for x in ldesc], + axis=1, sort=False) d.columns = data.columns.copy() return d diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 72c8d330170d4..99aaf6920a60b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -562,8 +562,10 @@ def _transform_general(self, func, *args, **kwargs): applied.append(res) concat_index = obj.columns if self.axis == 0 else obj.index - concatenated = concat(applied, join_axes=[concat_index], - axis=self.axis, verify_integrity=False) + other_axis = 1 if self.axis == 0 else 0 # switches between 0 & 1 + concatenated = concat(applied, axis=self.axis, verify_integrity=False) + concatenated = concatenated.reindex(concat_index, axis=other_axis, + copy=False) return self._set_result_index_ordered(concatenated) @Substitution(klass='DataFrame', selected='') diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 4523a6ad48f19..6b8e8a93b8bfd 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -2,6 +2,8 @@ concat routines """ +import warnings + import numpy as np import pandas.core.dtypes.concat as _concat @@ -221,12 +223,25 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, ... ValueError: Indexes have overlapping values: ['a'] """ - op = _Concatenator(objs, axis=axis, join_axes=join_axes, - ignore_index=ignore_index, join=join, + op = _Concatenator(objs, axis=axis, ignore_index=ignore_index, join=join, keys=keys, levels=levels, names=names, - verify_integrity=verify_integrity, - copy=copy, sort=sort) - return op.get_result() + verify_integrity=verify_integrity, copy=copy, sort=sort) + + res = op.get_result() + + if join_axes is not None: + # GH 21951 + warnings.warn('The join_axes-keyword is deprecated. Use .reindex or ' + '.reindex_like on the result to achieve the same ' + 'functionality.', FutureWarning, stacklevel=2) + ndim = res.ndim + if len(join_axes) != ndim - 1: + raise AssertionError("join_axes must be a list of indexes of " + "length {length}".format(length=ndim - 1)) + if ndim == 2: + other_axis = 1 if axis == 0 else 0 # switches between 0 & 1 + res = res.reindex(join_axes[0], axis=other_axis, copy=False) + return res class _Concatenator: @@ -234,10 +249,9 @@ class _Concatenator: Orchestrates a concatenation operation for BlockManagers """ - def __init__(self, objs, axis=0, join='outer', join_axes=None, - keys=None, levels=None, names=None, - ignore_index=False, verify_integrity=False, copy=True, - sort=False): + def __init__(self, objs, axis=0, join='outer', keys=None, levels=None, + names=None, ignore_index=False, verify_integrity=False, + copy=True, sort=False): if isinstance(objs, (NDFrame, str)): raise TypeError('first argument must be an iterable of pandas ' 'objects, you passed an object of type ' @@ -310,9 +324,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, if sum(obj.shape) > 0 or isinstance(obj, Series)] if (len(non_empties) and (keys is None and names is None and - levels is None and - join_axes is None and - not self.intersect)): + levels is None and not self.intersect)): objs = non_empties sample = objs[0] @@ -368,7 +380,6 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, # note: this is the BlockManager axis (since DataFrame is transposed) self.axis = axis - self.join_axes = join_axes self.keys = keys self.names = names or getattr(keys, 'names', None) self.levels = levels @@ -441,22 +452,10 @@ def _get_new_axes(self): ndim = self._get_result_dim() new_axes = [None] * ndim - if self.join_axes is None: - for i in range(ndim): - if i == self.axis: - continue - new_axes[i] = self._get_comb_axis(i) - else: - if len(self.join_axes) != ndim - 1: - raise AssertionError("length of join_axes must be equal " - "to {length}".format(length=ndim - 1)) - - # ufff... - indices = list(range(ndim)) - indices.remove(self.axis) - - for i, ax in zip(indices, self.join_axes): - new_axes[i] = ax + for i in range(ndim): + if i == self.axis: + continue + new_axes[i] = self._get_comb_axis(i) new_axes[self.axis] = self._get_concat_axis() return new_axes diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index ca2fb1acb6afa..63e216762b2e3 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -4,6 +4,8 @@ import numpy as np import pytest +from pandas.compat import PY36 + import pandas as pd from pandas import DataFrame, Series from pandas.core.indexes.datetimes import date_range @@ -233,6 +235,7 @@ def test_apply_without_aggregation(): assert_series_equal(result, test_series) +@pytest.mark.skipif(not PY36, reason='Insertion order for Python>=3.6') def test_agg_consistency(): # make sure that we are consistent across diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 4f65251ebd923..f1f3c8bce9ee9 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -722,6 +722,26 @@ def test_concat_categorical_empty(self): tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + def test_concat_join_axes_deprecated(self, axis): + # GH21951 + one = pd.DataFrame([[0., 1.], [2., 3.]], columns=list('ab')) + two = pd.DataFrame([[10., 11.], [12., 13.]], index=[1, 2], + columns=list('bc')) + + expected = pd.concat([one, two], + axis=1, sort=False).reindex(index=two.index) + with tm.assert_produces_warning(expected_warning=FutureWarning): + result = pd.concat([one, two], + axis=1, sort=False, join_axes=[two.index]) + tm.assert_frame_equal(result, expected) + + expected = pd.concat([one, two], + axis=0, sort=False).reindex(columns=two.columns) + with tm.assert_produces_warning(expected_warning=FutureWarning): + result = pd.concat([one, two], + axis=0, sort=False, join_axes=[two.columns]) + tm.assert_frame_equal(result, expected) + class TestAppend: From e3e8118fef42307b86e2c7a73978901a86568945 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 29 Jun 2019 01:44:16 +0200 Subject: [PATCH 2/3] review (jreback) --- doc/source/user_guide/merging.rst | 15 ++++++++------- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/core/frame.py | 3 +-- pandas/core/reshape/concat.py | 2 ++ pandas/tests/resample/test_resample_api.py | 3 --- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index 6c75ae83d74d3..6e63e672bb968 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -70,9 +70,8 @@ some configurable handling of "what to do with the other axes": :: - pd.concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, - keys=None, levels=None, names=None, verify_integrity=False, - copy=True) + pd.concat(objs, axis=0, join='outer', ignore_index=False, keys=None, + levels=None, names=None, verify_integrity=False, copy=True) * ``objs`` : a sequence or mapping of Series or DataFrame objects. If a dict is passed, the sorted keys will be used as the `keys` argument, unless @@ -87,10 +86,6 @@ some configurable handling of "what to do with the other axes": n - 1. This is useful if you are concatenating objects where the concatenation axis does not have meaningful indexing information. Note the index values on the other axes are still respected in the join. -* ``join_axes`` : list of Index objects. Specific indexes to use for the other - n - 1 axes instead of performing inner/outer set logic. This keyword is - deprecated, please use ``.reindex`` or ``.reindex_like`` on the result (or - the individual DataFrames before concatenating). * ``keys`` : sequence, default None. Construct hierarchical index using the passed keys as the outermost level. If multiple levels passed, should contain tuples. @@ -205,6 +200,12 @@ DataFrame: result = pd.concat([df1, df4], axis=1).reindex(df1.index) +Similarly, we could index before the concatenation: + +.. ipython:: python + + pd.concat([df1, df4.reindex(df1.index)], axis=1) + .. ipython:: python :suppress: diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 01bef6ce64706..a7a66963a7ff5 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -595,7 +595,7 @@ Other deprecations - The deprecated ``.ix[]`` indexer now raises a more visible ``FutureWarning`` instead of ``DeprecationWarning`` (:issue:`26438`). - Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`) -- :meth:`pandas.concat` has deprecated the ``join_axes``-keyword. Instead, use :meth:`DataFrame.reindex` or :meth:`DataFrame.reindex_like` on the result (:issue:`21951`) +- :meth:`pandas.concat` has deprecated the ``join_axes``-keyword. Instead, use :meth:`DataFrame.reindex` or :meth:`DataFrame.reindex_like` on the result or on the inputs (:issue:`21951`) - The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or the :meth:`SparseArray.to_dense` method instead (:issue:`26421`). - The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 48b1aadfb95c6..a58cd22fa1e0a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6881,8 +6881,7 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', if how == 'left': res = concat(frames, axis=1, join='outer', verify_integrity=True) - res = res.reindex(self.index, copy=False) - return res + return res.reindex(self.index, copy=False) else: return concat(frames, axis=1, join=how, verify_integrity=True) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 6b8e8a93b8bfd..2487fe5762f13 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -46,6 +46,8 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, join : {'inner', 'outer'}, default 'outer' How to handle indexes on other axis (or axes). join_axes : list of Index objects + .. deprecated:: 0.25.0 + Specific indexes to use for the other n - 1 axes instead of performing inner/outer set logic. ignore_index : bool, default False diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 63e216762b2e3..ca2fb1acb6afa 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -4,8 +4,6 @@ import numpy as np import pytest -from pandas.compat import PY36 - import pandas as pd from pandas import DataFrame, Series from pandas.core.indexes.datetimes import date_range @@ -235,7 +233,6 @@ def test_apply_without_aggregation(): assert_series_equal(result, test_series) -@pytest.mark.skipif(not PY36, reason='Insertion order for Python>=3.6') def test_agg_consistency(): # make sure that we are consistent across From 60e623965665000f341106e09adc43f27ebb6109 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 2 Jul 2019 19:00:04 -0400 Subject: [PATCH 3/3] restore original location --- pandas/core/reshape/concat.py | 60 ++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 2487fe5762f13..d4272cf6e406d 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -49,7 +49,8 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, .. deprecated:: 0.25.0 Specific indexes to use for the other n - 1 axes instead of performing - inner/outer set logic. + inner/outer set logic. Use .reindex() before or after concatenation + as a replacement. ignore_index : bool, default False If True, do not use the index values along the concatenation axis. The resulting axis will be labeled 0, ..., n - 1. This is useful if you are @@ -226,24 +227,11 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, ValueError: Indexes have overlapping values: ['a'] """ op = _Concatenator(objs, axis=axis, ignore_index=ignore_index, join=join, - keys=keys, levels=levels, names=names, - verify_integrity=verify_integrity, copy=copy, sort=sort) - - res = op.get_result() - - if join_axes is not None: - # GH 21951 - warnings.warn('The join_axes-keyword is deprecated. Use .reindex or ' - '.reindex_like on the result to achieve the same ' - 'functionality.', FutureWarning, stacklevel=2) - ndim = res.ndim - if len(join_axes) != ndim - 1: - raise AssertionError("join_axes must be a list of indexes of " - "length {length}".format(length=ndim - 1)) - if ndim == 2: - other_axis = 1 if axis == 0 else 0 # switches between 0 & 1 - res = res.reindex(join_axes[0], axis=other_axis, copy=False) - return res + join_axes=join_axes, keys=keys, levels=levels, + names=names, verify_integrity=verify_integrity, + copy=copy, sort=sort) + + return op.get_result() class _Concatenator: @@ -251,9 +239,9 @@ class _Concatenator: Orchestrates a concatenation operation for BlockManagers """ - def __init__(self, objs, axis=0, join='outer', keys=None, levels=None, - names=None, ignore_index=False, verify_integrity=False, - copy=True, sort=False): + def __init__(self, objs, axis=0, join='outer', join_axes=None, keys=None, + levels=None, names=None, ignore_index=False, + verify_integrity=False, copy=True, sort=False): if isinstance(objs, (NDFrame, str)): raise TypeError('first argument must be an iterable of pandas ' 'objects, you passed an object of type ' @@ -382,6 +370,7 @@ def __init__(self, objs, axis=0, join='outer', keys=None, levels=None, # note: this is the BlockManager axis (since DataFrame is transposed) self.axis = axis + self.join_axes = join_axes self.keys = keys self.names = names or getattr(keys, 'names', None) self.levels = levels @@ -454,10 +443,29 @@ def _get_new_axes(self): ndim = self._get_result_dim() new_axes = [None] * ndim - for i in range(ndim): - if i == self.axis: - continue - new_axes[i] = self._get_comb_axis(i) + if self.join_axes is None: + for i in range(ndim): + if i == self.axis: + continue + new_axes[i] = self._get_comb_axis(i) + + else: + # GH 21951 + warnings.warn( + 'The join_axes-keyword is deprecated. Use .reindex or ' + '.reindex_like on the result to achieve the same ' + 'functionality.', FutureWarning, stacklevel=4) + + if len(self.join_axes) != ndim - 1: + raise AssertionError("length of join_axes must be equal " + "to {length}".format(length=ndim - 1)) + + # ufff... + indices = list(range(ndim)) + indices.remove(self.axis) + + for i, ax in zip(indices, self.join_axes): + new_axes[i] = ax new_axes[self.axis] = self._get_concat_axis() return new_axes