diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 98b497bd6988b..3a2cadd6b1cd5 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -200,6 +200,7 @@ Other Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`) +- :func:`concat` no longer ignores empty objects when determining output dtypes (:issue:`39122`) - :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`) - :meth:`DataFrame.groupby` with ``as_index=False`` and aggregation methods will no longer exclude from the result the groupings that do not arise from the input (:issue:`49519`) - :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 3a34481ab3f33..17e68b0e19a68 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -8,12 +8,10 @@ TYPE_CHECKING, cast, ) -import warnings import numpy as np from pandas._libs import lib -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.astype import astype_array from pandas.core.dtypes.cast import ( @@ -101,28 +99,10 @@ def concat_compat( # Creating an empty array directly is tempting, but the winnings would be # marginal given that it would still require shape & dtype calculation and # np.concatenate which has them both implemented is compiled. - orig = to_concat non_empties = [x for x in to_concat if _is_nonempty(x, axis)] - if non_empties and axis == 0 and not ea_compat_axis: - # ea_compat_axis see GH#39574 - to_concat = non_empties any_ea, kinds, target_dtype = _get_result_dtype(to_concat, non_empties) - if len(to_concat) < len(orig): - _, _, alt_dtype = _get_result_dtype(orig, non_empties) - if alt_dtype != target_dtype: - # GH#39122 - warnings.warn( - "The behavior of array concatenation with empty entries is " - "deprecated. In a future version, this will no longer exclude " - "empty items when determining the result dtype. " - "To retain the old behavior, exclude the empty entries before " - "the concat operation.", - FutureWarning, - stacklevel=find_stack_level(), - ) - if target_dtype is not None: to_concat = [astype_array(arr, target_dtype, copy=False) for arr in to_concat] diff --git a/pandas/tests/dtypes/test_concat.py b/pandas/tests/dtypes/test_concat.py index 1652c9254061b..d4fe6c5264007 100644 --- a/pandas/tests/dtypes/test_concat.py +++ b/pandas/tests/dtypes/test_concat.py @@ -12,12 +12,9 @@ def test_concat_mismatched_categoricals_with_empty(): ser1 = Series(["a", "b", "c"], dtype="category") ser2 = Series([], dtype="category") - msg = "The behavior of array concatenation with empty entries is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = _concat.concat_compat([ser1._values, ser2._values]) - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = pd.concat([ser1, ser2])._values - tm.assert_categorical_equal(result, expected) + result = _concat.concat_compat([ser1._values, ser2._values]) + expected = pd.concat([ser1, ser2])._values + tm.assert_numpy_array_equal(result, expected) def test_concat_single_dataframe_tz_aware(): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index bcad88bdecabb..be8f5d73fe7e8 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -224,8 +224,6 @@ def f3(x): df2 = DataFrame({"a": [3, 2, 2, 2], "b": range(4), "c": range(5, 9)}) - depr_msg = "The behavior of array concatenation with empty entries is deprecated" - # correct result msg = "DataFrameGroupBy.apply operated on the grouping columns" with tm.assert_produces_warning(DeprecationWarning, match=msg): @@ -245,8 +243,7 @@ def f3(x): with pytest.raises(AssertionError, match=msg): df.groupby("a").apply(f3) with pytest.raises(AssertionError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - df2.groupby("a").apply(f3) + df2.groupby("a").apply(f3) def test_attr_wrapper(ts): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index beee14197bfb8..997276ef544f7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -636,9 +636,7 @@ def test_append_empty_preserve_name(self, name, expected): left = Index([], name="foo") right = Index([1, 2, 3], name=name) - msg = "The behavior of array concatenation with empty entries is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = left.append(right) + result = left.append(right) assert result.name == expected @pytest.mark.parametrize( diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py index 3fb6a3fb61396..81b5914fef402 100644 --- a/pandas/tests/reshape/concat/test_append.py +++ b/pandas/tests/reshape/concat/test_append.py @@ -162,9 +162,7 @@ def test_append_preserve_index_name(self): df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"]) df2 = df2.set_index(["A"]) - msg = "The behavior of array concatenation with empty entries is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df1._append(df2) + result = df1._append(df2) assert result.index.name == "A" indexes_can_append = [ diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py index 31c3ef3176222..c831cb8293943 100644 --- a/pandas/tests/reshape/concat/test_append_common.py +++ b/pandas/tests/reshape/concat/test_append_common.py @@ -691,15 +691,12 @@ def test_concat_categorical_empty(self): s1 = Series([], dtype="category") s2 = Series([1, 2], dtype="category") + exp = s2.astype(object) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) - msg = "The behavior of array concatenation with empty entries is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) - tm.assert_series_equal(s1._append(s2, ignore_index=True), s2) - - with tm.assert_produces_warning(FutureWarning, match=msg): - tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2) - tm.assert_series_equal(s2._append(s1, ignore_index=True), s2) + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) s1 = Series([], dtype="category") s2 = Series([], dtype="category") @@ -719,15 +716,12 @@ def test_concat_categorical_empty(self): s1 = Series([], dtype="category") s2 = Series([np.nan, np.nan]) - # empty Series is ignored - exp = Series([np.nan, np.nan]) - with tm.assert_produces_warning(FutureWarning, match=msg): - tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) - tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) + exp = Series([np.nan, np.nan], dtype=object) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1._append(s2, ignore_index=True), exp) - with tm.assert_produces_warning(FutureWarning, match=msg): - tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) - tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2._append(s1, ignore_index=True), exp) def test_categorical_concat_append(self): cat = Categorical(["a", "b"], categories=["a", "b"]) diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index 30ef0a934157b..06d57c48df817 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -62,11 +62,9 @@ def test_concat_empty_series(self): s1 = Series([1, 2, 3], name="x") s2 = Series(name="y", dtype="float64") - msg = "The behavior of array concatenation with empty entries is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = concat([s1, s2], axis=0) + res = concat([s1, s2], axis=0) # name will be reset - exp = Series([1, 2, 3]) + exp = Series([1, 2, 3], dtype="float64") tm.assert_series_equal(res, exp) # empty Series with no name diff --git a/pandas/tests/reshape/concat/test_series.py b/pandas/tests/reshape/concat/test_series.py index 6ac51c0b55c4e..3523340bb2858 100644 --- a/pandas/tests/reshape/concat/test_series.py +++ b/pandas/tests/reshape/concat/test_series.py @@ -43,10 +43,8 @@ def test_concat_empty_and_non_empty_series_regression(self): s1 = Series([1]) s2 = Series([], dtype=object) - expected = s1 - msg = "The behavior of array concatenation with empty entries is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = concat([s1, s2]) + expected = s1.astype(object) + result = concat([s1, s2]) tm.assert_series_equal(result, expected) def test_concat_series_axis1(self): diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py index e1ec8afda33a9..0f2f533c8feff 100644 --- a/pandas/tests/series/methods/test_combine_first.py +++ b/pandas/tests/series/methods/test_combine_first.py @@ -63,11 +63,9 @@ def test_combine_first(self): # corner case ser = Series([1.0, 2, 3], index=[0, 1, 2]) empty = Series([], index=[], dtype=object) - msg = "The behavior of array concatenation with empty entries is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = ser.combine_first(empty) + result = ser.combine_first(empty) ser.index = ser.index.astype("O") - tm.assert_series_equal(ser, result) + tm.assert_series_equal(result, ser.astype(object)) def test_combine_first_dt64(self, unit): s0 = to_datetime(Series(["2010", np.nan])).dt.as_unit(unit) @@ -112,10 +110,8 @@ def test_combine_first_timezone_series_with_empty_series(self): ) s1 = Series(range(10), index=time_index) s2 = Series(index=time_index) - msg = "The behavior of array concatenation with empty entries is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = s1.combine_first(s2) - tm.assert_series_equal(result, s1) + result = s1.combine_first(s2) + tm.assert_series_equal(result, s1.astype(np.float64)) def test_combine_first_preserves_dtype(self): # GH51764