Skip to content

Commit 2c9c402

Browse files
authored
DEPR: ignoring empty entries in pd.concat (#58056)
1 parent c7b998e commit 2c9c402

File tree

10 files changed

+25
-68
lines changed

10 files changed

+25
-68
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ Other Deprecations
200200
Removal of prior version deprecations/changes
201201
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
202202
- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`)
203+
- :func:`concat` no longer ignores empty objects when determining output dtypes (:issue:`39122`)
203204
- :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`)
204205
- :meth:`DataFrame.groupby` with ``as_index=False`` and aggregation methods will no longer exclude from the result the groupings that do not arise from the input (:issue:`49519`)
205206
- :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`)

pandas/core/dtypes/concat.py

-20
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,10 @@
88
TYPE_CHECKING,
99
cast,
1010
)
11-
import warnings
1211

1312
import numpy as np
1413

1514
from pandas._libs import lib
16-
from pandas.util._exceptions import find_stack_level
1715

1816
from pandas.core.dtypes.astype import astype_array
1917
from pandas.core.dtypes.cast import (
@@ -101,28 +99,10 @@ def concat_compat(
10199
# Creating an empty array directly is tempting, but the winnings would be
102100
# marginal given that it would still require shape & dtype calculation and
103101
# np.concatenate which has them both implemented is compiled.
104-
orig = to_concat
105102
non_empties = [x for x in to_concat if _is_nonempty(x, axis)]
106-
if non_empties and axis == 0 and not ea_compat_axis:
107-
# ea_compat_axis see GH#39574
108-
to_concat = non_empties
109103

110104
any_ea, kinds, target_dtype = _get_result_dtype(to_concat, non_empties)
111105

112-
if len(to_concat) < len(orig):
113-
_, _, alt_dtype = _get_result_dtype(orig, non_empties)
114-
if alt_dtype != target_dtype:
115-
# GH#39122
116-
warnings.warn(
117-
"The behavior of array concatenation with empty entries is "
118-
"deprecated. In a future version, this will no longer exclude "
119-
"empty items when determining the result dtype. "
120-
"To retain the old behavior, exclude the empty entries before "
121-
"the concat operation.",
122-
FutureWarning,
123-
stacklevel=find_stack_level(),
124-
)
125-
126106
if target_dtype is not None:
127107
to_concat = [astype_array(arr, target_dtype, copy=False) for arr in to_concat]
128108

pandas/tests/dtypes/test_concat.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,9 @@ def test_concat_mismatched_categoricals_with_empty():
1212
ser1 = Series(["a", "b", "c"], dtype="category")
1313
ser2 = Series([], dtype="category")
1414

15-
msg = "The behavior of array concatenation with empty entries is deprecated"
16-
with tm.assert_produces_warning(FutureWarning, match=msg):
17-
result = _concat.concat_compat([ser1._values, ser2._values])
18-
with tm.assert_produces_warning(FutureWarning, match=msg):
19-
expected = pd.concat([ser1, ser2])._values
20-
tm.assert_categorical_equal(result, expected)
15+
result = _concat.concat_compat([ser1._values, ser2._values])
16+
expected = pd.concat([ser1, ser2])._values
17+
tm.assert_numpy_array_equal(result, expected)
2118

2219

2320
def test_concat_single_dataframe_tz_aware():

pandas/tests/groupby/test_groupby.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,6 @@ def f3(x):
224224

225225
df2 = DataFrame({"a": [3, 2, 2, 2], "b": range(4), "c": range(5, 9)})
226226

227-
depr_msg = "The behavior of array concatenation with empty entries is deprecated"
228-
229227
# correct result
230228
msg = "DataFrameGroupBy.apply operated on the grouping columns"
231229
with tm.assert_produces_warning(DeprecationWarning, match=msg):
@@ -245,8 +243,7 @@ def f3(x):
245243
with pytest.raises(AssertionError, match=msg):
246244
df.groupby("a").apply(f3)
247245
with pytest.raises(AssertionError, match=msg):
248-
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
249-
df2.groupby("a").apply(f3)
246+
df2.groupby("a").apply(f3)
250247

251248

252249
def test_attr_wrapper(ts):

pandas/tests/indexes/test_base.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -636,9 +636,7 @@ def test_append_empty_preserve_name(self, name, expected):
636636
left = Index([], name="foo")
637637
right = Index([1, 2, 3], name=name)
638638

639-
msg = "The behavior of array concatenation with empty entries is deprecated"
640-
with tm.assert_produces_warning(FutureWarning, match=msg):
641-
result = left.append(right)
639+
result = left.append(right)
642640
assert result.name == expected
643641

644642
@pytest.mark.parametrize(

pandas/tests/reshape/concat/test_append.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,7 @@ def test_append_preserve_index_name(self):
162162
df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"])
163163
df2 = df2.set_index(["A"])
164164

165-
msg = "The behavior of array concatenation with empty entries is deprecated"
166-
with tm.assert_produces_warning(FutureWarning, match=msg):
167-
result = df1._append(df2)
165+
result = df1._append(df2)
168166
assert result.index.name == "A"
169167

170168
indexes_can_append = [

pandas/tests/reshape/concat/test_append_common.py

+10-16
Original file line numberDiff line numberDiff line change
@@ -691,15 +691,12 @@ def test_concat_categorical_empty(self):
691691

692692
s1 = Series([], dtype="category")
693693
s2 = Series([1, 2], dtype="category")
694+
exp = s2.astype(object)
695+
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
696+
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
694697

695-
msg = "The behavior of array concatenation with empty entries is deprecated"
696-
with tm.assert_produces_warning(FutureWarning, match=msg):
697-
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
698-
tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
699-
700-
with tm.assert_produces_warning(FutureWarning, match=msg):
701-
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
702-
tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
698+
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
699+
tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
703700

704701
s1 = Series([], dtype="category")
705702
s2 = Series([], dtype="category")
@@ -719,15 +716,12 @@ def test_concat_categorical_empty(self):
719716
s1 = Series([], dtype="category")
720717
s2 = Series([np.nan, np.nan])
721718

722-
# empty Series is ignored
723-
exp = Series([np.nan, np.nan])
724-
with tm.assert_produces_warning(FutureWarning, match=msg):
725-
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
726-
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
719+
exp = Series([np.nan, np.nan], dtype=object)
720+
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
721+
tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
727722

728-
with tm.assert_produces_warning(FutureWarning, match=msg):
729-
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
730-
tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
723+
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
724+
tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
731725

732726
def test_categorical_concat_append(self):
733727
cat = Categorical(["a", "b"], categories=["a", "b"])

pandas/tests/reshape/concat/test_empty.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,9 @@ def test_concat_empty_series(self):
6262

6363
s1 = Series([1, 2, 3], name="x")
6464
s2 = Series(name="y", dtype="float64")
65-
msg = "The behavior of array concatenation with empty entries is deprecated"
66-
with tm.assert_produces_warning(FutureWarning, match=msg):
67-
res = concat([s1, s2], axis=0)
65+
res = concat([s1, s2], axis=0)
6866
# name will be reset
69-
exp = Series([1, 2, 3])
67+
exp = Series([1, 2, 3], dtype="float64")
7068
tm.assert_series_equal(res, exp)
7169

7270
# empty Series with no name

pandas/tests/reshape/concat/test_series.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,8 @@ def test_concat_empty_and_non_empty_series_regression(self):
4343
s1 = Series([1])
4444
s2 = Series([], dtype=object)
4545

46-
expected = s1
47-
msg = "The behavior of array concatenation with empty entries is deprecated"
48-
with tm.assert_produces_warning(FutureWarning, match=msg):
49-
result = concat([s1, s2])
46+
expected = s1.astype(object)
47+
result = concat([s1, s2])
5048
tm.assert_series_equal(result, expected)
5149

5250
def test_concat_series_axis1(self):

pandas/tests/series/methods/test_combine_first.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,9 @@ def test_combine_first(self):
6363
# corner case
6464
ser = Series([1.0, 2, 3], index=[0, 1, 2])
6565
empty = Series([], index=[], dtype=object)
66-
msg = "The behavior of array concatenation with empty entries is deprecated"
67-
with tm.assert_produces_warning(FutureWarning, match=msg):
68-
result = ser.combine_first(empty)
66+
result = ser.combine_first(empty)
6967
ser.index = ser.index.astype("O")
70-
tm.assert_series_equal(ser, result)
68+
tm.assert_series_equal(result, ser.astype(object))
7169

7270
def test_combine_first_dt64(self, unit):
7371
s0 = to_datetime(Series(["2010", np.nan])).dt.as_unit(unit)
@@ -112,10 +110,8 @@ def test_combine_first_timezone_series_with_empty_series(self):
112110
)
113111
s1 = Series(range(10), index=time_index)
114112
s2 = Series(index=time_index)
115-
msg = "The behavior of array concatenation with empty entries is deprecated"
116-
with tm.assert_produces_warning(FutureWarning, match=msg):
117-
result = s1.combine_first(s2)
118-
tm.assert_series_equal(result, s1)
113+
result = s1.combine_first(s2)
114+
tm.assert_series_equal(result, s1.astype(np.float64))
119115

120116
def test_combine_first_preserves_dtype(self):
121117
# GH51764

0 commit comments

Comments
 (0)