Skip to content

Commit 47f5fdf

Browse files
API: concatting DataFrames does not skip empty objects (#39035)
* Revert "BUG: casting on concat with empties (#38907)" This reverts commit 04282c7. * Revert "BUG: inconsistent concat casting EA vs non-EA (#38843)" This reverts commit 2362df9.
1 parent 6259b5a commit 47f5fdf

File tree

7 files changed

+16
-32
lines changed

7 files changed

+16
-32
lines changed

doc/source/whatsnew/v1.3.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -312,8 +312,8 @@ Reshaping
312312
- Bug in :func:`merge` raising error when performing an inner join with partial index and ``right_index`` when no overlap between indices (:issue:`33814`)
313313
- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`)
314314
- Bug in :func:`join` over :class:`MultiIndex` returned wrong result, when one of both indexes had only one level (:issue:`36909`)
315-
- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`, :issue:`38907`)
316315
- :meth:`merge_asof` raises ``ValueError`` instead of cryptic ``TypeError`` in case of non-numerical merge columns (:issue:`29130`)
316+
-
317317

318318
Sparse
319319
^^^^^^

pandas/core/dtypes/concat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def is_nonempty(x) -> bool:
127127
# marginal given that it would still require shape & dtype calculation and
128128
# np.concatenate which has them both implemented is compiled.
129129
non_empties = [x for x in to_concat if is_nonempty(x)]
130-
if non_empties:
130+
if non_empties and axis == 0:
131131
to_concat = non_empties
132132

133133
typs = _get_dtype_kinds(to_concat)

pandas/core/internals/concat.py

-6
Original file line numberDiff line numberDiff line change
@@ -318,12 +318,6 @@ def _concatenate_join_units(
318318
# Concatenating join units along ax0 is handled in _merge_blocks.
319319
raise AssertionError("Concatenating join units along axis0")
320320

321-
nonempties = [
322-
x for x in join_units if x.block is None or x.block.shape[concat_axis] > 0
323-
]
324-
if nonempties:
325-
join_units = nonempties
326-
327321
empty_dtype, upcasted_na = _get_empty_dtype_and_na(join_units)
328322

329323
to_concat = [

pandas/tests/indexing/test_partial.py

+4-15
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,7 @@ def test_partial_setting_mixed_dtype(self):
154154
# columns will align
155155
df = DataFrame(columns=["A", "B"])
156156
df.loc[0] = Series(1, index=range(4))
157-
expected = DataFrame(columns=["A", "B"], index=[0], dtype=int)
158-
tm.assert_frame_equal(df, expected)
157+
tm.assert_frame_equal(df, DataFrame(columns=["A", "B"], index=[0]))
159158

160159
# columns will align
161160
df = DataFrame(columns=["A", "B"])
@@ -171,21 +170,11 @@ def test_partial_setting_mixed_dtype(self):
171170
with pytest.raises(ValueError, match=msg):
172171
df.loc[0] = [1, 2, 3]
173172

174-
@pytest.mark.parametrize("dtype", [None, "int64", "Int64"])
175-
def test_loc_setitem_expanding_empty(self, dtype):
173+
# TODO: #15657, these are left as object and not coerced
176174
df = DataFrame(columns=["A", "B"])
175+
df.loc[3] = [6, 7]
177176

178-
value = [6, 7]
179-
if dtype == "int64":
180-
value = np.array(value, dtype=dtype)
181-
elif dtype == "Int64":
182-
value = pd.array(value, dtype=dtype)
183-
184-
df.loc[3] = value
185-
186-
exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=dtype)
187-
if dtype is not None:
188-
exp = exp.astype(dtype)
177+
exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype="object")
189178
tm.assert_frame_equal(df, exp)
190179

191180
def test_series_partial_set(self):

pandas/tests/reshape/concat/test_append.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ def test_append_length0_frame(self, sort):
8282
df5 = df.append(df3, sort=sort)
8383

8484
expected = DataFrame(index=[0, 1], columns=["A", "B", "C"])
85-
expected["C"] = expected["C"].astype(np.float64)
8685
tm.assert_frame_equal(df5, expected)
8786

8887
def test_append_records(self):
@@ -341,11 +340,16 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self):
341340
expected = DataFrame(
342341
[[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"]
343342
)
343+
# These columns get cast to object after append
344+
expected["c"] = expected["c"].astype(object)
345+
expected["d"] = expected["d"].astype(object)
344346
tm.assert_frame_equal(result_a, expected)
345347

346348
expected = DataFrame(
347349
[[np.nan, np.nan, 1.0, 2.0, date]] * 2, columns=["c", "d", "a", "b", "date"]
348350
)
351+
expected["c"] = expected["c"].astype(object)
352+
expected["d"] = expected["d"].astype(object)
349353

350354
result_b = result_a.append(s, ignore_index=True)
351355
tm.assert_frame_equal(result_b, expected)

pandas/tests/reshape/concat/test_concat.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -474,12 +474,11 @@ def test_concat_will_upcast(dt, pdt):
474474
assert x.values.dtype == "float64"
475475

476476

477-
@pytest.mark.parametrize("dtype", ["int64", "Int64"])
478-
def test_concat_empty_and_non_empty_frame_regression(dtype):
477+
def test_concat_empty_and_non_empty_frame_regression():
479478
# GH 18178 regression test
480-
df1 = DataFrame({"foo": [1]}).astype(dtype)
479+
df1 = DataFrame({"foo": [1]})
481480
df2 = DataFrame({"foo": []})
482-
expected = df1
481+
expected = DataFrame({"foo": [1.0]})
483482
result = pd.concat([df1, df2])
484483
tm.assert_frame_equal(result, expected)
485484

pandas/tests/reshape/concat/test_empty.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -202,14 +202,12 @@ def test_concat_empty_series_dtypes_sparse(self):
202202
expected = pd.SparseDtype("object")
203203
assert result.dtype == expected
204204

205-
@pytest.mark.parametrize("dtype", ["int64", "Int64"])
206-
def test_concat_empty_df_object_dtype(self, dtype):
205+
def test_concat_empty_df_object_dtype(self):
207206
# GH 9149
208207
df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]})
209-
df_1["Row"] = df_1["Row"].astype(dtype)
210208
df_2 = DataFrame(columns=df_1.columns)
211209
result = pd.concat([df_1, df_2], axis=0)
212-
expected = df_1.copy()
210+
expected = df_1.astype(object)
213211
tm.assert_frame_equal(result, expected)
214212

215213
def test_concat_empty_dataframe_dtypes(self):

0 commit comments

Comments
 (0)