Skip to content

Commit 2362df9

Browse files
authored
BUG: inconsistent concat casting EA vs non-EA (#38843)
1 parent 02166b7 commit 2362df9

File tree

5 files changed

+25
-10
lines changed

5 files changed

+25
-10
lines changed

doc/source/whatsnew/v1.3.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -293,10 +293,11 @@ Groupby/resample/rolling
293293

294294
Reshaping
295295
^^^^^^^^^
296-
297296
- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`)
297+
- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`)
298298
-
299299

300+
300301
Sparse
301302
^^^^^^
302303

pandas/core/dtypes/concat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def is_nonempty(x) -> bool:
128128
# marginal given that it would still require shape & dtype calculation and
129129
# np.concatenate which has them both implemented is compiled.
130130
non_empties = [x for x in to_concat if is_nonempty(x)]
131-
if non_empties and axis == 0:
131+
if non_empties:
132132
to_concat = non_empties
133133

134134
typs = _get_dtype_kinds(to_concat)

pandas/tests/indexing/test_partial.py

+13-3
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,21 @@ def test_partial_setting_mixed_dtype(self):
170170
with pytest.raises(ValueError, match=msg):
171171
df.loc[0] = [1, 2, 3]
172172

173-
# TODO: #15657, these are left as object and not coerced
173+
@pytest.mark.parametrize("dtype", [None, "int64", "Int64"])
174+
def test_loc_setitem_expanding_empty(self, dtype):
174175
df = DataFrame(columns=["A", "B"])
175-
df.loc[3] = [6, 7]
176176

177-
exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype="object")
177+
value = [6, 7]
178+
if dtype == "int64":
179+
value = np.array(value, dtype=dtype)
180+
elif dtype == "Int64":
181+
value = pd.array(value, dtype=dtype)
182+
183+
df.loc[3] = value
184+
185+
exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=dtype)
186+
if dtype is not None:
187+
exp = exp.astype(dtype)
178188
tm.assert_frame_equal(df, exp)
179189

180190
def test_series_partial_set(self):

pandas/tests/reshape/concat/test_concat.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -474,11 +474,12 @@ def test_concat_will_upcast(dt, pdt):
474474
assert x.values.dtype == "float64"
475475

476476

477-
def test_concat_empty_and_non_empty_frame_regression():
477+
@pytest.mark.parametrize("dtype", ["int64", "Int64"])
478+
def test_concat_empty_and_non_empty_frame_regression(dtype):
478479
# GH 18178 regression test
479-
df1 = DataFrame({"foo": [1]})
480+
df1 = DataFrame({"foo": [1]}).astype(dtype)
480481
df2 = DataFrame({"foo": []})
481-
expected = DataFrame({"foo": [1.0]})
482+
expected = df1
482483
result = pd.concat([df1, df2])
483484
tm.assert_frame_equal(result, expected)
484485

pandas/tests/reshape/concat/test_empty.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -202,12 +202,15 @@ def test_concat_empty_series_dtypes_sparse(self):
202202
expected = pd.SparseDtype("object")
203203
assert result.dtype == expected
204204

205-
def test_concat_empty_df_object_dtype(self):
205+
@pytest.mark.parametrize("dtype", ["int64", "Int64"])
206+
def test_concat_empty_df_object_dtype(self, dtype):
206207
# GH 9149
207208
df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]})
209+
df_1["Row"] = df_1["Row"].astype(dtype)
208210
df_2 = DataFrame(columns=df_1.columns)
209211
result = pd.concat([df_1, df_2], axis=0)
210-
expected = df_1.astype(object)
212+
expected = df_1.copy()
213+
expected["EmptyCol"] = expected["EmptyCol"].astype(object) # TODO: why?
211214
tm.assert_frame_equal(result, expected)
212215

213216
def test_concat_empty_dataframe_dtypes(self):

0 commit comments

Comments
 (0)