Skip to content

Commit ccc1a25

Browse files
authored
Bug in concat casting all na levels to float (pandas-dev#44902)
1 parent 01268aa commit ccc1a25

File tree

4 files changed

+36
-1
lines changed

4 files changed

+36
-1
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -809,6 +809,7 @@ Reshaping
809809
- Bug in :func:`crosstab` would fail when inputs are lists or tuples (:issue:`44076`)
810810
- Bug in :meth:`DataFrame.append` failing to retain ``index.name`` when appending a list of :class:`Series` objects (:issue:`44109`)
811811
- Fixed metadata propagation in :meth:`Dataframe.apply` method, consequently fixing the same issue for :meth:`Dataframe.transform`, :meth:`Dataframe.nunique` and :meth:`Dataframe.mode` (:issue:`28283`)
812+
- Bug in :func:`concat` casting levels of :class:`MultiIndex` to float if the only consist of missing values (:issue:`44900`)
812813
- Bug in :meth:`DataFrame.stack` with ``ExtensionDtype`` columns incorrectly raising (:issue:`43561`)
813814
- Bug in :meth:`Series.unstack` with object doing unwanted type inference on resulting columns (:issue:`44595`)
814815
- Bug in :class:`MultiIndex` failing join operations with overlapping ``IntervalIndex`` levels (:issue:`44096`)

pandas/core/arrays/categorical.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,14 @@ def __init__(
422422
# We remove null values here, then below will re-insert
423423
# them, grep "full_codes"
424424
arr_list = [values[idx] for idx in np.where(~null_mask)[0]]
425-
arr = sanitize_array(arr_list, None)
425+
426+
# GH#44900 Do not cast to float if we have only missing values
427+
if arr_list or arr.dtype == "object":
428+
sanitize_dtype = None
429+
else:
430+
sanitize_dtype = arr.dtype
431+
432+
arr = sanitize_array(arr_list, None, dtype=sanitize_dtype)
426433
values = arr
427434

428435
if dtype.categories is None:

pandas/tests/arrays/categorical/test_missing.py

+14
Original file line numberDiff line numberDiff line change
@@ -197,3 +197,17 @@ def test_compare_categorical_with_missing(self, a1, a2, categories):
197197
result = Series(a1, dtype=cat_type) == Series(a2, dtype=cat_type)
198198
expected = Series(a1) == Series(a2)
199199
tm.assert_series_equal(result, expected)
200+
201+
@pytest.mark.parametrize(
202+
"na_value, dtype",
203+
[
204+
(pd.NaT, "datetime64[ns]"),
205+
(None, "float64"),
206+
(np.nan, "float64"),
207+
(pd.NA, "float64"),
208+
],
209+
)
210+
def test_categorical_only_missing_values_no_cast(self, na_value, dtype):
211+
# GH#44900
212+
result = Categorical([na_value, na_value])
213+
tm.assert_index_equal(result.categories, Index([], dtype=dtype))

pandas/tests/reshape/concat/test_datetimes.py

+13
Original file line numberDiff line numberDiff line change
@@ -528,3 +528,16 @@ def test_concat_timedelta64_block():
528528
result = concat([df, df])
529529
tm.assert_frame_equal(result.iloc[:10], df)
530530
tm.assert_frame_equal(result.iloc[10:], df)
531+
532+
533+
def test_concat_multiindex_datetime_nat():
534+
# GH#44900
535+
left = DataFrame({"a": 1}, index=MultiIndex.from_tuples([(1, pd.NaT)]))
536+
right = DataFrame(
537+
{"b": 2}, index=MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)])
538+
)
539+
result = concat([left, right], axis="columns")
540+
expected = DataFrame(
541+
{"a": [1.0, np.nan], "b": 2}, MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)])
542+
)
543+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)