diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 822428c6787be..c350e7e2546c3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9644,6 +9644,7 @@ def describe_categorical_1d(data): objcounts = data.value_counts() count_unique = len(objcounts[objcounts != 0]) result = [data.count(), count_unique] + dtype = None if result[1] > 0: top, freq = objcounts.index[0], objcounts.iloc[0] @@ -9668,9 +9669,10 @@ def describe_categorical_1d(data): # to maintain output shape consistency else: names += ['top', 'freq'] - result += [None, None] + result += [np.nan, np.nan] + dtype = 'object' - return pd.Series(result, index=names, name=data.name) + return pd.Series(result, index=names, name=data.name, dtype=dtype) def describe_1d(data): if is_bool_dtype(data): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 01a398584b5e1..9921d91d6de8c 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -525,6 +525,17 @@ def test_bool_describe_in_mixed_frame(self): index=['count', 'unique', 'top', 'freq']) tm.assert_frame_equal(result, expected) + def test_describe_empty_object(self): + # https://github.com/pandas-dev/pandas/issues/27183 + df = pd.DataFrame({"A": [None, None]}, dtype=object) + result = df.describe() + expected = pd.DataFrame({"A": [0, 0, np.nan, np.nan]}, dtype=object, + index=['count', 'unique', 'top', 'freq']) + tm.assert_frame_equal(result, expected) + + result = df.iloc[:0].describe() + tm.assert_frame_equal(result, expected) + def test_describe_bool_frame(self): # GH 13891 df = pd.DataFrame({ @@ -590,13 +601,17 @@ def test_describe_categorical(self): def test_describe_empty_categorical_column(self): # GH 26397 - # Ensure the index of an an empty categoric DataFrame column + # Ensure the index of an an empty categorical DataFrame column # also contains (count, unique, top, freq) df = pd.DataFrame({"empty_col": Categorical([])}) result = df.describe() - expected = DataFrame({'empty_col': [0, 0, None, None]}, - index=['count', 'unique', 'top', 'freq']) + expected = DataFrame({'empty_col': [0, 0, np.nan, np.nan]}, + index=['count', 'unique', 'top', 'freq'], + dtype='object') tm.assert_frame_equal(result, expected) + # ensure NaN, not None + assert np.isnan(result.iloc[2, 0]) + assert np.isnan(result.iloc[3, 0]) def test_describe_categorical_columns(self): # GH 11558 diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index df69bb35115cf..e48fd9ce11a7d 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -42,6 +42,20 @@ def test_describe(self): index=['count', 'unique', 'top', 'freq']) tm.assert_series_equal(result, expected) + def test_describe_empty_object(self): + # https://github.com/pandas-dev/pandas/issues/27183 + s = pd.Series([None, None], dtype=object) + result = s.describe() + expected = pd.Series([0, 0, np.nan, np.nan], dtype=object, + index=['count', 'unique', 'top', 'freq']) + tm.assert_series_equal(result, expected) + + result = s[:0].describe() + tm.assert_series_equal(result, expected) + # ensure NaN, not None + assert np.isnan(result.iloc[2]) + assert np.isnan(result.iloc[3]) + def test_describe_with_tz(self, tz_naive_fixture): # GH 21332 tz = tz_naive_fixture