diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 1ae76984484af..6dee1de31dd7e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -257,6 +257,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ - Bug in :meth:`IntegerArray.searchsorted` and :meth:`FloatingArray.searchsorted` returning inconsistent results when acting on ``np.nan`` (:issue:`45255`) +- Bug in :class:`Series` construction with index and empty data when :class:`ExtensionDtype` has ``na_value`` of None (:issue:`44602`) - Styler diff --git a/pandas/core/series.py b/pandas/core/series.py index 4e69596539bed..a92f0333098bb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -74,7 +74,6 @@ is_list_like, is_object_dtype, is_scalar, - pandas_dtype, validate_all_hashable, ) from pandas.core.dtypes.generic import ABCDataFrame @@ -100,6 +99,7 @@ from pandas.core.arrays.sparse import SparseAccessor import pandas.core.common as com from pandas.core.construction import ( + construct_1d_arraylike_from_scalar, create_series_with_explicit_dtype, extract_array, is_empty_data, @@ -495,7 +495,9 @@ def _init_dict( elif index is not None: # fastpath for Series(data=None). Just use broadcasting a scalar # instead of reindexing. - values = na_value_for_dtype(pandas_dtype(dtype), compat=False) + values = na_value_for_dtype(dtype, compat=False) + if values is None: + values = construct_1d_arraylike_from_scalar(values, len(index), dtype) keys = index else: keys, values = (), [] diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index e43650c291200..b5da0b0eab9ce 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -1,3 +1,6 @@ +from collections import abc +import numbers + import numpy as np import pytest @@ -9,30 +12,81 @@ from pandas.core.arrays import ExtensionArray -class DummyDtype(dtypes.ExtensionDtype): +class DummyClass: pass +class DummyDtype(dtypes.ExtensionDtype): + + type = DummyClass + name = "dummy" + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + raise TypeError(f"Cannot construct a '{cls}' from '{string}'") + + @classmethod + def construct_array_type(cls): + return DummyArray + + class DummyArray(ExtensionArray): + + _dtype = DummyDtype + def __init__(self, data): - self.data = data + self.data = np.array(data) - def __array__(self, dtype): + def __array__(self, dtype=None): return self.data @property def dtype(self): - return DummyDtype() + return DummyArray._dtype() def astype(self, dtype, copy=True): # we don't support anything but a single dtype - if isinstance(dtype, DummyDtype): + if isinstance(dtype, self._dtype): if copy: return type(self)(self.data) return self return np.array(self, dtype=dtype, copy=copy) + def __len__(self): + return len(self.data) + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + if isinstance(scalars, cls._dtype.type): + scalars = [scalars] + return DummyArray(scalars) + + def take(self, indices, allow_fill=False, fill_value=None): + from pandas.core.algorithms import take + + data = self.astype(object) + + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill) + return self._from_sequence(result, dtype=self.dtype) + + def isna(self): + return np.array([x is self.dtype.na_value for x in self.data], dtype="bool") + + def __getitem__(self, idx): + if isinstance(idx, numbers.Integral): + return self.data[idx] + elif isinstance(idx, (abc.Iterable, slice)): + return DummyArray(self.data[idx]) + else: + raise TypeError("Index type not supported", idx) + class TestExtensionArrayDtype: @pytest.mark.parametrize( @@ -79,3 +133,11 @@ def test_astype_no_copy(): def test_is_extension_array_dtype(dtype): assert isinstance(dtype, dtypes.ExtensionDtype) assert is_extension_array_dtype(dtype) + + +@pytest.mark.parametrize("na_value", [np.nan, pd.NA, None]) +def test_empty_series_construction(monkeypatch, na_value): + monkeypatch.setattr(DummyDtype, "na_value", na_value) + result = pd.Series(index=[1, 2, 3], dtype=DummyDtype()) + expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=DummyDtype()) + tm.assert_series_equal(result, expected)