From 61bcefc162cab173622610726923f2a6f6081112 Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Thu, 25 Nov 2021 22:45:10 +1100 Subject: [PATCH 01/10] Bugfix for constructing empty Series, with index, using ExtensionDtype with na_value of None --- pandas/core/series.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index f0f5bd7c3e2b2..c4e58e04dcda3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -486,6 +486,8 @@ def _init_dict( # fastpath for Series(data=None). Just use broadcasting a scalar # instead of reindexing. values = na_value_for_dtype(pandas_dtype(dtype), compat=False) + if values is None: + values = [None]*len(index) keys = index else: keys, values = (), [] From 67b7072d7678773e7accb815b1365c80d28fa185 Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Thu, 25 Nov 2021 22:47:56 +1100 Subject: [PATCH 02/10] pep8 compliance --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index c4e58e04dcda3..5cddb787744dd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -487,7 +487,7 @@ def _init_dict( # instead of reindexing. values = na_value_for_dtype(pandas_dtype(dtype), compat=False) if values is None: - values = [None]*len(index) + values = [None] * len(index) keys = index else: keys, values = (), [] From 3014b9662b1dc4bfaded45fa2f86009d11e42ecf Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Mon, 13 Dec 2021 15:26:14 +1100 Subject: [PATCH 03/10] added tests for empty Series construction with Extension Dtype --- pandas/tests/extension/test_common.py | 81 +++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 5 deletions(-) diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index e43650c291200..8ae0f54fb8e1c 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -1,3 +1,6 @@ +from collections.abc import Iterable +import numbers + import numpy as np import pytest @@ -9,30 +12,81 @@ from pandas.core.arrays import ExtensionArray -class DummyDtype(dtypes.ExtensionDtype): +class DummyClass: pass +class DummyDtype(dtypes.ExtensionDtype): + + type = DummyClass + name = "dummy" + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + raise TypeError("Cannot construct a '{}' from " "'{}'".format(cls, string)) + + @classmethod + def construct_array_type(cls): + return DummyArray + + class DummyArray(ExtensionArray): + + _dtype = DummyDtype + def __init__(self, data): - self.data = data + self.data = np.array(data) - def __array__(self, dtype): + def __array__(self, dtype=None): return self.data @property def dtype(self): - return DummyDtype() + return DummyArray._dtype() def astype(self, dtype, copy=True): # we don't support anything but a single dtype - if isinstance(dtype, DummyDtype): + if isinstance(dtype, self._dtype): if copy: return type(self)(self.data) return self return np.array(self, dtype=dtype, copy=copy) + def __len__(self): + return len(self.data) + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + if isinstance(scalars, cls._dtype.type): + scalars = [scalars] + return DummyArray(scalars) + + def take(self, indices, allow_fill=False, fill_value=None): + from pandas.core.algorithms import take + + data = self.astype(object) + + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill) + return self._from_sequence(result, dtype=self.dtype) + + def isna(self): + return np.array([x is self.dtype.na_value for x in self.data], dtype="bool") + + def __getitem__(self, idx): + if isinstance(idx, numbers.Integral): + return self.data[idx] + elif isinstance(idx, (Iterable, slice)): + return DummyArray(self.data[idx]) + else: + raise TypeError("Index type not supported", idx) + class TestExtensionArrayDtype: @pytest.mark.parametrize( @@ -79,3 +133,20 @@ def test_astype_no_copy(): def test_is_extension_array_dtype(dtype): assert isinstance(dtype, dtypes.ExtensionDtype) assert is_extension_array_dtype(dtype) + + +@pytest.mark.parametrize("na_value", [np.nan, pd.NA, None]) +def test_empty_series_construction(na_value): + class TempDType(DummyDtype): + @classmethod + def construct_array_type(cls): + return TempArray + + TempDType.na_value = na_value + + class TempArray(DummyArray): + _dtype = TempDType + + result = pd.Series(index=[1, 2, 3], dtype=TempDType()) + expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=TempDType()) + tm.assert_series_equal(result, expected) From cabfad4cf6e336f47de7208c91bf9d51cbac2367 Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Mon, 13 Dec 2021 17:07:55 +1100 Subject: [PATCH 04/10] linting --- pandas/tests/extension/test_common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index 8ae0f54fb8e1c..d67c1ae6ceb4e 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -1,4 +1,4 @@ -from collections.abc import Iterable +from collections import abc import numbers import numpy as np @@ -26,7 +26,7 @@ def construct_from_string(cls, string): if string == cls.name: return cls() else: - raise TypeError("Cannot construct a '{}' from " "'{}'".format(cls, string)) + raise TypeError("Cannot construct a '{}' from '{}'".format(cls, string)) @classmethod def construct_array_type(cls): @@ -82,7 +82,7 @@ def isna(self): def __getitem__(self, idx): if isinstance(idx, numbers.Integral): return self.data[idx] - elif isinstance(idx, (Iterable, slice)): + elif isinstance(idx, (abc.Iterable, slice)): return DummyArray(self.data[idx]) else: raise TypeError("Index type not supported", idx) From 5fee7b4f70c6c980492ed577b7032acad1f5e8a5 Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Mon, 13 Dec 2021 17:21:36 +1100 Subject: [PATCH 05/10] linting --- pandas/tests/extension/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index d67c1ae6ceb4e..43914e1f890a1 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -26,7 +26,7 @@ def construct_from_string(cls, string): if string == cls.name: return cls() else: - raise TypeError("Cannot construct a '{}' from '{}'".format(cls, string)) + raise TypeError(f"Cannot construct a '{cls}' from '{string}'") @classmethod def construct_array_type(cls): From 69117c1ebd1621a69164129169232c83a7439b19 Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Tue, 21 Dec 2021 11:52:39 +1100 Subject: [PATCH 06/10] Using construct_1d_arraylike_from_scalar to construct array of na_value --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/core/series.py | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 9ead1e4a75d01..855e40e0d28e5 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -724,6 +724,7 @@ ExtensionArray - NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`) - Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`) - Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`) +- Bug in :class:`Series` construction with index and empty data when :class:`ExtensionDtype` has ``na_value`` of None (:issue:`44602`) - Styler diff --git a/pandas/core/series.py b/pandas/core/series.py index 5cddb787744dd..0efd066d3c788 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -102,6 +102,7 @@ from pandas.core.arrays.sparse import SparseAccessor import pandas.core.common as com from pandas.core.construction import ( + construct_1d_arraylike_from_scalar, create_series_with_explicit_dtype, extract_array, is_empty_data, @@ -485,9 +486,11 @@ def _init_dict( elif index is not None: # fastpath for Series(data=None). Just use broadcasting a scalar # instead of reindexing. - values = na_value_for_dtype(pandas_dtype(dtype), compat=False) + dtype_ = pandas_dtype(dtype) + values = na_value_for_dtype(dtype_, compat=False) if values is None: - values = [None] * len(index) + # bugfix for GH#44602 + values = construct_1d_arraylike_from_scalar(values, len(index), dtype_) keys = index else: keys, values = (), [] From 49f8a68d1a5d2703bed35d4c9a0838a1398a03bd Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Thu, 20 Jan 2022 12:06:33 +1100 Subject: [PATCH 07/10] Moved whatsnew entry to 1.5.0 --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/series.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 1ae76984484af..6dee1de31dd7e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -257,6 +257,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ - Bug in :meth:`IntegerArray.searchsorted` and :meth:`FloatingArray.searchsorted` returning inconsistent results when acting on ``np.nan`` (:issue:`45255`) +- Bug in :class:`Series` construction with index and empty data when :class:`ExtensionDtype` has ``na_value`` of None (:issue:`44602`) - Styler diff --git a/pandas/core/series.py b/pandas/core/series.py index cf0582d6c89b8..d40a21bc45d68 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -499,7 +499,6 @@ def _init_dict( dtype_ = pandas_dtype(dtype) values = na_value_for_dtype(dtype_, compat=False) if values is None: - # bugfix for GH#44602 values = construct_1d_arraylike_from_scalar(values, len(index), dtype_) keys = index else: From 48db30e2b072727a32969f9a86b7e1c708aa77c8 Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Tue, 25 Jan 2022 12:30:36 +1100 Subject: [PATCH 08/10] Removed redundant call to pandas_dtype --- pandas/core/series.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d40a21bc45d68..0238db14433be 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -496,8 +496,7 @@ def _init_dict( elif index is not None: # fastpath for Series(data=None). Just use broadcasting a scalar # instead of reindexing. - dtype_ = pandas_dtype(dtype) - values = na_value_for_dtype(dtype_, compat=False) + values = na_value_for_dtype(dtype, compat=False) if values is None: values = construct_1d_arraylike_from_scalar(values, len(index), dtype_) keys = index From 8b6b01f56f80497b45870d9704c0a24265377662 Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Tue, 25 Jan 2022 12:30:36 +1100 Subject: [PATCH 09/10] Removed redundant call to pandas_dtype --- pandas/core/series.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d40a21bc45d68..a92f0333098bb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -74,7 +74,6 @@ is_list_like, is_object_dtype, is_scalar, - pandas_dtype, validate_all_hashable, ) from pandas.core.dtypes.generic import ABCDataFrame @@ -496,10 +495,9 @@ def _init_dict( elif index is not None: # fastpath for Series(data=None). Just use broadcasting a scalar # instead of reindexing. - dtype_ = pandas_dtype(dtype) - values = na_value_for_dtype(dtype_, compat=False) + values = na_value_for_dtype(dtype, compat=False) if values is None: - values = construct_1d_arraylike_from_scalar(values, len(index), dtype_) + values = construct_1d_arraylike_from_scalar(values, len(index), dtype) keys = index else: keys, values = (), [] From bdac79183ac5c74ecfb0206e9f24cbca10ab9c63 Mon Sep 17 00:00:00 2001 From: Riley Clement Date: Tue, 1 Mar 2022 11:17:38 +1100 Subject: [PATCH 10/10] using monkeypatch to set dtype na_value --- pandas/tests/extension/test_common.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index 43914e1f890a1..b5da0b0eab9ce 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -136,17 +136,8 @@ def test_is_extension_array_dtype(dtype): @pytest.mark.parametrize("na_value", [np.nan, pd.NA, None]) -def test_empty_series_construction(na_value): - class TempDType(DummyDtype): - @classmethod - def construct_array_type(cls): - return TempArray - - TempDType.na_value = na_value - - class TempArray(DummyArray): - _dtype = TempDType - - result = pd.Series(index=[1, 2, 3], dtype=TempDType()) - expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=TempDType()) +def test_empty_series_construction(monkeypatch, na_value): + monkeypatch.setattr(DummyDtype, "na_value", na_value) + result = pd.Series(index=[1, 2, 3], dtype=DummyDtype()) + expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=DummyDtype()) tm.assert_series_equal(result, expected)