diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 932618ba1df21..2a0e6f9ad4702 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -848,3 +848,4 @@ Other ^^^^^ - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) +- Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`) diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx index 4beb24f07c21c..ef1b4654209cd 100644 --- a/pandas/_libs/properties.pyx +++ b/pandas/_libs/properties.pyx @@ -9,22 +9,30 @@ from cpython cimport ( cdef class cache_readonly(object): cdef readonly: - object func, name, allow_setting + object func, name, allow_setting, __doc__ def __init__(self, func=None, allow_setting=False): if func is not None: self.func = func self.name = func.__name__ + self.__doc__ = func.__doc__ self.allow_setting = allow_setting def __call__(self, func, doc=None): self.func = func self.name = func.__name__ + if doc is not None: + self.__doc__ = doc + else: + self.__doc__ = func.__doc__ return self def __get__(self, obj, typ): - # Get the cache or set a default one if needed + if obj is None: + # accessed on the class, not the instance + return self + # Get the cache or set a default one if needed cache = getattr(obj, '_cache', None) if cache is None: try: @@ -55,6 +63,66 @@ cdef class cache_readonly(object): PyDict_SetItem(cache, self.name, value) + +cdef class maybe_cache(object): + """ + A configurable property-like class that acts like a cache_readonly + if an "_immutable" flag is True and a like property otherwise + """ + + cdef readonly: + object func, name, __doc__ + + def __init__(self, func=None): + if func is not None: + self.func = func + self.name = func.__name__ + self.__doc__ = func.__doc__ + + def __call__(self, func, doc=None): + self.func = func + self.name = func.__name__ + if doc is not None: + self.__doc__ = doc + else: + self.__doc__ = func.__doc__ + return self + + def __get__(self, obj, typ): + cdef: + bint immutable + + if obj is None: + # accessed on the class, not the instance + return self + + # Default to non-caching + immutable = getattr(typ, '_immutable', False) + if not immutable: + # behave like a property + val = self.func(obj) + return val + + # Get the cache or set a default one if needed + cache = getattr(obj, '_cache', None) + if cache is None: + try: + cache = obj._cache = {} + except AttributeError: + return + + if PyDict_Contains(cache, self.name): + # not necessary to Py_INCREF + val = PyDict_GetItem(cache, self.name) + else: + val = self.func(obj) + PyDict_SetItem(cache, self.name, val) + return val + + def __set__(self, obj, value): + raise Exception("cannot set values for [%s]" % self.name) + + cdef class AxisProperty(object): cdef: Py_ssize_t axis diff --git a/pandas/core/series.py b/pandas/core/series.py index 655eaa5373f5a..bb94a637ae7db 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -145,6 +145,8 @@ class Series(base.IndexOpsMixin, generic.NDFrame): ['asobject', 'sortlevel', 'reshape', 'get_value', 'set_value', 'from_csv', 'valid']) + hasnans = property(base.IndexOpsMixin.hasnans.func) + def __init__(self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False): diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index 79e23459ac992..fff43c8a7cd7e 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -8,6 +8,7 @@ from numpy import nan import numpy as np +import pandas as pd from pandas import Series from pandas.core.indexes.datetimes import Timestamp import pandas._libs.lib as lib @@ -309,3 +310,16 @@ def test_convert_preserve_all_bool(self): r = s._convert(datetime=True, numeric=True) e = Series([False, True, False, False], dtype=bool) tm.assert_series_equal(r, e) + + +def test_hasnans_unchached_for_series(): + # GH#19700 + idx = pd.Index([0, 1]) + assert not idx.hasnans + assert 'hasnans' in idx._cache + ser = idx.to_series() + assert not ser.hasnans + assert not hasattr(ser, '_cache') + ser.iloc[-1] = np.nan + assert ser.hasnans + assert 'enables' in pd.Series.hasnans.__doc__ diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 502f0c3bced61..919262be6dbaf 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -5,6 +5,7 @@ import numpy as np from pandas._libs import lib, writers as libwriters import pandas.util.testing as tm +import pandas as pd class TestMisc(object): @@ -198,3 +199,10 @@ def test_get_reverse_indexer(self): result = lib.get_reverse_indexer(indexer, 5) expected = np.array([4, 2, 3, 6, 7], dtype=np.int64) tm.assert_numpy_array_equal(result, expected) + + +def test_cache_readonly_preserve_docstrings(): + # GH#19700 accessing class attributes that are cache_readonly + # should a) not return None and b) reflect original docstrings + assert "perf" in pd.Index.hasnans.__doc__ + assert "dtype str " in pd.Index.dtype_str.__doc__