From 16e4ab78d29f4ce28e47406756c5bb8cd94dd3d9 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 22 Jul 2020 21:57:37 +0100 Subject: [PATCH 1/3] CLN: move cached_data to _cache['_data'] --- pandas/core/indexes/range.py | 14 ++------ pandas/tests/indexes/ranges/test_range.py | 43 +++++++++++++---------- 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index eee610681087d..10265d2f6645f 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -78,8 +78,6 @@ class RangeIndex(Int64Index): _engine_type = libindex.Int64Engine _range: range - # check whether self._data has been called - _cached_data: Optional[np.ndarray] = None # -------------------------------------------------------------------- # Constructors @@ -150,20 +148,14 @@ def _constructor(self): """ return the class to use for construction """ return Int64Index - @property + @cache_readonly def _data(self): """ An int array that for performance reasons is created only when needed. - The constructed array is saved in ``_cached_data``. This allows us to - check if the array has been created without accessing ``_data`` and - triggering the construction. + The constructed array is saved in ``_cache``. """ - if self._cached_data is None: - self._cached_data = np.arange( - self.start, self.stop, self.step, dtype=np.int64 - ) - return self._cached_data + return np.arange(self.start, self.stop, self.step, dtype=np.int64) @cache_readonly def _int64index(self) -> Int64Index: diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 5b6f9cb358b7d..d325ed3d8790d 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -137,53 +137,58 @@ def test_dtype(self): index = self.create_index() assert index.dtype == np.int64 - def test_cached_data(self): + def test_cache(self): # GH 26565, GH26617 - # Calling RangeIndex._data caches an int64 array of the same length at - # self._cached_data. This test checks whether _cached_data has been set + # This test checks whether _cache has been set. + # Calling RangeIndex._cache["_data"] creates an int64 array of the same length + # as the RangeIndex and stores it in _cache. idx = RangeIndex(0, 100, 10) - assert idx._cached_data is None + assert idx._cache == {} repr(idx) - assert idx._cached_data is None + assert idx._cache == {} str(idx) - assert idx._cached_data is None + assert idx._cache == {} idx.get_loc(20) - assert idx._cached_data is None + assert idx._cache == {} - 90 in idx - assert idx._cached_data is None + 90 in idx # True + assert idx._cache == {} - 91 in idx - assert idx._cached_data is None + 91 in idx # False + assert idx._cache == {} idx.all() - assert idx._cached_data is None + assert idx._cache == {} idx.any() - assert idx._cached_data is None + assert idx._cache == {} df = pd.DataFrame({"a": range(10)}, index=idx) df.loc[50] - assert idx._cached_data is None + assert idx._cache == {} with pytest.raises(KeyError, match="51"): df.loc[51] - assert idx._cached_data is None + assert idx._cache == {} df.loc[10:50] - assert idx._cached_data is None + assert idx._cache == {} df.iloc[5:10] - assert idx._cached_data is None + assert idx._cache == {} - # actually calling idx._data + # idx._cache should contain a _data entry after call to idx._data + idx._data assert isinstance(idx._data, np.ndarray) - assert isinstance(idx._cached_data, np.ndarray) + assert idx._data is idx._data # check cached value is reused + assert len(idx._cache) == 4 + expected = np.arange(0, 100, 10, dtype="int64") + tm.assert_numpy_array_equal(idx._cache["_data"], expected) def test_is_monotonic(self): index = RangeIndex(0, 20, 2) From 6318ffa13b17bda5e2ac524701d3c00e8cb17b88 Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 28 Jul 2020 07:32:42 +0100 Subject: [PATCH 2/3] add GH number --- pandas/tests/indexes/ranges/test_range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index d325ed3d8790d..ef4bb9a0869b0 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -138,7 +138,7 @@ def test_dtype(self): assert index.dtype == np.int64 def test_cache(self): - # GH 26565, GH26617 + # GH 26565, GH26617, GH35432 # This test checks whether _cache has been set. # Calling RangeIndex._cache["_data"] creates an int64 array of the same length # as the RangeIndex and stores it in _cache. From f24b28ff50f155694bccc4feae96927cdb3ffb5d Mon Sep 17 00:00:00 2001 From: tp Date: Tue, 28 Jul 2020 09:05:15 +0100 Subject: [PATCH 3/3] flake8 cleanup --- pandas/core/indexes/range.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 10265d2f6645f..1dc4fc1e91462 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -1,7 +1,7 @@ from datetime import timedelta import operator from sys import getsizeof -from typing import Any, Optional +from typing import Any import warnings import numpy as np