Skip to content

Commit a0ca4d7

Browse files
authored
Remove offset/DTI caching (disabled since 0.14 (#23118)
* Remove offset/DTI caching (disabled since 0.14 * remove unused import * remove cached_range tests * fixup remove unused imports * whatsnew
1 parent 2f05cc8 commit a0ca4d7

File tree

7 files changed

+31
-254
lines changed

7 files changed

+31
-254
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,7 @@ Other API Changes
688688
- :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`)
689689
- :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`)
690690
- Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
691+
- :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
691692

692693
.. _whatsnew_0240.deprecations:
693694

pandas/_libs/tslibs/offsets.pyx

-11
Original file line numberDiff line numberDiff line change
@@ -282,11 +282,6 @@ class ApplyTypeError(TypeError):
282282
pass
283283

284284

285-
# TODO: unused. remove?
286-
class CacheableOffset(object):
287-
_cacheable = True
288-
289-
290285
# ---------------------------------------------------------------------
291286
# Base Classes
292287

@@ -296,8 +291,6 @@ class _BaseOffset(object):
296291
and will (after pickle errors are resolved) go into a cdef class.
297292
"""
298293
_typ = "dateoffset"
299-
_normalize_cache = True
300-
_cacheable = False
301294
_day_opt = None
302295
_attributes = frozenset(['n', 'normalize'])
303296

@@ -386,10 +379,6 @@ class _BaseOffset(object):
386379
# that allows us to use methods that can go in a `cdef class`
387380
return self * 1
388381

389-
# TODO: this is never true. fix it or get rid of it
390-
def _should_cache(self):
391-
return self.isAnchored() and self._cacheable
392-
393382
def __repr__(self):
394383
className = getattr(self, '_outputName', type(self).__name__)
395384

pandas/core/arrays/datetimes.py

+17-32
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
resolution as libresolution)
1414

1515
from pandas.util._decorators import cache_readonly
16-
from pandas.errors import PerformanceWarning, AbstractMethodError
16+
from pandas.errors import PerformanceWarning
1717
from pandas import compat
1818

1919
from pandas.core.dtypes.common import (
@@ -268,27 +268,22 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
268268
end, end.tz, start.tz, freq, tz
269269
)
270270
if freq is not None:
271-
if cls._use_cached_range(freq, _normalized, start, end):
272-
# Currently always False; never hit
273-
# Should be reimplemented as a part of GH#17914
274-
index = cls._cached_range(start, end, periods=periods,
275-
freq=freq)
276-
else:
277-
index = _generate_regular_range(cls, start, end, periods, freq)
278-
279-
if tz is not None and getattr(index, 'tz', None) is None:
280-
arr = conversion.tz_localize_to_utc(
281-
ensure_int64(index.values),
282-
tz, ambiguous=ambiguous)
283-
284-
index = cls(arr)
285-
286-
# index is localized datetime64 array -> have to convert
287-
# start/end as well to compare
288-
if start is not None:
289-
start = start.tz_localize(tz).asm8
290-
if end is not None:
291-
end = end.tz_localize(tz).asm8
271+
# TODO: consider re-implementing _cached_range; GH#17914
272+
index = _generate_regular_range(cls, start, end, periods, freq)
273+
274+
if tz is not None and getattr(index, 'tz', None) is None:
275+
arr = conversion.tz_localize_to_utc(
276+
ensure_int64(index.values),
277+
tz, ambiguous=ambiguous)
278+
279+
index = cls(arr)
280+
281+
# index is localized datetime64 array -> have to convert
282+
# start/end as well to compare
283+
if start is not None:
284+
start = start.tz_localize(tz).asm8
285+
if end is not None:
286+
end = end.tz_localize(tz).asm8
292287
else:
293288
# Create a linearly spaced date_range in local time
294289
arr = np.linspace(start.value, end.value, periods)
@@ -303,16 +298,6 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
303298

304299
return cls._simple_new(index.values, freq=freq, tz=tz)
305300

306-
@classmethod
307-
def _use_cached_range(cls, freq, _normalized, start, end):
308-
# DatetimeArray is mutable, so is not cached
309-
return False
310-
311-
@classmethod
312-
def _cached_range(cls, start=None, end=None,
313-
periods=None, freq=None, **kwargs):
314-
raise AbstractMethodError(cls)
315-
316301
# -----------------------------------------------------------------
317302
# Descriptive Properties
318303

pandas/core/indexes/datetimes.py

+11-101
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin,
4141
wrap_field_accessor, wrap_array_method)
4242
from pandas.tseries.offsets import (
43-
generate_range, CDay, prefix_mapping)
43+
CDay, prefix_mapping)
4444

4545
from pandas.core.tools.timedeltas import to_timedelta
4646
from pandas.util._decorators import Appender, cache_readonly, Substitution
@@ -326,13 +326,6 @@ def _generate_range(cls, start, end, periods, name=None, freq=None,
326326
out.name = name
327327
return out
328328

329-
@classmethod
330-
def _use_cached_range(cls, freq, _normalized, start, end):
331-
# Note: This always returns False
332-
return (freq._should_cache() and
333-
not (freq._normalize_cache and not _normalized) and
334-
_naive_in_cache_range(start, end))
335-
336329
def _convert_for_op(self, value):
337330
""" Convert value to be insertable to ndarray """
338331
if self._has_same_tz(value):
@@ -410,71 +403,6 @@ def nbytes(self):
410403
# for TZ-aware
411404
return self._ndarray_values.nbytes
412405

413-
@classmethod
414-
def _cached_range(cls, start=None, end=None, periods=None, freq=None,
415-
name=None):
416-
if start is None and end is None:
417-
# I somewhat believe this should never be raised externally
418-
raise TypeError('Must specify either start or end.')
419-
if start is not None:
420-
start = Timestamp(start)
421-
if end is not None:
422-
end = Timestamp(end)
423-
if (start is None or end is None) and periods is None:
424-
raise TypeError(
425-
'Must either specify period or provide both start and end.')
426-
427-
if freq is None:
428-
# This can't happen with external-facing code
429-
raise TypeError('Must provide freq.')
430-
431-
drc = _daterange_cache
432-
if freq not in _daterange_cache:
433-
xdr = generate_range(offset=freq, start=_CACHE_START,
434-
end=_CACHE_END)
435-
436-
arr = tools.to_datetime(list(xdr), box=False)
437-
438-
cachedRange = DatetimeIndex._simple_new(arr)
439-
cachedRange.freq = freq
440-
cachedRange = cachedRange.tz_localize(None)
441-
cachedRange.name = None
442-
drc[freq] = cachedRange
443-
else:
444-
cachedRange = drc[freq]
445-
446-
if start is None:
447-
if not isinstance(end, Timestamp):
448-
raise AssertionError('end must be an instance of Timestamp')
449-
450-
end = freq.rollback(end)
451-
452-
endLoc = cachedRange.get_loc(end) + 1
453-
startLoc = endLoc - periods
454-
elif end is None:
455-
if not isinstance(start, Timestamp):
456-
raise AssertionError('start must be an instance of Timestamp')
457-
458-
start = freq.rollforward(start)
459-
460-
startLoc = cachedRange.get_loc(start)
461-
endLoc = startLoc + periods
462-
else:
463-
if not freq.onOffset(start):
464-
start = freq.rollforward(start)
465-
466-
if not freq.onOffset(end):
467-
end = freq.rollback(end)
468-
469-
startLoc = cachedRange.get_loc(start)
470-
endLoc = cachedRange.get_loc(end) + 1
471-
472-
indexSlice = cachedRange[startLoc:endLoc]
473-
indexSlice.name = name
474-
indexSlice.freq = freq
475-
476-
return indexSlice
477-
478406
def _mpl_repr(self):
479407
# how to represent ourselves to matplotlib
480408
return libts.ints_to_pydatetime(self.asi8, self.tz)
@@ -832,22 +760,19 @@ def _fast_union(self, other):
832760
else:
833761
left, right = other, self
834762

835-
left_start, left_end = left[0], left[-1]
763+
left_end = left[-1]
836764
right_end = right[-1]
837765

838-
if not self.freq._should_cache():
839-
# concatenate dates
840-
if left_end < right_end:
841-
loc = right.searchsorted(left_end, side='right')
842-
right_chunk = right.values[loc:]
843-
dates = _concat._concat_compat((left.values, right_chunk))
844-
return self._shallow_copy(dates)
845-
else:
846-
return left
766+
# TODO: consider re-implementing freq._should_cache for fastpath
767+
768+
# concatenate dates
769+
if left_end < right_end:
770+
loc = right.searchsorted(left_end, side='right')
771+
right_chunk = right.values[loc:]
772+
dates = _concat._concat_compat((left.values, right_chunk))
773+
return self._shallow_copy(dates)
847774
else:
848-
return type(self)(start=left_start,
849-
end=max(left_end, right_end),
850-
freq=left.freq)
775+
return left
851776

852777
def _wrap_union_result(self, other, result):
853778
name = self.name if self.name == other.name else None
@@ -1724,21 +1649,6 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
17241649
closed=closed, **kwargs)
17251650

17261651

1727-
_CACHE_START = Timestamp(datetime(1950, 1, 1))
1728-
_CACHE_END = Timestamp(datetime(2030, 1, 1))
1729-
1730-
_daterange_cache = {}
1731-
1732-
1733-
def _naive_in_cache_range(start, end):
1734-
if start is None or end is None:
1735-
return False
1736-
else:
1737-
if start.tzinfo is not None or end.tzinfo is not None:
1738-
return False
1739-
return start > _CACHE_START and end < _CACHE_END
1740-
1741-
17421652
def _time_to_micros(time):
17431653
seconds = time.hour * 60 * 60 + 60 * time.minute + time.second
17441654
return 1000000 * seconds + time.microsecond

pandas/tests/indexes/datetimes/test_date_range.py

-40
Original file line numberDiff line numberDiff line change
@@ -616,23 +616,6 @@ def test_naive_aware_conflicts(self):
616616
with tm.assert_raises_regex(TypeError, msg):
617617
aware.join(naive)
618618

619-
def test_cached_range(self):
620-
DatetimeIndex._cached_range(START, END, freq=BDay())
621-
DatetimeIndex._cached_range(START, periods=20, freq=BDay())
622-
DatetimeIndex._cached_range(end=START, periods=20, freq=BDay())
623-
624-
with tm.assert_raises_regex(TypeError, "freq"):
625-
DatetimeIndex._cached_range(START, END)
626-
627-
with tm.assert_raises_regex(TypeError, "specify period"):
628-
DatetimeIndex._cached_range(START, freq=BDay())
629-
630-
with tm.assert_raises_regex(TypeError, "specify period"):
631-
DatetimeIndex._cached_range(end=END, freq=BDay())
632-
633-
with tm.assert_raises_regex(TypeError, "start or end"):
634-
DatetimeIndex._cached_range(periods=20, freq=BDay())
635-
636619
def test_misc(self):
637620
end = datetime(2009, 5, 13)
638621
dr = bdate_range(end=end, periods=20)
@@ -693,29 +676,6 @@ def test_constructor(self):
693676
with tm.assert_raises_regex(TypeError, msg):
694677
bdate_range('2011-1-1', '2012-1-1', 'C')
695678

696-
def test_cached_range(self):
697-
DatetimeIndex._cached_range(START, END, freq=CDay())
698-
DatetimeIndex._cached_range(START, periods=20,
699-
freq=CDay())
700-
DatetimeIndex._cached_range(end=START, periods=20,
701-
freq=CDay())
702-
703-
# with pytest.raises(TypeError):
704-
with tm.assert_raises_regex(TypeError, "freq"):
705-
DatetimeIndex._cached_range(START, END)
706-
707-
# with pytest.raises(TypeError):
708-
with tm.assert_raises_regex(TypeError, "specify period"):
709-
DatetimeIndex._cached_range(START, freq=CDay())
710-
711-
# with pytest.raises(TypeError):
712-
with tm.assert_raises_regex(TypeError, "specify period"):
713-
DatetimeIndex._cached_range(end=END, freq=CDay())
714-
715-
# with pytest.raises(TypeError):
716-
with tm.assert_raises_regex(TypeError, "start or end"):
717-
DatetimeIndex._cached_range(periods=20, freq=CDay())
718-
719679
def test_misc(self):
720680
end = datetime(2009, 5, 13)
721681
dr = bdate_range(end=end, periods=20, freq='C')

0 commit comments

Comments
 (0)