Skip to content

Commit 8ceec8b

Browse files
authored
PERF: make DTA/TDA/PA _ndarray the attribute, _data the property (#40007)
1 parent bd9a702 commit 8ceec8b

File tree

5 files changed

+60
-38
lines changed

5 files changed

+60
-38
lines changed

pandas/core/arrays/datetimelike.py

+19-4
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray):
156156
_infer_matches: Tuple[str, ...]
157157
_is_recognized_dtype: Callable[[DtypeObj], bool]
158158
_recognized_scalars: Tuple[Type, ...]
159-
_data: np.ndarray
159+
_ndarray: np.ndarray
160160

161161
def __init__(self, data, dtype: Optional[Dtype] = None, freq=None, copy=False):
162162
raise AbstractMethodError(self)
@@ -253,9 +253,24 @@ def _check_compatible_with(
253253
# ------------------------------------------------------------------
254254
# NDArrayBackedExtensionArray compat
255255

256+
def __setstate__(self, state):
257+
if isinstance(state, dict):
258+
if "_data" in state and "_ndarray" not in state:
259+
# backward compat, changed what is property vs attribute
260+
state["_ndarray"] = state.pop("_data")
261+
for key, value in state.items():
262+
setattr(self, key, value)
263+
else:
264+
# PeriodArray, bc it mixes in a cython class
265+
if isinstance(state, tuple) and len(state) == 1:
266+
state = state[0]
267+
self.__setstate__(state)
268+
else:
269+
raise TypeError(state)
270+
256271
@cache_readonly
257-
def _ndarray(self) -> np.ndarray:
258-
return self._data
272+
def _data(self) -> np.ndarray:
273+
return self._ndarray
259274

260275
def _from_backing_data(
261276
self: DatetimeLikeArrayT, arr: np.ndarray
@@ -294,7 +309,7 @@ def asi8(self) -> np.ndarray:
294309
An ndarray with int64 dtype.
295310
"""
296311
# do not cache or you'll create a memory leak
297-
return self._data.view("i8")
312+
return self._ndarray.view("i8")
298313

299314
# ----------------------------------------------------------------
300315
# Rendering Methods

pandas/core/arrays/datetimes.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False):
261261

262262
if freq is None:
263263
freq = values.freq
264-
values = values._data
264+
values = values._ndarray
265265

266266
if not isinstance(values, np.ndarray):
267267
raise ValueError(
@@ -303,7 +303,7 @@ def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False):
303303
# be incorrect(ish?) for the array as a whole
304304
dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz))
305305

306-
self._data = values
306+
self._ndarray = values
307307
self._dtype = dtype
308308
self._freq = freq
309309

@@ -320,7 +320,7 @@ def _simple_new(
320320
values = values.view(DT64NS_DTYPE)
321321

322322
result = object.__new__(cls)
323-
result._data = values
323+
result._ndarray = values
324324
result._freq = freq
325325
result._dtype = dtype
326326
return result
@@ -618,7 +618,7 @@ def astype(self, dtype, copy=True):
618618

619619
elif self.tz is None and is_datetime64_dtype(dtype) and dtype != self.dtype:
620620
# unit conversion e.g. datetime64[s]
621-
return self._data.astype(dtype)
621+
return self._ndarray.astype(dtype)
622622

623623
elif is_period_dtype(dtype):
624624
return self.to_period(freq=dtype.freq)
@@ -1138,7 +1138,7 @@ def to_period(self, freq=None):
11381138

11391139
freq = res
11401140

1141-
return PeriodArray._from_datetime64(self._data, freq, tz=self.tz)
1141+
return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz)
11421142

11431143
def to_perioddelta(self, freq):
11441144
"""

pandas/core/arrays/period.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@ class PeriodArray(PeriodMixin, dtl.DatelikeOps):
181181
_datetimelike_ops = _field_ops + _object_ops + _bool_ops
182182
_datetimelike_methods = ["strftime", "to_timestamp", "asfreq"]
183183

184+
__setstate__ = dtl.DatelikeOps.__setstate__
185+
184186
# --------------------------------------------------------------------
185187
# Constructors
186188

@@ -201,10 +203,10 @@ def __init__(self, values, dtype: Optional[Dtype] = None, freq=None, copy=False)
201203
if isinstance(values, type(self)):
202204
if freq is not None and freq != values.freq:
203205
raise raise_on_incompatible(values, freq)
204-
values, freq = values._data, values.freq
206+
values, freq = values._ndarray, values.freq
205207

206208
values = np.array(values, dtype="int64", copy=copy)
207-
self._data = values
209+
self._ndarray = values
208210
if freq is None:
209211
raise ValueError("freq is not specified and cannot be inferred")
210212
self._dtype = PeriodDtype(freq)
@@ -347,7 +349,7 @@ def __arrow_array__(self, type=None):
347349

348350
if type is not None:
349351
if pyarrow.types.is_integer(type):
350-
return pyarrow.array(self._data, mask=self.isna(), type=type)
352+
return pyarrow.array(self._ndarray, mask=self.isna(), type=type)
351353
elif isinstance(type, ArrowPeriodType):
352354
# ensure we have the same freq
353355
if self.freqstr != type.freq:
@@ -361,7 +363,7 @@ def __arrow_array__(self, type=None):
361363
)
362364

363365
period_type = ArrowPeriodType(self.freqstr)
364-
storage_array = pyarrow.array(self._data, mask=self.isna(), type="int64")
366+
storage_array = pyarrow.array(self._ndarray, mask=self.isna(), type="int64")
365367
return pyarrow.ExtensionArray.from_storage(period_type, storage_array)
366368

367369
# --------------------------------------------------------------------

pandas/core/arrays/timedeltas.py

+28-23
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ def dtype(self) -> np.dtype:
166166
# ----------------------------------------------------------------
167167
# Constructors
168168

169+
_freq = None
170+
169171
def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False):
170172
values = extract_array(values)
171173

@@ -182,7 +184,7 @@ def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False):
182184
elif freq and values.freq:
183185
freq = to_offset(freq)
184186
freq, _ = dtl.validate_inferred_freq(freq, values.freq, False)
185-
values = values._data
187+
values = values._ndarray
186188

187189
if not isinstance(values, np.ndarray):
188190
msg = (
@@ -214,7 +216,7 @@ def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False):
214216
if freq:
215217
freq = to_offset(freq)
216218

217-
self._data = values
219+
self._ndarray = values
218220
self._dtype = dtype
219221
self._freq = freq
220222

@@ -232,7 +234,7 @@ def _simple_new(
232234
values = values.view(TD64NS_DTYPE)
233235

234236
result = object.__new__(cls)
235-
result._data = values
237+
result._ndarray = values
236238
result._freq = to_offset(freq)
237239
result._dtype = TD64NS_DTYPE
238240
return result
@@ -344,7 +346,7 @@ def astype(self, dtype, copy: bool = True):
344346
dtype = pandas_dtype(dtype)
345347

346348
if dtype.kind == "m":
347-
return astype_td64_unit_conversion(self._data, dtype, copy=copy)
349+
return astype_td64_unit_conversion(self._ndarray, dtype, copy=copy)
348350

349351
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)
350352

@@ -418,8 +420,8 @@ def _formatter(self, boxed=False):
418420
def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs):
419421
from pandas.io.formats.format import get_format_timedelta64
420422

421-
formatter = get_format_timedelta64(self._data, na_rep)
422-
return np.array([formatter(x) for x in self._data])
423+
formatter = get_format_timedelta64(self._ndarray, na_rep)
424+
return np.array([formatter(x) for x in self._ndarray])
423425

424426
# ----------------------------------------------------------------
425427
# Arithmetic Methods
@@ -488,7 +490,7 @@ def _addsub_object_array(self, other, op):
488490
def __mul__(self, other) -> TimedeltaArray:
489491
if is_scalar(other):
490492
# numpy will accept float and int, raise TypeError for others
491-
result = self._data * other
493+
result = self._ndarray * other
492494
freq = None
493495
if self.freq is not None and not isna(other):
494496
freq = self.freq * other
@@ -511,7 +513,7 @@ def __mul__(self, other) -> TimedeltaArray:
511513
return type(self)(result)
512514

513515
# numpy will accept float or int dtype, raise TypeError for others
514-
result = self._data * other
516+
result = self._ndarray * other
515517
return type(self)(result)
516518

517519
__rmul__ = __mul__
@@ -529,11 +531,11 @@ def __truediv__(self, other):
529531
return result
530532

531533
# otherwise, dispatch to Timedelta implementation
532-
return self._data / other
534+
return self._ndarray / other
533535

534536
elif lib.is_scalar(other):
535537
# assume it is numeric
536-
result = self._data / other
538+
result = self._ndarray / other
537539
freq = None
538540
if self.freq is not None:
539541
# Tick division is not implemented, so operate on Timedelta
@@ -549,7 +551,7 @@ def __truediv__(self, other):
549551

550552
elif is_timedelta64_dtype(other.dtype):
551553
# let numpy handle it
552-
return self._data / other
554+
return self._ndarray / other
553555

554556
elif is_object_dtype(other.dtype):
555557
# We operate on raveled arrays to avoid problems in inference
@@ -571,7 +573,7 @@ def __truediv__(self, other):
571573
return result
572574

573575
else:
574-
result = self._data / other
576+
result = self._ndarray / other
575577
return type(self)(result)
576578

577579
@unpack_zerodim_and_defer("__rtruediv__")
@@ -586,7 +588,7 @@ def __rtruediv__(self, other):
586588
return result
587589

588590
# otherwise, dispatch to Timedelta implementation
589-
return other / self._data
591+
return other / self._ndarray
590592

591593
elif lib.is_scalar(other):
592594
raise TypeError(
@@ -602,7 +604,7 @@ def __rtruediv__(self, other):
602604

603605
elif is_timedelta64_dtype(other.dtype):
604606
# let numpy handle it
605-
return other / self._data
607+
return other / self._ndarray
606608

607609
elif is_object_dtype(other.dtype):
608610
# Note: unlike in __truediv__, we do not _need_ to do type
@@ -629,7 +631,7 @@ def __floordiv__(self, other):
629631
return result
630632

631633
# dispatch to Timedelta implementation
632-
result = other.__rfloordiv__(self._data)
634+
result = other.__rfloordiv__(self._ndarray)
633635
return result
634636

635637
# at this point we should only have numeric scalars; anything
@@ -673,7 +675,7 @@ def __floordiv__(self, other):
673675
return result
674676

675677
elif is_integer_dtype(other.dtype) or is_float_dtype(other.dtype):
676-
result = self._data // other
678+
result = self._ndarray // other
677679
return type(self)(result)
678680

679681
else:
@@ -693,7 +695,7 @@ def __rfloordiv__(self, other):
693695
return result
694696

695697
# dispatch to Timedelta implementation
696-
result = other.__floordiv__(self._data)
698+
result = other.__floordiv__(self._ndarray)
697699
return result
698700

699701
raise TypeError(
@@ -763,15 +765,15 @@ def __rdivmod__(self, other):
763765

764766
def __neg__(self) -> TimedeltaArray:
765767
if self.freq is not None:
766-
return type(self)(-self._data, freq=-self.freq)
767-
return type(self)(-self._data)
768+
return type(self)(-self._ndarray, freq=-self.freq)
769+
return type(self)(-self._ndarray)
768770

769771
def __pos__(self) -> TimedeltaArray:
770-
return type(self)(self._data, freq=self.freq)
772+
return type(self)(self._ndarray, freq=self.freq)
771773

772774
def __abs__(self) -> TimedeltaArray:
773775
# Note: freq is not preserved
774-
return type(self)(np.abs(self._data))
776+
return type(self)(np.abs(self._ndarray))
775777

776778
# ----------------------------------------------------------------
777779
# Conversion Methods - Vectorized analogues of Timedelta methods
@@ -949,9 +951,12 @@ def sequence_to_td64ns(data, copy=False, unit=None, errors="raise"):
949951
data = np.array(data, copy=False)
950952
elif isinstance(data, ABCSeries):
951953
data = data._values
952-
elif isinstance(data, (ABCTimedeltaIndex, TimedeltaArray)):
954+
elif isinstance(data, ABCTimedeltaIndex):
955+
inferred_freq = data.freq
956+
data = data._data._ndarray
957+
elif isinstance(data, TimedeltaArray):
953958
inferred_freq = data.freq
954-
data = data._data
959+
data = data._ndarray
955960
elif isinstance(data, IntegerArray):
956961
data = data.to_numpy("int64", na_value=tslibs.iNaT)
957962
elif is_categorical_dtype(data.dtype):

pandas/core/indexes/datetimelike.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def _simple_new(
150150
result._cache = {}
151151

152152
# For groupby perf. See note in indexes/base about _index_data
153-
result._index_data = values._data
153+
result._index_data = values._ndarray
154154

155155
result._reset_identity()
156156
return result
@@ -165,7 +165,7 @@ def _is_all_dates(self) -> bool:
165165
@property
166166
def values(self) -> np.ndarray:
167167
# Note: PeriodArray overrides this to return an ndarray of objects.
168-
return self._data._data
168+
return self._data._ndarray
169169

170170
def __array_wrap__(self, result, context=None):
171171
"""

0 commit comments

Comments
 (0)