Skip to content

Commit e93bc80

Browse files
PERF: improve access of .array (#31037) (#31275)
1 parent cf2e9b9 commit e93bc80

File tree

5 files changed

+77
-27
lines changed

5 files changed

+77
-27
lines changed

pandas/core/arrays/numpy_.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ class PandasDtype(ExtensionDtype):
4343
def __init__(self, dtype):
4444
dtype = np.dtype(dtype)
4545
self._dtype = dtype
46-
self._name = dtype.name
4746
self._type = dtype.type
4847

4948
def __repr__(self) -> str:
@@ -56,7 +55,7 @@ def numpy_dtype(self):
5655

5756
@property
5857
def name(self):
59-
return self._name
58+
return self._dtype.name
6059

6160
@property
6261
def type(self):

pandas/core/base.py

+1-22
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,11 @@
1818
from pandas.core.dtypes.cast import is_nested_object
1919
from pandas.core.dtypes.common import (
2020
is_categorical_dtype,
21-
is_datetime64_ns_dtype,
2221
is_dict_like,
2322
is_extension_array_dtype,
2423
is_list_like,
2524
is_object_dtype,
2625
is_scalar,
27-
is_timedelta64_ns_dtype,
2826
needs_i8_conversion,
2927
)
3028
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
@@ -747,26 +745,7 @@ def array(self) -> ExtensionArray:
747745
[a, b, a]
748746
Categories (2, object): [a, b]
749747
"""
750-
# As a mixin, we depend on the mixing class having _values.
751-
# Special mixin syntax may be developed in the future:
752-
# https://github.com/python/typing/issues/246
753-
result = self._values # type: ignore
754-
755-
if is_datetime64_ns_dtype(result.dtype):
756-
from pandas.arrays import DatetimeArray
757-
758-
result = DatetimeArray(result)
759-
elif is_timedelta64_ns_dtype(result.dtype):
760-
from pandas.arrays import TimedeltaArray
761-
762-
result = TimedeltaArray(result)
763-
764-
elif not is_extension_array_dtype(result.dtype):
765-
from pandas.core.arrays.numpy_ import PandasArray
766-
767-
result = PandasArray(result)
768-
769-
return result
748+
raise AbstractMethodError(self)
770749

771750
def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs):
772751
"""

pandas/core/indexes/base.py

+10
Original file line numberDiff line numberDiff line change
@@ -3640,6 +3640,16 @@ def values(self):
36403640
"""
36413641
return self._data.view(np.ndarray)
36423642

3643+
@cache_readonly
3644+
@Appender(IndexOpsMixin.array.__doc__) # type: ignore
3645+
def array(self) -> ExtensionArray:
3646+
array = self._data
3647+
if isinstance(array, np.ndarray):
3648+
from pandas.core.arrays.numpy_ import PandasArray
3649+
3650+
array = PandasArray(array)
3651+
return array
3652+
36433653
@property
36443654
def _values(self) -> Union[ExtensionArray, ABCIndexClass, np.ndarray]:
36453655
# TODO(EA): remove index types as they become extension arrays

pandas/core/internals/blocks.py

+31-2
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,14 @@
6666
)
6767

6868
import pandas.core.algorithms as algos
69-
from pandas.core.arrays import Categorical, DatetimeArray, PandasDtype, TimedeltaArray
69+
from pandas.core.arrays import (
70+
Categorical,
71+
DatetimeArray,
72+
ExtensionArray,
73+
PandasArray,
74+
PandasDtype,
75+
TimedeltaArray,
76+
)
7077
from pandas.core.base import PandasObject
7178
import pandas.core.common as com
7279
from pandas.core.construction import extract_array
@@ -193,7 +200,14 @@ def is_categorical_astype(self, dtype):
193200
return False
194201

195202
def external_values(self, dtype=None):
196-
""" return an outside world format, currently just the ndarray """
203+
"""
204+
The array that Series.values returns (public attribute).
205+
206+
This has some historical constraints, and is overridden in block
207+
subclasses to return the correct array (e.g. period returns
208+
object ndarray and datetimetz a datetime64[ns] ndarray instead of
209+
proper extension array).
210+
"""
197211
return self.values
198212

199213
def internal_values(self, dtype=None):
@@ -202,6 +216,12 @@ def internal_values(self, dtype=None):
202216
"""
203217
return self.values
204218

219+
def array_values(self) -> ExtensionArray:
220+
"""
221+
The array that Series.array returns. Always an ExtensionArray.
222+
"""
223+
return PandasArray(self.values)
224+
205225
def get_values(self, dtype=None):
206226
"""
207227
return an internal format, currently just the ndarray
@@ -1770,6 +1790,9 @@ def get_values(self, dtype=None):
17701790
values = values.reshape((1,) + values.shape)
17711791
return values
17721792

1793+
def array_values(self) -> ExtensionArray:
1794+
return self.values
1795+
17731796
def to_dense(self):
17741797
return np.asarray(self.values)
17751798

@@ -2233,6 +2256,9 @@ def set(self, locs, values):
22332256
def external_values(self):
22342257
return np.asarray(self.values.astype("datetime64[ns]", copy=False))
22352258

2259+
def array_values(self) -> ExtensionArray:
2260+
return DatetimeArray._simple_new(self.values)
2261+
22362262

22372263
class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
22382264
""" implement a datetime64 block with a tz attribute """
@@ -2490,6 +2516,9 @@ def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs):
24902516
def external_values(self, dtype=None):
24912517
return np.asarray(self.values.astype("timedelta64[ns]", copy=False))
24922518

2519+
def array_values(self) -> ExtensionArray:
2520+
return TimedeltaArray._simple_new(self.values)
2521+
24932522

24942523
class BoolBlock(NumericBlock):
24952524
__slots__ = ()

pandas/core/series.py

+34-1
Original file line numberDiff line numberDiff line change
@@ -479,10 +479,43 @@ def values(self):
479479
@property
480480
def _values(self):
481481
"""
482-
Return the internal repr of this data.
482+
Return the internal repr of this data (defined by Block.interval_values).
483+
This are the values as stored in the Block (ndarray or ExtensionArray
484+
depending on the Block class).
485+
486+
Differs from the public ``.values`` for certain data types, because of
487+
historical backwards compatibility of the public attribute (e.g. period
488+
returns object ndarray and datetimetz a datetime64[ns] ndarray for
489+
``.values`` while it returns an ExtensionArray for ``._values`` in those
490+
cases).
491+
492+
Differs from ``.array`` in that this still returns the numpy array if
493+
the Block is backed by a numpy array, while ``.array`` ensures to always
494+
return an ExtensionArray.
495+
496+
Differs from ``._ndarray_values``, as that ensures to always return a
497+
numpy array (it will call ``_ndarray_values`` on the ExtensionArray, if
498+
the Series was backed by an ExtensionArray).
499+
500+
Overview:
501+
502+
dtype | values | _values | array | _ndarray_values |
503+
----------- | ------------- | ------------- | ------------- | --------------- |
504+
Numeric | ndarray | ndarray | PandasArray | ndarray |
505+
Category | Categorical | Categorical | Categorical | ndarray[int] |
506+
dt64[ns] | ndarray[M8ns] | ndarray[M8ns] | DatetimeArray | ndarray[M8ns] |
507+
dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns] |
508+
Period | ndarray[obj] | PeriodArray | PeriodArray | ndarray[int] |
509+
Nullable | EA | EA | EA | ndarray |
510+
483511
"""
484512
return self._data.internal_values()
485513

514+
@Appender(base.IndexOpsMixin.array.__doc__) # type: ignore
515+
@property
516+
def array(self) -> ExtensionArray:
517+
return self._data._block.array_values()
518+
486519
def _internal_get_values(self):
487520
"""
488521
Same as values (but handles sparseness conversions); is a view.

0 commit comments

Comments
 (0)