Skip to content

Commit 38ea154

Browse files
jorisvandenbosschejreback
authored andcommitted
PERF: improve access of .array (#31037)
1 parent 3b58c2e commit 38ea154

File tree

5 files changed

+70
-26
lines changed

5 files changed

+70
-26
lines changed

pandas/core/arrays/numpy_.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ class PandasDtype(ExtensionDtype):
4343
def __init__(self, dtype):
4444
dtype = np.dtype(dtype)
4545
self._dtype = dtype
46-
self._name = dtype.name
4746
self._type = dtype.type
4847

4948
def __repr__(self) -> str:
@@ -56,7 +55,7 @@ def numpy_dtype(self):
5655

5756
@property
5857
def name(self):
59-
return self._name
58+
return self._dtype.name
6059

6160
@property
6261
def type(self):

pandas/core/base.py

+1-22
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,11 @@
1818
from pandas.core.dtypes.cast import is_nested_object
1919
from pandas.core.dtypes.common import (
2020
is_categorical_dtype,
21-
is_datetime64_ns_dtype,
2221
is_dict_like,
2322
is_extension_array_dtype,
2423
is_list_like,
2524
is_object_dtype,
2625
is_scalar,
27-
is_timedelta64_ns_dtype,
2826
needs_i8_conversion,
2927
)
3028
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
@@ -745,26 +743,7 @@ def array(self) -> ExtensionArray:
745743
[a, b, a]
746744
Categories (2, object): [a, b]
747745
"""
748-
# As a mixin, we depend on the mixing class having _values.
749-
# Special mixin syntax may be developed in the future:
750-
# https://github.com/python/typing/issues/246
751-
result = self._values # type: ignore
752-
753-
if is_datetime64_ns_dtype(result.dtype):
754-
from pandas.arrays import DatetimeArray
755-
756-
result = DatetimeArray(result)
757-
elif is_timedelta64_ns_dtype(result.dtype):
758-
from pandas.arrays import TimedeltaArray
759-
760-
result = TimedeltaArray(result)
761-
762-
elif not is_extension_array_dtype(result.dtype):
763-
from pandas.core.arrays.numpy_ import PandasArray
764-
765-
result = PandasArray(result)
766-
767-
return result
746+
raise AbstractMethodError(self)
768747

769748
def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs):
770749
"""

pandas/core/indexes/base.py

+10
Original file line numberDiff line numberDiff line change
@@ -3923,6 +3923,16 @@ def values(self):
39233923
"""
39243924
return self._data.view(np.ndarray)
39253925

3926+
@cache_readonly
3927+
@Appender(IndexOpsMixin.array.__doc__) # type: ignore
3928+
def array(self) -> ExtensionArray:
3929+
array = self._data
3930+
if isinstance(array, np.ndarray):
3931+
from pandas.core.arrays.numpy_ import PandasArray
3932+
3933+
array = PandasArray(array)
3934+
return array
3935+
39263936
@property
39273937
def _values(self) -> Union[ExtensionArray, ABCIndexClass, np.ndarray]:
39283938
"""

pandas/core/internals/blocks.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,14 @@
6666
)
6767

6868
import pandas.core.algorithms as algos
69-
from pandas.core.arrays import Categorical, DatetimeArray, PandasDtype, TimedeltaArray
69+
from pandas.core.arrays import (
70+
Categorical,
71+
DatetimeArray,
72+
ExtensionArray,
73+
PandasArray,
74+
PandasDtype,
75+
TimedeltaArray,
76+
)
7077
from pandas.core.base import PandasObject
7178
import pandas.core.common as com
7279
from pandas.core.construction import extract_array
@@ -195,6 +202,7 @@ def is_categorical_astype(self, dtype):
195202
def external_values(self):
196203
"""
197204
The array that Series.values returns (public attribute).
205+
198206
This has some historical constraints, and is overridden in block
199207
subclasses to return the correct array (e.g. period returns
200208
object ndarray and datetimetz a datetime64[ns] ndarray instead of
@@ -208,6 +216,12 @@ def internal_values(self):
208216
"""
209217
return self.values
210218

219+
def array_values(self) -> ExtensionArray:
220+
"""
221+
The array that Series.array returns. Always an ExtensionArray.
222+
"""
223+
return PandasArray(self.values)
224+
211225
def get_values(self, dtype=None):
212226
"""
213227
return an internal format, currently just the ndarray
@@ -1780,6 +1794,9 @@ def get_values(self, dtype=None):
17801794
values = values.reshape((1,) + values.shape)
17811795
return values
17821796

1797+
def array_values(self) -> ExtensionArray:
1798+
return self.values
1799+
17831800
def to_dense(self):
17841801
return np.asarray(self.values)
17851802

@@ -2243,6 +2260,9 @@ def set(self, locs, values):
22432260
def external_values(self):
22442261
return np.asarray(self.values.astype("datetime64[ns]", copy=False))
22452262

2263+
def array_values(self) -> ExtensionArray:
2264+
return DatetimeArray._simple_new(self.values)
2265+
22462266

22472267
class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
22482268
""" implement a datetime64 block with a tz attribute """
@@ -2500,6 +2520,9 @@ def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs):
25002520
def external_values(self):
25012521
return np.asarray(self.values.astype("timedelta64[ns]", copy=False))
25022522

2523+
def array_values(self) -> ExtensionArray:
2524+
return TimedeltaArray._simple_new(self.values)
2525+
25032526

25042527
class BoolBlock(NumericBlock):
25052528
__slots__ = ()

pandas/core/series.py

+34-1
Original file line numberDiff line numberDiff line change
@@ -494,10 +494,43 @@ def values(self):
494494
@property
495495
def _values(self):
496496
"""
497-
Return the internal repr of this data.
497+
Return the internal repr of this data (defined by Block.interval_values).
498+
This are the values as stored in the Block (ndarray or ExtensionArray
499+
depending on the Block class).
500+
501+
Differs from the public ``.values`` for certain data types, because of
502+
historical backwards compatibility of the public attribute (e.g. period
503+
returns object ndarray and datetimetz a datetime64[ns] ndarray for
504+
``.values`` while it returns an ExtensionArray for ``._values`` in those
505+
cases).
506+
507+
Differs from ``.array`` in that this still returns the numpy array if
508+
the Block is backed by a numpy array, while ``.array`` ensures to always
509+
return an ExtensionArray.
510+
511+
Differs from ``._ndarray_values``, as that ensures to always return a
512+
numpy array (it will call ``_ndarray_values`` on the ExtensionArray, if
513+
the Series was backed by an ExtensionArray).
514+
515+
Overview:
516+
517+
dtype | values | _values | array | _ndarray_values |
518+
----------- | ------------- | ------------- | ------------- | --------------- |
519+
Numeric | ndarray | ndarray | PandasArray | ndarray |
520+
Category | Categorical | Categorical | Categorical | ndarray[int] |
521+
dt64[ns] | ndarray[M8ns] | ndarray[M8ns] | DatetimeArray | ndarray[M8ns] |
522+
dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns] |
523+
Period | ndarray[obj] | PeriodArray | PeriodArray | ndarray[int] |
524+
Nullable | EA | EA | EA | ndarray |
525+
498526
"""
499527
return self._data.internal_values()
500528

529+
@Appender(base.IndexOpsMixin.array.__doc__) # type: ignore
530+
@property
531+
def array(self) -> ExtensionArray:
532+
return self._data._block.array_values()
533+
501534
def _internal_get_values(self):
502535
"""
503536
Same as values (but handles sparseness conversions); is a view.

0 commit comments

Comments
 (0)