Skip to content

Commit 4661928

Browse files
authored
CLN: remove _ndarray_values (#32768)
1 parent cfbd7f6 commit 4661928

18 files changed

+37
-154
lines changed

doc/source/development/internals.rst

+2-8
Original file line numberDiff line numberDiff line change
@@ -89,16 +89,10 @@ pandas extends NumPy's type system with custom types, like ``Categorical`` or
8989
datetimes with a timezone, so we have multiple notions of "values". For 1-D
9090
containers (``Index`` classes and ``Series``) we have the following convention:
9191

92-
* ``cls._ndarray_values`` is *always* a NumPy ``ndarray``. Ideally,
93-
``_ndarray_values`` is cheap to compute. For example, for a ``Categorical``,
94-
this returns the codes, not the array of objects.
9592
* ``cls._values`` refers is the "best possible" array. This could be an
96-
``ndarray``, ``ExtensionArray``, or in ``Index`` subclass (note: we're in the
97-
process of removing the index subclasses here so that it's always an
98-
``ndarray`` or ``ExtensionArray``).
93+
``ndarray`` or ``ExtensionArray``.
9994

100-
So, for example, ``Series[category]._values`` is a ``Categorical``, while
101-
``Series[category]._ndarray_values`` is the underlying codes.
95+
So, for example, ``Series[category]._values`` is a ``Categorical``.
10296

10397
.. _ref-subclassing-pandas:
10498

doc/source/reference/extensions.rst

-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ objects.
3737
api.extensions.ExtensionArray._from_factorized
3838
api.extensions.ExtensionArray._from_sequence
3939
api.extensions.ExtensionArray._from_sequence_of_strings
40-
api.extensions.ExtensionArray._ndarray_values
4140
api.extensions.ExtensionArray._reduce
4241
api.extensions.ExtensionArray._values_for_argsort
4342
api.extensions.ExtensionArray._values_for_factorize

pandas/core/arrays/base.py

-17
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ class ExtensionArray:
9393
_from_factorized
9494
_from_sequence
9595
_from_sequence_of_strings
96-
_ndarray_values
9796
_reduce
9897
_values_for_argsort
9998
_values_for_factorize
@@ -1046,22 +1045,6 @@ def _concat_same_type(
10461045
# of objects
10471046
_can_hold_na = True
10481047

1049-
@property
1050-
def _ndarray_values(self) -> np.ndarray:
1051-
"""
1052-
Internal pandas method for lossy conversion to a NumPy ndarray.
1053-
1054-
This method is not part of the pandas interface.
1055-
1056-
The expectation is that this is cheap to compute, and is primarily
1057-
used for interacting with our indexers.
1058-
1059-
Returns
1060-
-------
1061-
array : ndarray
1062-
"""
1063-
return np.array(self)
1064-
10651048
def _reduce(self, name, skipna=True, **kwargs):
10661049
"""
10671050
Return a scalar result of performing the reduction operation.

pandas/core/arrays/categorical.py

+2-11
Original file line numberDiff line numberDiff line change
@@ -451,10 +451,6 @@ def dtype(self) -> CategoricalDtype:
451451
"""
452452
return self._dtype
453453

454-
@property
455-
def _ndarray_values(self) -> np.ndarray:
456-
return self.codes
457-
458454
@property
459455
def _constructor(self) -> Type["Categorical"]:
460456
return Categorical
@@ -2567,12 +2563,7 @@ def _get_codes_for_values(values, categories):
25672563
"""
25682564
dtype_equal = is_dtype_equal(values.dtype, categories.dtype)
25692565

2570-
if dtype_equal:
2571-
# To prevent erroneous dtype coercion in _get_data_algo, retrieve
2572-
# the underlying numpy array. gh-22702
2573-
values = getattr(values, "_ndarray_values", values)
2574-
categories = getattr(categories, "_ndarray_values", categories)
2575-
elif is_extension_array_dtype(categories.dtype) and is_object_dtype(values):
2566+
if is_extension_array_dtype(categories.dtype) and is_object_dtype(values):
25762567
# Support inferring the correct extension dtype from an array of
25772568
# scalar objects. e.g.
25782569
# Categorical(array[Period, Period], categories=PeriodIndex(...))
@@ -2582,7 +2573,7 @@ def _get_codes_for_values(values, categories):
25822573
# exception raised in _from_sequence
25832574
values = ensure_object(values)
25842575
categories = ensure_object(categories)
2585-
else:
2576+
elif not dtype_equal:
25862577
values = ensure_object(values)
25872578
categories = ensure_object(categories)
25882579

pandas/core/arrays/datetimelike.py

-4
Original file line numberDiff line numberDiff line change
@@ -456,10 +456,6 @@ def asi8(self) -> np.ndarray:
456456
# do not cache or you'll create a memory leak
457457
return self._data.view("i8")
458458

459-
@property
460-
def _ndarray_values(self):
461-
return self._data
462-
463459
# ----------------------------------------------------------------
464460
# Rendering Methods
465461

pandas/core/arrays/integer.py

-12
Original file line numberDiff line numberDiff line change
@@ -478,18 +478,6 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
478478
data = self.to_numpy(dtype=dtype, **kwargs)
479479
return astype_nansafe(data, dtype, copy=False)
480480

481-
@property
482-
def _ndarray_values(self) -> np.ndarray:
483-
"""
484-
Internal pandas method for lossy conversion to a NumPy ndarray.
485-
486-
This method is not part of the pandas interface.
487-
488-
The expectation is that this is cheap to compute, and is primarily
489-
used for interacting with our indexers.
490-
"""
491-
return self._data
492-
493481
def _values_for_factorize(self) -> Tuple[np.ndarray, float]:
494482
# TODO: https://github.com/pandas-dev/pandas/issues/30037
495483
# use masked algorithms, rather than object-dtype / np.nan.

pandas/core/base.py

-17
Original file line numberDiff line numberDiff line change
@@ -855,23 +855,6 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs):
855855
result[self.isna()] = na_value
856856
return result
857857

858-
@property
859-
def _ndarray_values(self) -> np.ndarray:
860-
"""
861-
The data as an ndarray, possibly losing information.
862-
863-
The expectation is that this is cheap to compute, and is primarily
864-
used for interacting with our indexers.
865-
866-
- categorical -> codes
867-
"""
868-
if is_extension_array_dtype(self):
869-
return self.array._ndarray_values
870-
# As a mixin, we depend on the mixing class having values.
871-
# Special mixin syntax may be developed in the future:
872-
# https://github.com/python/typing/issues/246
873-
return self.values # type: ignore
874-
875858
@property
876859
def empty(self):
877860
return not self.size

pandas/core/indexes/base.py

+13-18
Original file line numberDiff line numberDiff line change
@@ -464,8 +464,7 @@ def _simple_new(cls, values, name: Label = None):
464464
# _index_data is a (temporary?) fix to ensure that the direct data
465465
# manipulation we do in `_libs/reduction.pyx` continues to work.
466466
# We need access to the actual ndarray, since we're messing with
467-
# data buffers and strides. We don't re-use `_ndarray_values`, since
468-
# we actually set this value too.
467+
# data buffers and strides.
469468
result._index_data = values
470469
result._name = name
471470
result._cache = {}
@@ -625,7 +624,8 @@ def ravel(self, order="C"):
625624
--------
626625
numpy.ndarray.ravel
627626
"""
628-
return self._ndarray_values.ravel(order=order)
627+
values = self._get_engine_target()
628+
return values.ravel(order=order)
629629

630630
def view(self, cls=None):
631631

@@ -3846,29 +3846,24 @@ def _values(self) -> Union[ExtensionArray, np.ndarray]:
38463846
"""
38473847
The best array representation.
38483848
3849-
This is an ndarray or ExtensionArray. This differs from
3850-
``_ndarray_values``, which always returns an ndarray.
3849+
This is an ndarray or ExtensionArray.
38513850
3852-
Both ``_values`` and ``_ndarray_values`` are consistent between
3853-
``Series`` and ``Index`` (except for datetime64[ns], which returns
3854-
a DatetimeArray for _values on the Index, but ndarray[M8ns] on the
3855-
Series).
3851+
``_values`` are consistent between``Series`` and ``Index``.
38563852
38573853
It may differ from the public '.values' method.
38583854
3859-
index | values | _values | _ndarray_values |
3860-
----------------- | --------------- | ------------- | --------------- |
3861-
Index | ndarray | ndarray | ndarray |
3862-
CategoricalIndex | Categorical | Categorical | ndarray[int] |
3863-
DatetimeIndex | ndarray[M8ns] | DatetimeArray | ndarray[M8ns] |
3864-
DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray | ndarray[M8ns] |
3865-
PeriodIndex | ndarray[object] | PeriodArray | ndarray[int] |
3866-
IntervalIndex | IntervalArray | IntervalArray | ndarray[object] |
3855+
index | values | _values |
3856+
----------------- | --------------- | ------------- |
3857+
Index | ndarray | ndarray |
3858+
CategoricalIndex | Categorical | Categorical |
3859+
DatetimeIndex | ndarray[M8ns] | DatetimeArray |
3860+
DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray |
3861+
PeriodIndex | ndarray[object] | PeriodArray |
3862+
IntervalIndex | IntervalArray | IntervalArray |
38673863
38683864
See Also
38693865
--------
38703866
values
3871-
_ndarray_values
38723867
"""
38733868
return self._data
38743869

pandas/core/indexes/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def sort_values(self, return_indexer=False, ascending=True):
179179
sorted_index = self.take(_as)
180180
return sorted_index, _as
181181
else:
182-
# NB: using asi8 instead of _ndarray_values matters in numpy 1.18
182+
# NB: using asi8 instead of _data matters in numpy 1.18
183183
# because the treatment of NaT has been changed to put NaT last
184184
# instead of first.
185185
sorted_values = np.sort(self.asi8)

pandas/core/indexes/extension.py

-4
Original file line numberDiff line numberDiff line change
@@ -228,10 +228,6 @@ def __iter__(self):
228228
def __array__(self, dtype=None) -> np.ndarray:
229229
return np.asarray(self._data, dtype=dtype)
230230

231-
@property
232-
def _ndarray_values(self) -> np.ndarray:
233-
return self._data._ndarray_values
234-
235231
def _get_engine_target(self) -> np.ndarray:
236232
return self._data._values_for_argsort()
237233

pandas/core/series.py

+9-13
Original file line numberDiff line numberDiff line change
@@ -550,21 +550,17 @@ def _values(self):
550550
timedelta64 dtypes), while ``.array`` ensures to always return an
551551
ExtensionArray.
552552
553-
Differs from ``._ndarray_values``, as that ensures to always return a
554-
numpy array (it will call ``_ndarray_values`` on the ExtensionArray, if
555-
the Series was backed by an ExtensionArray).
556-
557553
Overview:
558554
559-
dtype | values | _values | array | _ndarray_values |
560-
----------- | ------------- | ------------- | ------------- | --------------- |
561-
Numeric | ndarray | ndarray | PandasArray | ndarray |
562-
Category | Categorical | Categorical | Categorical | ndarray[int] |
563-
dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns] |
564-
dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns] |
565-
td64[ns] | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] | ndarray[m8ns] |
566-
Period | ndarray[obj] | PeriodArray | PeriodArray | ndarray[int] |
567-
Nullable | EA | EA | EA | ndarray |
555+
dtype | values | _values | array |
556+
----------- | ------------- | ------------- | ------------- |
557+
Numeric | ndarray | ndarray | PandasArray |
558+
Category | Categorical | Categorical | Categorical |
559+
dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray |
560+
dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray |
561+
td64[ns] | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] |
562+
Period | ndarray[obj] | PeriodArray | PeriodArray |
563+
Nullable | EA | EA | EA |
568564
569565
"""
570566
return self._data.internal_values()

pandas/tests/base/test_conversion.py

-28
Original file line numberDiff line numberDiff line change
@@ -220,34 +220,6 @@ def test_values_consistent(array, expected_type, dtype):
220220
tm.assert_equal(l_values, r_values)
221221

222222

223-
@pytest.mark.parametrize(
224-
"array, expected",
225-
[
226-
(np.array([0, 1], dtype=np.int64), np.array([0, 1], dtype=np.int64)),
227-
(np.array(["0", "1"]), np.array(["0", "1"], dtype=object)),
228-
(pd.Categorical(["a", "a"]), np.array([0, 0], dtype="int8")),
229-
(
230-
pd.DatetimeIndex(["2017-01-01T00:00:00"]),
231-
np.array(["2017-01-01T00:00:00"], dtype="M8[ns]"),
232-
),
233-
(
234-
pd.DatetimeIndex(["2017-01-01T00:00:00"], tz="US/Eastern"),
235-
np.array(["2017-01-01T05:00:00"], dtype="M8[ns]"),
236-
),
237-
(pd.TimedeltaIndex([10 ** 10]), np.array([10 ** 10], dtype="m8[ns]")),
238-
(
239-
pd.PeriodIndex(["2017", "2018"], freq="D"),
240-
np.array([17167, 17532], dtype=np.int64),
241-
),
242-
],
243-
)
244-
def test_ndarray_values(array, expected):
245-
l_values = pd.Series(array)._ndarray_values
246-
r_values = pd.Index(array)._ndarray_values
247-
tm.assert_numpy_array_equal(l_values, r_values)
248-
tm.assert_numpy_array_equal(l_values, expected)
249-
250-
251223
@pytest.mark.parametrize("arr", [np.array([1, 2, 3])])
252224
def test_numpy_array(arr):
253225
ser = pd.Series(arr)

pandas/tests/indexes/common.py

+1-9
Original file line numberDiff line numberDiff line change
@@ -313,16 +313,11 @@ def test_ensure_copied_data(self, indices):
313313
result = result.tz_localize("UTC").tz_convert(indices.tz)
314314

315315
tm.assert_index_equal(indices, result)
316-
tm.assert_numpy_array_equal(
317-
indices._ndarray_values, result._ndarray_values, check_same="copy"
318-
)
319316

320317
if isinstance(indices, PeriodIndex):
321318
# .values an object array of Period, thus copied
322319
result = index_type(ordinal=indices.asi8, copy=False, **init_kwargs)
323-
tm.assert_numpy_array_equal(
324-
indices._ndarray_values, result._ndarray_values, check_same="same"
325-
)
320+
tm.assert_numpy_array_equal(indices.asi8, result.asi8, check_same="same")
326321
elif isinstance(indices, IntervalIndex):
327322
# checked in test_interval.py
328323
pass
@@ -331,9 +326,6 @@ def test_ensure_copied_data(self, indices):
331326
tm.assert_numpy_array_equal(
332327
indices.values, result.values, check_same="same"
333328
)
334-
tm.assert_numpy_array_equal(
335-
indices._ndarray_values, result._ndarray_values, check_same="same"
336-
)
337329

338330
def test_memory_usage(self, indices):
339331
indices._engine.clear_mapping()

pandas/tests/indexes/interval/test_constructors.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def test_constructor_nan(self, constructor, breaks, closed):
9191

9292
assert result.closed == closed
9393
assert result.dtype.subtype == expected_subtype
94-
tm.assert_numpy_array_equal(result._ndarray_values, expected_values)
94+
tm.assert_numpy_array_equal(np.array(result), expected_values)
9595

9696
@pytest.mark.parametrize(
9797
"breaks",
@@ -114,7 +114,7 @@ def test_constructor_empty(self, constructor, breaks, closed):
114114
assert result.empty
115115
assert result.closed == closed
116116
assert result.dtype.subtype == expected_subtype
117-
tm.assert_numpy_array_equal(result._ndarray_values, expected_values)
117+
tm.assert_numpy_array_equal(np.array(result), expected_values)
118118

119119
@pytest.mark.parametrize(
120120
"breaks",

pandas/tests/indexes/interval/test_interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def test_ensure_copied_data(self, closed):
147147
)
148148

149149
# by-definition make a copy
150-
result = IntervalIndex(index._ndarray_values, copy=False)
150+
result = IntervalIndex(np.array(index), copy=False)
151151
tm.assert_numpy_array_equal(
152152
index.left.values, result.left.values, check_same="copy"
153153
)

pandas/tests/indexes/period/test_constructors.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,9 @@ def test_constructor_fromarraylike(self):
147147

148148
msg = "freq not specified and cannot be inferred"
149149
with pytest.raises(ValueError, match=msg):
150-
PeriodIndex(idx._ndarray_values)
150+
PeriodIndex(idx.asi8)
151151
with pytest.raises(ValueError, match=msg):
152-
PeriodIndex(list(idx._ndarray_values))
152+
PeriodIndex(list(idx.asi8))
153153

154154
msg = "'Period' object is not iterable"
155155
with pytest.raises(TypeError, match=msg):

pandas/tests/indexes/period/test_period.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -161,23 +161,23 @@ def test_values(self):
161161
tm.assert_numpy_array_equal(idx.to_numpy(), exp)
162162

163163
exp = np.array([], dtype=np.int64)
164-
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
164+
tm.assert_numpy_array_equal(idx.asi8, exp)
165165

166166
idx = PeriodIndex(["2011-01", NaT], freq="M")
167167

168168
exp = np.array([Period("2011-01", freq="M"), NaT], dtype=object)
169169
tm.assert_numpy_array_equal(idx.values, exp)
170170
tm.assert_numpy_array_equal(idx.to_numpy(), exp)
171171
exp = np.array([492, -9223372036854775808], dtype=np.int64)
172-
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
172+
tm.assert_numpy_array_equal(idx.asi8, exp)
173173

174174
idx = PeriodIndex(["2011-01-01", NaT], freq="D")
175175

176176
exp = np.array([Period("2011-01-01", freq="D"), NaT], dtype=object)
177177
tm.assert_numpy_array_equal(idx.values, exp)
178178
tm.assert_numpy_array_equal(idx.to_numpy(), exp)
179179
exp = np.array([14975, -9223372036854775808], dtype=np.int64)
180-
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
180+
tm.assert_numpy_array_equal(idx.asi8, exp)
181181

182182
def test_period_index_length(self):
183183
pi = period_range(freq="A", start="1/1/2001", end="12/1/2009")

pandas/tests/reductions/test_reductions.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,7 @@ def test_ops(self, opname, obj):
5555
if not isinstance(obj, PeriodIndex):
5656
expected = getattr(obj.values, opname)()
5757
else:
58-
expected = pd.Period(
59-
ordinal=getattr(obj._ndarray_values, opname)(), freq=obj.freq
60-
)
58+
expected = pd.Period(ordinal=getattr(obj.asi8, opname)(), freq=obj.freq)
6159
try:
6260
assert result == expected
6361
except TypeError:

0 commit comments

Comments
 (0)