Skip to content

Commit 2598709

Browse files
TomAugspurgerPingviinituutti
authored andcommitted
API: Revert breaking .values changes (pandas-dev#24163)
* API: Revert breaking `.values` changes User-facing change: `Series[period].values` nad `Series[interval].values` continues to be an ndarray of objects. Recommend ``.array`` instead. There are a handful of related places in pandas where we assumed that ``Series[EA].values`` was an EA. Part of pandas-dev#23995
1 parent 7b16820 commit 2598709

File tree

9 files changed

+66
-58
lines changed

9 files changed

+66
-58
lines changed

doc/source/whatsnew/v0.24.0.rst

+10-44
Original file line numberDiff line numberDiff line change
@@ -219,17 +219,21 @@ Previously, these would be cast to a NumPy array with object dtype. In general,
219219
this should result in better performance when storing an array of intervals or periods
220220
in a :class:`Series` or column of a :class:`DataFrame`.
221221

222-
Note that the ``.values`` of a ``Series`` containing one of these types is no longer a NumPy
223-
array, but rather an ``ExtensionArray``:
222+
Use :attr:`Series.array` to extract the underlying array of intervals or periods
223+
from the ``Series``::
224224

225225
.. ipython:: python
226226
227-
ser.values
228-
pser.values
227+
ser.array
228+
pser.array
229229
230-
This is the same behavior as ``Series.values`` for categorical data. See
231-
:ref:`whatsnew_0240.api_breaking.interval_values` for more.
230+
.. warning::
232231

232+
For backwards compatibility, :attr:`Series.values` continues to return
233+
a NumPy array of objects for Interval and Period data. We recommend
234+
using :attr:`Series.array` when you need the array of data stored in the
235+
``Series``, and :meth:`Series.to_numpy` when you know you need a NumPy array.
236+
See :ref:`basics.dtypes` and :ref:`dsintro.attrs` for more.
233237

234238
.. _whatsnew_0240.enhancements.styler_pipe:
235239

@@ -507,44 +511,6 @@ New Behavior on Windows:
507511
...: print(f.read())
508512
b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n'
509513
510-
.. _whatsnew_0240.api_breaking.interval_values:
511-
512-
``IntervalIndex.values`` is now an ``IntervalArray``
513-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
514-
515-
The :attr:`~Interval.values` attribute of an :class:`IntervalIndex` now returns an
516-
``IntervalArray``, rather than a NumPy array of :class:`Interval` objects (:issue:`19453`).
517-
518-
Previous Behavior:
519-
520-
.. code-block:: ipython
521-
522-
In [1]: idx = pd.interval_range(0, 4)
523-
524-
In [2]: idx.values
525-
Out[2]:
526-
array([Interval(0, 1, closed='right'), Interval(1, 2, closed='right'),
527-
Interval(2, 3, closed='right'), Interval(3, 4, closed='right')],
528-
dtype=object)
529-
530-
New Behavior:
531-
532-
.. ipython:: python
533-
534-
idx = pd.interval_range(0, 4)
535-
idx.values
536-
537-
This mirrors ``CategoricalIndex.values``, which returns a ``Categorical``.
538-
539-
For situations where you need an ``ndarray`` of ``Interval`` objects, use
540-
:meth:`numpy.asarray`.
541-
542-
.. ipython:: python
543-
544-
np.asarray(idx)
545-
idx.values.astype(object)
546-
547-
548514
.. _whatsnew_0240.api.timezone_offset_parsing:
549515

550516
Parsing Datetime Strings with Timezone Offsets

pandas/core/base.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -913,7 +913,7 @@ def _ndarray_values(self):
913913
- categorical -> codes
914914
"""
915915
if is_extension_array_dtype(self):
916-
return self.values._ndarray_values
916+
return self.array._ndarray_values
917917
return self.values
918918

919919
@property
@@ -1307,12 +1307,12 @@ def memory_usage(self, deep=False):
13071307
Memory usage does not include memory consumed by elements that
13081308
are not components of the array if deep=False or if used on PyPy
13091309
"""
1310-
if hasattr(self.values, 'memory_usage'):
1311-
return self.values.memory_usage(deep=deep)
1310+
if hasattr(self.array, 'memory_usage'):
1311+
return self.array.memory_usage(deep=deep)
13121312

1313-
v = self.values.nbytes
1313+
v = self.array.nbytes
13141314
if deep and is_object_dtype(self) and not PYPY:
1315-
v += lib.memory_usage_of_objects(self.values)
1315+
v += lib.memory_usage_of_objects(self.array)
13161316
return v
13171317

13181318
@Substitution(

pandas/core/indexes/datetimes.py

+9
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,15 @@ def nbytes(self):
326326
# for TZ-aware
327327
return self._ndarray_values.nbytes
328328

329+
def memory_usage(self, deep=False):
330+
# TODO: Remove this when we have a DatetimeTZArray
331+
# Necessary to avoid recursion error since DTI._values is a DTI
332+
# for TZ-aware
333+
result = self._ndarray_values.nbytes
334+
# include our engine hashtable
335+
result += self._engine.sizeof(deep=deep)
336+
return result
337+
329338
@cache_readonly
330339
def _is_dates_only(self):
331340
"""Return a boolean if we are only dates (and don't have a timezone)"""

pandas/core/internals/blocks.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
_NS_DTYPE, _TD_DTYPE, ensure_platform_int, is_bool_dtype, is_categorical,
2222
is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
2323
is_dtype_equal, is_extension_array_dtype, is_extension_type,
24-
is_float_dtype, is_integer, is_integer_dtype, is_list_like,
25-
is_numeric_v_string_like, is_object_dtype, is_re, is_re_compilable,
26-
is_sparse, is_timedelta64_dtype, pandas_dtype)
24+
is_float_dtype, is_integer, is_integer_dtype, is_interval_dtype,
25+
is_list_like, is_numeric_v_string_like, is_object_dtype, is_period_dtype,
26+
is_re, is_re_compilable, is_sparse, is_timedelta64_dtype, pandas_dtype)
2727
import pandas.core.dtypes.concat as _concat
2828
from pandas.core.dtypes.dtypes import (
2929
CategoricalDtype, DatetimeTZDtype, ExtensionDtype, PandasExtensionDtype)
@@ -1996,6 +1996,18 @@ def _unstack(self, unstacker_func, new_columns, n_rows, fill_value):
19961996
return blocks, mask
19971997

19981998

1999+
class ObjectValuesExtensionBlock(ExtensionBlock):
2000+
"""
2001+
Block providing backwards-compatibility for `.values`.
2002+
2003+
Used by PeriodArray and IntervalArray to ensure that
2004+
Series[T].values is an ndarray of objects.
2005+
"""
2006+
2007+
def external_values(self, dtype=None):
2008+
return self.values.astype(object)
2009+
2010+
19992011
class NumericBlock(Block):
20002012
__slots__ = ()
20012013
is_numeric = True
@@ -3017,6 +3029,8 @@ def get_block_type(values, dtype=None):
30173029

30183030
if is_categorical(values):
30193031
cls = CategoricalBlock
3032+
elif is_interval_dtype(dtype) or is_period_dtype(dtype):
3033+
cls = ObjectValuesExtensionBlock
30203034
elif is_extension_array_dtype(values):
30213035
cls = ExtensionBlock
30223036
elif issubclass(vtype, np.floating):

pandas/core/internals/managers.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@
3030
from pandas.io.formats.printing import pprint_thing
3131

3232
from .blocks import (
33-
Block, CategoricalBlock, DatetimeTZBlock, ExtensionBlock, _extend_blocks,
34-
_merge_blocks, _safe_reshape, get_block_type, make_block)
33+
Block, CategoricalBlock, DatetimeTZBlock, ExtensionBlock,
34+
ObjectValuesExtensionBlock, _extend_blocks, _merge_blocks, _safe_reshape,
35+
get_block_type, make_block)
3536
from .concat import ( # all for concatenate_block_managers
3637
combine_concat_plans, concatenate_join_units, get_mgr_concatenation_plan,
3738
is_uniform_join_units)
@@ -1752,6 +1753,14 @@ def form_blocks(arrays, names, axes):
17521753

17531754
blocks.extend(external_blocks)
17541755

1756+
if len(items_dict['ObjectValuesExtensionBlock']):
1757+
external_blocks = [
1758+
make_block(array, klass=ObjectValuesExtensionBlock, placement=[i])
1759+
for i, _, array in items_dict['ObjectValuesExtensionBlock']
1760+
]
1761+
1762+
blocks.extend(external_blocks)
1763+
17551764
if len(extra_locs):
17561765
shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:])
17571766

pandas/core/reshape/reshape.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ def _unstack_extension_series(series, level, fill_value):
433433
level=level, fill_value=-1).get_result()
434434

435435
out = []
436-
values = series.values
436+
values = series.array
437437

438438
for col, indices in result.iteritems():
439439
out.append(Series(values.take(indices.values,

pandas/tests/extension/base/reshaping.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ def test_unstack(self, data, index, obj):
231231

232232
for level in combinations:
233233
result = ser.unstack(level=level)
234-
assert all(isinstance(result[col].values, type(data))
234+
assert all(isinstance(result[col].array, type(data))
235235
for col in result.columns)
236236
expected = ser.astype(object).unstack(level=level)
237237
result = result.astype(object)

pandas/tests/series/test_dtypes.py

+10
Original file line numberDiff line numberDiff line change
@@ -492,3 +492,13 @@ def test_is_homogeneous_type(self):
492492
assert Series()._is_homogeneous_type
493493
assert Series([1, 2])._is_homogeneous_type
494494
assert Series(pd.Categorical([1, 2]))._is_homogeneous_type
495+
496+
@pytest.mark.parametrize("data", [
497+
pd.period_range("2000", periods=4),
498+
pd.IntervalIndex.from_breaks([1, 2, 3, 4])
499+
])
500+
def test_values_compatibility(self, data):
501+
# https://github.com/pandas-dev/pandas/issues/23995
502+
result = pd.Series(data).values
503+
expected = np.array(data.astype(object))
504+
tm.assert_numpy_array_equal(result, expected)

pandas/util/testing.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1340,11 +1340,11 @@ def assert_series_equal(left, right, check_dtype=True,
13401340
assert_numpy_array_equal(left.get_values(), right.get_values(),
13411341
check_dtype=check_dtype)
13421342
elif is_interval_dtype(left) or is_interval_dtype(right):
1343-
assert_interval_array_equal(left.values, right.values)
1343+
assert_interval_array_equal(left.array, right.array)
13441344

13451345
elif (is_extension_array_dtype(left) and not is_categorical_dtype(left) and
13461346
is_extension_array_dtype(right) and not is_categorical_dtype(right)):
1347-
return assert_extension_array_equal(left.values, right.values)
1347+
return assert_extension_array_equal(left.array, right.array)
13481348

13491349
else:
13501350
_testing.assert_almost_equal(left.get_values(), right.get_values(),

0 commit comments

Comments
 (0)