Skip to content

API: Revert breaking .values changes #24163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 10 additions & 44 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -219,17 +219,21 @@ Previously, these would be cast to a NumPy array with object dtype. In general,
this should result in better performance when storing an array of intervals or periods
in a :class:`Series` or column of a :class:`DataFrame`.

Note that the ``.values`` of a ``Series`` containing one of these types is no longer a NumPy
array, but rather an ``ExtensionArray``:
Use :attr:`Series.array` to extract the underlying array of intervals or periods
from the ``Series``::

.. ipython:: python

ser.values
pser.values
ser.array
pser.array

This is the same behavior as ``Series.values`` for categorical data. See
:ref:`whatsnew_0240.api_breaking.interval_values` for more.
.. warning::

For backwards compatibility, :attr:`Series.values` continues to return
a NumPy array of objects for Interval and Period data. We recommend
using :attr:`Series.array` when you need the array of data stored in the
``Series``, and :meth:`Series.to_numpy` when you know you need a NumPy array.
See :ref:`basics.dtypes` and :ref:`dsintro.attrs` for more.

.. _whatsnew_0240.enhancements.styler_pipe:

Expand Down Expand Up @@ -505,44 +509,6 @@ New Behavior on Windows:
...: print(f.read())
b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n'

.. _whatsnew_0240.api_breaking.interval_values:

``IntervalIndex.values`` is now an ``IntervalArray``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The :attr:`~Interval.values` attribute of an :class:`IntervalIndex` now returns an
``IntervalArray``, rather than a NumPy array of :class:`Interval` objects (:issue:`19453`).

Previous Behavior:

.. code-block:: ipython

In [1]: idx = pd.interval_range(0, 4)

In [2]: idx.values
Out[2]:
array([Interval(0, 1, closed='right'), Interval(1, 2, closed='right'),
Interval(2, 3, closed='right'), Interval(3, 4, closed='right')],
dtype=object)

New Behavior:

.. ipython:: python

idx = pd.interval_range(0, 4)
idx.values

This mirrors ``CategoricalIndex.values``, which returns a ``Categorical``.

For situations where you need an ``ndarray`` of ``Interval`` objects, use
:meth:`numpy.asarray`.

.. ipython:: python

np.asarray(idx)
idx.values.astype(object)


.. _whatsnew_0240.api.timezone_offset_parsing:

Parsing Datetime Strings with Timezone Offsets
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,7 +913,7 @@ def _ndarray_values(self):
- categorical -> codes
"""
if is_extension_array_dtype(self):
return self.values._ndarray_values
return self.array._ndarray_values
return self.values

@property
Expand Down Expand Up @@ -1307,12 +1307,12 @@ def memory_usage(self, deep=False):
Memory usage does not include memory consumed by elements that
are not components of the array if deep=False or if used on PyPy
"""
if hasattr(self.values, 'memory_usage'):
return self.values.memory_usage(deep=deep)
if hasattr(self.array, 'memory_usage'):
return self.array.memory_usage(deep=deep)

v = self.values.nbytes
v = self.array.nbytes
if deep and is_object_dtype(self) and not PYPY:
v += lib.memory_usage_of_objects(self.values)
v += lib.memory_usage_of_objects(self.array)
return v

@Substitution(
Expand Down
9 changes: 9 additions & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,15 @@ def nbytes(self):
# for TZ-aware
return self._ndarray_values.nbytes

def memory_usage(self, deep=False):
# TODO: Remove this when we have a DatetimeTZArray
# Necessary to avoid recursion error since DTI._values is a DTI
# for TZ-aware
result = self._ndarray_values.nbytes
# include our engine hashtable
result += self._engine.sizeof(deep=deep)
return result

@cache_readonly
def _is_dates_only(self):
"""Return a boolean if we are only dates (and don't have a timezone)"""
Expand Down
20 changes: 17 additions & 3 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
_NS_DTYPE, _TD_DTYPE, ensure_platform_int, is_bool_dtype, is_categorical,
is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype,
is_dtype_equal, is_extension_array_dtype, is_extension_type,
is_float_dtype, is_integer, is_integer_dtype, is_list_like,
is_numeric_v_string_like, is_object_dtype, is_re, is_re_compilable,
is_sparse, is_timedelta64_dtype, pandas_dtype)
is_float_dtype, is_integer, is_integer_dtype, is_interval_dtype,
is_list_like, is_numeric_v_string_like, is_object_dtype, is_period_dtype,
is_re, is_re_compilable, is_sparse, is_timedelta64_dtype, pandas_dtype)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.dtypes import (
CategoricalDtype, DatetimeTZDtype, ExtensionDtype, PandasExtensionDtype)
Expand Down Expand Up @@ -1996,6 +1996,18 @@ def _unstack(self, unstacker_func, new_columns, n_rows, fill_value):
return blocks, mask


class ObjectValuesExtensionBlock(ExtensionBlock):
"""
Block providing backwards-compatibility for `.values`.

Used by PeriodArray and IntervalArray to ensure that
Series[T].values is an ndarray of objects.
"""

def external_values(self, dtype=None):
return self.values.astype(object)


class NumericBlock(Block):
__slots__ = ()
is_numeric = True
Expand Down Expand Up @@ -3017,6 +3029,8 @@ def get_block_type(values, dtype=None):

if is_categorical(values):
cls = CategoricalBlock
elif is_interval_dtype(dtype) or is_period_dtype(dtype):
cls = ObjectValuesExtensionBlock
elif is_extension_array_dtype(values):
cls = ExtensionBlock
elif issubclass(vtype, np.floating):
Expand Down
13 changes: 11 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@
from pandas.io.formats.printing import pprint_thing

from .blocks import (
Block, CategoricalBlock, DatetimeTZBlock, ExtensionBlock, _extend_blocks,
_merge_blocks, _safe_reshape, get_block_type, make_block)
Block, CategoricalBlock, DatetimeTZBlock, ExtensionBlock,
ObjectValuesExtensionBlock, _extend_blocks, _merge_blocks, _safe_reshape,
get_block_type, make_block)
from .concat import ( # all for concatenate_block_managers
combine_concat_plans, concatenate_join_units, get_mgr_concatenation_plan,
is_uniform_join_units)
Expand Down Expand Up @@ -1752,6 +1753,14 @@ def form_blocks(arrays, names, axes):

blocks.extend(external_blocks)

if len(items_dict['ObjectValuesExtensionBlock']):
external_blocks = [
make_block(array, klass=ObjectValuesExtensionBlock, placement=[i])
for i, _, array in items_dict['ObjectValuesExtensionBlock']
]

blocks.extend(external_blocks)

if len(extra_locs):
shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:])

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ def _unstack_extension_series(series, level, fill_value):
level=level, fill_value=-1).get_result()

out = []
values = series.values
values = series.array

for col, indices in result.iteritems():
out.append(Series(values.take(indices.values,
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/reshaping.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def test_unstack(self, data, index, obj):

for level in combinations:
result = ser.unstack(level=level)
assert all(isinstance(result[col].values, type(data))
assert all(isinstance(result[col].array, type(data))
for col in result.columns)
expected = ser.astype(object).unstack(level=level)
result = result.astype(object)
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/series/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,3 +492,13 @@ def test_is_homogeneous_type(self):
assert Series()._is_homogeneous_type
assert Series([1, 2])._is_homogeneous_type
assert Series(pd.Categorical([1, 2]))._is_homogeneous_type

@pytest.mark.parametrize("data", [
pd.period_range("2000", periods=4),
pd.IntervalIndex.from_breaks([1, 2, 3, 4])
])
def test_values_compatibility(self, data):
# https://github.com/pandas-dev/pandas/issues/23995
result = pd.Series(data).values
expected = np.array(data.astype(object))
tm.assert_numpy_array_equal(result, expected)
4 changes: 2 additions & 2 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1340,11 +1340,11 @@ def assert_series_equal(left, right, check_dtype=True,
assert_numpy_array_equal(left.get_values(), right.get_values(),
check_dtype=check_dtype)
elif is_interval_dtype(left) or is_interval_dtype(right):
assert_interval_array_equal(left.values, right.values)
assert_interval_array_equal(left.array, right.array)

elif (is_extension_array_dtype(left) and not is_categorical_dtype(left) and
is_extension_array_dtype(right) and not is_categorical_dtype(right)):
return assert_extension_array_equal(left.values, right.values)
return assert_extension_array_equal(left.array, right.array)

else:
_testing.assert_almost_equal(left.get_values(), right.get_values(),
Expand Down