Skip to content

CLN: Make Series._values match Index._values #31182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Jan 28, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ def apply_standard(self):
and not self.dtypes.apply(is_extension_array_dtype).any()
# Disallow complex_internals since libreduction shortcut
# cannot handle MultiIndex
and not self.dtypes.apply(lambda x: x.kind in ["m", "M"]).any()
and not isinstance(self.agg_axis, ABCMultiIndex)
):

Expand Down
14 changes: 13 additions & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,16 @@ def array(self) -> ExtensionArray:
[a, b, a]
Categories (2, object): [a, b]
"""
raise AbstractMethodError(self)
# As a mixin, we depend on the mixing class having _values.
# Special mixin syntax may be developed in the future:
# https://github.com/python/typing/issues/246
result = self._values # type: ignore
if isinstance(result, np.ndarray):
from pandas.core.arrays.numpy_ import PandasArray

result = PandasArray(result)

return result

def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs):
"""
Expand Down Expand Up @@ -1249,6 +1258,9 @@ def unique(self):
if hasattr(values, "unique"):

result = values.unique()
if self.dtype.kind in ["m", "M"]:
if getattr(self.dtype, "tz", None) is None:
result = np.asarray(result)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comment here on why this is needed

else:
result = unique1d(values)

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,10 @@ def extract_array(obj, extract_numpy: bool = False):
array([1, 2, 3])
"""
if isinstance(obj, (ABCIndexClass, ABCSeries)):
obj = obj.array
arr = obj._values
if not extract_numpy and isinstance(arr, np.ndarray):
return obj.array
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I was also just thinking while looking above at the .array implementation, that we could do the same here instead of going through the ".array -> wrap in PandasArray -> extract the numpy array again" route, that will further reduce some overhead of extract_array(..., extract_numpy=True).

Could also do a arr = PandasArray(arr) here for being explicit (it's not that it duplicates a lot from .array)

return arr

if extract_numpy and isinstance(obj, ABCPandasArray):
obj = obj.to_numpy()
Expand Down
34 changes: 11 additions & 23 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import pandas._libs.internals as libinternals
from pandas._libs.tslibs import Timedelta, conversion
from pandas._libs.tslibs.timezones import tz_compare
from pandas.util._decorators import cache_readonly
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import (
Expand Down Expand Up @@ -66,14 +67,7 @@
)

import pandas.core.algorithms as algos
from pandas.core.arrays import (
Categorical,
DatetimeArray,
ExtensionArray,
PandasArray,
PandasDtype,
TimedeltaArray,
)
from pandas.core.arrays import Categorical, DatetimeArray, PandasDtype, TimedeltaArray
from pandas.core.base import PandasObject
import pandas.core.common as com
from pandas.core.construction import extract_array
Expand Down Expand Up @@ -216,12 +210,6 @@ def internal_values(self):
"""
return self.values

def array_values(self) -> ExtensionArray:
"""
The array that Series.array returns. Always an ExtensionArray.
"""
return PandasArray(self.values)

def get_values(self, dtype=None):
"""
return an internal format, currently just the ndarray
Expand Down Expand Up @@ -1794,9 +1782,6 @@ def get_values(self, dtype=None):
values = values.reshape((1,) + values.shape)
return values

def array_values(self) -> ExtensionArray:
return self.values

def to_dense(self):
return np.asarray(self.values)

Expand Down Expand Up @@ -2138,6 +2123,13 @@ def get_values(self, dtype=None):
return result.reshape(self.values.shape)
return self.values

def internal_values(self):
return self._ea_values

@cache_readonly
def _ea_values(self):
return self._holder(self.values)


class DatetimeBlock(DatetimeLikeBlockMixin, Block):
__slots__ = ()
Expand Down Expand Up @@ -2174,6 +2166,7 @@ def _maybe_coerce_values(self, values):
values = values._data

assert isinstance(values, np.ndarray), type(values)
assert values.dtype == _NS_DTYPE, values.dtype
return values

def astype(self, dtype, copy: bool = False, errors: str = "raise"):
Expand Down Expand Up @@ -2260,9 +2253,6 @@ def set(self, locs, values):
def external_values(self):
return np.asarray(self.values.astype("datetime64[ns]", copy=False))

def array_values(self) -> ExtensionArray:
return DatetimeArray._simple_new(self.values)


class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
""" implement a datetime64 block with a tz attribute """
Expand All @@ -2271,6 +2261,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
is_datetimetz = True
is_extension = True

internal_values = Block.internal_values
_can_hold_element = DatetimeBlock._can_hold_element
to_native_types = DatetimeBlock.to_native_types
fill_value = np.datetime64("NaT", "ns")
Expand Down Expand Up @@ -2520,9 +2511,6 @@ def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs):
def external_values(self):
return np.asarray(self.values.astype("timedelta64[ns]", copy=False))

def array_values(self) -> ExtensionArray:
return TimedeltaArray._simple_new(self.values)


class BoolBlock(NumericBlock):
__slots__ = ()
Expand Down
5 changes: 0 additions & 5 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,11 +526,6 @@ def _values(self):
"""
return self._data.internal_values()

@Appender(base.IndexOpsMixin.array.__doc__) # type: ignore
@property
def array(self) -> ExtensionArray:
return self._data._block.array_values()

def _internal_get_values(self):
"""
Same as values (but handles sparseness conversions); is a view.
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1583,7 +1583,7 @@ def test_string_na_nat_conversion(self, cache):
for i in range(5):
x = series[i]
if isna(x):
expected[i] = iNaT
expected[i] = pd.NaT
else:
expected[i] = to_datetime(x, cache=cache)

Expand Down
15 changes: 13 additions & 2 deletions pandas/tests/reductions/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,12 @@ def test_invalid_td64_reductions(self, opname):
)
td = s.diff()

msg = "reduction operation '{op}' not allowed for this dtype"
msg = "|".join(
[
"reduction operation '{op}' not allowed for this dtype",
r"cannot perform {op} with type timedelta64\[ns\]",
]
)
msg = msg.format(op=opname)

with pytest.raises(TypeError, match=msg):
Expand Down Expand Up @@ -648,7 +653,13 @@ def test_ops_consistency_on_empty(self, method):
# timedelta64[ns]
tdser = Series([], dtype="m8[ns]")
if method == "var":
with pytest.raises(TypeError, match="operation 'var' not allowed"):
msg = "|".join(
[
"operation 'var' not allowed",
r"cannot perform var with type timedelta64\[ns\]",
]
)
with pytest.raises(TypeError, match=msg):
getattr(tdser, method)()
else:
result = getattr(tdser, method)()
Expand Down