Skip to content

CLN: Make Series._values match Index._values #31182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Jan 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,9 @@ def apply_standard(self):
if (
self.result_type in ["reduce", None]
and not self.dtypes.apply(is_extension_array_dtype).any()
# Disallow dtypes where setting _index_data will break
# ExtensionArray values, see GH#31182
and not self.dtypes.apply(lambda x: x.kind in ["m", "M"]).any()
# Disallow complex_internals since libreduction shortcut raises a TypeError
and not self.agg_axis._has_complex_internals
):
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1265,6 +1265,10 @@ def unique(self):
if hasattr(values, "unique"):

result = values.unique()
if self.dtype.kind in ["m", "M"] and isinstance(self, ABCSeries):
# GH#31182 Series._values returns EA, unpack for backward-compat
if getattr(self.dtype, "tz", None) is None:
result = np.asarray(result)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comment here on why this is needed

else:
result = unique1d(values)

Expand Down
5 changes: 5 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2151,6 +2151,10 @@ def get_values(self, dtype=None):
return result.reshape(self.values.shape)
return self.values

def internal_values(self):
# Override to return DatetimeArray and TimedeltaArray
return self.array_values()


class DatetimeBlock(DatetimeLikeBlockMixin, Block):
__slots__ = ()
Expand Down Expand Up @@ -2284,6 +2288,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
is_datetimetz = True
is_extension = True

internal_values = Block.internal_values
_can_hold_element = DatetimeBlock._can_hold_element
to_native_types = DatetimeBlock.to_native_types
fill_value = np.datetime64("NaT", "ns")
Expand Down
11 changes: 7 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,8 @@ def _values(self):
"""
Return the internal repr of this data (defined by Block.interval_values).
This are the values as stored in the Block (ndarray or ExtensionArray
depending on the Block class).
depending on the Block class), with datetime64[ns] and timedelta64[ns]
wrapped in ExtensionArrays to match Index._values behavior.

Differs from the public ``.values`` for certain data types, because of
historical backwards compatibility of the public attribute (e.g. period
Expand All @@ -502,8 +503,9 @@ def _values(self):
cases).

Differs from ``.array`` in that this still returns the numpy array if
the Block is backed by a numpy array, while ``.array`` ensures to always
return an ExtensionArray.
the Block is backed by a numpy array (except for datetime64 and
timedelta64 dtypes), while ``.array`` ensures to always return an
ExtensionArray.

Differs from ``._ndarray_values``, as that ensures to always return a
numpy array (it will call ``_ndarray_values`` on the ExtensionArray, if
Expand All @@ -515,8 +517,9 @@ def _values(self):
----------- | ------------- | ------------- | ------------- | --------------- |
Numeric | ndarray | ndarray | PandasArray | ndarray |
Category | Categorical | Categorical | Categorical | ndarray[int] |
dt64[ns] | ndarray[M8ns] | ndarray[M8ns] | DatetimeArray | ndarray[M8ns] |
dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns] |
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A bit above (beginning of the docstring), the sentence "This are the values as stored in the Block" is no longer adequate I think?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sentence I am quoting (the second line of the docstring) still needs to be updated

dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns] |
td64[ns] | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] | ndarray[m8ns] |
Period | ndarray[obj] | PeriodArray | PeriodArray | ndarray[int] |
Nullable | EA | EA | EA | ndarray |

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1583,7 +1583,7 @@ def test_string_na_nat_conversion(self, cache):
for i in range(5):
x = series[i]
if isna(x):
expected[i] = iNaT
expected[i] = pd.NaT
else:
expected[i] = to_datetime(x, cache=cache)

Expand Down
15 changes: 13 additions & 2 deletions pandas/tests/reductions/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,12 @@ def test_invalid_td64_reductions(self, opname):
)
td = s.diff()

msg = "reduction operation '{op}' not allowed for this dtype"
msg = "|".join(
[
"reduction operation '{op}' not allowed for this dtype",
r"cannot perform {op} with type timedelta64\[ns\]",
]
)
msg = msg.format(op=opname)

with pytest.raises(TypeError, match=msg):
Expand Down Expand Up @@ -648,7 +653,13 @@ def test_ops_consistency_on_empty(self, method):
# timedelta64[ns]
tdser = Series([], dtype="m8[ns]")
if method == "var":
with pytest.raises(TypeError, match="operation 'var' not allowed"):
msg = "|".join(
[
"operation 'var' not allowed",
r"cannot perform var with type timedelta64\[ns\]",
]
)
with pytest.raises(TypeError, match=msg):
getattr(tdser, method)()
else:
result = getattr(tdser, method)()
Expand Down