Skip to content

Commit 8c38283

Browse files
authored
CLN: avoid values_from_object in Series (#32426)
1 parent c910ec3 commit 8c38283

File tree

4 files changed

+39
-33
lines changed

4 files changed

+39
-33
lines changed

pandas/core/frame.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
ensure_platform_int,
7878
infer_dtype_from_object,
7979
is_bool_dtype,
80+
is_datetime64_any_dtype,
8081
is_dict_like,
8182
is_dtype_equal,
8283
is_extension_array_dtype,
@@ -88,6 +89,7 @@
8889
is_list_like,
8990
is_named_tuple,
9091
is_object_dtype,
92+
is_period_dtype,
9193
is_scalar,
9294
is_sequence,
9395
needs_i8_conversion,
@@ -7789,11 +7791,13 @@ def _reduce(
77897791
self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
77907792
):
77917793

7792-
dtype_is_dt = self.dtypes.apply(lambda x: x.kind == "M")
7794+
dtype_is_dt = self.dtypes.apply(
7795+
lambda x: is_datetime64_any_dtype(x) or is_period_dtype(x)
7796+
)
77937797
if numeric_only is None and name in ["mean", "median"] and dtype_is_dt.any():
77947798
warnings.warn(
77957799
"DataFrame.mean and DataFrame.median with numeric_only=None "
7796-
"will include datetime64 and datetime64tz columns in a "
7800+
"will include datetime64, datetime64tz, and PeriodDtype columns in a "
77977801
"future version.",
77987802
FutureWarning,
77997803
stacklevel=3,
@@ -7854,6 +7858,10 @@ def blk_func(values):
78547858
assert len(res) == max(list(res.keys())) + 1, res.keys()
78557859
out = df._constructor_sliced(res, index=range(len(res)), dtype=out_dtype)
78567860
out.index = df.columns
7861+
if axis == 0 and df.dtypes.apply(needs_i8_conversion).any():
7862+
# FIXME: needs_i8_conversion check is kludge, not sure
7863+
# why it is necessary in this case and this case alone
7864+
out[:] = coerce_to_dtypes(out.values, df.dtypes)
78577865
return out
78587866

78597867
if numeric_only is None:

pandas/core/nanops.py

+26-23
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from pandas._config import get_option
99

10-
from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib
10+
from pandas._libs import NaT, Period, Timedelta, Timestamp, iNaT, lib
1111
from pandas._typing import Dtype, Scalar
1212
from pandas.compat._optional import import_optional_dependency
1313

@@ -17,9 +17,7 @@
1717
is_any_int_dtype,
1818
is_bool_dtype,
1919
is_complex,
20-
is_datetime64_dtype,
21-
is_datetime64tz_dtype,
22-
is_datetime_or_timedelta_dtype,
20+
is_datetime64_any_dtype,
2321
is_float,
2422
is_float_dtype,
2523
is_integer,
@@ -28,8 +26,10 @@
2826
is_object_dtype,
2927
is_scalar,
3028
is_timedelta64_dtype,
29+
needs_i8_conversion,
3130
pandas_dtype,
3231
)
32+
from pandas.core.dtypes.dtypes import PeriodDtype
3333
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna
3434

3535
from pandas.core.construction import extract_array
@@ -134,10 +134,8 @@ def f(
134134

135135

136136
def _bn_ok_dtype(dtype: Dtype, name: str) -> bool:
137-
# Bottleneck chokes on datetime64
138-
if not is_object_dtype(dtype) and not (
139-
is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype)
140-
):
137+
# Bottleneck chokes on datetime64, PeriodDtype (or and EA)
138+
if not is_object_dtype(dtype) and not needs_i8_conversion(dtype):
141139

142140
# GH 15507
143141
# bottleneck does not properly upcast during the sum
@@ -283,17 +281,16 @@ def _get_values(
283281
# with scalar fill_value. This guarantee is important for the
284282
# maybe_upcast_putmask call below
285283
assert is_scalar(fill_value)
284+
values = extract_array(values, extract_numpy=True)
286285

287286
mask = _maybe_get_mask(values, skipna, mask)
288287

289-
values = extract_array(values, extract_numpy=True)
290288
dtype = values.dtype
291289

292-
if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values):
290+
if needs_i8_conversion(values):
293291
# changing timedelta64/datetime64 to int64 needs to happen after
294292
# finding `mask` above
295-
values = getattr(values, "asi8", values)
296-
values = values.view(np.int64)
293+
values = np.asarray(values.view("i8"))
297294

298295
dtype_ok = _na_ok_dtype(dtype)
299296

@@ -307,7 +304,8 @@ def _get_values(
307304

308305
if skipna and copy:
309306
values = values.copy()
310-
if dtype_ok:
307+
assert mask is not None # for mypy
308+
if dtype_ok and mask.any():
311309
np.putmask(values, mask, fill_value)
312310

313311
# promote if needed
@@ -325,13 +323,14 @@ def _get_values(
325323

326324

327325
def _na_ok_dtype(dtype) -> bool:
328-
# TODO: what about datetime64tz? PeriodDtype?
329-
return not issubclass(dtype.type, (np.integer, np.timedelta64, np.datetime64))
326+
if needs_i8_conversion(dtype):
327+
return False
328+
return not issubclass(dtype.type, np.integer)
330329

331330

332331
def _wrap_results(result, dtype: Dtype, fill_value=None):
333332
""" wrap our results if needed """
334-
if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
333+
if is_datetime64_any_dtype(dtype):
335334
if fill_value is None:
336335
# GH#24293
337336
fill_value = iNaT
@@ -342,7 +341,8 @@ def _wrap_results(result, dtype: Dtype, fill_value=None):
342341
result = np.nan
343342
result = Timestamp(result, tz=tz)
344343
else:
345-
result = result.view(dtype)
344+
# If we have float dtype, taking a view will give the wrong result
345+
result = result.astype(dtype)
346346
elif is_timedelta64_dtype(dtype):
347347
if not isinstance(result, np.ndarray):
348348
if result == fill_value:
@@ -356,6 +356,14 @@ def _wrap_results(result, dtype: Dtype, fill_value=None):
356356
else:
357357
result = result.astype("m8[ns]").view(dtype)
358358

359+
elif isinstance(dtype, PeriodDtype):
360+
if is_float(result) and result.is_integer():
361+
result = int(result)
362+
if is_integer(result):
363+
result = Period._from_ordinal(result, freq=dtype.freq)
364+
else:
365+
raise NotImplementedError(type(result), result)
366+
359367
return result
360368

361369

@@ -542,12 +550,7 @@ def nanmean(values, axis=None, skipna=True, mask=None):
542550
)
543551
dtype_sum = dtype_max
544552
dtype_count = np.float64
545-
if (
546-
is_integer_dtype(dtype)
547-
or is_timedelta64_dtype(dtype)
548-
or is_datetime64_dtype(dtype)
549-
or is_datetime64tz_dtype(dtype)
550-
):
553+
if is_integer_dtype(dtype) or needs_i8_conversion(dtype):
551554
dtype_sum = np.float64
552555
elif is_float_dtype(dtype):
553556
dtype_sum = dtype

pandas/core/series.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1985,7 +1985,7 @@ def idxmin(self, axis=0, skipna=True, *args, **kwargs):
19851985
nan
19861986
"""
19871987
skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
1988-
i = nanops.nanargmin(com.values_from_object(self), skipna=skipna)
1988+
i = nanops.nanargmin(self._values, skipna=skipna)
19891989
if i == -1:
19901990
return np.nan
19911991
return self.index[i]
@@ -2056,7 +2056,7 @@ def idxmax(self, axis=0, skipna=True, *args, **kwargs):
20562056
nan
20572057
"""
20582058
skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)
2059-
i = nanops.nanargmax(com.values_from_object(self), skipna=skipna)
2059+
i = nanops.nanargmax(self._values, skipna=skipna)
20602060
if i == -1:
20612061
return np.nan
20622062
return self.index[i]
@@ -2094,7 +2094,7 @@ def round(self, decimals=0, *args, **kwargs) -> "Series":
20942094
dtype: float64
20952095
"""
20962096
nv.validate_round(args, kwargs)
2097-
result = com.values_from_object(self).round(decimals)
2097+
result = self._values.round(decimals)
20982098
result = self._constructor(result, index=self.index).__finalize__(self)
20992099

21002100
return result

pandas/tests/frame/test_analytics.py

-5
Original file line numberDiff line numberDiff line change
@@ -856,11 +856,6 @@ def test_mean_datetimelike(self):
856856
expected = pd.Series({"A": 1.0, "C": df.loc[1, "C"]})
857857
tm.assert_series_equal(result, expected)
858858

859-
@pytest.mark.xfail(
860-
reason="casts to object-dtype and then tries to add timestamps",
861-
raises=TypeError,
862-
strict=True,
863-
)
864859
def test_mean_datetimelike_numeric_only_false(self):
865860
df = pd.DataFrame(
866861
{

0 commit comments

Comments
 (0)