Skip to content

Commit c464695

Browse files
jbrockmendelquintusdias
authored andcommitted
CLN: remove block._coerce_values (pandas-dev#27567)
1 parent af45301 commit c464695

File tree

4 files changed

+41
-95
lines changed

4 files changed

+41
-95
lines changed

pandas/core/groupby/generic.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,10 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
148148
new_blocks = []
149149
new_items = []
150150
deleted_items = []
151+
no_result = object()
151152
for block in data.blocks:
152-
153+
# Avoid inheriting result from earlier in the loop
154+
result = no_result
153155
locs = block.mgr_locs.as_array
154156
try:
155157
result, _ = self.grouper.aggregate(
@@ -174,15 +176,15 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
174176
except TypeError:
175177
# we may have an exception in trying to aggregate
176178
# continue and exclude the block
177-
pass
178-
179+
deleted_items.append(locs)
180+
continue
179181
finally:
182+
if result is not no_result:
183+
dtype = block.values.dtype
180184

181-
dtype = block.values.dtype
182-
183-
# see if we can cast the block back to the original dtype
184-
result = block._try_coerce_and_cast_result(result, dtype=dtype)
185-
newb = block.make_block(result)
185+
# see if we can cast the block back to the original dtype
186+
result = block._try_coerce_and_cast_result(result, dtype=dtype)
187+
newb = block.make_block(result)
186188

187189
new_items.append(locs)
188190
new_blocks.append(newb)

pandas/core/groupby/groupby.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class providing the base-class of operations.
4747
SpecificationError,
4848
)
4949
import pandas.core.common as com
50+
from pandas.core.construction import extract_array
5051
from pandas.core.frame import DataFrame
5152
from pandas.core.generic import NDFrame
5253
from pandas.core.groupby import base
@@ -803,10 +804,9 @@ def _try_cast(self, result, obj, numeric_only=False):
803804
# Prior results _may_ have been generated in UTC.
804805
# Ensure we localize to UTC first before converting
805806
# to the target timezone
807+
arr = extract_array(obj)
806808
try:
807-
result = obj._values._from_sequence(
808-
result, dtype="datetime64[ns, UTC]"
809-
)
809+
result = arr._from_sequence(result, dtype="datetime64[ns, UTC]")
810810
result = result.astype(dtype)
811811
except TypeError:
812812
# _try_cast was called at a point where the result

pandas/core/internals/blocks.py

+26-82
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import numpy as np
99

10-
from pandas._libs import NaT, lib, tslib, tslibs
10+
from pandas._libs import NaT, Timestamp, lib, tslib, tslibs
1111
import pandas._libs.internals as libinternals
1212
from pandas._libs.tslibs import Timedelta, conversion
1313
from pandas._libs.tslibs.timezones import tz_compare
@@ -715,20 +715,6 @@ def _try_cast_result(self, result, dtype=None):
715715
# may need to change the dtype here
716716
return maybe_downcast_to_dtype(result, dtype)
717717

718-
def _coerce_values(self, values):
719-
"""
720-
Coerce values (usually derived from self.values) for an operation.
721-
722-
Parameters
723-
----------
724-
values : ndarray or ExtensionArray
725-
726-
Returns
727-
-------
728-
ndarray or ExtensionArray
729-
"""
730-
return values
731-
732718
def _try_coerce_args(self, other):
733719
""" provide coercion to our input arguments """
734720

@@ -817,7 +803,7 @@ def replace(
817803
convert=convert,
818804
)
819805

820-
values = self._coerce_values(self.values)
806+
values = self.values
821807
to_replace = self._try_coerce_args(to_replace)
822808

823809
mask = missing.mask_missing(values, to_replace)
@@ -882,7 +868,6 @@ def setitem(self, indexer, value):
882868
if self._can_hold_element(value):
883869
value = self._try_coerce_args(value)
884870

885-
values = self._coerce_values(values)
886871
# can keep its own dtype
887872
if hasattr(value, "dtype") and is_dtype_equal(values.dtype, value.dtype):
888873
dtype = self.dtype
@@ -1229,7 +1214,6 @@ def _interpolate_with_fill(
12291214
return [self.copy()]
12301215

12311216
values = self.values if inplace else self.values.copy()
1232-
values = self._coerce_values(values)
12331217
fill_value = self._try_coerce_args(fill_value)
12341218
values = missing.interpolate_2d(
12351219
values,
@@ -1444,7 +1428,6 @@ def func(cond, values, other):
14441428
else:
14451429
# see if we can operate on the entire block, or need item-by-item
14461430
# or if we are a single block (ndim == 1)
1447-
values = self._coerce_values(values)
14481431
try:
14491432
result = func(cond, values, other)
14501433
except TypeError:
@@ -1548,14 +1531,13 @@ def quantile(self, qs, interpolation="linear", axis=0):
15481531
# We need to operate on i8 values for datetimetz
15491532
# but `Block.get_values()` returns an ndarray of objects
15501533
# right now. We need an API for "values to do numeric-like ops on"
1551-
values = self.values.asi8
1534+
values = self.values.view("M8[ns]")
15521535

15531536
# TODO: NonConsolidatableMixin shape
15541537
# Usual shape inconsistencies for ExtensionBlocks
15551538
values = values[None, :]
15561539
else:
15571540
values = self.get_values()
1558-
values = self._coerce_values(values)
15591541

15601542
is_empty = values.shape[axis] == 0
15611543
orig_scalar = not is_list_like(qs)
@@ -1720,7 +1702,6 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False)
17201702
# use block's copy logic.
17211703
# .values may be an Index which does shallow copy by default
17221704
new_values = self.values if inplace else self.copy().values
1723-
new_values = self._coerce_values(new_values)
17241705
new = self._try_coerce_args(new)
17251706

17261707
if isinstance(new, np.ndarray) and len(new) == len(mask):
@@ -1919,12 +1900,6 @@ def _try_cast_result(self, result, dtype=None):
19191900
result could also be an EA Array itself, in which case it
19201901
is already a 1-D array
19211902
"""
1922-
try:
1923-
1924-
result = self._holder._from_sequence(result.ravel(), dtype=dtype)
1925-
except Exception:
1926-
pass
1927-
19281903
return result
19291904

19301905
def formatting_values(self):
@@ -2304,8 +2279,8 @@ def _try_coerce_args(self, other):
23042279
if is_valid_nat_for_dtype(other, self.dtype):
23052280
other = np.datetime64("NaT", "ns")
23062281
elif isinstance(other, (datetime, np.datetime64, date)):
2307-
other = self._box_func(other)
2308-
if getattr(other, "tz") is not None:
2282+
other = Timestamp(other)
2283+
if other.tz is not None:
23092284
raise TypeError("cannot coerce a Timestamp with a tz on a naive Block")
23102285
other = other.asm8
23112286
elif hasattr(other, "dtype") and is_datetime64_dtype(other):
@@ -2320,18 +2295,11 @@ def _try_coerce_args(self, other):
23202295

23212296
def _try_coerce_result(self, result):
23222297
""" reverse of try_coerce_args """
2323-
if isinstance(result, np.ndarray):
2324-
if result.dtype.kind in ["i", "f"]:
2325-
result = result.astype("M8[ns]")
2326-
2327-
elif isinstance(result, (np.integer, np.float, np.datetime64)):
2328-
result = self._box_func(result)
2298+
if isinstance(result, np.ndarray) and result.dtype.kind == "i":
2299+
# needed for _interpolate_with_ffill
2300+
result = result.view("M8[ns]")
23292301
return result
23302302

2331-
@property
2332-
def _box_func(self):
2333-
return tslibs.Timestamp
2334-
23352303
def to_native_types(
23362304
self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs
23372305
):
@@ -2387,6 +2355,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
23872355
is_extension = True
23882356

23892357
_can_hold_element = DatetimeBlock._can_hold_element
2358+
fill_value = np.datetime64("NaT", "ns")
23902359

23912360
@property
23922361
def _holder(self):
@@ -2442,7 +2411,7 @@ def get_values(self, dtype=None):
24422411
"""
24432412
values = self.values
24442413
if is_object_dtype(dtype):
2445-
values = values._box_values(values._data)
2414+
values = values.astype(object)
24462415

24472416
values = np.asarray(values)
24482417

@@ -2468,9 +2437,6 @@ def _slice(self, slicer):
24682437
return self.values[loc]
24692438
return self.values[slicer]
24702439

2471-
def _coerce_values(self, values):
2472-
return _block_shape(values, ndim=self.ndim)
2473-
24742440
def _try_coerce_args(self, other):
24752441
"""
24762442
localize and return i8 for the values
@@ -2483,17 +2449,7 @@ def _try_coerce_args(self, other):
24832449
-------
24842450
base-type other
24852451
"""
2486-
2487-
if isinstance(other, ABCSeries):
2488-
other = self._holder(other)
2489-
2490-
if isinstance(other, bool):
2491-
raise TypeError
2492-
elif is_datetime64_dtype(other):
2493-
# add the tz back
2494-
other = self._holder(other, dtype=self.dtype)
2495-
2496-
elif is_valid_nat_for_dtype(other, self.dtype):
2452+
if is_valid_nat_for_dtype(other, self.dtype):
24972453
other = np.datetime64("NaT", "ns")
24982454
elif isinstance(other, self._holder):
24992455
if not tz_compare(other.tz, self.values.tz):
@@ -2513,22 +2469,23 @@ def _try_coerce_args(self, other):
25132469
def _try_coerce_result(self, result):
25142470
""" reverse of try_coerce_args """
25152471
if isinstance(result, np.ndarray):
2516-
if result.dtype.kind in ["i", "f"]:
2517-
result = result.astype("M8[ns]")
2472+
if result.ndim == 2:
2473+
# kludge for 2D blocks with 1D EAs
2474+
result = result[0, :]
2475+
if result.dtype == np.float64:
2476+
# needed for post-groupby.median
2477+
result = self._holder._from_sequence(
2478+
result.astype(np.int64), freq=None, dtype=self.values.dtype
2479+
)
2480+
elif result.dtype == "M8[ns]":
2481+
# otherwise we get here via quantile and already have M8[ns]
2482+
result = self._holder._simple_new(
2483+
result, freq=None, dtype=self.values.dtype
2484+
)
25182485

2519-
elif isinstance(result, (np.integer, np.float, np.datetime64)):
2486+
elif isinstance(result, np.datetime64):
2487+
# also for post-quantile
25202488
result = self._box_func(result)
2521-
2522-
if isinstance(result, np.ndarray):
2523-
# allow passing of > 1dim if its trivial
2524-
2525-
if result.ndim > 1:
2526-
result = result.reshape(np.prod(result.shape))
2527-
# GH#24096 new values invalidates a frequency
2528-
result = self._holder._simple_new(
2529-
result, freq=None, dtype=self.values.dtype
2530-
)
2531-
25322489
return result
25332490

25342491
@property
@@ -2627,10 +2584,6 @@ def __init__(self, values, placement, ndim=None):
26272584
def _holder(self):
26282585
return TimedeltaArray
26292586

2630-
@property
2631-
def _box_func(self):
2632-
return lambda x: Timedelta(x, unit="ns")
2633-
26342587
def _can_hold_element(self, element):
26352588
tipo = maybe_infer_dtype_type(element)
26362589
if tipo is not None:
@@ -2688,15 +2641,6 @@ def _try_coerce_args(self, other):
26882641

26892642
def _try_coerce_result(self, result):
26902643
""" reverse of try_coerce_args / try_operate """
2691-
if isinstance(result, np.ndarray):
2692-
mask = isna(result)
2693-
if result.dtype.kind in ["i", "f"]:
2694-
result = result.astype("m8[ns]")
2695-
result[mask] = np.timedelta64("NaT", "ns")
2696-
2697-
elif isinstance(result, (np.integer, np.float)):
2698-
result = self._box_func(result)
2699-
27002644
return result
27012645

27022646
def should_store(self, value):

pandas/tests/indexing/test_datetime.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,15 @@ def test_indexing_with_datetime_tz(self):
5151
# indexing
5252
result = df.iloc[1]
5353
expected = Series(
54-
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), np.nan, np.nan],
54+
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT],
5555
index=list("ABC"),
5656
dtype="object",
5757
name=1,
5858
)
5959
tm.assert_series_equal(result, expected)
6060
result = df.loc[1]
6161
expected = Series(
62-
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), np.nan, np.nan],
62+
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT],
6363
index=list("ABC"),
6464
dtype="object",
6565
name=1,

0 commit comments

Comments
 (0)