-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
CLN: remove block._coerce_values #27567
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1043a20
dafef8b
f35754c
0a4ed9c
1fb34fd
be2e42a
1ae64ac
512855a
121b140
0b8bdba
10f7817
f46707d
8fc4537
c1e629e
9bb2342
6500ed8
957d866
79128ed
84d0a44
5d73147
6ed302c
544f638
7507a41
df6ac0a
5848937
0b1001f
56f8c07
53585b2
ba043bb
60d428c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,7 @@ | |
|
||
import numpy as np | ||
|
||
from pandas._libs import NaT, lib, tslib, tslibs | ||
from pandas._libs import NaT, Timestamp, lib, tslib, tslibs | ||
import pandas._libs.internals as libinternals | ||
from pandas._libs.tslibs import Timedelta, conversion | ||
from pandas._libs.tslibs.timezones import tz_compare | ||
|
@@ -715,20 +715,6 @@ def _try_cast_result(self, result, dtype=None): | |
# may need to change the dtype here | ||
return maybe_downcast_to_dtype(result, dtype) | ||
|
||
def _coerce_values(self, values): | ||
""" | ||
Coerce values (usually derived from self.values) for an operation. | ||
|
||
Parameters | ||
---------- | ||
values : ndarray or ExtensionArray | ||
|
||
Returns | ||
------- | ||
ndarray or ExtensionArray | ||
""" | ||
return values | ||
|
||
def _try_coerce_args(self, other): | ||
""" provide coercion to our input arguments """ | ||
|
||
|
@@ -817,7 +803,7 @@ def replace( | |
convert=convert, | ||
) | ||
|
||
values = self._coerce_values(self.values) | ||
values = self.values | ||
to_replace = self._try_coerce_args(to_replace) | ||
|
||
mask = missing.mask_missing(values, to_replace) | ||
|
@@ -882,7 +868,6 @@ def setitem(self, indexer, value): | |
if self._can_hold_element(value): | ||
value = self._try_coerce_args(value) | ||
|
||
values = self._coerce_values(values) | ||
# can keep its own dtype | ||
if hasattr(value, "dtype") and is_dtype_equal(values.dtype, value.dtype): | ||
dtype = self.dtype | ||
|
@@ -1229,7 +1214,6 @@ def _interpolate_with_fill( | |
return [self.copy()] | ||
|
||
values = self.values if inplace else self.values.copy() | ||
values = self._coerce_values(values) | ||
fill_value = self._try_coerce_args(fill_value) | ||
values = missing.interpolate_2d( | ||
values, | ||
|
@@ -1444,7 +1428,6 @@ def func(cond, values, other): | |
else: | ||
# see if we can operate on the entire block, or need item-by-item | ||
# or if we are a single block (ndim == 1) | ||
values = self._coerce_values(values) | ||
try: | ||
result = func(cond, values, other) | ||
except TypeError: | ||
|
@@ -1548,14 +1531,13 @@ def quantile(self, qs, interpolation="linear", axis=0): | |
# We need to operate on i8 values for datetimetz | ||
# but `Block.get_values()` returns an ndarray of objects | ||
# right now. We need an API for "values to do numeric-like ops on" | ||
values = self.values.asi8 | ||
values = self.values.view("M8[ns]") | ||
|
||
# TODO: NonConsolidatableMixin shape | ||
# Usual shape inconsistencies for ExtensionBlocks | ||
values = values[None, :] | ||
else: | ||
values = self.get_values() | ||
values = self._coerce_values(values) | ||
|
||
is_empty = values.shape[axis] == 0 | ||
orig_scalar = not is_list_like(qs) | ||
|
@@ -1720,7 +1702,6 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False) | |
# use block's copy logic. | ||
# .values may be an Index which does shallow copy by default | ||
new_values = self.values if inplace else self.copy().values | ||
new_values = self._coerce_values(new_values) | ||
new = self._try_coerce_args(new) | ||
|
||
if isinstance(new, np.ndarray) and len(new) == len(mask): | ||
|
@@ -1919,12 +1900,6 @@ def _try_cast_result(self, result, dtype=None): | |
result could also be an EA Array itself, in which case it | ||
is already a 1-D array | ||
""" | ||
try: | ||
|
||
result = self._holder._from_sequence(result.ravel(), dtype=dtype) | ||
except Exception: | ||
pass | ||
|
||
return result | ||
|
||
def formatting_values(self): | ||
|
@@ -2304,8 +2279,8 @@ def _try_coerce_args(self, other): | |
if is_valid_nat_for_dtype(other, self.dtype): | ||
other = np.datetime64("NaT", "ns") | ||
elif isinstance(other, (datetime, np.datetime64, date)): | ||
other = self._box_func(other) | ||
if getattr(other, "tz") is not None: | ||
other = Timestamp(other) | ||
if other.tz is not None: | ||
raise TypeError("cannot coerce a Timestamp with a tz on a naive Block") | ||
other = other.asm8 | ||
elif hasattr(other, "dtype") and is_datetime64_dtype(other): | ||
|
@@ -2320,18 +2295,11 @@ def _try_coerce_args(self, other): | |
|
||
def _try_coerce_result(self, result): | ||
""" reverse of try_coerce_args """ | ||
if isinstance(result, np.ndarray): | ||
if result.dtype.kind in ["i", "f"]: | ||
result = result.astype("M8[ns]") | ||
|
||
elif isinstance(result, (np.integer, np.float, np.datetime64)): | ||
result = self._box_func(result) | ||
if isinstance(result, np.ndarray) and result.dtype.kind == "i": | ||
# needed for _interpolate_with_ffill | ||
result = result.view("M8[ns]") | ||
return result | ||
|
||
@property | ||
def _box_func(self): | ||
return tslibs.Timestamp | ||
|
||
def to_native_types( | ||
self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs | ||
): | ||
|
@@ -2387,6 +2355,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): | |
is_extension = True | ||
|
||
_can_hold_element = DatetimeBlock._can_hold_element | ||
fill_value = np.datetime64("NaT", "ns") | ||
|
||
@property | ||
def _holder(self): | ||
|
@@ -2442,7 +2411,7 @@ def get_values(self, dtype=None): | |
""" | ||
values = self.values | ||
if is_object_dtype(dtype): | ||
values = values._box_values(values._data) | ||
values = values.astype(object) | ||
|
||
values = np.asarray(values) | ||
|
||
|
@@ -2468,9 +2437,6 @@ def _slice(self, slicer): | |
return self.values[loc] | ||
return self.values[slicer] | ||
|
||
def _coerce_values(self, values): | ||
return _block_shape(values, ndim=self.ndim) | ||
|
||
def _try_coerce_args(self, other): | ||
""" | ||
localize and return i8 for the values | ||
|
@@ -2483,17 +2449,7 @@ def _try_coerce_args(self, other): | |
------- | ||
base-type other | ||
""" | ||
|
||
if isinstance(other, ABCSeries): | ||
other = self._holder(other) | ||
|
||
if isinstance(other, bool): | ||
raise TypeError | ||
elif is_datetime64_dtype(other): | ||
# add the tz back | ||
other = self._holder(other, dtype=self.dtype) | ||
|
||
elif is_valid_nat_for_dtype(other, self.dtype): | ||
if is_valid_nat_for_dtype(other, self.dtype): | ||
other = np.datetime64("NaT", "ns") | ||
elif isinstance(other, self._holder): | ||
if not tz_compare(other.tz, self.values.tz): | ||
|
@@ -2513,22 +2469,23 @@ def _try_coerce_args(self, other): | |
def _try_coerce_result(self, result): | ||
""" reverse of try_coerce_args """ | ||
if isinstance(result, np.ndarray): | ||
if result.dtype.kind in ["i", "f"]: | ||
result = result.astype("M8[ns]") | ||
if result.ndim == 2: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a comment or 2 here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
# kludge for 2D blocks with 1D EAs | ||
result = result[0, :] | ||
if result.dtype == np.float64: | ||
# needed for post-groupby.median | ||
result = self._holder._from_sequence( | ||
result.astype(np.int64), freq=None, dtype=self.values.dtype | ||
) | ||
elif result.dtype == "M8[ns]": | ||
# otherwise we get here via quantile and already have M8[ns] | ||
result = self._holder._simple_new( | ||
result, freq=None, dtype=self.values.dtype | ||
) | ||
|
||
elif isinstance(result, (np.integer, np.float, np.datetime64)): | ||
elif isinstance(result, np.datetime64): | ||
# also for post-quantile | ||
result = self._box_func(result) | ||
|
||
if isinstance(result, np.ndarray): | ||
# allow passing of > 1dim if its trivial | ||
|
||
if result.ndim > 1: | ||
result = result.reshape(np.prod(result.shape)) | ||
# GH#24096 new values invalidates a frequency | ||
result = self._holder._simple_new( | ||
result, freq=None, dtype=self.values.dtype | ||
) | ||
|
||
return result | ||
|
||
@property | ||
|
@@ -2627,10 +2584,6 @@ def __init__(self, values, placement, ndim=None): | |
def _holder(self): | ||
return TimedeltaArray | ||
|
||
@property | ||
def _box_func(self): | ||
return lambda x: Timedelta(x, unit="ns") | ||
|
||
def _can_hold_element(self, element): | ||
tipo = maybe_infer_dtype_type(element) | ||
if tipo is not None: | ||
|
@@ -2688,15 +2641,6 @@ def _try_coerce_args(self, other): | |
|
||
def _try_coerce_result(self, result): | ||
""" reverse of try_coerce_args / try_operate """ | ||
if isinstance(result, np.ndarray): | ||
mask = isna(result) | ||
if result.dtype.kind in ["i", "f"]: | ||
result = result.astype("m8[ns]") | ||
result[mask] = np.timedelta64("NaT", "ns") | ||
|
||
elif isinstance(result, (np.integer, np.float)): | ||
result = self._box_func(result) | ||
|
||
return result | ||
|
||
def should_store(self, value): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,15 +51,15 @@ def test_indexing_with_datetime_tz(self): | |
# indexing | ||
result = df.iloc[1] | ||
expected = Series( | ||
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), np.nan, np.nan], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this not work now (the existing code)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the test is incorrect, and tm.assert_series_equal is getting it wrong.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will open an issue for assert_series_equal |
||
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], | ||
index=list("ABC"), | ||
dtype="object", | ||
name=1, | ||
) | ||
tm.assert_series_equal(result, expected) | ||
result = df.loc[1] | ||
expected = Series( | ||
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), np.nan, np.nan], | ||
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], | ||
index=list("ABC"), | ||
dtype="object", | ||
name=1, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we should probably remove .asi8 on Index as I agree its sligthly confusing (I don't think we need deprecation)