Skip to content

CLN: remove block._coerce_values #27567

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Jul 26, 2019
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
1043a20
stop conflating iNaT with td64-NaT
jbrockmendel Jul 16, 2019
dafef8b
Merge branch 'master' of https://github.com/pandas-dev/pandas into inat
jbrockmendel Jul 16, 2019
f35754c
dont allow iNaT in DatetimeBlock
jbrockmendel Jul 17, 2019
0a4ed9c
fix docstring
jbrockmendel Jul 17, 2019
1fb34fd
coerce less
jbrockmendel Jul 17, 2019
be2e42a
cleanup
jbrockmendel Jul 17, 2019
1ae64ac
remove unnecessary
jbrockmendel Jul 17, 2019
512855a
remove _coerce_values
jbrockmendel Jul 17, 2019
121b140
Merge branch 'master' of https://github.com/pandas-dev/pandas into inat3
jbrockmendel Jul 17, 2019
0b8bdba
comments
jbrockmendel Jul 17, 2019
10f7817
Merge branch 'master' of https://github.com/pandas-dev/pandas into inat3
jbrockmendel Jul 17, 2019
f46707d
Merge branch 'master' of https://github.com/pandas-dev/pandas into inat3
jbrockmendel Jul 19, 2019
8fc4537
Merge branch 'master' of https://github.com/pandas-dev/pandas into inat3
jbrockmendel Jul 20, 2019
c1e629e
Merge branch 'master' of https://github.com/pandas-dev/pandas into inat3
jbrockmendel Jul 23, 2019
9bb2342
Merge branch 'master' of https://github.com/pandas-dev/pandas into inat3
jbrockmendel Jul 24, 2019
6500ed8
Cleanup
jbrockmendel Jul 24, 2019
957d866
Merge branch 'master' of https://github.com/pandas-dev/pandas into inat3
jbrockmendel Jul 24, 2019
79128ed
use extract_array
jbrockmendel Jul 24, 2019
84d0a44
Merge branch 'master' of https://github.com/pandas-dev/pandas into inat3
jbrockmendel Jul 24, 2019
5d73147
blackify
jbrockmendel Jul 24, 2019
6ed302c
isort
jbrockmendel Jul 24, 2019
544f638
Merge branch 'master' of https://github.com/pandas-dev/pandas into inat3
jbrockmendel Jul 24, 2019
7507a41
Merge branch 'master' of https://github.com/pandas-dev/pandas into inat3
jbrockmendel Jul 25, 2019
df6ac0a
update import
jbrockmendel Jul 25, 2019
5848937
Fix incorrect test
jbrockmendel Jul 25, 2019
0b1001f
Merge branch 'master' of https://github.com/pandas-dev/pandas into inat3
jbrockmendel Jul 25, 2019
56f8c07
Merge branch 'master' of https://github.com/pandas-dev/pandas into inat3
jbrockmendel Jul 26, 2019
53585b2
comment
jbrockmendel Jul 26, 2019
ba043bb
post merge fix
jbrockmendel Jul 26, 2019
60d428c
Merge branch 'master' of https://github.com/pandas-dev/pandas into inat3
jbrockmendel Jul 26, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,10 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
new_blocks = []
new_items = []
deleted_items = []
no_result = object()
for block in data.blocks:

# Avoid inheriting result from earlier in the loop
result = no_result
locs = block.mgr_locs.as_array
try:
result, _ = self.grouper.aggregate(
Expand All @@ -174,15 +176,15 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
except TypeError:
# we may have an exception in trying to aggregate
# continue and exclude the block
pass

deleted_items.append(locs)
continue
finally:
if result is not no_result:
dtype = block.values.dtype

dtype = block.values.dtype

# see if we can cast the block back to the original dtype
result = block._try_coerce_and_cast_result(result, dtype=dtype)
newb = block.make_block(result)
# see if we can cast the block back to the original dtype
result = block._try_coerce_and_cast_result(result, dtype=dtype)
newb = block.make_block(result)

new_items.append(locs)
new_blocks.append(newb)
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class providing the base-class of operations.
SpecificationError,
)
import pandas.core.common as com
from pandas.core.construction import extract_array
from pandas.core.frame import DataFrame
from pandas.core.generic import NDFrame
from pandas.core.groupby import base
Expand Down Expand Up @@ -803,10 +804,9 @@ def _try_cast(self, result, obj, numeric_only=False):
# Prior results _may_ have been generated in UTC.
# Ensure we localize to UTC first before converting
# to the target timezone
arr = extract_array(obj)
try:
result = obj._values._from_sequence(
result, dtype="datetime64[ns, UTC]"
)
result = arr._from_sequence(result, dtype="datetime64[ns, UTC]")
result = result.astype(dtype)
except TypeError:
# _try_cast was called at a point where the result
Expand Down
107 changes: 25 additions & 82 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np

from pandas._libs import NaT, lib, tslib, tslibs
from pandas._libs import NaT, Timestamp, lib, tslib, tslibs
import pandas._libs.internals as libinternals
from pandas._libs.tslibs import Timedelta, conversion
from pandas._libs.tslibs.timezones import tz_compare
Expand Down Expand Up @@ -713,20 +713,6 @@ def _try_cast_result(self, result, dtype=None):
# may need to change the dtype here
return maybe_downcast_to_dtype(result, dtype)

def _coerce_values(self, values):
"""
Coerce values (usually derived from self.values) for an operation.

Parameters
----------
values : ndarray or ExtensionArray

Returns
-------
ndarray or ExtensionArray
"""
return values

def _try_coerce_args(self, other):
""" provide coercion to our input arguments """

Expand Down Expand Up @@ -815,7 +801,7 @@ def replace(
convert=convert,
)

values = self._coerce_values(self.values)
values = self.values
to_replace = self._try_coerce_args(to_replace)

mask = missing.mask_missing(values, to_replace)
Expand Down Expand Up @@ -903,7 +889,6 @@ def setitem(self, indexer, value):
b = self.astype(dtype)
return b.setitem(indexer, value)
else:
values = self._coerce_values(values)
# can keep its own dtype
if hasattr(value, "dtype") and is_dtype_equal(values.dtype, value.dtype):
dtype = self.dtype
Expand Down Expand Up @@ -1228,7 +1213,6 @@ def _interpolate_with_fill(
return [self.copy()]

values = self.values if inplace else self.values.copy()
values = self._coerce_values(values)
fill_value = self._try_coerce_args(fill_value)
values = missing.interpolate_2d(
values,
Expand Down Expand Up @@ -1443,7 +1427,6 @@ def func(cond, values, other):
else:
# see if we can operate on the entire block, or need item-by-item
# or if we are a single block (ndim == 1)
values = self._coerce_values(values)
try:
result = func(cond, values, other)
except TypeError:
Expand Down Expand Up @@ -1547,14 +1530,13 @@ def quantile(self, qs, interpolation="linear", axis=0):
# We need to operate on i8 values for datetimetz
# but `Block.get_values()` returns an ndarray of objects
# right now. We need an API for "values to do numeric-like ops on"
values = self.values.asi8
values = self.values.view("M8[ns]")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should probably remove .asi8 on Index as I agree its sligthly confusing (I don't think we need deprecation)


# TODO: NonConsolidatableMixin shape
# Usual shape inconsistencies for ExtensionBlocks
values = values[None, :]
else:
values = self.get_values()
values = self._coerce_values(values)

is_empty = values.shape[axis] == 0
orig_scalar = not is_list_like(qs)
Expand Down Expand Up @@ -1719,7 +1701,6 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False)
# use block's copy logic.
# .values may be an Index which does shallow copy by default
new_values = self.values if inplace else self.copy().values
new_values = self._coerce_values(new_values)
new = self._try_coerce_args(new)

if isinstance(new, np.ndarray) and len(new) == len(mask):
Expand Down Expand Up @@ -1918,12 +1899,6 @@ def _try_cast_result(self, result, dtype=None):
result could also be an EA Array itself, in which case it
is already a 1-D array
"""
try:

result = self._holder._from_sequence(result.ravel(), dtype=dtype)
except Exception:
pass

return result

def formatting_values(self):
Expand Down Expand Up @@ -2303,8 +2278,8 @@ def _try_coerce_args(self, other):
if is_valid_nat_for_dtype(other, self.dtype):
other = np.datetime64("NaT", "ns")
elif isinstance(other, (datetime, np.datetime64, date)):
other = self._box_func(other)
if getattr(other, "tz") is not None:
other = Timestamp(other)
if other.tz is not None:
raise TypeError("cannot coerce a Timestamp with a tz on a naive Block")
other = other.asm8
elif hasattr(other, "dtype") and is_datetime64_dtype(other):
Expand All @@ -2319,18 +2294,11 @@ def _try_coerce_args(self, other):

def _try_coerce_result(self, result):
""" reverse of try_coerce_args """
if isinstance(result, np.ndarray):
if result.dtype.kind in ["i", "f"]:
result = result.astype("M8[ns]")

elif isinstance(result, (np.integer, np.float, np.datetime64)):
result = self._box_func(result)
if isinstance(result, np.ndarray) and result.dtype.kind == "i":
# needed for _interpolate_with_ffill
result = result.view("M8[ns]")
return result

@property
def _box_func(self):
return tslibs.Timestamp

def to_native_types(
self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs
):
Expand Down Expand Up @@ -2386,6 +2354,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
is_extension = True

_can_hold_element = DatetimeBlock._can_hold_element
fill_value = np.datetime64("NaT", "ns")

@property
def _holder(self):
Expand Down Expand Up @@ -2441,7 +2410,7 @@ def get_values(self, dtype=None):
"""
values = self.values
if is_object_dtype(dtype):
values = values._box_values(values._data)
values = values.astype(object)

values = np.asarray(values)

Expand All @@ -2467,9 +2436,6 @@ def _slice(self, slicer):
return self.values[loc]
return self.values[slicer]

def _coerce_values(self, values):
return _block_shape(values, ndim=self.ndim)

def _try_coerce_args(self, other):
"""
localize and return i8 for the values
Expand All @@ -2482,17 +2448,7 @@ def _try_coerce_args(self, other):
-------
base-type other
"""

if isinstance(other, ABCSeries):
other = self._holder(other)

if isinstance(other, bool):
raise TypeError
elif is_datetime64_dtype(other):
# add the tz back
other = self._holder(other, dtype=self.dtype)

elif is_valid_nat_for_dtype(other, self.dtype):
if is_valid_nat_for_dtype(other, self.dtype):
other = np.datetime64("NaT", "ns")
elif isinstance(other, self._holder):
if not tz_compare(other.tz, self.values.tz):
Expand All @@ -2512,22 +2468,22 @@ def _try_coerce_args(self, other):
def _try_coerce_result(self, result):
""" reverse of try_coerce_args """
if isinstance(result, np.ndarray):
if result.dtype.kind in ["i", "f"]:
result = result.astype("M8[ns]")
if result.ndim == 2:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment or 2 here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

result = result[0, :]
if result.dtype == np.float64:
# needed for post-groupby.median
result = self._holder._from_sequence(
result.astype(np.int64), freq=None, dtype=self.values.dtype
)
elif result.dtype == "M8[ns]":
# otherwise we get here via quantile and already have M8[ns]
result = self._holder._simple_new(
result, freq=None, dtype=self.values.dtype
)

elif isinstance(result, (np.integer, np.float, np.datetime64)):
elif isinstance(result, np.datetime64):
# also for post-quantile
result = self._box_func(result)

if isinstance(result, np.ndarray):
# allow passing of > 1dim if its trivial

if result.ndim > 1:
result = result.reshape(np.prod(result.shape))
# GH#24096 new values invalidates a frequency
result = self._holder._simple_new(
result, freq=None, dtype=self.values.dtype
)

return result

@property
Expand Down Expand Up @@ -2626,10 +2582,6 @@ def __init__(self, values, placement, ndim=None):
def _holder(self):
return TimedeltaArray

@property
def _box_func(self):
return lambda x: Timedelta(x, unit="ns")

def _can_hold_element(self, element):
tipo = maybe_infer_dtype_type(element)
if tipo is not None:
Expand Down Expand Up @@ -2687,15 +2639,6 @@ def _try_coerce_args(self, other):

def _try_coerce_result(self, result):
""" reverse of try_coerce_args / try_operate """
if isinstance(result, np.ndarray):
mask = isna(result)
if result.dtype.kind in ["i", "f"]:
result = result.astype("m8[ns]")
result[mask] = np.timedelta64("NaT", "ns")

elif isinstance(result, (np.integer, np.float)):
result = self._box_func(result)

return result

def should_store(self, value):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexing/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ def test_indexing_with_datetime_tz(self):
# indexing
result = df.iloc[1]
expected = Series(
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), np.nan, np.nan],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this not work now (the existing code)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the test is incorrect, and tm.assert_series_equal is getting it wrong.

now = pd.Timestamp.now("US/Eastern")
left = pd.Series([now, pd.NaT, pd.NaT], dtype=object)
right = pd.Series([now, np.nan, np.nan], dtype=object)

tm.assert_series_equal(left, right)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will open an issue for assert_series_equal

[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT],
index=list("ABC"),
dtype="object",
name=1,
)
tm.assert_series_equal(result, expected)
result = df.loc[1]
expected = Series(
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), np.nan, np.nan],
[Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT],
index=list("ABC"),
dtype="object",
name=1,
Expand Down