From 47dbc1297dffc92132ebc9e84e88c150209ef097 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Jun 2019 18:13:03 -0700 Subject: [PATCH 1/5] separate coerce_values from coerce_args --- pandas/core/internals/blocks.py | 85 +++++++++++++++--------- pandas/tests/internals/test_internals.py | 4 +- 2 files changed, 56 insertions(+), 33 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 92ea936944a3c..cb49ef3266441 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -363,7 +363,9 @@ def fillna(self, value, limit=None, inplace=False, downcast=None): # fillna, but if we cannot coerce, then try again as an ObjectBlock try: - values, _ = self._try_coerce_args(self.values, value) + # Note: we only call try_coerce_args to let it raise + self._try_coerce_args(value) + blocks = self.putmask(mask, value, inplace=inplace) blocks = [b.make_block(values=self._try_coerce_result(b.values)) for b in blocks] @@ -670,7 +672,13 @@ def _try_cast_result(self, result, dtype=None): # may need to change the dtype here return maybe_downcast_to_dtype(result, dtype) - def _try_coerce_args(self, values, other): + def _coerce_values(self, values): + """ + Coerce values (usually derived from self.values) for an operation. + """ + return values + + def _try_coerce_args(self, other): """ provide coercion to our input arguments """ if np.any(notna(other)) and not self._can_hold_element(other): @@ -680,7 +688,7 @@ def _try_coerce_args(self, values, other): type(other).__name__, type(self).__name__.lower().replace('Block', ''))) - return values, other + return other def _try_coerce_result(self, result): """ reverse of try_coerce_args """ @@ -730,8 +738,8 @@ def replace(self, to_replace, value, inplace=False, filter=None, # try to replace, if we raise an error, convert to ObjectBlock and # retry try: - values, to_replace = self._try_coerce_args(self.values, - to_replace) + to_replace = self._try_coerce_args(to_replace) + values = self._coerce_values(self.values) mask = missing.mask_missing(values, to_replace) if filter is not None: filtered_out = ~self.mgr_locs.isin(filter) @@ -788,7 +796,8 @@ def setitem(self, indexer, value): # coerce if block dtype can store value values = self.values try: - values, value = self._try_coerce_args(values, value) + value = self._try_coerce_args(value) + values = self._coerce_values(values) # can keep its own dtype if hasattr(value, 'dtype') and is_dtype_equal(values.dtype, value.dtype): @@ -920,7 +929,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, new = self.fill_value if self._can_hold_element(new): - _, new = self._try_coerce_args(new_values, new) + new = self._try_coerce_args(new) if transpose: new_values = new_values.T @@ -1122,7 +1131,8 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, return [self.copy()] values = self.values if inplace else self.values.copy() - values, fill_value = self._try_coerce_args(values, fill_value) + values = self._coerce_values(values) + fill_value = self._try_coerce_args(fill_value) values = missing.interpolate_2d(values, method=method, axis=axis, limit=limit, fill_value=fill_value, dtype=self.dtype) @@ -1293,7 +1303,8 @@ def func(cond, values, other): if cond.ravel().all(): return values - values, other = self._try_coerce_args(values, other) + values = self._coerce_values(values) + other = self._try_coerce_args(other) try: return self._try_coerce_result(expressions.where( @@ -1418,7 +1429,7 @@ def quantile(self, qs, interpolation='linear', axis=0): values = values[None, :] else: values = self.get_values() - values, _ = self._try_coerce_args(values, values) + values = self._coerce_values(values) is_empty = values.shape[axis] == 0 orig_scalar = not is_list_like(qs) @@ -1570,7 +1581,8 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, # use block's copy logic. # .values may be an Index which does shallow copy by default new_values = self.values if inplace else self.copy().values - new_values, new = self._try_coerce_args(new_values, new) + new_values = self._coerce_values(new_values) + new = self._try_coerce_args(new) if isinstance(new, np.ndarray) and len(new) == len(mask): new = new[mask] @@ -2090,25 +2102,28 @@ def _can_hold_element(self, element): return (is_integer(element) or isinstance(element, datetime) or isna(element)) - def _try_coerce_args(self, values, other): + def _coerce_values(self, values): """ - Coerce values and other to dtype 'i8'. NaN and NaT convert to + Coerce values (usually derived from self.values) for an operation. + """ + return values.view('i8') + + def _try_coerce_args(self, other): + """ + Coerce other to dtype 'i8'. NaN and NaT convert to the smallest i8, and will correctly round-trip to NaT if converted back in _try_coerce_result. values is always ndarray-like, other may not be Parameters ---------- - values : ndarray-like other : ndarray-like or scalar Returns ------- - base-type values, base-type other + base-type other """ - values = values.view('i8') - if isinstance(other, bool): raise TypeError elif is_null_datetimelike(other): @@ -2126,7 +2141,7 @@ def _try_coerce_args(self, values, other): # let higher levels handle raise TypeError(other) - return values, other + return other def _try_coerce_result(self, result): """ reverse of try_coerce_args """ @@ -2275,21 +2290,25 @@ def _slice(self, slicer): return self.values[loc] return self.values[slicer] - def _try_coerce_args(self, values, other): + def _coerce_values(self, values): + """ + Coerce values (usually derived from self.values) for an operation. + """ + # asi8 is a view, needs copy + return _block_shape(values.view("i8"), ndim=self.ndim) + + def _try_coerce_args(self, other): """ localize and return i8 for the values Parameters ---------- - values : ndarray-like other : ndarray-like or scalar Returns ------- - base-type values, base-type other + base-type other """ - # asi8 is a view, needs copy - values = _block_shape(values.view("i8"), ndim=self.ndim) if isinstance(other, ABCSeries): other = self._holder(other) @@ -2317,7 +2336,7 @@ def _try_coerce_args(self, values, other): else: raise TypeError(other) - return values, other + return other def _try_coerce_result(self, result): """ reverse of try_coerce_args """ @@ -2458,21 +2477,25 @@ def fillna(self, value, **kwargs): value = Timedelta(value, unit='s') return super().fillna(value, **kwargs) - def _try_coerce_args(self, values, other): + def _coerce_values(self, values): + """ + Coerce values (usually derived from self.values) for an operation. + """ + return values.view('i8') + + def _try_coerce_args(self, other): """ Coerce values and other to int64, with null values converted to iNaT. values is always ndarray-like, other may not be Parameters ---------- - values : ndarray-like other : ndarray-like or scalar Returns ------- - base-type values, base-type other + base-type other """ - values = values.view('i8') if isinstance(other, bool): raise TypeError @@ -2487,7 +2510,7 @@ def _try_coerce_args(self, values, other): # let higher levels handle raise TypeError(other) - return values, other + return other def _try_coerce_result(self, result): """ reverse of try_coerce_args / try_operate """ @@ -2658,7 +2681,7 @@ def _maybe_downcast(self, blocks, downcast=None): def _can_hold_element(self, element): return True - def _try_coerce_args(self, values, other): + def _try_coerce_args(self, other): """ provide coercion to our input arguments """ if isinstance(other, ABCDatetimeIndex): @@ -2671,7 +2694,7 @@ def _try_coerce_args(self, values, other): # when falling back to ObjectBlock.where other = other.astype(object) - return values, other + return other def should_store(self, value): return not (issubclass(value.dtype.type, diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index fbd821f8ec342..7cbb6d0a0557d 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -299,14 +299,14 @@ def test_try_coerce_arg(self): block = create_block('datetime', [0]) # coerce None - none_coerced = block._try_coerce_args(block.values, None)[1] + none_coerced = block._try_coerce_args(None) assert pd.Timestamp(none_coerced) is pd.NaT # coerce different types of date bojects vals = (np.datetime64('2010-10-10'), datetime(2010, 10, 10), date(2010, 10, 10)) for val in vals: - coerced = block._try_coerce_args(block.values, val)[1] + coerced = block._try_coerce_args(val) assert np.int64 == type(coerced) assert pd.Timestamp('2010-10-10') == pd.Timestamp(coerced) From 13341264e7ad9af3e92204fac5b31ce820b8f151 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 30 Jun 2019 12:05:23 -0500 Subject: [PATCH 2/5] remove redundant method, move non-raising outside try --- pandas/core/internals/blocks.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index cb05ac4fc7d13..9faa2f6893e91 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -727,9 +727,9 @@ def replace(self, to_replace, value, inplace=False, filter=None, # try to replace, if we raise an error, convert to ObjectBlock and # retry + values = self._coerce_values(self.values) try: to_replace = self._try_coerce_args(to_replace) - values = self._coerce_values(self.values) except (TypeError, ValueError): # GH 22083, TypeError or ValueError occurred within error handling # causes infinite loop. Cast and retry only if not objectblock. @@ -2261,13 +2261,6 @@ def is_view(self): # check the ndarray values of the DatetimeIndex values return self.values._data.base is not None - def copy(self, deep=True): - """ copy constructor """ - values = self.values - if deep: - values = values.copy() - return self.make_block_same_class(values) - def get_values(self, dtype=None): """ Returns an ndarray of values. From 2ea33f98fcc8993f1110c27009ab770c8359c0c7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 30 Jun 2019 14:11:37 -0500 Subject: [PATCH 3/5] remove ndim kwarg from make_block --- pandas/core/internals/blocks.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9faa2f6893e91..cb6f0ae882809 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -202,17 +202,15 @@ def array_dtype(self): """ return self.dtype - def make_block(self, values, placement=None, ndim=None): + def make_block(self, values, placement=None): """ Create a new block, with type inference propagate any values that are not specified """ if placement is None: placement = self.mgr_locs - if ndim is None: - ndim = self.ndim - return make_block(values, placement=placement, ndim=ndim) + return make_block(values, placement=placement, ndim=self.ndim) def make_block_same_class(self, values, placement=None, ndim=None, dtype=None): From 74944dd9aae91996d0a92b262a45186d179f0bd6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 30 Jun 2019 15:14:37 -0500 Subject: [PATCH 4/5] small cleanup --- pandas/core/internals/blocks.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f3b41dfe94b57..5c973e4d7a8ec 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1304,8 +1304,8 @@ def func(cond, values, other): other = self._try_coerce_args(other) try: - return self._try_coerce_result(expressions.where( - cond, values, other)) + fastres = expressions.where(cond, values, other) + return self._try_coerce_result(fastres) except Exception as detail: if errors == 'raise': raise TypeError( @@ -1352,10 +1352,10 @@ def func(cond, values, other): result_blocks = [] for m in [mask, ~mask]: if m.any(): - r = self._try_cast_result(result.take(m.nonzero()[0], - axis=axis)) - result_blocks.append( - self.make_block(r.T, placement=self.mgr_locs[m])) + taken = result.take(m.nonzero()[0], axis=axis) + r = self._try_cast_result(taken) + nb = self.make_block(r.T, placement=self.mgr_locs[m]) + result_blocks.append(nb) return result_blocks From ccd438197c5d46e35417bb08da5fd26d5bfc2916 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 1 Jul 2019 16:59:20 -0700 Subject: [PATCH 5/5] flesh out and de-dup docstrings --- pandas/core/internals/blocks.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d6df378e4f6c1..6cfeb62ef736b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -662,6 +662,14 @@ def _try_cast_result(self, result, dtype=None): def _coerce_values(self, values): """ Coerce values (usually derived from self.values) for an operation. + + Parameters + ---------- + values : ndarray or ExtensionArray + + Returns + ------- + ndarray or ExtensionArray """ return values @@ -2131,9 +2139,6 @@ def _can_hold_element(self, element): isna(element)) def _coerce_values(self, values): - """ - Coerce values (usually derived from self.values) for an operation. - """ return values.view('i8') def _try_coerce_args(self, other): @@ -2312,9 +2317,6 @@ def _slice(self, slicer): return self.values[slicer] def _coerce_values(self, values): - """ - Coerce values (usually derived from self.values) for an operation. - """ # asi8 is a view, needs copy return _block_shape(values.view("i8"), ndim=self.ndim) @@ -2499,9 +2501,6 @@ def fillna(self, value, **kwargs): return super().fillna(value, **kwargs) def _coerce_values(self, values): - """ - Coerce values (usually derived from self.values) for an operation. - """ return values.view('i8') def _try_coerce_args(self, other):