From e18c342ad2f33ada991d4eddf2f78505db95431f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 14:43:53 -0700 Subject: [PATCH 1/5] replace try/except in Block.replace --- pandas/core/internals/blocks.py | 35 ++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f931df25c4fd5..7bb62b21a4915 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -10,6 +10,7 @@ from pandas._libs import NaT, lib, tslib, tslibs import pandas._libs.internals as libinternals from pandas._libs.tslibs import Timedelta, conversion, is_null_datetimelike +from pandas._libs.tslibs.timezones import tz_compare from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -727,6 +728,13 @@ def _try_coerce_args(self, other): type(self).__name__.lower().replace("Block", ""), ) ) + if lib.is_scalar(other) and isna(other) and self.is_integer: + raise TypeError( + "cannot convert {} to an {}".format( + type(other).__name__, + type(self).__name__.lower().replace("Block", ""), + ) + ) return other @@ -775,16 +783,13 @@ def replace( inplace = validate_bool_kwarg(inplace, "inplace") original_to_replace = to_replace - # try to replace, if we raise an error, convert to ObjectBlock and + # If we cannot replace with own dtype, convert to ObjectBlock and # retry - values = self._coerce_values(self.values) - try: - to_replace = self._try_coerce_args(to_replace) - except (TypeError, ValueError): + if not self._can_hold_element(to_replace): # GH 22083, TypeError or ValueError occurred within error handling # causes infinite loop. Cast and retry only if not objectblock. if is_object_dtype(self): - raise + raise AssertionError # try again with a compatible block block = self.astype(object) @@ -797,6 +802,9 @@ def replace( convert=convert, ) + values = self._coerce_values(self.values) + to_replace = self._try_coerce_args(to_replace) + mask = missing.mask_missing(values, to_replace) if filter is not None: filtered_out = ~self.mgr_locs.isin(filter) @@ -1399,7 +1407,10 @@ def where(self, other, cond, align=True, errors="raise", try_cast=False, axis=0) # our where function def func(cond, values, other): - other = self._try_coerce_args(other) + + if not (self.is_integer and lib.is_scalar(other) and np.isnan(other)): + # TODO: why does this one case behave differently? + other = self._try_coerce_args(other) try: fastres = expressions.where(cond, values, other) @@ -2248,14 +2259,18 @@ def _astype(self, dtype, **kwargs): def _can_hold_element(self, element): tipo = maybe_infer_dtype_type(element) if tipo is not None: + return is_dtype_equal(tipo, self.dtype) return tipo == _NS_DTYPE or tipo == np.int64 + elif element is NaT: + return True elif isinstance(element, datetime): + if self.is_datetimetz: + return tz_compare(element.tzinfo, self.dtype.tz) return element.tzinfo is None elif is_integer(element): return element == tslibs.iNaT - # TODO: shouldnt we exclude timedelta64("NaT")? See GH#27297 - return isna(element) + return isna(element) and not isinstance(element, np.timedelta64) def _coerce_values(self, values): return values.view("i8") @@ -2359,6 +2374,8 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): is_datetimetz = True is_extension = True + _can_hold_element = DatetimeBlock._can_hold_element + @property def _holder(self): return DatetimeArray From 467742934336ab15a5a55bbba7e6978aa483e46b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 15:13:41 -0700 Subject: [PATCH 2/5] comment --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 7bb62b21a4915..4b740195bbf5a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1409,7 +1409,7 @@ def where(self, other, cond, align=True, errors="raise", try_cast=False, axis=0) def func(cond, values, other): if not (self.is_integer and lib.is_scalar(other) and np.isnan(other)): - # TODO: why does this one case behave differently? + # np.where will cast integer array to floats in this case other = self._try_coerce_args(other) try: From 6251c58a5c2629865260203b0a7fc5a4b91d8cf2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 11 Jul 2019 16:15:29 -0700 Subject: [PATCH 3/5] comment --- pandas/core/internals/blocks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4b740195bbf5a..b06cb328efee4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -786,6 +786,8 @@ def replace( # If we cannot replace with own dtype, convert to ObjectBlock and # retry if not self._can_hold_element(to_replace): + # TODO: we should be able to infer at this point that there is + # nothing to replace # GH 22083, TypeError or ValueError occurred within error handling # causes infinite loop. Cast and retry only if not objectblock. if is_object_dtype(self): From 21e07ed69a60b24ede6c6ef94f9d76c6b272656c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 15 Jul 2019 11:46:45 -0700 Subject: [PATCH 4/5] remove unreachable --- pandas/core/internals/blocks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a799b513398a9..7716cdc95b86a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2262,7 +2262,6 @@ def _can_hold_element(self, element): tipo = maybe_infer_dtype_type(element) if tipo is not None: return is_dtype_equal(tipo, self.dtype) - return tipo == _NS_DTYPE or tipo == np.int64 elif element is NaT: return True elif isinstance(element, datetime): From 2295ea1b40131755d2acd9065d2c7a85851c8002 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 20 Jul 2019 18:13:41 -0700 Subject: [PATCH 5/5] more general can_hold_na catching --- pandas/core/internals/blocks.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 16c6d5c624c73..78565437c4924 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -733,7 +733,7 @@ def _try_coerce_args(self, other): type(self).__name__.lower().replace("Block", ""), ) ) - if lib.is_scalar(other) and isna(other) and self.is_integer: + if np.any(isna(other)) and not self._can_hold_na: raise TypeError( "cannot convert {} to an {}".format( type(other).__name__, @@ -1411,7 +1411,11 @@ def where(self, other, cond, align=True, errors="raise", try_cast=False, axis=0) # our where function def func(cond, values, other): - if not (self.is_integer and lib.is_scalar(other) and np.isnan(other)): + if not ( + (self.is_integer or self.is_bool) + and lib.is_scalar(other) + and np.isnan(other) + ): # np.where will cast integer array to floats in this case other = self._try_coerce_args(other)