From 7863781fdcb692f896490899480c3b60d5a37ef4 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 2 Nov 2020 11:41:56 +0000 Subject: [PATCH 1/5] refactor core dtypes --- pandas/core/dtypes/cast.py | 50 ++++++++++++++++++--------------- pandas/core/dtypes/common.py | 4 +-- pandas/core/dtypes/concat.py | 7 ++--- pandas/core/dtypes/dtypes.py | 17 ++++++----- pandas/core/dtypes/inference.py | 5 +--- pandas/core/dtypes/missing.py | 23 ++++++++------- 6 files changed, 53 insertions(+), 53 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 692da8f8e021e..fdc4e258ca85c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -126,12 +126,11 @@ def is_nested_object(obj) -> bool: This may not be necessarily be performant. """ - if isinstance(obj, ABCSeries) and is_object_dtype(obj.dtype): - - if any(isinstance(v, ABCSeries) for v in obj._values): - return True - - return False + return bool( + isinstance(obj, ABCSeries) + and is_object_dtype(obj.dtype) + and any(isinstance(v, ABCSeries) for v in obj._values) + ) def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]): @@ -335,10 +334,11 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj: from pandas.core.arrays.boolean import BooleanDtype from pandas.core.arrays.integer import Int64Dtype - if how in ["add", "cumsum", "sum"] and (dtype == np.dtype(bool)): - return np.dtype(np.int64) - elif how in ["add", "cumsum", "sum"] and isinstance(dtype, BooleanDtype): - return Int64Dtype() + if how in ["add", "cumsum", "sum"]: + if dtype == np.dtype(bool): + return np.dtype(np.int64) + elif isinstance(dtype, BooleanDtype): + return Int64Dtype() return dtype @@ -481,9 +481,11 @@ def maybe_casted_values( """ values = index._values - if not isinstance(index, (ABCPeriodIndex, ABCDatetimeIndex)): - if values.dtype == np.object_: - values = lib.maybe_convert_objects(values) + if ( + not isinstance(index, (ABCPeriodIndex, ABCDatetimeIndex)) + and values.dtype == np.object_ + ): + values = lib.maybe_convert_objects(values) # if we have the codes, extract the values with a mask if codes is not None: @@ -725,8 +727,8 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, # a 1-element ndarray if isinstance(val, np.ndarray): - msg = "invalid ndarray passed to infer_dtype_from_scalar" if val.ndim != 0: + msg = "invalid ndarray passed to infer_dtype_from_scalar" raise ValueError(msg) dtype = val.dtype @@ -1552,7 +1554,7 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj: numpy.find_common_type """ - if len(types) == 0: + if not types: raise ValueError("no types given") first = types[0] @@ -1841,12 +1843,16 @@ def validate_numeric_casting(dtype: np.dtype, value: Scalar) -> None: ------ ValueError """ - if issubclass(dtype.type, (np.integer, np.bool_)): - if is_float(value) and np.isnan(value): - raise ValueError("Cannot assign nan to integer series") + if ( + issubclass(dtype.type, (np.integer, np.bool_)) + and is_float(value) + and np.isnan(value) + ): + raise ValueError("Cannot assign nan to integer series") - if issubclass(dtype.type, (np.integer, np.floating, complex)) and not issubclass( - dtype.type, np.bool_ + if ( + issubclass(dtype.type, (np.integer, np.floating, complex)) + and not issubclass(dtype.type, np.bool_) + and is_bool(value) ): - if is_bool(value): - raise ValueError("Cannot assign bool to float/integer series") + raise ValueError("Cannot assign bool to float/integer series") diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 14184f044ae95..86ad9933aa19d 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1698,7 +1698,7 @@ def infer_dtype_from_object(dtype): elif dtype in ["period"]: raise NotImplementedError - if dtype == "datetime" or dtype == "timedelta": + if dtype in ["datetime", "timedelta"]: dtype += "64" try: return infer_dtype_from_object(getattr(np, dtype)) @@ -1733,7 +1733,7 @@ def _validate_date_like_dtype(dtype) -> None: typ = np.datetime_data(dtype)[0] except ValueError as e: raise TypeError(e) from e - if typ != "generic" and typ != "ns": + if typ not in ["generic", "ns"]: raise ValueError( f"{repr(dtype.name)} is too specific of a frequency, " f"try passing {repr(dtype.type.__name__)}" diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 60fd959701821..8f245fd9a429f 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -91,10 +91,9 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike: # are not coming from Index/Series._values), eg in BlockManager.quantile arr = array(arr) - if is_extension_array_dtype(dtype): - if isinstance(arr, np.ndarray): - # numpy's astype cannot handle ExtensionDtypes - return array(arr, dtype=dtype, copy=False) + if is_extension_array_dtype(dtype) and isinstance(arr, np.ndarray): + # numpy's astype cannot handle ExtensionDtypes + return array(arr, dtype=dtype, copy=False) return arr.astype(dtype, copy=False) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 01b34187997cb..e853f665abdfd 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -419,14 +419,13 @@ def _hash_categories(categories, ordered: Ordered = True) -> int: categories = list(categories) # breaks if a np.array of categories cat_array = hash_tuples(categories) else: - if categories.dtype == "O": - if len({type(x) for x in categories}) != 1: - # TODO: hash_array doesn't handle mixed types. It casts - # everything to a str first, which means we treat - # {'1', '2'} the same as {'1', 2} - # find a better solution - hashed = hash((tuple(categories), ordered)) - return hashed + if categories.dtype == "O" and len({type(x) for x in categories}) != 1: + # TODO: hash_array doesn't handle mixed types. It casts + # everything to a str first, which means we treat + # {'1', '2'} the same as {'1', 2} + # find a better solution + hashed = hash((tuple(categories), ordered)) + return hashed if DatetimeTZDtype.is_dtype(categories.dtype): # Avoid future warning. @@ -903,7 +902,7 @@ def __hash__(self) -> int: def __eq__(self, other: Any) -> bool: if isinstance(other, str): - return other == self.name or other == self.name.title() + return other in [self.name, self.name.title()] return isinstance(other, PeriodDtype) and self.freq == other.freq diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 329c4445b05bc..8f8ded2ad54b1 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -125,10 +125,7 @@ def is_file_like(obj) -> bool: if not (hasattr(obj, "read") or hasattr(obj, "write")): return False - if not hasattr(obj, "__iter__"): - return False - - return True + return bool(hasattr(obj, "__iter__")) def is_re(obj) -> bool: diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 0b4aab0ac9d88..692c9f5d89e83 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -358,8 +358,8 @@ def isna_compat(arr, fill_value=np.nan) -> bool: ------- True if we can fill using this fill_value """ - dtype = arr.dtype if isna(fill_value): + dtype = arr.dtype return not (is_bool_dtype(dtype) or is_integer_dtype(dtype)) return True @@ -447,9 +447,10 @@ def array_equivalent( right = right.view("i8") # if we have structured dtypes, compare first - if left.dtype.type is np.void or right.dtype.type is np.void: - if left.dtype != right.dtype: - return False + if ( + left.dtype.type is np.void or right.dtype.type is np.void + ) and left.dtype != right.dtype: + return False return np.array_equal(left, right) @@ -484,11 +485,11 @@ def _array_equivalent_object(left, right, strict_nan): if np.any(np.asarray(left_value != right_value)): return False except TypeError as err: - if "Cannot compare tz-naive" in str(err): + if "Cannot compare tz-naive" in str( + err + ) or "boolean value of NA is ambiguous" in str(err): # tzawareness compat failure, see GH#28507 return False - elif "boolean value of NA is ambiguous" in str(err): - return False raise return True @@ -637,8 +638,6 @@ def isna_all(arr: ArrayLike) -> bool: else: checker = lambda x: _isna_ndarraylike(x, inf_as_na=INF_AS_NA) - for i in range(0, total_len, chunk_len): - if not checker(arr[i : i + chunk_len]).all(): - return False - - return True + return all( + checker(arr[i : i + chunk_len]).all() for i in range(0, total_len, chunk_len) + ) From 4e2183125f202cd8dbec75ba07da2f819dd386ea Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 4 Nov 2020 18:11:37 +0000 Subject: [PATCH 2/5] revert check --- pandas/core/dtypes/cast.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index fdc4e258ca85c..9f5b13a8884f1 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2,6 +2,8 @@ Routines for casting. """ +from __future__ import annotations + from contextlib import suppress from datetime import date, datetime, timedelta from typing import ( @@ -271,7 +273,7 @@ def trans(x): return result -def maybe_cast_result(result, obj: "Series", numeric_only: bool = False, how: str = ""): +def maybe_cast_result(result, obj: Series, numeric_only: bool = False, how: str = ""): """ Try casting result to a different type if appropriate @@ -1537,7 +1539,7 @@ def maybe_cast_to_datetime(value, dtype: DtypeObj, errors: str = "raise"): return value -def find_common_type(types: List[DtypeObj]) -> DtypeObj: +def find_common_type(types: Union[List[DtypeObj], Series]) -> DtypeObj: """ Find a common data type among the given dtypes. @@ -1554,7 +1556,7 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj: numpy.find_common_type """ - if not types: + if not len(types): raise ValueError("no types given") first = types[0] From 68479a1b80b0337e9e6b70d7b2dec69a829f7909 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 4 Nov 2020 18:12:51 +0000 Subject: [PATCH 3/5] revert --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 9f5b13a8884f1..1bbff9ae2413e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1556,7 +1556,7 @@ def find_common_type(types: Union[List[DtypeObj], Series]) -> DtypeObj: numpy.find_common_type """ - if not len(types): + if len(types) == 0: raise ValueError("no types given") first = types[0] From 8aaebe2b908065fa02b28f030f95bc93c312373b Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Fri, 6 Nov 2020 07:20:19 +0000 Subject: [PATCH 4/5] pass list of dtypeobj --- pandas/core/arrays/sparse/accessor.py | 2 +- pandas/core/dtypes/cast.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index ec4b0fd89860c..12f29faab9574 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -329,7 +329,7 @@ def to_coo(self): import_optional_dependency("scipy") from scipy.sparse import coo_matrix - dtype = find_common_type(self._parent.dtypes) + dtype = find_common_type(self._parent.dtypes.to_list()) if isinstance(dtype, SparseDtype): dtype = dtype.subtype diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index aab2be3dbc96c..b85d65bfd00fa 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1580,7 +1580,7 @@ def maybe_cast_to_datetime(value, dtype: DtypeObj, errors: str = "raise"): return value -def find_common_type(types: Union[List[DtypeObj], Series]) -> DtypeObj: +def find_common_type(types: List[DtypeObj]) -> DtypeObj: """ Find a common data type among the given dtypes. @@ -1597,7 +1597,7 @@ def find_common_type(types: Union[List[DtypeObj], Series]) -> DtypeObj: numpy.find_common_type """ - if len(types) == 0: + if not types: raise ValueError("no types given") first = types[0] From 9e1366d35f3cfc3df6ce8a52e0452f8cc85a3c95 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Wed, 9 Dec 2020 18:48:09 +0000 Subject: [PATCH 5/5] coverage --- pandas/core/dtypes/missing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 692c9f5d89e83..137f6593f3eeb 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -485,11 +485,11 @@ def _array_equivalent_object(left, right, strict_nan): if np.any(np.asarray(left_value != right_value)): return False except TypeError as err: - if "Cannot compare tz-naive" in str( - err - ) or "boolean value of NA is ambiguous" in str(err): + if "Cannot compare tz-naive" in str(err): # tzawareness compat failure, see GH#28507 return False + elif "boolean value of NA is ambiguous" in str(err): + return False raise return True