Skip to content

CLN refactor core dtypes #37584

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Jan 3, 2021
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
7863781
refactor core dtypes
MarcoGorelli Nov 2, 2020
a67b324
Merge remote-tracking branch 'upstream/master' into refactor-core-dtypes
MarcoGorelli Nov 2, 2020
4e21831
revert check
MarcoGorelli Nov 4, 2020
68479a1
revert
MarcoGorelli Nov 4, 2020
9696dae
Merge remote-tracking branch 'upstream/master' into refactor-core-dtypes
MarcoGorelli Nov 6, 2020
8aaebe2
pass list of dtypeobj
MarcoGorelli Nov 6, 2020
d2b5bef
Merge remote-tracking branch 'upstream/master' into refactor-core-dtypes
MarcoGorelli Nov 18, 2020
5e53457
Merge remote-tracking branch 'upstream/master' into refactor-core-dtypes
MarcoGorelli Nov 18, 2020
1374cf8
Merge remote-tracking branch 'upstream/master' into refactor-core-dtypes
MarcoGorelli Nov 23, 2020
1d92e4f
Merge remote-tracking branch 'upstream/master' into refactor-core-dtypes
MarcoGorelli Nov 24, 2020
a4bb6e8
Merge remote-tracking branch 'upstream/master' into refactor-core-dtypes
MarcoGorelli Nov 29, 2020
5241029
Merge remote-tracking branch 'upstream/master' into refactor-core-dtypes
MarcoGorelli Dec 6, 2020
9e1366d
coverage
MarcoGorelli Dec 9, 2020
b6ab2be
Merge branch 'refactor-core-dtypes' of github.com:MarcoGorelli/pandas…
MarcoGorelli Dec 9, 2020
f8ed13e
Merge remote-tracking branch 'upstream/master' into refactor-core-dtypes
MarcoGorelli Dec 9, 2020
65c5179
Merge remote-tracking branch 'upstream/master' into refactor-core-dtypes
MarcoGorelli Jan 3, 2021
f4a1ff1
Merge branch 'refactor-core-dtypes' of github.com:MarcoGorelli/pandas…
MarcoGorelli Jan 3, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/arrays/sparse/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def to_coo(self):
import_optional_dependency("scipy")
from scipy.sparse import coo_matrix

dtype = find_common_type(self._parent.dtypes)
dtype = find_common_type(self._parent.dtypes.to_list())
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is because find_commont_type from pandas/core/dtypes/cast.py requires a list

if isinstance(dtype, SparseDtype):
dtype = dtype.subtype

Expand Down
44 changes: 25 additions & 19 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
Routines for casting.
"""

from __future__ import annotations

from contextlib import suppress
from datetime import date, datetime, timedelta
from typing import (
Expand Down Expand Up @@ -126,12 +128,11 @@ def is_nested_object(obj) -> bool:
This may not be necessarily be performant.

"""
if isinstance(obj, ABCSeries) and is_object_dtype(obj.dtype):

if any(isinstance(v, ABCSeries) for v in obj._values):
return True

return False
return bool(
isinstance(obj, ABCSeries)
and is_object_dtype(obj.dtype)
and any(isinstance(v, ABCSeries) for v in obj._values)
)


def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scalar:
Expand Down Expand Up @@ -359,10 +360,11 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj:
from pandas.core.arrays.boolean import BooleanDtype
from pandas.core.arrays.integer import Int64Dtype

if how in ["add", "cumsum", "sum"] and (dtype == np.dtype(bool)):
return np.dtype(np.int64)
elif how in ["add", "cumsum", "sum"] and isinstance(dtype, BooleanDtype):
return Int64Dtype()
if how in ["add", "cumsum", "sum"]:
if dtype == np.dtype(bool):
return np.dtype(np.int64)
elif isinstance(dtype, BooleanDtype):
return Int64Dtype()
return dtype


Expand Down Expand Up @@ -749,8 +751,8 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj,

# a 1-element ndarray
if isinstance(val, np.ndarray):
msg = "invalid ndarray passed to infer_dtype_from_scalar"
if val.ndim != 0:
msg = "invalid ndarray passed to infer_dtype_from_scalar"
raise ValueError(msg)

dtype = val.dtype
Expand Down Expand Up @@ -1555,7 +1557,7 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
numpy.find_common_type

"""
if len(types) == 0:
if not types:
raise ValueError("no types given")

first = types[0]
Expand Down Expand Up @@ -1844,12 +1846,16 @@ def validate_numeric_casting(dtype: np.dtype, value: Scalar) -> None:
------
ValueError
"""
if issubclass(dtype.type, (np.integer, np.bool_)):
if is_float(value) and np.isnan(value):
raise ValueError("Cannot assign nan to integer series")
if (
issubclass(dtype.type, (np.integer, np.bool_))
and is_float(value)
and np.isnan(value)
):
raise ValueError("Cannot assign nan to integer series")

if issubclass(dtype.type, (np.integer, np.floating, complex)) and not issubclass(
dtype.type, np.bool_
if (
issubclass(dtype.type, (np.integer, np.floating, complex))
and not issubclass(dtype.type, np.bool_)
and is_bool(value)
):
if is_bool(value):
raise ValueError("Cannot assign bool to float/integer series")
raise ValueError("Cannot assign bool to float/integer series")
4 changes: 2 additions & 2 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1698,7 +1698,7 @@ def infer_dtype_from_object(dtype):
elif dtype in ["period"]:
raise NotImplementedError

if dtype == "datetime" or dtype == "timedelta":
if dtype in ["datetime", "timedelta"]:
dtype += "64"
try:
return infer_dtype_from_object(getattr(np, dtype))
Expand Down Expand Up @@ -1733,7 +1733,7 @@ def _validate_date_like_dtype(dtype) -> None:
typ = np.datetime_data(dtype)[0]
except ValueError as e:
raise TypeError(e) from e
if typ != "generic" and typ != "ns":
if typ not in ["generic", "ns"]:
raise ValueError(
f"{repr(dtype.name)} is too specific of a frequency, "
f"try passing {repr(dtype.type.__name__)}"
Expand Down
7 changes: 3 additions & 4 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,9 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
# are not coming from Index/Series._values), eg in BlockManager.quantile
arr = array(arr)

if is_extension_array_dtype(dtype):
if isinstance(arr, np.ndarray):
# numpy's astype cannot handle ExtensionDtypes
return array(arr, dtype=dtype, copy=False)
if is_extension_array_dtype(dtype) and isinstance(arr, np.ndarray):
# numpy's astype cannot handle ExtensionDtypes
return array(arr, dtype=dtype, copy=False)
return arr.astype(dtype, copy=False)


Expand Down
17 changes: 8 additions & 9 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,14 +423,13 @@ def _hash_categories(categories, ordered: Ordered = True) -> int:
categories = list(categories) # breaks if a np.array of categories
cat_array = hash_tuples(categories)
else:
if categories.dtype == "O":
if len({type(x) for x in categories}) != 1:
# TODO: hash_array doesn't handle mixed types. It casts
# everything to a str first, which means we treat
# {'1', '2'} the same as {'1', 2}
# find a better solution
hashed = hash((tuple(categories), ordered))
return hashed
if categories.dtype == "O" and len({type(x) for x in categories}) != 1:
# TODO: hash_array doesn't handle mixed types. It casts
# everything to a str first, which means we treat
# {'1', '2'} the same as {'1', 2}
# find a better solution
hashed = hash((tuple(categories), ordered))
return hashed

if DatetimeTZDtype.is_dtype(categories.dtype):
# Avoid future warning.
Expand Down Expand Up @@ -907,7 +906,7 @@ def __hash__(self) -> int:

def __eq__(self, other: Any) -> bool:
if isinstance(other, str):
return other == self.name or other == self.name.title()
return other in [self.name, self.name.title()]

return isinstance(other, PeriodDtype) and self.freq == other.freq

Expand Down
5 changes: 1 addition & 4 deletions pandas/core/dtypes/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,7 @@ def is_file_like(obj) -> bool:
if not (hasattr(obj, "read") or hasattr(obj, "write")):
return False

if not hasattr(obj, "__iter__"):
return False

return True
return bool(hasattr(obj, "__iter__"))


def is_re(obj) -> bool:
Expand Down
23 changes: 11 additions & 12 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,8 @@ def isna_compat(arr, fill_value=np.nan) -> bool:
-------
True if we can fill using this fill_value
"""
dtype = arr.dtype
if isna(fill_value):
dtype = arr.dtype
return not (is_bool_dtype(dtype) or is_integer_dtype(dtype))
return True

Expand Down Expand Up @@ -447,9 +447,10 @@ def array_equivalent(
right = right.view("i8")

# if we have structured dtypes, compare first
if left.dtype.type is np.void or right.dtype.type is np.void:
if left.dtype != right.dtype:
return False
if (
left.dtype.type is np.void or right.dtype.type is np.void
) and left.dtype != right.dtype:
return False

return np.array_equal(left, right)

Expand Down Expand Up @@ -484,11 +485,11 @@ def _array_equivalent_object(left, right, strict_nan):
if np.any(np.asarray(left_value != right_value)):
return False
except TypeError as err:
if "Cannot compare tz-naive" in str(err):
if "Cannot compare tz-naive" in str(
err
) or "boolean value of NA is ambiguous" in str(err):
# tzawareness compat failure, see GH#28507
return False
elif "boolean value of NA is ambiguous" in str(err):
return False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think this is for coverage

raise
return True

Expand Down Expand Up @@ -637,8 +638,6 @@ def isna_all(arr: ArrayLike) -> bool:
else:
checker = lambda x: _isna_ndarraylike(x, inf_as_na=INF_AS_NA)

for i in range(0, total_len, chunk_len):
if not checker(arr[i : i + chunk_len]).all():
return False

return True
return all(
checker(arr[i : i + chunk_len]).all() for i in range(0, total_len, chunk_len)
)