Skip to content

REF: implement ensure_can_hold_na #39714

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,8 @@ def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray) -> np.ndarray:
# upcast (possibly), otherwise we DON't want to upcast (e.g. if we
# have values, say integers, in the success portion then it's ok to not
# upcast)
new_dtype, _ = maybe_promote(result.dtype, np.nan)
new_dtype = ensure_dtype_can_hold_na(result.dtype)

if new_dtype != result.dtype:
result = result.astype(new_dtype, copy=True)

Expand All @@ -484,7 +485,21 @@ def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray) -> np.ndarray:
return result


def maybe_promote(dtype, fill_value=np.nan):
def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj:
"""
If we have a dtype that cannot hold NA values, find the best match that can.
"""
if isinstance(dtype, ExtensionDtype):
# TODO: ExtensionDtype.can_hold_na?
return dtype
elif dtype.kind == "b":
return np.dtype(object)
elif dtype.kind in ["i", "u"]:
return np.dtype(np.float64)
return dtype


def maybe_promote(dtype: DtypeObj, fill_value=np.nan):
"""
Find the minimal dtype that can hold both the given dtype and fill_value.

Expand Down Expand Up @@ -565,7 +580,7 @@ def maybe_promote(dtype, fill_value=np.nan):
fill_value = np.timedelta64("NaT", "ns")
else:
fill_value = fv.to_timedelta64()
elif is_datetime64tz_dtype(dtype):
elif isinstance(dtype, DatetimeTZDtype):
if isna(fill_value):
fill_value = NaT
elif not isinstance(fill_value, datetime):
Expand Down
11 changes: 5 additions & 6 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@
from pandas._typing import ArrayLike, DtypeObj, Manager, Shape
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.cast import find_common_type, maybe_promote
from pandas.core.dtypes.cast import ensure_dtype_can_hold_na, find_common_type
from pandas.core.dtypes.common import (
get_dtype,
is_categorical_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
Expand Down Expand Up @@ -225,13 +224,13 @@ def needs_filling(self) -> bool:

@cache_readonly
def dtype(self):
if self.block is None:
blk = self.block
if blk is None:
raise AssertionError("Block is None, no dtype")

if not self.needs_filling:
return self.block.dtype
else:
return get_dtype(maybe_promote(self.block.dtype, self.block.fill_value)[0])
return blk.dtype
return ensure_dtype_can_hold_na(blk.dtype)

@cache_readonly
def is_na(self) -> bool:
Expand Down