Skip to content

CLN: move maybe_casted_values from pandas/core/frame.py to pandas/core/dtype/cast.py #36985

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Oct 10, 2020
66 changes: 65 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,12 @@
ABCSeries,
)
from pandas.core.dtypes.inference import is_list_like
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
from pandas.core.dtypes.missing import (
is_valid_nat_for_dtype,
isna,
na_value_for_dtype,
notna,
)

if TYPE_CHECKING:
from pandas import Series
Expand Down Expand Up @@ -439,6 +444,65 @@ def changeit():
return result, False


def maybe_casted_values(index, codes=None):
"""
Convert an index, given directly or as a pair (level, code), to a 1D array.

Parameters
----------
index : Index
codes : sequence of integers (optional)

Returns
-------
ExtensionArray or ndarray
If codes is `None`, the values of `index`.
If codes is passed, an array obtained by taking from `index` the indices
contained in `codes`.
"""

values = index._values
if not isinstance(index, (ABCPeriodIndex, ABCDatetimeIndex)):
if values.dtype == np.object_:
values = lib.maybe_convert_objects(values)

# if we have the codes, extract the values with a mask
if codes is not None:
mask = codes == -1

# we can have situations where the whole mask is -1,
# meaning there is nothing found in codes, so make all nan's
if mask.size > 0 and mask.all():
dtype = index.dtype
fill_value = na_value_for_dtype(dtype)
values = construct_1d_arraylike_from_scalar(fill_value, len(mask), dtype)
else:
values = values.take(codes)

# TODO(https://github.com/pandas-dev/pandas/issues/24206)
# Push this into maybe_upcast_putmask?
# We can't pass EAs there right now. Looks a bit
# complicated.
# So we unbox the ndarray_values, op, re-box.
values_type = type(values)
values_dtype = values.dtype

from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure this is acceptable - will change if not. The issue is we can't just import at the top of the file because of a circular import

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could do needs_i8_conversion(values.dtype) and isinstance(values, ExtensionArray)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok fine with cleaning this up after, this routine definitely needs it (and also possibly is duplicative of things in this file anyways)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could do needs_i8_conversion(values.dtype) and isinstance(values, ExtensionArray)

tried it and it broke some tests. I'll investigate in the follow-on


if isinstance(values, DatetimeLikeArrayMixin):
values = values._data # TODO: can we de-kludge yet?

if mask.any():
values, _ = maybe_upcast_putmask(values, mask, np.nan)

if issubclass(values_type, DatetimeLikeArrayMixin):
values = values_type(
values, dtype=values_dtype
) # type: ignore[call-arg]

return values


def maybe_promote(dtype, fill_value=np.nan):
"""
Find the minimal dtype that can hold both the given dtype and fill_value.
Expand Down
49 changes: 3 additions & 46 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@
infer_dtype_from_scalar,
invalidate_string_dtypes,
maybe_cast_to_datetime,
maybe_casted_values,
maybe_convert_platform,
maybe_downcast_to_dtype,
maybe_infer_to_datetimelike,
maybe_upcast,
maybe_upcast_putmask,
validate_numeric_casting,
)
from pandas.core.dtypes.common import (
Expand All @@ -114,7 +114,7 @@
needs_i8_conversion,
pandas_dtype,
)
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna
from pandas.core.dtypes.missing import isna, notna

from pandas.core import algorithms, common as com, nanops, ops
from pandas.core.accessor import CachedAccessor
Expand All @@ -125,15 +125,12 @@
transform,
)
from pandas.core.arrays import Categorical, ExtensionArray
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
from pandas.core.arrays.sparse import SparseFrameAccessor
from pandas.core.construction import extract_array
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.indexes import base as ibase
from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.multi import MultiIndex, maybe_droplevels
from pandas.core.indexes.period import PeriodIndex
from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable
from pandas.core.internals import BlockManager
from pandas.core.internals.construction import (
Expand Down Expand Up @@ -4847,46 +4844,6 @@ class max type
else:
new_obj = self.copy()

def _maybe_casted_values(index, labels=None):
values = index._values
if not isinstance(index, (PeriodIndex, DatetimeIndex)):
if values.dtype == np.object_:
values = lib.maybe_convert_objects(values)

# if we have the labels, extract the values with a mask
if labels is not None:
mask = labels == -1

# we can have situations where the whole mask is -1,
# meaning there is nothing found in labels, so make all nan's
if mask.size > 0 and mask.all():
dtype = index.dtype
fill_value = na_value_for_dtype(dtype)
values = construct_1d_arraylike_from_scalar(
fill_value, len(mask), dtype
)
else:
values = values.take(labels)

# TODO(https://github.com/pandas-dev/pandas/issues/24206)
# Push this into maybe_upcast_putmask?
# We can't pass EAs there right now. Looks a bit
# complicated.
# So we unbox the ndarray_values, op, re-box.
values_type = type(values)
values_dtype = values.dtype

if issubclass(values_type, DatetimeLikeArray):
values = values._data # TODO: can we de-kludge yet?

if mask.any():
values, _ = maybe_upcast_putmask(values, mask, np.nan)

if issubclass(values_type, DatetimeLikeArray):
values = values_type(values, dtype=values_dtype)

return values

new_index = ibase.default_index(len(new_obj))
if level is not None:
if not isinstance(level, (tuple, list)):
Expand Down Expand Up @@ -4929,7 +4886,7 @@ def _maybe_casted_values(index, labels=None):
name_lst += [col_fill] * missing
name = tuple(name_lst)
# to ndarray and maybe infer different dtype
level_values = _maybe_casted_values(lev, lab)
level_values = maybe_casted_values(lev, lab)
new_obj.insert(0, name, level_values)

new_obj.index = new_index
Expand Down