From 030b90bab9ebc09ff63f13b6c5837c0c4821bb74 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 7 Oct 2020 21:26:56 +0000 Subject: [PATCH 1/9] move core/frame.py -> core/dtype/cast.py --- pandas/core/dtypes/cast.py | 60 +++++++++++++++++++++++++++++++++++++- pandas/core/frame.py | 49 ++----------------------------- 2 files changed, 62 insertions(+), 47 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 48391ab7d9373..57ba9a480c9cd 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -73,7 +73,12 @@ ABCSeries, ) from pandas.core.dtypes.inference import is_list_like -from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna +from pandas.core.dtypes.missing import ( + is_valid_nat_for_dtype, + isna, + na_value_for_dtype, + notna, +) if TYPE_CHECKING: from pandas import Series @@ -439,6 +444,59 @@ def changeit(): return result, False +def maybe_casted_values(index, codes=None): + """ + Convert an index, given directly or as a pair (level, code), to a 1D array. + Parameters + ---------- + index : Index + codes : sequence of integers (optional) + Returns + ------- + ExtensionArray or ndarray + If codes is `None`, the values of `index`. + If codes is passed, an array obtained by taking from `index` the indices + contained in `codes`. + """ + + values = index._values + if not isinstance(index, (ABCPeriodIndex, ABCDatetimeIndex)): + if values.dtype == np.object_: + values = lib.maybe_convert_objects(values) + + # if we have the codes, extract the values with a mask + if codes is not None: + mask = codes == -1 + + # we can have situations where the whole mask is -1, + # meaning there is nothing found in codes, so make all nan's + if mask.size > 0 and mask.all(): + dtype = index.dtype + fill_value = na_value_for_dtype(dtype) + values = construct_1d_arraylike_from_scalar(fill_value, len(mask), dtype) + else: + values = values.take(codes) + + # TODO(https://github.com/pandas-dev/pandas/issues/24206) + # Push this into maybe_upcast_putmask? + # We can't pass EAs there right now. Looks a bit + # complicated. + # So we unbox the ndarray_values, op, re-box. + values_type = type(values) + values_dtype = values.dtype + + if issubclass(values_type, ABCDatetimeArray): + values = values._data # TODO: can we de-kludge yet? + + if mask.any(): + values, _ = maybe_upcast_putmask(values, mask, np.nan) + + if issubclass(values_type, ABCDatetimeArray): + values = values_type(values, dtype=values_dtype) + + return values + + def maybe_promote(dtype, fill_value=np.nan): """ Find the minimal dtype that can hold both the given dtype and fill_value. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8a330e3d595cf..4783e8c3ce427 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -83,11 +83,11 @@ infer_dtype_from_scalar, invalidate_string_dtypes, maybe_cast_to_datetime, + maybe_casted_values, maybe_convert_platform, maybe_downcast_to_dtype, maybe_infer_to_datetimelike, maybe_upcast, - maybe_upcast_putmask, validate_numeric_casting, ) from pandas.core.dtypes.common import ( @@ -114,7 +114,7 @@ needs_i8_conversion, pandas_dtype, ) -from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna +from pandas.core.dtypes.missing import isna, notna from pandas.core import algorithms, common as com, nanops, ops from pandas.core.accessor import CachedAccessor @@ -125,15 +125,12 @@ transform, ) from pandas.core.arrays import Categorical, ExtensionArray -from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray from pandas.core.arrays.sparse import SparseFrameAccessor from pandas.core.construction import extract_array from pandas.core.generic import NDFrame, _shared_docs from pandas.core.indexes import base as ibase from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences -from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.multi import MultiIndex, maybe_droplevels -from pandas.core.indexes.period import PeriodIndex from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable from pandas.core.internals import BlockManager from pandas.core.internals.construction import ( @@ -4847,46 +4844,6 @@ class max type else: new_obj = self.copy() - def _maybe_casted_values(index, labels=None): - values = index._values - if not isinstance(index, (PeriodIndex, DatetimeIndex)): - if values.dtype == np.object_: - values = lib.maybe_convert_objects(values) - - # if we have the labels, extract the values with a mask - if labels is not None: - mask = labels == -1 - - # we can have situations where the whole mask is -1, - # meaning there is nothing found in labels, so make all nan's - if mask.size > 0 and mask.all(): - dtype = index.dtype - fill_value = na_value_for_dtype(dtype) - values = construct_1d_arraylike_from_scalar( - fill_value, len(mask), dtype - ) - else: - values = values.take(labels) - - # TODO(https://github.com/pandas-dev/pandas/issues/24206) - # Push this into maybe_upcast_putmask? - # We can't pass EAs there right now. Looks a bit - # complicated. - # So we unbox the ndarray_values, op, re-box. - values_type = type(values) - values_dtype = values.dtype - - if issubclass(values_type, DatetimeLikeArray): - values = values._data # TODO: can we de-kludge yet? - - if mask.any(): - values, _ = maybe_upcast_putmask(values, mask, np.nan) - - if issubclass(values_type, DatetimeLikeArray): - values = values_type(values, dtype=values_dtype) - - return values - new_index = ibase.default_index(len(new_obj)) if level is not None: if not isinstance(level, (tuple, list)): @@ -4929,7 +4886,7 @@ def _maybe_casted_values(index, labels=None): name_lst += [col_fill] * missing name = tuple(name_lst) # to ndarray and maybe infer different dtype - level_values = _maybe_casted_values(lev, lab) + level_values = maybe_casted_values(lev, lab) new_obj.insert(0, name, level_values) new_obj.index = new_index From fc28afa01148783b89afa6ac1795760a89488f63 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 9 Oct 2020 03:20:00 +0000 Subject: [PATCH 2/9] feedback: fix docstring formatting --- pandas/core/dtypes/cast.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 57ba9a480c9cd..c3d7a9cc6769b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -447,10 +447,12 @@ def changeit(): def maybe_casted_values(index, codes=None): """ Convert an index, given directly or as a pair (level, code), to a 1D array. + Parameters ---------- index : Index codes : sequence of integers (optional) + Returns ------- ExtensionArray or ndarray From 3cce7057b27c0678e78a181356aa8effd102806c Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 9 Oct 2020 16:55:55 +0000 Subject: [PATCH 3/9] add DatetimeLikeArrayMixin import in maybe_casted_values --- pandas/core/dtypes/cast.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c3d7a9cc6769b..5745a63e5e2bc 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -487,13 +487,15 @@ def maybe_casted_values(index, codes=None): values_type = type(values) values_dtype = values.dtype - if issubclass(values_type, ABCDatetimeArray): + from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin + + if issubclass(values_type, DatetimeLikeArrayMixin): values = values._data # TODO: can we de-kludge yet? if mask.any(): values, _ = maybe_upcast_putmask(values, mask, np.nan) - if issubclass(values_type, ABCDatetimeArray): + if issubclass(values_type, DatetimeLikeArrayMixin): values = values_type(values, dtype=values_dtype) return values From 87af5e2ce20af6076f0fd5b588717a228aa7de8c Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 9 Oct 2020 17:29:25 +0000 Subject: [PATCH 4/9] attempt to silence mypy --- pandas/core/dtypes/cast.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5745a63e5e2bc..d7867f8c90bb1 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -487,15 +487,17 @@ def maybe_casted_values(index, codes=None): values_type = type(values) values_dtype = values.dtype - from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin + from pandas.core.arrays.datetimelike import ( + DatetimeLikeArrayMixin as DatetimeLikeArray, + ) - if issubclass(values_type, DatetimeLikeArrayMixin): + if issubclass(values_type, DatetimeLikeArray): values = values._data # TODO: can we de-kludge yet? if mask.any(): values, _ = maybe_upcast_putmask(values, mask, np.nan) - if issubclass(values_type, DatetimeLikeArrayMixin): + if issubclass(values_type, DatetimeLikeArray): values = values_type(values, dtype=values_dtype) return values From 340af7a0eac20262ac77a187f144654c3ca62fc7 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 9 Oct 2020 17:39:21 +0000 Subject: [PATCH 5/9] rewrite if statement to appease mypy --- pandas/core/dtypes/cast.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d7867f8c90bb1..39802e641cce4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -497,7 +497,9 @@ def maybe_casted_values(index, codes=None): if mask.any(): values, _ = maybe_upcast_putmask(values, mask, np.nan) - if issubclass(values_type, DatetimeLikeArray): + if not issubclass(values_type, DatetimeLikeArray): + pass + else: values = values_type(values, dtype=values_dtype) return values From 2b6e15f062dbf71ffee4053c845b2aa7b18a8638 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 10 Oct 2020 00:45:45 +0000 Subject: [PATCH 6/9] feedback: replace issubclass(., DatetimeLikeArray) --- pandas/core/dtypes/cast.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 39802e641cce4..605af85a879c4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -55,6 +55,7 @@ is_timedelta64_dtype, is_timedelta64_ns_dtype, is_unsigned_integer_dtype, + needs_i8_conversion, pandas_dtype, ) from pandas.core.dtypes.dtypes import ( @@ -487,19 +488,13 @@ def maybe_casted_values(index, codes=None): values_type = type(values) values_dtype = values.dtype - from pandas.core.arrays.datetimelike import ( - DatetimeLikeArrayMixin as DatetimeLikeArray, - ) - - if issubclass(values_type, DatetimeLikeArray): + if needs_i8_conversion(values.dtype) and isinstance(values, ExtensionArray): values = values._data # TODO: can we de-kludge yet? if mask.any(): values, _ = maybe_upcast_putmask(values, mask, np.nan) - if not issubclass(values_type, DatetimeLikeArray): - pass - else: + if needs_i8_conversion(values.dtype) and isinstance(values, ExtensionArray): values = values_type(values, dtype=values_dtype) return values From b5cd73ccfe352639ef831a2ca5d302f74ffe6b2b Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 10 Oct 2020 05:03:48 +0000 Subject: [PATCH 7/9] add missing import --- pandas/core/dtypes/cast.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 605af85a879c4..89f6f759d3956 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -488,12 +488,16 @@ def maybe_casted_values(index, codes=None): values_type = type(values) values_dtype = values.dtype - if needs_i8_conversion(values.dtype) and isinstance(values, ExtensionArray): + from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin + + if isinstance(values, DatetimeLikeArrayMixin): values = values._data # TODO: can we de-kludge yet? if mask.any(): values, _ = maybe_upcast_putmask(values, mask, np.nan) + from pandas.core.arrays import ExtensionArray + if needs_i8_conversion(values.dtype) and isinstance(values, ExtensionArray): values = values_type(values, dtype=values_dtype) From 20c26206a63d297aeca3d3b0b034bb6f5b295281 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 10 Oct 2020 16:36:37 +0000 Subject: [PATCH 8/9] fix mypy error with # type: ignore --- pandas/core/dtypes/cast.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 89f6f759d3956..6be947efca193 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -55,7 +55,6 @@ is_timedelta64_dtype, is_timedelta64_ns_dtype, is_unsigned_integer_dtype, - needs_i8_conversion, pandas_dtype, ) from pandas.core.dtypes.dtypes import ( @@ -496,10 +495,8 @@ def maybe_casted_values(index, codes=None): if mask.any(): values, _ = maybe_upcast_putmask(values, mask, np.nan) - from pandas.core.arrays import ExtensionArray - - if needs_i8_conversion(values.dtype) and isinstance(values, ExtensionArray): - values = values_type(values, dtype=values_dtype) + if issubclass(values_type, DatetimeLikeArrayMixin): + values = values_type(values, dtype=values_dtype) # type: ignore return values From d6581535c915781d9cc3a65c0e59d3cd63c14b54 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 10 Oct 2020 17:10:57 +0000 Subject: [PATCH 9/9] add error code to '# type: ignore' --- pandas/core/dtypes/cast.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6be947efca193..4b746a4ee9b21 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -496,7 +496,9 @@ def maybe_casted_values(index, codes=None): values, _ = maybe_upcast_putmask(values, mask, np.nan) if issubclass(values_type, DatetimeLikeArrayMixin): - values = values_type(values, dtype=values_dtype) # type: ignore + values = values_type( + values, dtype=values_dtype + ) # type: ignore[call-arg] return values