From 4850b9a6e303bea0b935fc6ba4b02221eea1e0e5 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 2 Jan 2021 15:07:33 -0800 Subject: [PATCH 1/2] REF: simplify Index.__new__ --- pandas/core/indexes/base.py | 46 +++++++++++++++-------------- pandas/core/indexes/datetimelike.py | 2 ++ 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b0c89000a53a9..2db803e5c1b19 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6,6 +6,7 @@ TYPE_CHECKING, Any, Callable, + Dict, FrozenSet, Hashable, List, @@ -131,6 +132,11 @@ _Identity = NewType("_Identity", object) +def disallow_kwargs(kwargs: Dict[str, Any]): + if kwargs: + raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") + + def _new_Index(cls, d): """ This is called upon unpickling, rather than the default which doesn't @@ -296,13 +302,19 @@ def __new__( return result.astype(dtype, copy=False) return result - if is_ea_or_datetimelike_dtype(dtype): + elif is_ea_or_datetimelike_dtype(dtype): # non-EA dtype indexes have special casting logic, so we punt here klass = cls._dtype_to_subclass(dtype) if klass is not Index: return klass(data, dtype=dtype, copy=copy, name=name, **kwargs) - if is_ea_or_datetimelike_dtype(data_dtype): + ea_cls = dtype.construct_array_type() + data = ea_cls._from_sequence(data, dtype=dtype, copy=copy) + data = np.asarray(data, dtype=object) + disallow_kwargs(kwargs) + return Index._simple_new(data, name=name) + + elif is_ea_or_datetimelike_dtype(data_dtype): klass = cls._dtype_to_subclass(data_dtype) if klass is not Index: result = klass(data, copy=copy, name=name, **kwargs) @@ -310,18 +322,9 @@ def __new__( return result.astype(dtype, copy=False) return result - # extension dtype - if is_extension_array_dtype(data_dtype) or is_extension_array_dtype(dtype): - if not (dtype is None or is_object_dtype(dtype)): - # coerce to the provided dtype - ea_cls = dtype.construct_array_type() - data = ea_cls._from_sequence(data, dtype=dtype, copy=False) - else: - data = np.asarray(data, dtype=object) - - # coerce to the object dtype - data = data.astype(object) - return Index(data, dtype=object, copy=copy, name=name, **kwargs) + data = np.array(data, dtype=object, copy=copy) + disallow_kwargs(kwargs) + return Index._simple_new(data, name=name) # index-like elif isinstance(data, (np.ndarray, Index, ABCSeries)): @@ -333,7 +336,7 @@ def __new__( # should not be coerced # GH 11836 data = _maybe_cast_with_dtype(data, dtype, copy) - dtype = data.dtype # TODO: maybe not for object? + dtype = data.dtype if data.dtype.kind in ["i", "u", "f"]: # maybe coerce to a sub-class @@ -342,16 +345,15 @@ def __new__( arr = com.asarray_tuplesafe(data, dtype=object) if dtype is None: - new_data = _maybe_cast_data_without_dtype(arr) - new_dtype = new_data.dtype - return cls( - new_data, dtype=new_dtype, copy=copy, name=name, **kwargs - ) + arr = _maybe_cast_data_without_dtype(arr) + dtype = arr.dtype + + if kwargs: + return cls(arr, dtype, copy=copy, name=name, **kwargs) klass = cls._dtype_to_subclass(arr.dtype) arr = klass._ensure_array(arr, dtype, copy) - if kwargs: - raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") + disallow_kwargs(kwargs) return klass._simple_new(arr, name) elif is_scalar(data): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 249e9707be328..450c786f1fbc1 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -634,6 +634,8 @@ class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index): _is_monotonic_decreasing = Index.is_monotonic_decreasing _is_unique = Index.is_unique + _ensure_array = Index._ensure_array + def _with_freq(self, freq): arr = self._data._with_freq(freq) return type(self)._simple_new(arr, name=self.name) From 78b9456e4bd74b9c644d4fd6c0533b3ea5755c18 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 2 Jan 2021 17:44:03 -0800 Subject: [PATCH 2/2] mypy fixup --- pandas/core/indexes/datetimelike.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 450c786f1fbc1..7d214829b1871 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -623,7 +623,7 @@ def _convert_arr_indexer(self, keyarr): return com.asarray_tuplesafe(keyarr) -class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index): +class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin): """ Mixin class for methods shared by DatetimeIndex and TimedeltaIndex, but not PeriodIndex @@ -634,8 +634,6 @@ class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index): _is_monotonic_decreasing = Index.is_monotonic_decreasing _is_unique = Index.is_unique - _ensure_array = Index._ensure_array - def _with_freq(self, freq): arr = self._data._with_freq(freq) return type(self)._simple_new(arr, name=self.name) @@ -818,11 +816,7 @@ def _union(self, other, sort): i8self = Int64Index._simple_new(self.asi8) i8other = Int64Index._simple_new(other.asi8) i8result = i8self._union(i8other, sort=sort) - # pandas\core\indexes\datetimelike.py:887: error: Unexpected - # keyword argument "freq" for "DatetimeTimedeltaMixin" [call-arg] - result = type(self)( - i8result, dtype=self.dtype, freq="infer" # type: ignore[call-arg] - ) + result = type(self)(i8result, dtype=self.dtype, freq="infer") return result # --------------------------------------------------------------------