TYP overload fillna pandas-dev#40737 (pandas-dev#40887)

LarWong · MarcoGorelli · yeshsurya · commit 630832b3addd · 2021-05-06T14:25:04.000+05:30
* TYP: Added overloads for fillna() in frame.py and series.py * TYP: Added overloads for fillna() in frame.py and series.py pandas-dev#40737 * TYP: Added fillna() overloads to generic.py pandas-dev#40727 * TYP: removed generic overloads pandas-dev#40737 * fixed redundant cast error * reverting prior changes * remove cast again * removed unnecessary overloads in frame.py and series.py * fixed overloads * reverted value typing * remove extra types (lets keep this to overloads) Co-authored-by: Marco Gorelli <marcogorelli@protonmail.com>
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -60,7 +60,7 @@
 from pandas.core.dtypes.cast import (
     convert_dtypes,
     maybe_box_native,
-    maybe_cast_pointwise_result,
+    maybe_cast_result,
     validate_numeric_casting,
 )
 from pandas.core.dtypes.common import (
@@ -100,7 +100,6 @@
 import pandas.core.common as com
 from pandas.core.construction import (
     create_series_with_explicit_dtype,
-    ensure_wrapped_if_datetimelike,
     extract_array,
     is_empty_data,
     sanitize_array,
@@ -113,15 +112,15 @@
 from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
 from pandas.core.indexes.api import (
     CategoricalIndex,
-    DatetimeIndex,
     Float64Index,
     Index,
     MultiIndex,
-    PeriodIndex,
-    TimedeltaIndex,
     ensure_index,
 )
 import pandas.core.indexes.base as ibase
+from pandas.core.indexes.datetimes import DatetimeIndex
+from pandas.core.indexes.period import PeriodIndex
+from pandas.core.indexes.timedeltas import TimedeltaIndex
 from pandas.core.indexing import check_bool_indexer
 from pandas.core.internals import (
     SingleArrayManager,
@@ -864,7 +863,7 @@ def take(self, indices, axis=0, is_copy=None, **kwargs) -> Series:
         result = self._constructor(new_values, index=new_index, fastpath=True)
         return result.__finalize__(self, method="take")
 
-    def _take_with_is_copy(self, indices, axis=0) -> Series:
+    def _take_with_is_copy(self, indices, axis=0):
         """
         Internal version of the `take` method that sets the `_is_copy`
         attribute to keep track of the parent dataframe (using in indexing
@@ -1020,7 +1019,7 @@ def _get_value(self, label, takeable: bool = False):
         loc = self.index.get_loc(label)
         return self.index._get_values_for_loc(self, loc, label)
 
-    def __setitem__(self, key, value) -> None:
+    def __setitem__(self, key, value):
         key = com.apply_if_callable(key, self)
         cacher_needs_updating = self._check_is_chained_assignment_possible()
 
@@ -1059,7 +1058,7 @@ def __setitem__(self, key, value) -> None:
         if cacher_needs_updating:
             self._maybe_update_cacher()
 
-    def _set_with_engine(self, key, value) -> None:
+    def _set_with_engine(self, key, value):
         # fails with AttributeError for IntervalIndex
         loc = self.index._engine.get_loc(key)
         # error: Argument 1 to "validate_numeric_casting" has incompatible type
@@ -1095,15 +1094,15 @@ def _set_with(self, key, value):
             else:
                 self.loc[key] = value
 
-    def _set_labels(self, key, value) -> None:
+    def _set_labels(self, key, value):
         key = com.asarray_tuplesafe(key)
         indexer: np.ndarray = self.index.get_indexer(key)
         mask = indexer == -1
         if mask.any():
             raise KeyError(f"{key[mask]} not in index")
         self._set_values(indexer, value)
 
-    def _set_values(self, key, value) -> None:
+    def _set_values(self, key, value):
         if isinstance(key, Series):
             key = key._values
 
@@ -1892,7 +1891,7 @@ def count(self, level=None):
         2
         """
         if level is None:
-            return notna(self._values).sum().astype("int64")
+            return notna(self._values).sum()
         else:
             warnings.warn(
                 "Using the level keyword in DataFrame and Series aggregations is "
@@ -1994,12 +1993,15 @@ def unique(self) -> ArrayLike:
         ['2016-01-01 00:00:00-05:00']
         Length: 1, dtype: datetime64[ns, US/Eastern]
 
-        An Categorical will return categories in the order of
-        appearance and with the same dtype.
+        An unordered Categorical will return categories in the order of
+        appearance.
 
         >>> pd.Series(pd.Categorical(list('baabc'))).unique()
         ['b', 'a', 'c']
-        Categories (3, object): ['a', 'b', 'c']
+        Categories (3, object): ['b', 'a', 'c']
+
+        An ordered Categorical preserves the category ordering.
+
         >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'),
         ...                          ordered=True)).unique()
         ['b', 'a', 'c']
@@ -2754,15 +2756,13 @@ def __rmatmul__(self, other):
         return self.dot(np.transpose(other))
 
     @doc(base.IndexOpsMixin.searchsorted, klass="Series")
-    def searchsorted(self, value, side="left", sorter=None) -> np.ndarray:
+    def searchsorted(self, value, side="left", sorter=None):
         return algorithms.searchsorted(self._values, value, side=side, sorter=sorter)
 
     # -------------------------------------------------------------------
     # Combination
 
-    def append(
-        self, to_append, ignore_index: bool = False, verify_integrity: bool = False
-    ):
+    def append(self, to_append, ignore_index=False, verify_integrity=False):
         """
         Concatenate two or more Series.
 
@@ -2846,7 +2846,7 @@ def append(
             to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity
         )
 
-    def _binop(self, other: Series, func, level=None, fill_value=None):
+    def _binop(self, other, func, level=None, fill_value=None):
         """
         Perform generic binary operation with optional fill value.
 
@@ -2873,7 +2873,7 @@ def _binop(self, other: Series, func, level=None, fill_value=None):
         if not self.index.equals(other.index):
             this, other = self.align(other, level=level, join="outer", copy=False)
 
-        this_vals, other_vals = ops.fill_binop(this._values, other._values, fill_value)
+        this_vals, other_vals = ops.fill_binop(this.values, other.values, fill_value)
 
         with np.errstate(all="ignore"):
             result = func(this_vals, other_vals)
@@ -3071,24 +3071,22 @@ def combine(self, other, func, fill_value=None) -> Series:
             # so do this element by element
             new_index = self.index.union(other.index)
             new_name = ops.get_op_result_name(self, other)
-            new_values = np.empty(len(new_index), dtype=object)
-            for i, idx in enumerate(new_index):
+            new_values = []
+            for idx in new_index:
                 lv = self.get(idx, fill_value)
                 rv = other.get(idx, fill_value)
                 with np.errstate(all="ignore"):
-                    new_values[i] = func(lv, rv)
+                    new_values.append(func(lv, rv))
         else:
             # Assume that other is a scalar, so apply the function for
             # each element in the Series
             new_index = self.index
-            new_values = np.empty(len(new_index), dtype=object)
             with np.errstate(all="ignore"):
-                new_values[:] = [func(lv, other) for lv in self._values]
+                new_values = [func(lv, other) for lv in self._values]
             new_name = self.name
 
-        # try_float=False is to match _aggregate_series_pure_python
-        npvalues = lib.maybe_convert_objects(new_values, try_float=False)
-        res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False)
+        res_values = sanitize_array(new_values, None)
+        res_values = maybe_cast_result(res_values, self.dtype, same_dtype=False)
         return self._constructor(res_values, index=new_index, name=new_name)
 
     def combine_first(self, other) -> Series:
@@ -3611,7 +3609,7 @@ def argsort(self, axis=0, kind="quicksort", order=None) -> Series:
 
         Returns
         -------
-        Series[np.intp]
+        Series
             Positions of values within the sort order with -1 indicating
             nan values.
 
@@ -3732,7 +3730,7 @@ def nlargest(self, n=5, keep="first") -> Series:
         """
         return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest()
 
-    def nsmallest(self, n: int = 5, keep: str = "first") -> Series:
+    def nsmallest(self, n=5, keep="first") -> Series:
         """
         Return the smallest `n` elements.
 
@@ -3944,7 +3942,7 @@ def explode(self, ignore_index: bool = False) -> Series:
 
         return self._constructor(values, index=index, name=self.name)
 
-    def unstack(self, level=-1, fill_value=None) -> DataFrame:
+    def unstack(self, level=-1, fill_value=None):
         """
         Unstack, also known as pivot, Series with MultiIndex to produce DataFrame.
 
@@ -4169,8 +4167,7 @@ def apply(
             Python function or NumPy ufunc to apply.
         convert_dtype : bool, default True
             Try to find better dtype for elementwise function results. If
-            False, leave as dtype=object. Note that the dtype is always
-            preserved for extension array dtypes, such as Categorical.
+            False, leave as dtype=object.
         args : tuple
             Positional arguments passed to func after the series value.
         **kwargs
@@ -4190,7 +4187,7 @@ def apply(
         Notes
         -----
         Functions that mutate the passed object can produce unexpected
-        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
+        behavior or errors and are not supported. See :ref:`udf-mutation`
         for more details.
 
         Examples
@@ -4297,11 +4294,7 @@ def _reduce(
             with np.errstate(all="ignore"):
                 return op(delegate, skipna=skipna, **kwds)
 
-    def _reindex_indexer(
-        self, new_index: Index | None, indexer: np.ndarray | None, copy: bool
-    ) -> Series:
-        # Note: new_index is None iff indexer is None
-        # if not None, indexer is np.intp
+    def _reindex_indexer(self, new_index, indexer, copy):
         if indexer is None:
             if copy:
                 return self.copy()
@@ -4319,9 +4312,8 @@ def _needs_reindex_multi(self, axes, method, level) -> bool:
         """
         return False
 
-    # error: Cannot determine type of 'align'
     @doc(
-        NDFrame.align,  # type: ignore[has-type]
+        NDFrame.align,
         klass=_shared_doc_kwargs["klass"],
         axes_single_arg=_shared_doc_kwargs["axes_single_arg"],
     )
@@ -4473,9 +4465,8 @@ def set_axis(self, labels, axis: Axis = ..., inplace: bool = ...) -> Series | No
     def set_axis(self, labels, axis: Axis = 0, inplace: bool = False):
         return super().set_axis(labels, axis=axis, inplace=inplace)
 
-    # error: Cannot determine type of 'reindex'
     @doc(
-        NDFrame.reindex,  # type: ignore[has-type]
+        NDFrame.reindex,
         klass=_shared_doc_kwargs["klass"],
         axes=_shared_doc_kwargs["axes"],
         optional_labels=_shared_doc_kwargs["optional_labels"],
@@ -4705,8 +4696,7 @@ def fillna(
     ) -> Series | None:
         ...
 
-    # error: Cannot determine type of 'fillna'
-    @doc(NDFrame.fillna, **_shared_doc_kwargs)  # type: ignore[has-type]
+    @doc(NDFrame.fillna, **_shared_doc_kwargs)
     def fillna(
         self,
         value=None,
@@ -4752,9 +4742,8 @@ def pop(self, item: Hashable) -> Any:
         """
         return super().pop(item=item)
 
-    # error: Cannot determine type of 'replace'
     @doc(
-        NDFrame.replace,  # type: ignore[has-type]
+        NDFrame.replace,
         klass=_shared_doc_kwargs["klass"],
         inplace=_shared_doc_kwargs["inplace"],
         replace_iloc=_shared_doc_kwargs["replace_iloc"],
@@ -4802,8 +4791,7 @@ def _replace_single(self, to_replace, method: str, inplace: bool, limit):
 
         return result
 
-    # error: Cannot determine type of 'shift'
-    @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"])  # type: ignore[has-type]
+    @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"])
     def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> Series:
         return super().shift(
             periods=periods, freq=freq, axis=axis, fill_value=fill_value
@@ -5038,23 +5026,19 @@ def _convert_dtypes(
             result = input_series.copy()
         return result
 
-    # error: Cannot determine type of 'isna'
-    @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])  # type: ignore[has-type]
+    @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])
     def isna(self) -> Series:
         return generic.NDFrame.isna(self)
 
-    # error: Cannot determine type of 'isna'
-    @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])  # type: ignore[has-type]
+    @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"])
     def isnull(self) -> Series:
         return super().isnull()
 
-    # error: Cannot determine type of 'notna'
-    @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"])  # type: ignore[has-type]
+    @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"])
     def notna(self) -> Series:
         return super().notna()
 
-    # error: Cannot determine type of 'notna'
-    @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"])  # type: ignore[has-type]
+    @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"])
     def notnull(self) -> Series:
         return super().notnull()
 
@@ -5149,8 +5133,7 @@ def dropna(self, axis=0, inplace=False, how=None):
     # ----------------------------------------------------------------------
     # Time series-oriented methods
 
-    # error: Cannot determine type of 'asfreq'
-    @doc(NDFrame.asfreq, **_shared_doc_kwargs)  # type: ignore[has-type]
+    @doc(NDFrame.asfreq, **_shared_doc_kwargs)
     def asfreq(
         self,
         freq,
@@ -5167,8 +5150,7 @@ def asfreq(
             fill_value=fill_value,
         )
 
-    # error: Cannot determine type of 'resample'
-    @doc(NDFrame.resample, **_shared_doc_kwargs)  # type: ignore[has-type]
+    @doc(NDFrame.resample, **_shared_doc_kwargs)
     def resample(
         self,
         rule,
@@ -5313,8 +5295,6 @@ def _arith_method(self, other, op):
 
         lvalues = self._values
         rvalues = extract_array(other, extract_numpy=True, extract_range=True)
-        rvalues = ops.maybe_prepare_scalar_for_op(rvalues, lvalues.shape)
-        rvalues = ensure_wrapped_if_datetimelike(rvalues)
 
         with np.errstate(all="ignore"):
             result = ops.arithmetic_op(lvalues, rvalues, op)