pandas-dev
diff --git a/‎ci/code_checks.sh
+7-4 b/‎ci/code_checks.sh
+7-4
diff --git a/‎doc/source/user_guide/missing_data.rst
-26 b/‎doc/source/user_guide/missing_data.rst
-26
diff --git a/‎doc/source/whatsnew/v1.2.0.rst
+4-1 b/‎doc/source/whatsnew/v1.2.0.rst
+4-1
diff --git a/‎pandas/_typing.py
+1-1 b/‎pandas/_typing.py
+1-1
diff --git a/‎pandas/core/algorithms.py
+3-4 b/‎pandas/core/algorithms.py
+3-4
diff --git a/‎pandas/core/array_algos/replace.py
+95 b/‎pandas/core/array_algos/replace.py
+95
diff --git a/‎pandas/core/arrays/categorical.py
+14-1 b/‎pandas/core/arrays/categorical.py
+14-1
diff --git a/‎pandas/core/arrays/datetimelike.py
+2-3 b/‎pandas/core/arrays/datetimelike.py
+2-3
diff --git a/‎pandas/core/construction.py
+4-2 b/‎pandas/core/construction.py
+4-2
diff --git a/‎pandas/core/dtypes/cast.py
+2-2 b/‎pandas/core/dtypes/cast.py
+2-2
diff --git a/‎pandas/core/dtypes/dtypes.py
+2-2 b/‎pandas/core/dtypes/dtypes.py
+2-2
diff --git a/‎pandas/core/generic.py
-14 b/‎pandas/core/generic.py
-14
diff --git a/‎pandas/core/groupby/categorical.py
+4-2 b/‎pandas/core/groupby/categorical.py
+4-2
diff --git a/‎pandas/core/groupby/generic.py
+2 b/‎pandas/core/groupby/generic.py
+2
diff --git a/‎pandas/core/groupby/groupby.py
+2 b/‎pandas/core/groupby/groupby.py
+2
@@ -187,6 +187,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include="*.py" -E "super\(\w*, (self|cls)\)" pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for use of builtin filter function' ; echo $MSG
+    invgrep -R --include="*.py" -P '(?<!def)[\(\s]filter\(' pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     # Check for the following code in testing: `np.testing` and `np.array_equal`
     MSG='Check for invalid testing' ; echo $MSG
     invgrep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/
@@ -238,10 +242,9 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
-    # https://github.com/python/mypy/issues/7384
-    # MSG='Check for missing error codes with # type: ignore' ; echo $MSG
-    # invgrep -R --include="*.py" -P '# type: ignore(?!\[)' pandas
-    # RET=$(($RET + $?)) ; echo $MSG "DONE"
+    MSG='Check for missing error codes with # type: ignore' ; echo $MSG
+    invgrep -R --include="*.py" -P '# type:\s?ignore(?!\[)' pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG
     invgrep -R --include=*.{py,pyx} '\.__class__' pandas
 
@@ -689,32 +689,6 @@ You can also operate on the DataFrame in place:
 
    df.replace(1.5, np.nan, inplace=True)
 
-.. warning::
-
-   When replacing multiple ``bool`` or ``datetime64`` objects, the first
-   argument to ``replace`` (``to_replace``) must match the type of the value
-   being replaced. For example,
-
-   .. code-block:: python
-
-      >>> s = pd.Series([True, False, True])
-      >>> s.replace({'a string': 'new value', True: False})  # raises
-      TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
-
-   will raise a ``TypeError`` because one of the ``dict`` keys is not of the
-   correct type for replacement.
-
-   However, when replacing a *single* object such as,
-
-   .. ipython:: python
-
-      s = pd.Series([True, False, True])
-      s.replace('a string', 'another string')
-
-   the original ``NDFrame`` object will be returned untouched. We're working on
-   unifying this API, but for backwards compatibility reasons we cannot break
-   the latter behavior. See :issue:`6354` for more details.
-
 Missing data casting rules and indexing
 ---------------------------------------
 
 
@@ -214,7 +214,8 @@ Performance improvements
 
 Bug fixes
 ~~~~~~~~~
-
+- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
+-
 
 Categorical
 ^^^^^^^^^^^
@@ -311,6 +312,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupby.apply` would drop a :class:`CategoricalIndex` when grouped on. (:issue:`35792`)
 - Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`)
 - Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
+- Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
 -
 
 Reshaping
@@ -337,6 +339,7 @@ ExtensionArray
 Other
 ^^^^^
 - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
+- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
 -
 
 .. ---------------------------------------------------------------------------
 
@@ -62,7 +62,7 @@
 # other
 
 Dtype = Union[
-    "ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool]]
+    "ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool, object]]
 ]
 DtypeObj = Union[np.dtype, "ExtensionDtype"]
 FilePathOrBuffer = Union[str, Path, IO[AnyStr], IOBase]
 
@@ -6,7 +6,7 @@
 
 import operator
 from textwrap import dedent
-from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union, cast
 from warnings import catch_warnings, simplefilter, warn
 
 import numpy as np
@@ -60,7 +60,7 @@
 from pandas.core.indexers import validate_indices
 
 if TYPE_CHECKING:
-    from pandas import DataFrame, Series
+    from pandas import Categorical, DataFrame, Series
 
 _shared_docs: Dict[str, str] = {}
 
@@ -429,8 +429,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
     if is_categorical_dtype(comps):
         # TODO(extension)
         # handle categoricals
-        # error: "ExtensionArray" has no attribute "isin"  [attr-defined]
-        return comps.isin(values)  # type: ignore[attr-defined]
+        return cast("Categorical", comps).isin(values)
 
     comps, dtype = _ensure_data(comps)
     values, _ = _ensure_data(values, dtype=dtype)
 
@@ -0,0 +1,95 @@
+"""
+Methods used by Block.replace and related methods.
+"""
+import operator
+import re
+from typing import Optional, Pattern, Union
+
+import numpy as np
+
+from pandas._typing import ArrayLike, Scalar
+
+from pandas.core.dtypes.common import (
+    is_datetimelike_v_numeric,
+    is_numeric_v_string_like,
+    is_scalar,
+)
+from pandas.core.dtypes.missing import isna
+
+
+def compare_or_regex_search(
+    a: ArrayLike,
+    b: Union[Scalar, Pattern],
+    regex: bool = False,
+    mask: Optional[ArrayLike] = None,
+) -> Union[ArrayLike, bool]:
+    """
+    Compare two array_like inputs of the same shape or two scalar values
+
+    Calls operator.eq or re.search, depending on regex argument. If regex is
+    True, perform an element-wise regex matching.
+
+    Parameters
+    ----------
+    a : array_like
+    b : scalar or regex pattern
+    regex : bool, default False
+    mask : array_like or None (default)
+
+    Returns
+    -------
+    mask : array_like of bool
+    """
+
+    def _check_comparison_types(
+        result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern]
+    ):
+        """
+        Raises an error if the two arrays (a,b) cannot be compared.
+        Otherwise, returns the comparison result as expected.
+        """
+        if is_scalar(result) and isinstance(a, np.ndarray):
+            type_names = [type(a).__name__, type(b).__name__]
+
+            if isinstance(a, np.ndarray):
+                type_names[0] = f"ndarray(dtype={a.dtype})"
+
+            raise TypeError(
+                f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
+            )
+
+    if not regex:
+        op = lambda x: operator.eq(x, b)
+    else:
+        op = np.vectorize(
+            lambda x: bool(re.search(b, x))
+            if isinstance(x, str) and isinstance(b, (str, Pattern))
+            else False
+        )
+
+    # GH#32621 use mask to avoid comparing to NAs
+    if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray):
+        mask = np.reshape(~(isna(a)), a.shape)
+    if isinstance(a, np.ndarray):
+        a = a[mask]
+
+    if is_numeric_v_string_like(a, b):
+        # GH#29553 avoid deprecation warnings from numpy
+        return np.zeros(a.shape, dtype=bool)
+
+    elif is_datetimelike_v_numeric(a, b):
+        # GH#29553 avoid deprecation warnings from numpy
+        _check_comparison_types(False, a, b)
+        return False
+
+    result = op(a)
+
+    if isinstance(result, np.ndarray) and mask is not None:
+        # The shape of the mask can differ to that of the result
+        # since we may compare only a subset of a's or b's elements
+        tmp = np.zeros(mask.shape, dtype=np.bool_)
+        tmp[mask] = result
+        result = tmp
+
+    _check_comparison_types(result, a, b)
+    return result
@@ -280,6 +280,19 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject):
     ['a', 'b', 'c', 'a', 'b', 'c']
     Categories (3, object): ['a', 'b', 'c']
 
+    Missing values are not included as a category.
+
+    >>> c = pd.Categorical([1, 2, 3, 1, 2, 3, np.nan])
+    >>> c
+    [1, 2, 3, 1, 2, 3, NaN]
+    Categories (3, int64): [1, 2, 3]
+
+    However, their presence is indicated in the `codes` attribute
+    by code `-1`.
+
+    >>> c.codes
+    array([ 0,  1,  2,  0,  1,  2, -1], dtype=int8)
+
     Ordered `Categoricals` can be sorted according to the custom order
     of the categories and can have a min and max value.
 
@@ -2316,7 +2329,7 @@ def _concat_same_type(self, to_concat):
 
         return union_categoricals(to_concat)
 
-    def isin(self, values):
+    def isin(self, values) -> np.ndarray:
         """
         Check whether `values` are contained in Categorical.
 
 
@@ -468,10 +468,9 @@ def _ndarray(self) -> np.ndarray:
 
     def _from_backing_data(self: _T, arr: np.ndarray) -> _T:
         # Note: we do not retain `freq`
+        # error: Too many arguments for "NDArrayBackedExtensionArray"
         # error: Unexpected keyword argument "dtype" for "NDArrayBackedExtensionArray"
-        # TODO: add my error code
-        # https://github.com/python/mypy/issues/7384
-        return type(self)(arr, dtype=self.dtype)  # type: ignore
+        return type(self)(arr, dtype=self.dtype)  # type: ignore[call-arg]
 
     # ------------------------------------------------------------------
 
 
@@ -335,7 +335,7 @@ def array(
     return result
 
 
-def extract_array(obj, extract_numpy: bool = False):
+def extract_array(obj: AnyArrayLike, extract_numpy: bool = False) -> ArrayLike:
     """
     Extract the ndarray or ExtensionArray from a Series or Index.
 
@@ -383,7 +383,9 @@ def extract_array(obj, extract_numpy: bool = False):
     if extract_numpy and isinstance(obj, ABCPandasArray):
         obj = obj.to_numpy()
 
-    return obj
+    # error: Incompatible return value type (got "Index", expected "ExtensionArray")
+    # error: Incompatible return value type (got "Series", expected "ExtensionArray")
+    return obj  # type: ignore[return-value]
 
 
 def sanitize_array(
 
@@ -1488,7 +1488,7 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
     if has_bools:
         for t in types:
             if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t):
-                return object
+                return np.dtype("object")
 
     return np.find_common_type(types, [])
 
@@ -1550,7 +1550,7 @@ def construct_1d_arraylike_from_scalar(
         elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"):
             # we need to coerce to object dtype to avoid
             # to allow numpy to take our string as a scalar value
-            dtype = object
+            dtype = np.dtype("object")
             if not isna(value):
                 value = ensure_str(value)
 
 
@@ -395,7 +395,7 @@ def _hash_categories(categories, ordered: Ordered = True) -> int:
         from pandas.core.dtypes.common import DT64NS_DTYPE, is_datetime64tz_dtype
 
         from pandas.core.util.hashing import (
-            _combine_hash_arrays,
+            combine_hash_arrays,
             hash_array,
             hash_tuples,
         )
@@ -427,7 +427,7 @@ def _hash_categories(categories, ordered: Ordered = True) -> int:
             )
         else:
             cat_array = [cat_array]
-        hashed = _combine_hash_arrays(iter(cat_array), num_items=len(cat_array))
+        hashed = combine_hash_arrays(iter(cat_array), num_items=len(cat_array))
         return np.bitwise_xor.reduce(hashed)
 
     @classmethod
 
@@ -6561,20 +6561,6 @@ def replace(
         1   new  new
         2  bait  xyz
 
-        Note that when replacing multiple ``bool`` or ``datetime64`` objects,
-        the data types in the `to_replace` parameter must match the data
-        type of the value being replaced:
-
-        >>> df = pd.DataFrame({{'A': [True, False, True],
-        ...                    'B': [False, True, False]}})
-        >>> df.replace({{'a string': 'new value', True: False}})  # raises
-        Traceback (most recent call last):
-            ...
-        TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
-
-        This raises a ``TypeError`` because one of the ``dict`` keys is not of
-        the correct type for replacement.
-
         Compare the behavior of ``s.replace({{'a': None}})`` and
         ``s.replace('a', None)`` to understand the peculiarities
         of the `to_replace` parameter:
 
@@ -98,8 +98,10 @@ def recode_from_groupby(
     """
     # we re-order to the original category orderings
     if sort:
-        return ci.set_categories(c.categories)  # type: ignore [attr-defined]
+        # error: "CategoricalIndex" has no attribute "set_categories"
+        return ci.set_categories(c.categories)  # type: ignore[attr-defined]
 
     # we are not sorting, so add unobserved to the end
     new_cats = c.categories[~c.categories.isin(ci.categories)]
-    return ci.add_categories(new_cats)  # type: ignore [attr-defined]
+    # error: "CategoricalIndex" has no attribute "add_categories"
+    return ci.add_categories(new_cats)  # type: ignore[attr-defined]
@@ -1084,6 +1084,7 @@ def blk_func(bvalues: ArrayLike) -> ArrayLike:
                     assert how == "ohlc"
                     raise
 
+                # We get here with a) EADtypes and b) object dtype
                 obj: Union[Series, DataFrame]
                 # call our grouper again with only this block
                 if isinstance(bvalues, ExtensionArray):
@@ -1694,6 +1695,7 @@ def _wrap_transformed_output(
         """
         indexed_output = {key.position: val for key, val in output.items()}
         columns = Index(key.label for key in output)
+        columns.name = self.obj.columns.name
 
         result = self.obj._constructor(indexed_output)
         result.columns = columns
 
@@ -1012,6 +1012,8 @@ def _agg_general(
                     # raised in _get_cython_function, in some cases can
                     #  be trimmed by implementing cython funcs for more dtypes
                     pass
+                else:
+                    raise
 
             # apply a non-cython aggregation
             result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@`
`62`	`62`	`# other`
`63`	`63`
`64`	`64`	`Dtype = Union[`
`65`		`- "ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool]]`
	`65`	`+ "ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool, object]]`
`66`	`66`	`]`
`67`	`67`	`DtypeObj = Union[np.dtype, "ExtensionDtype"]`
`68`	`68`	`FilePathOrBuffer = Union[str, Path, IO[AnyStr], IOBase]`