Merge pull request #251 from pandas-dev/master

sthagen · web-flow · commit 49ddb565e3d2 · 2021-08-04T12:57:07.000+02:00
Sync Fork from Upstream Repo
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -110,7 +110,7 @@ repos:
         entry: python scripts/generate_pip_deps_from_conda.py
         files: ^(environment.yml|requirements-dev.txt)$
         pass_filenames: false
-        additional_dependencies: [pyyaml]
+        additional_dependencies: [pyyaml, toml]
     -   id: sync-flake8-versions
         name: Check flake8 version is synced across flake8, yesqa, and environment.yml
         language: python
diff --git a/doc/source/development/contributing_environment.rst b/doc/source/development/contributing_environment.rst
@@ -189,11 +189,8 @@ Creating a Python environment (pip)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 If you aren't using conda for your development environment, follow these instructions.
-You'll need to have at least the :ref:`minimum Python version <install.version>` that pandas supports. If your Python version
-is 3.8.0 (or later), you might need to update your ``setuptools`` to version 42.0.0 (or later)
-in your development environment before installing the build dependencies::
-
-      pip install --upgrade setuptools
+You'll need to have at least the :ref:`minimum Python version <install.version>` that pandas supports.
+You also need to have ``setuptools`` 51.0.0 or later to build pandas.
 
 **Unix**/**macOS with virtualenv**
 
diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst
@@ -22,7 +22,7 @@ Fixed regressions
 - Regression in :meth:`DataFrame.drop` does nothing if :class:`MultiIndex` has duplicates and indexer is a tuple or list of tuples (:issue:`42771`)
 - Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`)
 - Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`)
--
+- Fixed regression in :meth:`.Styler.highlight_min` and :meth:`.Styler.highlight_max` where ``pandas.NA`` was not successfully ignored (:issue:`42650`)
 
 .. ---------------------------------------------------------------------------
 
@@ -31,7 +31,7 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 - 1D slices over extension types turn into N-dimensional slices over ExtensionArrays (:issue:`42430`)
--
+- :meth:`.Styler.hide_columns` now hides the index name header row as well as column headers (:issue:`42101`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -270,6 +270,7 @@ Groupby/resample/rolling
 - Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`)
 - Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`)
 - Bug in :meth:`GroupBy.shift` that would return the grouping columns if ``fill_value`` was not None (:issue:`41556`)
+- Bug in :meth:`pandas.DataFrame.ewm`, where non-float64 dtypes were silently failing (:issue:`42452`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py
@@ -11,8 +11,10 @@
     pa_version_under2p0 = _palv < Version("2.0.0")
     pa_version_under3p0 = _palv < Version("3.0.0")
     pa_version_under4p0 = _palv < Version("4.0.0")
+    pa_version_under5p0 = _palv < Version("5.0.0")
 except ImportError:
     pa_version_under1p0 = True
     pa_version_under2p0 = True
     pa_version_under3p0 = True
     pa_version_under4p0 = True
+    pa_version_under5p0 = True
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
@@ -8,13 +8,16 @@
     TYPE_CHECKING,
     Any,
     TypeVar,
+    cast,
+    overload,
 )
 
 import numpy as np
 
 from pandas._libs.hashtable import object_hash
 from pandas._typing import (
     DtypeObj,
+    npt,
     type_t,
 )
 from pandas.errors import AbstractMethodError
@@ -29,7 +32,7 @@
     from pandas.core.arrays import ExtensionArray
 
     # To parameterize on same ExtensionDtype
-    E = TypeVar("E", bound="ExtensionDtype")
+    ExtensionDtypeT = TypeVar("ExtensionDtypeT", bound="ExtensionDtype")
 
 
 class ExtensionDtype:
@@ -206,7 +209,9 @@ def construct_array_type(cls) -> type_t[ExtensionArray]:
         raise AbstractMethodError(cls)
 
     @classmethod
-    def construct_from_string(cls, string: str):
+    def construct_from_string(
+        cls: type_t[ExtensionDtypeT], string: str
+    ) -> ExtensionDtypeT:
         r"""
         Construct this type from a string.
 
@@ -368,7 +373,7 @@ def _can_hold_na(self) -> bool:
         return True
 
 
-def register_extension_dtype(cls: type[E]) -> type[E]:
+def register_extension_dtype(cls: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]:
     """
     Register an ExtensionType with pandas as class decorator.
 
@@ -409,9 +414,9 @@ class Registry:
     """
 
     def __init__(self):
-        self.dtypes: list[type[ExtensionDtype]] = []
+        self.dtypes: list[type_t[ExtensionDtype]] = []
 
-    def register(self, dtype: type[ExtensionDtype]) -> None:
+    def register(self, dtype: type_t[ExtensionDtype]) -> None:
         """
         Parameters
         ----------
@@ -422,22 +427,46 @@ def register(self, dtype: type[ExtensionDtype]) -> None:
 
         self.dtypes.append(dtype)
 
-    def find(self, dtype: type[ExtensionDtype] | str) -> type[ExtensionDtype] | None:
+    @overload
+    def find(self, dtype: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]:
+        ...
+
+    @overload
+    def find(self, dtype: ExtensionDtypeT) -> ExtensionDtypeT:
+        ...
+
+    @overload
+    def find(self, dtype: str) -> ExtensionDtype | None:
+        ...
+
+    @overload
+    def find(
+        self, dtype: npt.DTypeLike
+    ) -> type_t[ExtensionDtype] | ExtensionDtype | None:
+        ...
+
+    def find(
+        self, dtype: type_t[ExtensionDtype] | ExtensionDtype | npt.DTypeLike
+    ) -> type_t[ExtensionDtype] | ExtensionDtype | None:
         """
         Parameters
         ----------
-        dtype : Type[ExtensionDtype] or str
+        dtype : ExtensionDtype class or instance or str or numpy dtype or python type
 
         Returns
         -------
         return the first matching dtype, otherwise return None
         """
         if not isinstance(dtype, str):
-            dtype_type = dtype
+            dtype_type: type_t
             if not isinstance(dtype, type):
                 dtype_type = type(dtype)
+            else:
+                dtype_type = dtype
             if issubclass(dtype_type, ExtensionDtype):
-                return dtype
+                # cast needed here as mypy doesn't know we have figured
+                # out it is an ExtensionDtype or type_t[ExtensionDtype]
+                return cast("ExtensionDtype | type_t[ExtensionDtype]", dtype)
 
             return None
 
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -1765,9 +1765,7 @@ def pandas_dtype(dtype) -> DtypeObj:
     # registered extension types
     result = registry.find(dtype)
     if result is not None:
-        # error: Incompatible return value type (got "Type[ExtensionDtype]",
-        # expected "Union[dtype, ExtensionDtype]")
-        return result  # type: ignore[return-value]
+        return result
 
     # try a numpy dtype
     # raise a consistent TypeError if failed
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4280,6 +4280,11 @@ def check_int_infer_dtype(dtypes):
                     # error: Argument 1 to "append" of "list" has incompatible type
                     # "Type[signedinteger[Any]]"; expected "Type[signedinteger[Any]]"
                     converted_dtypes.append(np.int64)  # type: ignore[arg-type]
+                elif dtype == "float" or dtype is float:
+                    # GH#42452 : np.dtype("float") coerces to np.float64 from Numpy 1.20
+                    converted_dtypes.extend(
+                        [np.float64, np.float32]  # type: ignore[list-item]
+                    )
                 else:
                     # error: Argument 1 to "append" of "list" has incompatible type
                     # "Union[dtype[Any], ExtensionDtype]"; expected
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -2347,15 +2347,15 @@ def highlight_max(
         Styler.highlight_quantile: Highlight values defined by a quantile with a style.
         """
 
-        def f(data: FrameOrSeries, props: str) -> np.ndarray:
-            return np.where(data == np.nanmax(data.to_numpy()), props, "")
-
         if props is None:
             props = f"background-color: {color};"
         # error: Argument 1 to "apply" of "Styler" has incompatible type
         # "Callable[[FrameOrSeries, str], ndarray]"; expected "Callable[..., Styler]"
         return self.apply(
-            f, axis=axis, subset=subset, props=props  # type: ignore[arg-type]
+            partial(_highlight_value, op="max"),  # type: ignore[arg-type]
+            axis=axis,
+            subset=subset,
+            props=props,
         )
 
     def highlight_min(
@@ -2398,15 +2398,15 @@ def highlight_min(
         Styler.highlight_quantile: Highlight values defined by a quantile with a style.
         """
 
-        def f(data: FrameOrSeries, props: str) -> np.ndarray:
-            return np.where(data == np.nanmin(data.to_numpy()), props, "")
-
         if props is None:
             props = f"background-color: {color};"
         # error: Argument 1 to "apply" of "Styler" has incompatible type
         # "Callable[[FrameOrSeries, str], ndarray]"; expected "Callable[..., Styler]"
         return self.apply(
-            f, axis=axis, subset=subset, props=props  # type: ignore[arg-type]
+            partial(_highlight_value, op="min"),  # type: ignore[arg-type]
+            axis=axis,
+            subset=subset,
+            props=props,
         )
 
     def highlight_between(
@@ -2912,6 +2912,16 @@ def _highlight_between(
     return np.where(g_left & l_right, props, "")
 
 
+def _highlight_value(data: FrameOrSeries, op: str, props: str) -> np.ndarray:
+    """
+    Return an array of css strings based on the condition of values matching an op.
+    """
+    value = getattr(data, op)(skipna=True)
+    if isinstance(data, DataFrame):  # min/max must be done twice to return scalar
+        value = getattr(value, op)(skipna=True)
+    return np.where(data == value, props, "")
+
+
 def _bar(
     data: FrameOrSeries,
     align: str | float | int | Callable,
diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py
@@ -353,6 +353,7 @@ def _translate_header(
             self.data.index.names
             and com.any_not_none(*self.data.index.names)
             and not self.hide_index_
+            and not self.hide_columns_
         ):
             index_names = [
                 _element(
diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py
@@ -407,3 +407,37 @@ def test_select_dtypes_numeric_nullable_string(self, nullable_string_dtype):
         df = DataFrame(arr)
         is_selected = df.select_dtypes(np.number).shape == df.shape
         assert not is_selected
+
+    @pytest.mark.parametrize(
+        "expected, float_dtypes",
+        [
+            [
+                DataFrame(
+                    {"A": range(3), "B": range(5, 8), "C": range(10, 7, -1)}
+                ).astype(dtype={"A": float, "B": np.float64, "C": np.float32}),
+                float,
+            ],
+            [
+                DataFrame(
+                    {"A": range(3), "B": range(5, 8), "C": range(10, 7, -1)}
+                ).astype(dtype={"A": float, "B": np.float64, "C": np.float32}),
+                "float",
+            ],
+            [DataFrame({"C": range(10, 7, -1)}, dtype=np.float32), np.float32],
+            [
+                DataFrame({"A": range(3), "B": range(5, 8)}).astype(
+                    dtype={"A": float, "B": np.float64}
+                ),
+                np.float64,
+            ],
+        ],
+    )
+    def test_select_dtypes_float_dtype(self, expected, float_dtypes):
+        # GH#42452
+        dtype_dict = {"A": float, "B": np.float64, "C": np.float32}
+        df = DataFrame(
+            {"A": range(3), "B": range(5, 8), "C": range(10, 7, -1)},
+        )
+        df = df.astype(dtype_dict)
+        result = df.select_dtypes(include=float_dtypes)
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/formats/style/test_highlight.py b/pandas/tests/io/formats/style/test_highlight.py
@@ -2,10 +2,10 @@
 import pytest
 
 from pandas import (
+    NA,
     DataFrame,
     IndexSlice,
 )
-import pandas._testing as tm
 
 pytest.importorskip("jinja2")
 
@@ -55,9 +55,7 @@ def test_highlight_minmax_basic(df, f):
     }
     if f == "highlight_min":
         df = -df
-    with tm.assert_produces_warning(RuntimeWarning):
-        # All-NaN slice encountered
-        result = getattr(df.style, f)(axis=1, color="red")._compute().ctx
+    result = getattr(df.style, f)(axis=1, color="red")._compute().ctx
     assert result == expected
 
 
@@ -78,6 +76,26 @@ def test_highlight_minmax_ext(df, f, kwargs):
     assert result == expected
 
 
+@pytest.mark.parametrize("f", ["highlight_min", "highlight_max"])
+@pytest.mark.parametrize("axis", [None, 0, 1])
+def test_highlight_minmax_nulls(f, axis):
+    # GH 42750
+    expected = {
+        (1, 0): [("background-color", "yellow")],
+        (1, 1): [("background-color", "yellow")],
+    }
+    if axis == 1:
+        expected.update({(2, 1): [("background-color", "yellow")]})
+
+    if f == "highlight_max":
+        df = DataFrame({"a": [NA, 1, None], "b": [np.nan, 1, -1]})
+    else:
+        df = DataFrame({"a": [NA, -1, None], "b": [np.nan, -1, 1]})
+
+    result = getattr(df.style, f)(axis=axis)._compute().ctx
+    assert result == expected
+
+
 @pytest.mark.parametrize(
     "kwargs",
     [
diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py
@@ -1131,7 +1131,7 @@ def test_hide_column_headers(self):
 
         self.df.index.name = "some_name"
         ctx = self.df.style.hide_columns()._translate(True, True)
-        assert len(ctx["head"]) == 1  # only a single row for index names: no col heads
+        assert len(ctx["head"]) == 0  # no header for index names, changed in #42101
 
     def test_hide_single_index(self):
         # GH 14194
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/requirements-dev.txt b/requirements-dev.txt
diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py