diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 17d8c79994dbe..2deb3295d3c68 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -304,6 +304,7 @@ Numeric - Bug in :meth:`DataFrame.mode` and :meth:`Series.mode` not keeping consistent integer :class:`Index` for empty input (:issue:`33321`) - Bug in :meth:`DataFrame.rank` with ``np.inf`` and mixture of ``np.nan`` and ``np.inf`` (:issue:`32593`) - Bug in :meth:`DataFrame.rank` with ``axis=0`` and columns holding incomparable types raising ``IndexError`` (:issue:`38932`) +- Bug in :func:`select_dtypes` different behavior between Windows and Linux with ``include="int"`` (:issue:`36569`) - Conversion diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6357b8feb348b..2e4a6f0928f94 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3786,8 +3786,21 @@ def select_dtypes(self, include=None, exclude=None) -> DataFrame: raise ValueError("at least one of include or exclude must be nonempty") # convert the myriad valid dtypes object to a single representation - include = frozenset(infer_dtype_from_object(x) for x in include) - exclude = frozenset(infer_dtype_from_object(x) for x in exclude) + def check_int_infer_dtype(dtypes): + converted_dtypes = [] + for dtype in dtypes: + # Numpy maps int to different types (int32, in64) on Windows and Linux + # see https://github.com/numpy/numpy/issues/9464 + if (isinstance(dtype, str) and dtype == "int") or (dtype is int): + converted_dtypes.append(np.int32) + converted_dtypes.append(np.int64) + else: + converted_dtypes.append(infer_dtype_from_object(dtype)) + return frozenset(converted_dtypes) + + include = check_int_infer_dtype(include) + exclude = check_int_infer_dtype(exclude) + for dtypes in (include, exclude): invalidate_string_dtypes(dtypes) diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 434df5ccccaf7..2a94b18b806f8 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -110,7 +110,7 @@ def test_select_dtypes_exclude_include_using_list_like(self): { "a": list("abc"), "b": list(range(1, 4)), - "c": np.arange(3, 6).astype("u1"), + "c": np.arange(3, 6, dtype="u1"), "d": np.arange(4.0, 7.0, dtype="float64"), "e": [True, False, True], "f": pd.date_range("now", periods=3).values, @@ -128,6 +128,26 @@ def test_select_dtypes_exclude_include_using_list_like(self): e = df[["b", "e"]] tm.assert_frame_equal(r, e) + @pytest.mark.parametrize( + "include", [(np.bool_, "int"), (np.bool_, "integer"), ("bool", int)] + ) + def test_select_dtypes_exclude_include_int(self, include): + # Fix select_dtypes(include='int') for Windows, FYI #36596 + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6, dtype="int32"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("now", periods=3).values, + } + ) + exclude = (np.datetime64,) + result = df.select_dtypes(include=include, exclude=exclude) + expected = df[["b", "c", "e"]] + tm.assert_frame_equal(result, expected) + def test_select_dtypes_include_using_scalars(self): df = DataFrame( {