Skip to content

Commit 8d48776

Browse files
author
Oleh Kozynets
authored
Fix select_dtypes(include='int') for Windows. (#36808)
1 parent f10da19 commit 8d48776

File tree

3 files changed

+37
-3
lines changed

3 files changed

+37
-3
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ Numeric
305305
- Bug in :meth:`DataFrame.mode` and :meth:`Series.mode` not keeping consistent integer :class:`Index` for empty input (:issue:`33321`)
306306
- Bug in :meth:`DataFrame.rank` with ``np.inf`` and mixture of ``np.nan`` and ``np.inf`` (:issue:`32593`)
307307
- Bug in :meth:`DataFrame.rank` with ``axis=0`` and columns holding incomparable types raising ``IndexError`` (:issue:`38932`)
308+
- Bug in :func:`select_dtypes` different behavior between Windows and Linux with ``include="int"`` (:issue:`36569`)
308309
-
309310

310311
Conversion

pandas/core/frame.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -3777,8 +3777,21 @@ def select_dtypes(self, include=None, exclude=None) -> DataFrame:
37773777
raise ValueError("at least one of include or exclude must be nonempty")
37783778

37793779
# convert the myriad valid dtypes object to a single representation
3780-
include = frozenset(infer_dtype_from_object(x) for x in include)
3781-
exclude = frozenset(infer_dtype_from_object(x) for x in exclude)
3780+
def check_int_infer_dtype(dtypes):
3781+
converted_dtypes = []
3782+
for dtype in dtypes:
3783+
# Numpy maps int to different types (int32, in64) on Windows and Linux
3784+
# see https://github.com/numpy/numpy/issues/9464
3785+
if (isinstance(dtype, str) and dtype == "int") or (dtype is int):
3786+
converted_dtypes.append(np.int32)
3787+
converted_dtypes.append(np.int64)
3788+
else:
3789+
converted_dtypes.append(infer_dtype_from_object(dtype))
3790+
return frozenset(converted_dtypes)
3791+
3792+
include = check_int_infer_dtype(include)
3793+
exclude = check_int_infer_dtype(exclude)
3794+
37823795
for dtypes in (include, exclude):
37833796
invalidate_string_dtypes(dtypes)
37843797

pandas/tests/frame/methods/test_select_dtypes.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def test_select_dtypes_exclude_include_using_list_like(self):
110110
{
111111
"a": list("abc"),
112112
"b": list(range(1, 4)),
113-
"c": np.arange(3, 6).astype("u1"),
113+
"c": np.arange(3, 6, dtype="u1"),
114114
"d": np.arange(4.0, 7.0, dtype="float64"),
115115
"e": [True, False, True],
116116
"f": pd.date_range("now", periods=3).values,
@@ -128,6 +128,26 @@ def test_select_dtypes_exclude_include_using_list_like(self):
128128
e = df[["b", "e"]]
129129
tm.assert_frame_equal(r, e)
130130

131+
@pytest.mark.parametrize(
132+
"include", [(np.bool_, "int"), (np.bool_, "integer"), ("bool", int)]
133+
)
134+
def test_select_dtypes_exclude_include_int(self, include):
135+
# Fix select_dtypes(include='int') for Windows, FYI #36596
136+
df = DataFrame(
137+
{
138+
"a": list("abc"),
139+
"b": list(range(1, 4)),
140+
"c": np.arange(3, 6, dtype="int32"),
141+
"d": np.arange(4.0, 7.0, dtype="float64"),
142+
"e": [True, False, True],
143+
"f": pd.date_range("now", periods=3).values,
144+
}
145+
)
146+
exclude = (np.datetime64,)
147+
result = df.select_dtypes(include=include, exclude=exclude)
148+
expected = df[["b", "c", "e"]]
149+
tm.assert_frame_equal(result, expected)
150+
131151
def test_select_dtypes_include_using_scalars(self):
132152
df = DataFrame(
133153
{

0 commit comments

Comments
 (0)