Skip to content

BUG: Allow numeric ExtensionDtypes in DataFrame.select_dtypes #38246

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 36 commits into from
Dec 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
503f9fc
select dtypes
andrewgsavage Jul 19, 2020
ea1796f
flake8
andrewgsavage Jul 19, 2020
3932391
flake8
andrewgsavage Jul 19, 2020
7cf6e3e
black
andrewgsavage Jul 19, 2020
0cb170a
checks
andrewgsavage Jul 19, 2020
83c84ba
create compat func
andrewgsavage Jul 31, 2020
1087ff7
create compat func
andrewgsavage Jul 31, 2020
7fe272f
create compat func
andrewgsavage Jul 31, 2020
77243fd
test
andrewgsavage Aug 1, 2020
eb475a8
lint
andrewgsavage Aug 1, 2020
9b5db9a
lint
andrewgsavage Aug 1, 2020
9cbf294
remove import from pandas.core
andrewgsavage Oct 11, 2020
4c1d67a
Merge branch 'master' into select_dtypes
andrewgsavage Oct 11, 2020
14fabfc
move files, add docstring and examples
andrewgsavage Oct 19, 2020
da6cf68
isort, remove pint_pandas example
andrewgsavage Oct 19, 2020
3801310
isort, remove pint_pandas example
andrewgsavage Oct 19, 2020
9e23d20
remove unused type comment@
andrewgsavage Oct 19, 2020
13b1531
merge master
arw2019 Dec 2, 2020
41fd75e
pd namespace usage
arw2019 Dec 2, 2020
85d0ae8
remove tests from old location
arw2019 Dec 2, 2020
4445207
unused import
arw2019 Dec 2, 2020
31ef92f
consolidate return
arw2019 Dec 2, 2020
5d9d2c2
whatsnew
arw2019 Dec 2, 2020
5d60908
minimize diff
arw2019 Dec 3, 2020
b8495c3
Merge branch 'master' of https://github.com/pandas-dev/pandas into HEAD
arw2019 Dec 3, 2020
75ea3a2
don't make separate method + ignore mypy
arw2019 Dec 5, 2020
e84ecb0
Merge branch 'master' of https://github.com/pandas-dev/pandas into GH…
arw2019 Dec 5, 2020
1df34f5
review comment
arw2019 Dec 13, 2020
0b5a9ef
Merge branch 'master' of https://github.com/pandas-dev/pandas into GH…
arw2019 Dec 13, 2020
ce63fa7
move whatsnew to 1.3
arw2019 Dec 13, 2020
93c7403
parametrize test
arw2019 Dec 13, 2020
ef095b5
add
arw2019 Dec 13, 2020
7c25c80
Merge branch 'master' of https://github.com/pandas-dev/pandas into GH…
arw2019 Dec 14, 2020
9bbd980
review comment
arw2019 Dec 14, 2020
57b6126
remove commented code
arw2019 Dec 14, 2020
f0902be
Merge branch 'master' of https://github.com/pandas-dev/pandas into GH…
arw2019 Dec 14, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ Timezones
Numeric
^^^^^^^
- Bug in :meth:`DataFrame.quantile`, :meth:`DataFrame.sort_values` causing incorrect subsequent indexing behavior (:issue:`38351`)
-
- Bug in :meth:`DataFrame.select_dtypes` with ``include=np.number`` now retains numeric ``ExtensionDtype`` columns (:issue:`35340`)
-

Conversion
Expand Down
14 changes: 8 additions & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3719,12 +3719,14 @@ def extract_unique_dtypes_from_dtypes_set(
extracted_dtypes = [
unique_dtype
for unique_dtype in unique_dtypes
# error: Argument 1 to "tuple" has incompatible type
# "FrozenSet[Union[ExtensionDtype, str, Any, Type[str],
# Type[float], Type[int], Type[complex], Type[bool]]]";
# expected "Iterable[Union[type, Tuple[Any, ...]]]"
if issubclass(
unique_dtype.type, tuple(dtypes_set) # type: ignore[arg-type]
if (
issubclass(
unique_dtype.type, tuple(dtypes_set) # type: ignore[arg-type]
)
or (
np.number in dtypes_set
and getattr(unique_dtype, "_is_numeric", False)
)
)
]
return extracted_dtypes
Expand Down
54 changes: 54 additions & 0 deletions pandas/tests/frame/methods/test_select_dtypes.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,46 @@
import numpy as np
import pytest

from pandas.core.dtypes.dtypes import ExtensionDtype

import pandas as pd
from pandas import DataFrame, Timestamp
import pandas._testing as tm
from pandas.core.arrays import ExtensionArray


class DummyDtype(ExtensionDtype):
type = int

def __init__(self, numeric):
self._numeric = numeric

@property
def name(self):
return "Dummy"

@property
def _is_numeric(self):
return self._numeric


class DummyArray(ExtensionArray):
def __init__(self, data, dtype):
self.data = data
self._dtype = dtype

def __array__(self, dtype):
return self.data

@property
def dtype(self):
return self._dtype

def __len__(self) -> int:
return len(self.data)

def __getitem__(self, item):
pass


class TestSelectDtypes:
Expand Down Expand Up @@ -322,3 +359,20 @@ def test_select_dtypes_typecodes(self):
expected = df
FLOAT_TYPES = list(np.typecodes["AllFloat"])
tm.assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected)

@pytest.mark.parametrize(
"arr,expected",
(
(np.array([1, 2], dtype=np.int32), True),
(pd.array([1, 2], dtype="Int32"), True),
(pd.array(["a", "b"], dtype="string"), False),
(DummyArray([1, 2], dtype=DummyDtype(numeric=True)), True),
(DummyArray([1, 2], dtype=DummyDtype(numeric=False)), False),
),
)
def test_select_dtypes_numeric(self, arr, expected):
# GH 35340

df = DataFrame(arr)
is_selected = df.select_dtypes(np.number).shape == df.shape
assert is_selected == expected