-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: Select numeric ExtensionDtypes with DataFrame.select_dtypes #35341
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
503f9fc
ea1796f
3932391
7cf6e3e
0cb170a
83c84ba
1087ff7
7fe272f
77243fd
eb475a8
9b5db9a
9cbf294
4c1d67a
14fabfc
da6cf68
3801310
9e23d20
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,8 @@ | |
|
||
import numpy as np | ||
|
||
from pandas.core.dtypes.common import is_extension_array_dtype | ||
|
||
# numpy versioning | ||
_np_version = np.__version__ | ||
_nlv = LooseVersion(_np_version) | ||
|
@@ -62,6 +64,16 @@ def np_array_datetime64_compat(arr, *args, **kwargs): | |
return np.array(arr, *args, **kwargs) | ||
|
||
|
||
def np_issubclass_compat(unique_dtype, dtypes_set): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this wouldn't g in here, better in pandas/core/dtypes/common.py with a full doc-string & examples There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is almost is_numeric, but with the twist about np.number There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've moved it and added doc string and examples. I'm not aware of any pandas dtypes that have Should |
||
if (issubclass(unique_dtype.type, tuple(dtypes_set)) # type: ignore | ||
or ( | ||
np.number in dtypes_set | ||
and is_extension_array_dtype(unique_dtype) | ||
and unique_dtype._is_numeric | ||
)): | ||
return True | ||
return False | ||
|
||
__all__ = [ | ||
"np", | ||
"_np_version", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import pytest | ||
import numpy as np | ||
|
||
import pandas as pd | ||
from pandas.core.arrays import ExtensionArray | ||
from pandas.core.dtypes.dtypes import ExtensionDtype | ||
|
||
|
||
class DummyDtype(ExtensionDtype): | ||
type = int | ||
_numeric = False | ||
|
||
@property | ||
def name(self): | ||
return "Dummy" | ||
|
||
@property | ||
def _is_numeric(self): | ||
return self._numeric | ||
|
||
|
||
class DummyArray(ExtensionArray): | ||
_dtype = DummyDtype() | ||
|
||
def __init__(self, data): | ||
self.data = data | ||
|
||
def __array__(self, dtype): | ||
return self.data | ||
|
||
@property | ||
def dtype(self): | ||
return self._dtype | ||
|
||
def __len__(self) -> int: | ||
return len(self.data) | ||
|
||
def __getitem__(self, item): | ||
pass | ||
|
||
|
||
def test_select_dtypes_numeric(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. all of this should go in tests/frame/methods/test_select_dtypes.py |
||
da = DummyArray([1, 2]) | ||
da._dtype._numeric = True | ||
df = pd.DataFrame(da) | ||
assert df.select_dtypes(np.number).shape == df.shape | ||
|
||
da = DummyArray([1, 2]) | ||
da._dtype._numeric = False | ||
df = pd.DataFrame(da) | ||
assert df.select_dtypes(np.number).shape != df.shape |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if there's any viable way to avoid this, we'd rather not import from pandas.core here