-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH - Modify Dataframe.select_dtypes to accept scalar values #16860
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2285,18 +2285,16 @@ def select_dtypes(self, include=None, exclude=None): | |
|
||
Parameters | ||
---------- | ||
include, exclude : list-like | ||
A list of dtypes or strings to be included/excluded. You must pass | ||
in a non-empty sequence for at least one of these. | ||
include, exclude : scalar or list-like | ||
A selection of dtypes or strings to be included/excluded. At least | ||
one of these parameters must be supplied. | ||
|
||
Raises | ||
------ | ||
ValueError | ||
* If both of ``include`` and ``exclude`` are empty | ||
* If ``include`` and ``exclude`` have overlapping elements | ||
* If any kind of string dtype is passed in. | ||
TypeError | ||
* If either of ``include`` or ``exclude`` is not a sequence | ||
|
||
Returns | ||
------- | ||
|
@@ -2348,10 +2346,15 @@ def select_dtypes(self, include=None, exclude=None): | |
4 True | ||
5 False | ||
""" | ||
include, exclude = include or (), exclude or () | ||
if not (is_list_like(include) and is_list_like(exclude)): | ||
raise TypeError('include and exclude must both be non-string' | ||
' sequences') | ||
|
||
# GH16855 - If either include or exclude is a non-None scalar then | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment not needed |
||
# convert to a tuple of length 1 and continue. | ||
# This allows, for example, df.select_dtypes(include='object'). | ||
if not is_list_like(include): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use
and similar There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using this logic doesn't cover the case when |
||
include = (include,) if include is not None else () | ||
if not is_list_like(exclude): | ||
exclude = (exclude,) if exclude is not None else () | ||
|
||
selection = tuple(map(frozenset, (include, exclude))) | ||
|
||
if not any(selection): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -149,6 +149,10 @@ def test_select_dtypes_include(self): | |
ei = df[['k']] | ||
assert_frame_equal(ri, ei) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add as a separate test, try both with include and exclude as scalaras (separately) |
||
ri = df.select_dtypes(include='category') | ||
ei = df[['f']] | ||
assert_frame_equal(ri, ei) | ||
|
||
pytest.raises(NotImplementedError, | ||
lambda: df.select_dtypes(include=['period'])) | ||
|
||
|
@@ -162,6 +166,10 @@ def test_select_dtypes_exclude(self): | |
ee = df[['a', 'e']] | ||
assert_frame_equal(re, ee) | ||
|
||
re = df.select_dtypes(exclude=np.number) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add this test in there |
||
ee = df[['a', 'e']] | ||
assert_frame_equal(re, ee) | ||
|
||
def test_select_dtypes_exclude_include(self): | ||
df = DataFrame({'a': list('abc'), | ||
'b': list(range(1, 4)), | ||
|
@@ -181,6 +189,12 @@ def test_select_dtypes_exclude_include(self): | |
e = df[['b', 'e']] | ||
assert_frame_equal(r, e) | ||
|
||
exclude = 'datetime' | ||
include = 'bool' | ||
r = df.select_dtypes(include=include, exclude=exclude) | ||
e = df[['e']] | ||
assert_frame_equal(r, e) | ||
|
||
def test_select_dtypes_not_an_attr_but_still_valid_dtype(self): | ||
df = DataFrame({'a': list('abc'), | ||
'b': list(range(1, 4)), | ||
|
@@ -205,18 +219,6 @@ def test_select_dtypes_empty(self): | |
'must be nonempty'): | ||
df.select_dtypes() | ||
|
||
def test_select_dtypes_raises_on_string(self): | ||
df = DataFrame({'a': list('abc'), 'b': list(range(1, 4))}) | ||
with tm.assert_raises_regex(TypeError, 'include and exclude ' | ||
'.+ non-'): | ||
df.select_dtypes(include='object') | ||
with tm.assert_raises_regex(TypeError, 'include and exclude ' | ||
'.+ non-'): | ||
df.select_dtypes(exclude='object') | ||
with tm.assert_raises_regex(TypeError, 'include and exclude ' | ||
'.+ non-'): | ||
df.select_dtypes(include=int, exclude='object') | ||
|
||
def test_select_dtypes_bad_datetime64(self): | ||
df = DataFrame({'a': list('abc'), | ||
'b': list(range(1, 4)), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
a reason you changed style.ipynb?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It got changed automatically by something as part of trying to build docs. I found it actually got deleted at some point and so reverted it from previous commits but the space was different for some reason.