From 503f9fc6b6875e39583690ff9285bbdf7ab78c12 Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sun, 19 Jul 2020 11:39:38 +0100 Subject: [PATCH 01/29] select dtypes --- pandas/core/frame.py | 8 ++++ .../extension/test_select_dtypes_numeric.py | 47 +++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 pandas/tests/extension/test_select_dtypes_numeric.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f52341ed782d8..3b9dd06356587 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3547,6 +3547,7 @@ def select_dtypes(self, include=None, exclude=None) -> "DataFrame": 4 True 1.0 5 False 2.0 """ + if not is_list_like(include): include = (include,) if include is not None else () if not is_list_like(exclude): @@ -3578,7 +3579,14 @@ def extract_unique_dtypes_from_dtypes_set( unique_dtype for unique_dtype in unique_dtypes if issubclass(unique_dtype.type, tuple(dtypes_set)) # type: ignore + or ( + np.number in dtypes_set + and is_extension_array_dtype(unique_dtype) + and unique_dtype._is_numeric + ) ] + if np.number in dtypes_set: + extracted_dtypes.extend([]) return extracted_dtypes unique_dtypes = self.dtypes.unique() diff --git a/pandas/tests/extension/test_select_dtypes_numeric.py b/pandas/tests/extension/test_select_dtypes_numeric.py new file mode 100644 index 0000000000000..39b2e130fc89d --- /dev/null +++ b/pandas/tests/extension/test_select_dtypes_numeric.py @@ -0,0 +1,47 @@ +from pandas.api.extensions import ExtensionArray, ExtensionDtype +from pandas.core.arrays import ExtensionArray +from pandas.core.dtypes.dtypes import ExtensionDtype + +import pandas as pd +import numpy as np + + +class DummyDtype(ExtensionDtype): + type = int + + @property + def name(self): + return "Dummy" + + @property + def _is_numeric(self): + # type: () -> bool + return self._numeric + + +class DummyArray(ExtensionArray): + _dtype = DummyDtype() + + def __init__(self, data): + self.data = data + + def __array__(self, dtype): + return self.data + + @property + def dtype(self): + return self._dtype + + def __len__(self) -> int: + return len(self.data) + + def __getitem__(self, item): + pass + + +@pytest.mark.parametrize("numeric", [True, False]) +def test_select_dtypes_numeric(numeric): + da = DummyArray([1, 2]) + da._dtype._numeric = numeric + df = pd.DataFrame(da) + assert df.select_dtypes(np.number).shape == df.shape From ea1796f5be144878c299ddcea9805a0033837db6 Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sun, 19 Jul 2020 12:07:56 +0100 Subject: [PATCH 02/29] flake8 --- .../extension/test_select_dtypes_numeric.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/tests/extension/test_select_dtypes_numeric.py b/pandas/tests/extension/test_select_dtypes_numeric.py index 39b2e130fc89d..ec5c9f9d6cc2a 100644 --- a/pandas/tests/extension/test_select_dtypes_numeric.py +++ b/pandas/tests/extension/test_select_dtypes_numeric.py @@ -1,27 +1,25 @@ -from pandas.api.extensions import ExtensionArray, ExtensionDtype from pandas.core.arrays import ExtensionArray from pandas.core.dtypes.dtypes import ExtensionDtype +import pytest import pandas as pd import numpy as np - class DummyDtype(ExtensionDtype): type = int - + @property def name(self): - return "Dummy" + return 'Dummy' @property def _is_numeric(self): # type: () -> bool return self._numeric - class DummyArray(ExtensionArray): _dtype = DummyDtype() - + def __init__(self, data): self.data = data @@ -31,17 +29,18 @@ def __array__(self, dtype): @property def dtype(self): return self._dtype - + def __len__(self) -> int: return len(self.data) - + def __getitem__(self, item): pass - @pytest.mark.parametrize("numeric", [True, False]) def test_select_dtypes_numeric(numeric): - da = DummyArray([1, 2]) + da = DummyArray([1,2]) da._dtype._numeric = numeric df = pd.DataFrame(da) assert df.select_dtypes(np.number).shape == df.shape + + \ No newline at end of file From 39323916a15ef68aebfe33568151819707168496 Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sun, 19 Jul 2020 12:14:49 +0100 Subject: [PATCH 03/29] flake8 --- .../tests/extension/test_select_dtypes_numeric.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/tests/extension/test_select_dtypes_numeric.py b/pandas/tests/extension/test_select_dtypes_numeric.py index ec5c9f9d6cc2a..c0464b23bde27 100644 --- a/pandas/tests/extension/test_select_dtypes_numeric.py +++ b/pandas/tests/extension/test_select_dtypes_numeric.py @@ -5,9 +5,10 @@ import pandas as pd import numpy as np + class DummyDtype(ExtensionDtype): type = int - + @property def name(self): return 'Dummy' @@ -17,9 +18,10 @@ def _is_numeric(self): # type: () -> bool return self._numeric + class DummyArray(ExtensionArray): _dtype = DummyDtype() - + def __init__(self, data): self.data = data @@ -29,18 +31,17 @@ def __array__(self, dtype): @property def dtype(self): return self._dtype - + def __len__(self) -> int: return len(self.data) - + def __getitem__(self, item): pass + @pytest.mark.parametrize("numeric", [True, False]) def test_select_dtypes_numeric(numeric): - da = DummyArray([1,2]) + da = DummyArray([1, 2]) da._dtype._numeric = numeric df = pd.DataFrame(da) assert df.select_dtypes(np.number).shape == df.shape - - \ No newline at end of file From 7cf6e3edaf05d7c5a36f8aeb121b53040960f6fe Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sun, 19 Jul 2020 12:20:30 +0100 Subject: [PATCH 04/29] black --- pandas/tests/extension/test_select_dtypes_numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_select_dtypes_numeric.py b/pandas/tests/extension/test_select_dtypes_numeric.py index c0464b23bde27..abcf6ff963d1a 100644 --- a/pandas/tests/extension/test_select_dtypes_numeric.py +++ b/pandas/tests/extension/test_select_dtypes_numeric.py @@ -11,7 +11,7 @@ class DummyDtype(ExtensionDtype): @property def name(self): - return 'Dummy' + return "Dummy" @property def _is_numeric(self): From 0cb170afa05fe8bc65e2b16b82a5848d4158fdfe Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sun, 19 Jul 2020 12:43:10 +0100 Subject: [PATCH 05/29] checks --- .../tests/extension/test_select_dtypes_numeric.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/tests/extension/test_select_dtypes_numeric.py b/pandas/tests/extension/test_select_dtypes_numeric.py index abcf6ff963d1a..aadafb228fadd 100644 --- a/pandas/tests/extension/test_select_dtypes_numeric.py +++ b/pandas/tests/extension/test_select_dtypes_numeric.py @@ -1,13 +1,14 @@ -from pandas.core.arrays import ExtensionArray -from pandas.core.dtypes.dtypes import ExtensionDtype - import pytest -import pandas as pd import numpy as np +import pandas as pd +from pandas.core.arrays import ExtensionArray +from pandas.core.dtypes.dtypes import ExtensionDtype + class DummyDtype(ExtensionDtype): type = int + _numeric = False @property def name(self): @@ -15,7 +16,6 @@ def name(self): @property def _is_numeric(self): - # type: () -> bool return self._numeric @@ -44,4 +44,7 @@ def test_select_dtypes_numeric(numeric): da = DummyArray([1, 2]) da._dtype._numeric = numeric df = pd.DataFrame(da) - assert df.select_dtypes(np.number).shape == df.shape + if numeric: + assert df.select_dtypes(np.number).shape == df.shape + else: + assert df.select_dtypes(np.number).shape != df.shape From 83c84badd8c6c4fb92b668e269b051ad01a5a072 Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Fri, 31 Jul 2020 19:42:21 +0100 Subject: [PATCH 06/29] create compat func --- pandas/compat/numpy/__init__.py | 11 +++++++++++ pandas/core/frame.py | 9 +-------- .../tests/extension/test_select_dtypes_numeric.py | 13 +++++++------ 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 789a4668b6fee..74e1d0f7917b9 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -5,6 +5,8 @@ import numpy as np +from pandas.core.dtypes.common import is_extension_array_dtype + # numpy versioning _np_version = np.__version__ _nlv = LooseVersion(_np_version) @@ -61,6 +63,15 @@ def np_array_datetime64_compat(arr, *args, **kwargs): return np.array(arr, *args, **kwargs) +def np_issubclass_compat(unique_dtype, dtypes_set): + if (issubclass(unique_dtype.type, tuple(dtypes_set)) # type: ignore + or ( + np.number in dtypes_set + and is_extension_array_dtype(unique_dtype) + and unique_dtype._is_numeric + )): + return True + return False __all__ = [ "np", diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3b9dd06356587..d827933102892 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3578,15 +3578,8 @@ def extract_unique_dtypes_from_dtypes_set( extracted_dtypes = [ unique_dtype for unique_dtype in unique_dtypes - if issubclass(unique_dtype.type, tuple(dtypes_set)) # type: ignore - or ( - np.number in dtypes_set - and is_extension_array_dtype(unique_dtype) - and unique_dtype._is_numeric - ) + if np_issubclass_compat(unique_dtype, dtypes_set) ] - if np.number in dtypes_set: - extracted_dtypes.extend([]) return extracted_dtypes unique_dtypes = self.dtypes.unique() diff --git a/pandas/tests/extension/test_select_dtypes_numeric.py b/pandas/tests/extension/test_select_dtypes_numeric.py index aadafb228fadd..3069ecf1113c6 100644 --- a/pandas/tests/extension/test_select_dtypes_numeric.py +++ b/pandas/tests/extension/test_select_dtypes_numeric.py @@ -39,12 +39,13 @@ def __getitem__(self, item): pass -@pytest.mark.parametrize("numeric", [True, False]) def test_select_dtypes_numeric(numeric): da = DummyArray([1, 2]) - da._dtype._numeric = numeric + da._dtype._numeric = True df = pd.DataFrame(da) - if numeric: - assert df.select_dtypes(np.number).shape == df.shape - else: - assert df.select_dtypes(np.number).shape != df.shape + assert df.select_dtypes(np.number).shape == df.shape + + da = DummyArray([1, 2]) + da._dtype._numeric = True + df = pd.DataFrame(da) + assert df.select_dtypes(np.number).shape != df.shape From 1087ff7f1cb5848dd0a1a772b038a66de095a07a Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Fri, 31 Jul 2020 19:46:48 +0100 Subject: [PATCH 07/29] create compat func --- pandas/core/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d827933102892..6aa35622cd9eb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -60,6 +60,7 @@ from pandas.compat import PY37 from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv +from pandas.compat.numpy import np_issubclass_compat from pandas.util._decorators import ( Appender, Substitution, From 7fe272f33941fae2c49bbca42c9be7bbdce9877e Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Fri, 31 Jul 2020 21:43:14 +0100 Subject: [PATCH 08/29] create compat func --- pandas/tests/extension/test_select_dtypes_numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_select_dtypes_numeric.py b/pandas/tests/extension/test_select_dtypes_numeric.py index 3069ecf1113c6..741eb1b662b00 100644 --- a/pandas/tests/extension/test_select_dtypes_numeric.py +++ b/pandas/tests/extension/test_select_dtypes_numeric.py @@ -39,7 +39,7 @@ def __getitem__(self, item): pass -def test_select_dtypes_numeric(numeric): +def test_select_dtypes_numeric(): da = DummyArray([1, 2]) da._dtype._numeric = True df = pd.DataFrame(da) From 77243fd18e26fdc2a517f4014d4748aa835b095a Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sat, 1 Aug 2020 09:11:33 +0100 Subject: [PATCH 09/29] test --- pandas/tests/extension/test_select_dtypes_numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_select_dtypes_numeric.py b/pandas/tests/extension/test_select_dtypes_numeric.py index 741eb1b662b00..10c6675efcdac 100644 --- a/pandas/tests/extension/test_select_dtypes_numeric.py +++ b/pandas/tests/extension/test_select_dtypes_numeric.py @@ -46,6 +46,6 @@ def test_select_dtypes_numeric(): assert df.select_dtypes(np.number).shape == df.shape da = DummyArray([1, 2]) - da._dtype._numeric = True + da._dtype._numeric = False df = pd.DataFrame(da) assert df.select_dtypes(np.number).shape != df.shape From eb475a8308bdb622a6c343014e7d4678e71ffdc4 Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sat, 1 Aug 2020 11:30:57 +0100 Subject: [PATCH 10/29] lint --- pandas/compat/numpy/__init__.py | 11 ++++++----- pandas/tests/extension/test_select_dtypes_numeric.py | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 74e1d0f7917b9..c3bf356ccb24f 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -63,13 +63,14 @@ def np_array_datetime64_compat(arr, *args, **kwargs): return np.array(arr, *args, **kwargs) + def np_issubclass_compat(unique_dtype, dtypes_set): if (issubclass(unique_dtype.type, tuple(dtypes_set)) # type: ignore - or ( - np.number in dtypes_set - and is_extension_array_dtype(unique_dtype) - and unique_dtype._is_numeric - )): + or ( + np.number in dtypes_set + and is_extension_array_dtype(unique_dtype) + and unique_dtype._is_numeric + )): return True return False diff --git a/pandas/tests/extension/test_select_dtypes_numeric.py b/pandas/tests/extension/test_select_dtypes_numeric.py index 10c6675efcdac..d33b7d542f659 100644 --- a/pandas/tests/extension/test_select_dtypes_numeric.py +++ b/pandas/tests/extension/test_select_dtypes_numeric.py @@ -44,7 +44,7 @@ def test_select_dtypes_numeric(): da._dtype._numeric = True df = pd.DataFrame(da) assert df.select_dtypes(np.number).shape == df.shape - + da = DummyArray([1, 2]) da._dtype._numeric = False df = pd.DataFrame(da) From 9b5db9a51a3f0ec68c3ee0b24588f38fa489a1ad Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sat, 1 Aug 2020 11:31:57 +0100 Subject: [PATCH 11/29] lint --- pandas/compat/numpy/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index c3bf356ccb24f..36d89d23e43d9 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -70,7 +70,7 @@ def np_issubclass_compat(unique_dtype, dtypes_set): np.number in dtypes_set and is_extension_array_dtype(unique_dtype) and unique_dtype._is_numeric - )): + )): return True return False From 9cbf294faed3e79c274fd5b3fc517339bff31e1b Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sun, 11 Oct 2020 17:57:01 +0100 Subject: [PATCH 12/29] remove import from pandas.core --- pandas/compat/numpy/__init__.py | 4 ++-- pandas/tests/extension/test_select_dtypes_numeric.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 36d89d23e43d9..f993f9200baec 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -5,7 +5,6 @@ import numpy as np -from pandas.core.dtypes.common import is_extension_array_dtype # numpy versioning _np_version = np.__version__ @@ -68,12 +67,13 @@ def np_issubclass_compat(unique_dtype, dtypes_set): if (issubclass(unique_dtype.type, tuple(dtypes_set)) # type: ignore or ( np.number in dtypes_set - and is_extension_array_dtype(unique_dtype) + and hasattr(unique_dtype, "_is_numeric" # is an extensionarray and unique_dtype._is_numeric )): return True return False + __all__ = [ "np", "_np_version", diff --git a/pandas/tests/extension/test_select_dtypes_numeric.py b/pandas/tests/extension/test_select_dtypes_numeric.py index d33b7d542f659..bfe823a8ff206 100644 --- a/pandas/tests/extension/test_select_dtypes_numeric.py +++ b/pandas/tests/extension/test_select_dtypes_numeric.py @@ -1,4 +1,3 @@ -import pytest import numpy as np import pandas as pd From 14fabfc553947e486b209b66b821e81a55dc8954 Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Mon, 19 Oct 2020 12:48:59 +0100 Subject: [PATCH 13/29] move files, add docstring and examples --- pandas/compat/numpy/__init__.py | 11 ----- pandas/core/dtypes/common.py | 43 +++++++++++++++++ pandas/core/frame.py | 2 +- .../extension/test_select_dtypes_numeric.py | 15 +++--- .../tests/frame/methods/test_select_dtypes.py | 47 +++++++++++++++++++ 5 files changed, 98 insertions(+), 20 deletions(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index da958c7a69ed9..afbe39e92f7ed 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -63,17 +63,6 @@ def np_array_datetime64_compat(arr, *args, **kwargs): return np.array(arr, *args, **kwargs) -def np_issubclass_compat(unique_dtype, dtypes_set): - if (issubclass(unique_dtype.type, tuple(dtypes_set)) # type: ignore - or ( - np.number in dtypes_set - and hasattr(unique_dtype, "_is_numeric" # is an extensionarray - and unique_dtype._is_numeric - )): - return True - return False - - __all__ = [ "np", "_np_version", diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 14184f044ae95..cd56040388067 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1231,6 +1231,49 @@ def needs_i8_conversion(arr_or_dtype) -> bool: ) +def np_issubclass_compat(unique_dtype, dtypes_set) -> bool: + """ + Check whether the provided dtype is a subclass of, or has an attribute + (e.g. _is_numeric) indiciating it is a subclass of any of the dtypes in + dtypes_set. + + Parameters + ---------- + unique_dtype : dtype + The dtype to check. + dtypes_set : array-like + The dtypes to check unique_dtype is a sublass of. + + Returns + ------- + boolean + Whether or not the unique_dtype is a subclass of dtype_set. + + Examples + -------- + >>> np_issubclass_compat(pd.Int16Dtype(), [np.bool_, np.float]) + False + >>> np_issubclass_compat(pd.Int16Dtype(), [np.integer]) + True + >>> np_issubclass_compat(pd.BooleanDtype(), [np.bool_]) + True + >>> np_issubclass_compat(pd.Float64Dtype(), [np.float]) + True + >>> np_issubclass_compat(pd.Float64Dtype(), [np.number]) + True + >>> import pint_pandas + >>> np_issubclass_compat(pint_pandas.PintType("meter"), [np.number]) + True + """ + if issubclass(unique_dtype.type, tuple(dtypes_set)) or ( # type: ignore + np.number in dtypes_set + and hasattr(unique_dtype, "_is_numeric") # is an extensionarray + and unique_dtype._is_numeric + ): + return True + return False + + def is_numeric_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of a numeric dtype. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d202a184f9701..0a018b82501a6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -62,7 +62,6 @@ ) from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv -from pandas.compat.numpy import np_issubclass_compat from pandas.util._decorators import ( Appender, Substitution, @@ -114,6 +113,7 @@ is_sequence, needs_i8_conversion, pandas_dtype, + np_issubclass_compat, ) from pandas.core.dtypes.missing import isna, notna diff --git a/pandas/tests/extension/test_select_dtypes_numeric.py b/pandas/tests/extension/test_select_dtypes_numeric.py index bfe823a8ff206..e64f9a7475cd5 100644 --- a/pandas/tests/extension/test_select_dtypes_numeric.py +++ b/pandas/tests/extension/test_select_dtypes_numeric.py @@ -7,7 +7,9 @@ class DummyDtype(ExtensionDtype): type = int - _numeric = False + + def __init__(self, numeric): + self._numeric = numeric @property def name(self): @@ -19,10 +21,9 @@ def _is_numeric(self): class DummyArray(ExtensionArray): - _dtype = DummyDtype() - - def __init__(self, data): + def __init__(self, data, dtype): self.data = data + self._dtype = dtype def __array__(self, dtype): return self.data @@ -39,12 +40,10 @@ def __getitem__(self, item): def test_select_dtypes_numeric(): - da = DummyArray([1, 2]) - da._dtype._numeric = True + da = DummyArray([1, 2], dtype=DummyDtype(numeric=True)) df = pd.DataFrame(da) assert df.select_dtypes(np.number).shape == df.shape - da = DummyArray([1, 2]) - da._dtype._numeric = False + da = DummyArray([1, 2], dtype=DummyDtype(numeric=False)) df = pd.DataFrame(da) assert df.select_dtypes(np.number).shape != df.shape diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 4599761909c33..5cdb1d6bbf7c8 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -5,6 +5,43 @@ from pandas import DataFrame, Timestamp import pandas._testing as tm +from pandas.core.arrays import ExtensionArray +from pandas.core.dtypes.dtypes import ExtensionDtype + + +class DummyDtype(ExtensionDtype): + type = int + + def __init__(self, numeric): + self._numeric = numeric + + @property + def name(self): + return "Dummy" + + @property + def _is_numeric(self): + return self._numeric + + +class DummyArray(ExtensionArray): + def __init__(self, data, dtype): + self.data = data + self._dtype = dtype + + def __array__(self, dtype): + return self.data + + @property + def dtype(self): + return self._dtype + + def __len__(self) -> int: + return len(self.data) + + def __getitem__(self, item): + pass + class TestSelectDtypes: def test_select_dtypes_include_using_list_like(self): @@ -324,3 +361,13 @@ def test_select_dtypes_typecodes(self): expected = df FLOAT_TYPES = list(np.typecodes["AllFloat"]) tm.assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected) + + def test_select_dtypes_numeric(self): + # GH 35340 + da = DummyArray([1, 2], dtype=DummyDtype(numeric=True)) + df = pd.DataFrame(da) + assert df.select_dtypes(np.number).shape == df.shape + + da = DummyArray([1, 2], dtype=DummyDtype(numeric=False)) + df = pd.DataFrame(da) + assert df.select_dtypes(np.number).shape != df.shape From da6cf688d25c74f2a1a012a019c134cbedd37e97 Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Mon, 19 Oct 2020 13:22:14 +0100 Subject: [PATCH 14/29] isort, remove pint_pandas example --- pandas/compat/numpy/__init__.py | 1 - pandas/tests/extension/test_select_dtypes_numeric.py | 3 ++- pandas/tests/frame/methods/test_select_dtypes.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index afbe39e92f7ed..a2444b7ba5a0d 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -5,7 +5,6 @@ import numpy as np - # numpy versioning _np_version = np.__version__ _nlv = LooseVersion(_np_version) diff --git a/pandas/tests/extension/test_select_dtypes_numeric.py b/pandas/tests/extension/test_select_dtypes_numeric.py index e64f9a7475cd5..b65ef9f3b2b03 100644 --- a/pandas/tests/extension/test_select_dtypes_numeric.py +++ b/pandas/tests/extension/test_select_dtypes_numeric.py @@ -1,8 +1,9 @@ import numpy as np +from pandas.core.dtypes.dtypes import ExtensionDtype + import pandas as pd from pandas.core.arrays import ExtensionArray -from pandas.core.dtypes.dtypes import ExtensionDtype class DummyDtype(ExtensionDtype): diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 5cdb1d6bbf7c8..509c5c754af34 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -1,12 +1,12 @@ import numpy as np import pytest +from pandas.core.dtypes.dtypes import ExtensionDtype + import pandas as pd from pandas import DataFrame, Timestamp import pandas._testing as tm - from pandas.core.arrays import ExtensionArray -from pandas.core.dtypes.dtypes import ExtensionDtype class DummyDtype(ExtensionDtype): From 38013100bdc3cc9cc8b2c0f9e9bf82c6b2a01170 Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Mon, 19 Oct 2020 14:10:08 +0100 Subject: [PATCH 15/29] isort, remove pint_pandas example --- pandas/core/dtypes/common.py | 3 --- pandas/core/frame.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index cd56040388067..dda2b60807334 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1261,9 +1261,6 @@ def np_issubclass_compat(unique_dtype, dtypes_set) -> bool: True >>> np_issubclass_compat(pd.Float64Dtype(), [np.number]) True - >>> import pint_pandas - >>> np_issubclass_compat(pint_pandas.PintType("meter"), [np.number]) - True """ if issubclass(unique_dtype.type, tuple(dtypes_set)) or ( # type: ignore np.number in dtypes_set diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0a018b82501a6..50855769f17ec 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -112,8 +112,8 @@ is_scalar, is_sequence, needs_i8_conversion, - pandas_dtype, np_issubclass_compat, + pandas_dtype, ) from pandas.core.dtypes.missing import isna, notna From 9e23d20be36151f234548ec8c2ba9f530995e862 Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Mon, 19 Oct 2020 14:14:06 +0100 Subject: [PATCH 16/29] remove unused type comment@ --- pandas/core/dtypes/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index dda2b60807334..2469f2a1927ed 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1262,7 +1262,7 @@ def np_issubclass_compat(unique_dtype, dtypes_set) -> bool: >>> np_issubclass_compat(pd.Float64Dtype(), [np.number]) True """ - if issubclass(unique_dtype.type, tuple(dtypes_set)) or ( # type: ignore + if issubclass(unique_dtype.type, tuple(dtypes_set)) or ( np.number in dtypes_set and hasattr(unique_dtype, "_is_numeric") # is an extensionarray and unique_dtype._is_numeric From 41fd75e4930bc0c9d1dd1cab314c657bc96996fb Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Wed, 2 Dec 2020 13:36:08 -0500 Subject: [PATCH 17/29] pd namespace usage --- pandas/tests/frame/methods/test_select_dtypes.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 5882170ecfb57..f2df4ac484070 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -362,10 +362,11 @@ def test_select_dtypes_typecodes(self): def test_select_dtypes_numeric(self): # GH 35340 + da = DummyArray([1, 2], dtype=DummyDtype(numeric=True)) - df = pd.DataFrame(da) + df = DataFrame(da) assert df.select_dtypes(np.number).shape == df.shape da = DummyArray([1, 2], dtype=DummyDtype(numeric=False)) - df = pd.DataFrame(da) + df = DataFrame(da) assert df.select_dtypes(np.number).shape != df.shape From 85d0ae82e2a23c5ef74a0f4e4ed7d8b9212ae544 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Wed, 2 Dec 2020 13:40:24 -0500 Subject: [PATCH 18/29] remove tests from old location --- .../extension/test_select_dtypes_numeric.py | 50 ------------------- 1 file changed, 50 deletions(-) delete mode 100644 pandas/tests/extension/test_select_dtypes_numeric.py diff --git a/pandas/tests/extension/test_select_dtypes_numeric.py b/pandas/tests/extension/test_select_dtypes_numeric.py deleted file mode 100644 index b65ef9f3b2b03..0000000000000 --- a/pandas/tests/extension/test_select_dtypes_numeric.py +++ /dev/null @@ -1,50 +0,0 @@ -import numpy as np - -from pandas.core.dtypes.dtypes import ExtensionDtype - -import pandas as pd -from pandas.core.arrays import ExtensionArray - - -class DummyDtype(ExtensionDtype): - type = int - - def __init__(self, numeric): - self._numeric = numeric - - @property - def name(self): - return "Dummy" - - @property - def _is_numeric(self): - return self._numeric - - -class DummyArray(ExtensionArray): - def __init__(self, data, dtype): - self.data = data - self._dtype = dtype - - def __array__(self, dtype): - return self.data - - @property - def dtype(self): - return self._dtype - - def __len__(self) -> int: - return len(self.data) - - def __getitem__(self, item): - pass - - -def test_select_dtypes_numeric(): - da = DummyArray([1, 2], dtype=DummyDtype(numeric=True)) - df = pd.DataFrame(da) - assert df.select_dtypes(np.number).shape == df.shape - - da = DummyArray([1, 2], dtype=DummyDtype(numeric=False)) - df = pd.DataFrame(da) - assert df.select_dtypes(np.number).shape != df.shape From 4445207989f073ba0f32eef3b93a0446d048925b Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Wed, 2 Dec 2020 13:40:43 -0500 Subject: [PATCH 19/29] unused import --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e527e981dc59b..0c8df07efa42c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -114,7 +114,6 @@ is_object_dtype, is_scalar, is_sequence, - needs_i8_conversion, np_issubclass_compat, pandas_dtype, ) From 31ef92f9513063d7fd4f8cf8d57009765a69ab51 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Wed, 2 Dec 2020 13:43:32 -0500 Subject: [PATCH 20/29] consolidate return --- pandas/core/dtypes/common.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 82d03254b40ca..e1c75c698985e 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1262,13 +1262,11 @@ def np_issubclass_compat(unique_dtype, dtypes_set) -> bool: >>> np_issubclass_compat(pd.Float64Dtype(), [np.number]) True """ - if issubclass(unique_dtype.type, tuple(dtypes_set)) or ( + return issubclass(unique_dtype.type, tuple(dtypes_set)) or ( np.number in dtypes_set and hasattr(unique_dtype, "_is_numeric") # is an extensionarray and unique_dtype._is_numeric - ): - return True - return False + ) def is_numeric_dtype(arr_or_dtype) -> bool: From 5d9d2c2411add095f741cbb1eb1c38cf5d938d77 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Wed, 2 Dec 2020 13:54:16 -0500 Subject: [PATCH 21/29] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 5c7ccd256a84c..3512dc96ff1fa 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -622,6 +622,7 @@ Numeric - Bug in :meth:`DataFrame.std` with ``timedelta64`` dtype and ``skipna=False`` (:issue:`37392`) - Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` with ``datetime64`` dtype and ``skipna=False`` (:issue:`36907`) - Bug in :meth:`DataFrame.idxmax` and :meth:`DataFrame.idxmin` with mixed dtypes incorrectly raising ``TypeError`` (:issue:`38195`) +- Bug in :meth:`DataFrame.select_dtypes` with ``include=np.number`` now retains numeric ``ExtensionDtype`` columns (:issue:`35340`) Conversion ^^^^^^^^^^ From 5d60908d97711303b587eb5c9c23176a3e0ba8c0 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Thu, 3 Dec 2020 13:01:18 -0500 Subject: [PATCH 22/29] minimize diff --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0c8df07efa42c..f0e6daf158b4c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3682,7 +3682,6 @@ def select_dtypes(self, include=None, exclude=None) -> DataFrame: 4 True 1.0 5 False 2.0 """ - if not is_list_like(include): include = (include,) if include is not None else () if not is_list_like(exclude): From 75ea3a21e59144f6307d5437eaa1db25aed50529 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sat, 5 Dec 2020 01:07:21 -0500 Subject: [PATCH 23/29] don't make separate method + ignore mypy --- pandas/core/dtypes/common.py | 38 ------------------------------------ pandas/core/frame.py | 12 ++++++++++-- 2 files changed, 10 insertions(+), 40 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index e1c75c698985e..b4f6d587c6642 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1231,44 +1231,6 @@ def needs_i8_conversion(arr_or_dtype) -> bool: ) -def np_issubclass_compat(unique_dtype, dtypes_set) -> bool: - """ - Check whether the provided dtype is a subclass of, or has an attribute - (e.g. _is_numeric) indiciating it is a subclass of any of the dtypes in - dtypes_set. - - Parameters - ---------- - unique_dtype : dtype - The dtype to check. - dtypes_set : array-like - The dtypes to check unique_dtype is a sublass of. - - Returns - ------- - boolean - Whether or not the unique_dtype is a subclass of dtype_set. - - Examples - -------- - >>> np_issubclass_compat(pd.Int16Dtype(), [np.bool_, np.float]) - False - >>> np_issubclass_compat(pd.Int16Dtype(), [np.integer]) - True - >>> np_issubclass_compat(pd.BooleanDtype(), [np.bool_]) - True - >>> np_issubclass_compat(pd.Float64Dtype(), [np.float]) - True - >>> np_issubclass_compat(pd.Float64Dtype(), [np.number]) - True - """ - return issubclass(unique_dtype.type, tuple(dtypes_set)) or ( - np.number in dtypes_set - and hasattr(unique_dtype, "_is_numeric") # is an extensionarray - and unique_dtype._is_numeric - ) - - def is_numeric_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of a numeric dtype. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f0e6daf158b4c..c6a22eee963af 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -114,7 +114,6 @@ is_object_dtype, is_scalar, is_sequence, - np_issubclass_compat, pandas_dtype, ) from pandas.core.dtypes.missing import isna, notna @@ -3712,7 +3711,16 @@ def extract_unique_dtypes_from_dtypes_set( extracted_dtypes = [ unique_dtype for unique_dtype in unique_dtypes - if np_issubclass_compat(unique_dtype, dtypes_set) + if ( + issubclass( + unique_dtype.type, tuple(dtypes_set) # type: ignore[arg-type] + ) + or ( + np.number in dtypes_set + and hasattr(unique_dtype, "_is_numeric") # is an extensionarray + and unique_dtype._is_numeric + ) + ) ] return extracted_dtypes From 1df34f55971925cc015d2d2fdf4fdffd39788896 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 13 Dec 2020 17:00:37 -0500 Subject: [PATCH 24/29] review comment --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5fb8bbeac978d..bf1e84597c1a7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3717,7 +3717,7 @@ def extract_unique_dtypes_from_dtypes_set( ) or ( np.number in dtypes_set - and hasattr(unique_dtype, "_is_numeric") # is an extensionarray + and is_extension_array_dtype(unique_dtype) and unique_dtype._is_numeric ) ) From ce63fa74b917bfea05bc69ca0718b6e3e465ef2c Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 13 Dec 2020 17:29:30 -0500 Subject: [PATCH 25/29] move whatsnew to 1.3 --- doc/source/whatsnew/v1.2.0.rst | 1 - doc/source/whatsnew/v1.3.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index c9215a7106389..bc7f5b8174573 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -640,7 +640,6 @@ Numeric - Bug in :meth:`DataFrame.std` with ``timedelta64`` dtype and ``skipna=False`` (:issue:`37392`) - Bug in :meth:`DataFrame.min` and :meth:`DataFrame.max` with ``datetime64`` dtype and ``skipna=False`` (:issue:`36907`) - Bug in :meth:`DataFrame.idxmax` and :meth:`DataFrame.idxmin` with mixed dtypes incorrectly raising ``TypeError`` (:issue:`38195`) -- Bug in :meth:`DataFrame.select_dtypes` with ``include=np.number`` now retains numeric ``ExtensionDtype`` columns (:issue:`35340`) Conversion ^^^^^^^^^^ diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 70b07e08cf760..301ce483f9d00 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -174,7 +174,7 @@ Timezones Numeric ^^^^^^^ - Bug in :meth:`DataFrame.quantile`, :meth:`DataFrame.sort_values` causing incorrect subsequent indexing behavior (:issue:`38351`) -- +- Bug in :meth:`DataFrame.select_dtypes` with ``include=np.number`` now retains numeric ``ExtensionDtype`` columns (:issue:`35340`) - Conversion From 93c7403d80161fc62505506084bbf04ffb0d74e0 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 13 Dec 2020 17:35:37 -0500 Subject: [PATCH 26/29] parametrize test --- .../tests/frame/methods/test_select_dtypes.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index f2df4ac484070..072d1f17cf139 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -360,13 +360,20 @@ def test_select_dtypes_typecodes(self): FLOAT_TYPES = list(np.typecodes["AllFloat"]) tm.assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected) - def test_select_dtypes_numeric(self): + @pytest.mark.parametrize( + "arr,expected", + ( + (DummyArray([1, 2], dtype=DummyDtype(numeric=True)), True), + (DummyArray([1, 2], dtype=DummyDtype(numeric=False)), False), + ), + ) + def test_select_dtypes_numeric(self, arr, expected): # GH 35340 - da = DummyArray([1, 2], dtype=DummyDtype(numeric=True)) - df = DataFrame(da) - assert df.select_dtypes(np.number).shape == df.shape + df = DataFrame(arr) + is_selected = df.select_dtypes(np.number).shape == df.shape + assert is_selected == expected - da = DummyArray([1, 2], dtype=DummyDtype(numeric=False)) - df = DataFrame(da) - assert df.select_dtypes(np.number).shape != df.shape + # da = DummyArray([1, 2], dtype=DummyDtype(numeric=False)) + # df = DataFrame(da) + # assert df.select_dtypes(np.number).shape != df.shape From ef095b5483e92b1111ebac4c7aca39444f8f924e Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 13 Dec 2020 17:38:05 -0500 Subject: [PATCH 27/29] add --- pandas/tests/frame/methods/test_select_dtypes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 072d1f17cf139..afdab30397f72 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -363,6 +363,9 @@ def test_select_dtypes_typecodes(self): @pytest.mark.parametrize( "arr,expected", ( + (np.array([1, 2], dtype=np.int32), True), + (pd.array([1, 2], dtype="Int32"), True), + (pd.array(["a", "b"], dtype="string"), False), (DummyArray([1, 2], dtype=DummyDtype(numeric=True)), True), (DummyArray([1, 2], dtype=DummyDtype(numeric=False)), False), ), From 9bbd9802a3b6fdc042be0b7ebb2af12a00c54147 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 14 Dec 2020 11:05:47 -0500 Subject: [PATCH 28/29] review comment --- pandas/core/frame.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 658fa41ba2f05..baf79f2872b44 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3722,8 +3722,7 @@ def extract_unique_dtypes_from_dtypes_set( ) or ( np.number in dtypes_set - and is_extension_array_dtype(unique_dtype) - and unique_dtype._is_numeric + and getattr(unique_dtype, "_is_numeric", False) ) ) ] From 57b61267608780e9e7d5ccd9991687de18a92591 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 14 Dec 2020 11:06:59 -0500 Subject: [PATCH 29/29] remove commented code --- pandas/tests/frame/methods/test_select_dtypes.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index afdab30397f72..f2dbe4a799a17 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -376,7 +376,3 @@ def test_select_dtypes_numeric(self, arr, expected): df = DataFrame(arr) is_selected = df.select_dtypes(np.number).shape == df.shape assert is_selected == expected - - # da = DummyArray([1, 2], dtype=DummyDtype(numeric=False)) - # df = DataFrame(da) - # assert df.select_dtypes(np.number).shape != df.shape