From e8e5308993d17b75d461d68e8457b231dea17e7e Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sat, 29 Jun 2019 13:26:25 +0100 Subject: [PATCH 1/8] prevent filtering extensionarray columns when plotting --- pandas/plotting/_matplotlib/core.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 5fb4d201223bd..0c5ea63c379bd 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -330,13 +330,12 @@ def _compute_plot_data(self): data = data.to_frame(name=label) # GH16953, _convert is needed as fallback, for ``Series`` - # with ``dtype == object`` + # with ``dtype == object`` data = data._convert(datetime=True, timedelta=True) - numeric_data = data.select_dtypes(include=[np.number, - "datetime", - "datetimetz", - "timedelta"]) - + # Numeric categorical data gets caught by including np.number in select_dtypes + numeric_dtypes = [dtype for dtype in set(data.dtypes) + if hasattr(dtype, "_is_numeric") and dtype._is_numeric and not dtype.name is "category"] + numeric_data = data.select_dtypes(include=[np.number] + numeric_dtypes) try: is_empty = numeric_data.empty except AttributeError: From 910cefc5ecf9e6df750c421d70a50adb20686e83 Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sat, 29 Jun 2019 13:38:26 +0100 Subject: [PATCH 2/8] lint --- pandas/plotting/_matplotlib/core.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 0c5ea63c379bd..7c426053edf87 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -330,11 +330,14 @@ def _compute_plot_data(self): data = data.to_frame(name=label) # GH16953, _convert is needed as fallback, for ``Series`` - # with ``dtype == object`` + # with ``dtype == object`` data = data._convert(datetime=True, timedelta=True) - # Numeric categorical data gets caught by including np.number in select_dtypes + # Numeric categorical data gets caught by including np.number + # in select_dtypes numeric_dtypes = [dtype for dtype in set(data.dtypes) - if hasattr(dtype, "_is_numeric") and dtype._is_numeric and not dtype.name is "category"] + if hasattr(dtype, "_is_numeric") and + dtype._is_numeric and + dtype.name is not "category"] numeric_data = data.select_dtypes(include=[np.number] + numeric_dtypes) try: is_empty = numeric_data.empty From 7004ef0803628aaab245222e3fc42a4f5934a8f4 Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sat, 29 Jun 2019 13:43:52 +0100 Subject: [PATCH 3/8] lint --- pandas/plotting/_matplotlib/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 7c426053edf87..2183f1b8a15e1 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -334,9 +334,9 @@ def _compute_plot_data(self): data = data._convert(datetime=True, timedelta=True) # Numeric categorical data gets caught by including np.number # in select_dtypes - numeric_dtypes = [dtype for dtype in set(data.dtypes) - if hasattr(dtype, "_is_numeric") and - dtype._is_numeric and + numeric_dtypes = [dtype for dtype in set(data.dtypes) + if hasattr(dtype, "_is_numeric") and + dtype._is_numeric and dtype.name is not "category"] numeric_data = data.select_dtypes(include=[np.number] + numeric_dtypes) try: From 9a9d88d823a9e9f6b2c8e7f2b683c7e9071b1774 Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sat, 29 Jun 2019 13:48:22 +0100 Subject: [PATCH 4/8] reference to issue --- pandas/plotting/_matplotlib/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 2183f1b8a15e1..d93ab5bd522eb 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -332,6 +332,7 @@ def _compute_plot_data(self): # GH16953, _convert is needed as fallback, for ``Series`` # with ``dtype == object`` data = data._convert(datetime=True, timedelta=True) + # GH26173, don't filter out numeric extension types # Numeric categorical data gets caught by including np.number # in select_dtypes numeric_dtypes = [dtype for dtype in set(data.dtypes) From 63c66956a9f45f9fc99fcab59ec14bb90511922c Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sat, 29 Jun 2019 21:39:49 +0100 Subject: [PATCH 5/8] move to select_dtypes --- pandas/core/dtypes/common.py | 5 ++++- pandas/core/frame.py | 19 ++++++++++++++++--- pandas/plotting/_matplotlib/core.py | 13 +++++-------- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index b2b74e2a70ca9..aeae503d87452 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1576,7 +1576,10 @@ def is_numeric_dtype(arr_or_dtype): >>> is_numeric_dtype(np.array([], dtype=np.timedelta64)) False """ - + if is_extension_array_dtype(arr_or_dtype): + dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) + return dtype._is_numeric + return _is_dtype_type( arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_)) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index df7003ecf000e..cbf39a7812c9d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -41,8 +41,8 @@ is_bool_dtype, is_datetime64_any_dtype, is_datetime64tz_dtype, is_dict_like, is_dtype_equal, is_extension_array_dtype, is_extension_type, is_float_dtype, is_integer, is_integer_dtype, is_iterator, is_list_like, - is_named_tuple, is_nested_list_like, is_object_dtype, is_scalar, - is_sequence, needs_i8_conversion) + is_named_tuple, is_nested_list_like, is_numeric_dtype, is_object_dtype, + is_scalar, is_sequence, needs_i8_conversion) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCIndexClass, ABCMultiIndex, ABCSeries) from pandas.core.dtypes.missing import isna, notna @@ -3264,7 +3264,20 @@ def _get_info_slice(obj, indexer): lambda x: frozenset(map(infer_dtype_from_object, x)), selection) for dtypes in (include, exclude): invalidate_string_dtypes(dtypes) - + + def add_extension_types(dtypes, search_dtype, func): + """Adds bool or numeric extension types to include/exclude""" + extension_dtypes = [dtype.type for dtype in self.dtypes + if func(dtype)] + if search_dtype in dtypes: + return frozenset(dtypes.union(extension_dtypes)) + else: + return dtypes + + include = add_extension_types(include, np.number, is_numeric_dtype) + exclude = add_extension_types(exclude, np.number, is_numeric_dtype) + include = add_extension_types(include, np.bool_, is_bool_dtype) + exclude = add_extension_types(exclude, np.bool_, is_bool_dtype) # can't both include AND exclude! if not include.isdisjoint(exclude): raise ValueError('include and exclude overlap on {inc_ex}'.format( diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index d93ab5bd522eb..5fb4d201223bd 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -332,14 +332,11 @@ def _compute_plot_data(self): # GH16953, _convert is needed as fallback, for ``Series`` # with ``dtype == object`` data = data._convert(datetime=True, timedelta=True) - # GH26173, don't filter out numeric extension types - # Numeric categorical data gets caught by including np.number - # in select_dtypes - numeric_dtypes = [dtype for dtype in set(data.dtypes) - if hasattr(dtype, "_is_numeric") and - dtype._is_numeric and - dtype.name is not "category"] - numeric_data = data.select_dtypes(include=[np.number] + numeric_dtypes) + numeric_data = data.select_dtypes(include=[np.number, + "datetime", + "datetimetz", + "timedelta"]) + try: is_empty = numeric_data.empty except AttributeError: From ce684d5af8b979c22f3f5744ae2e365be1441320 Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sat, 29 Jun 2019 21:42:27 +0100 Subject: [PATCH 6/8] lint --- pandas/core/dtypes/common.py | 1 - pandas/core/frame.py | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index aeae503d87452..ae0744f25c57d 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1579,7 +1579,6 @@ def is_numeric_dtype(arr_or_dtype): if is_extension_array_dtype(arr_or_dtype): dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) return dtype._is_numeric - return _is_dtype_type( arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_)) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cbf39a7812c9d..f41827c9088b9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3264,16 +3264,14 @@ def _get_info_slice(obj, indexer): lambda x: frozenset(map(infer_dtype_from_object, x)), selection) for dtypes in (include, exclude): invalidate_string_dtypes(dtypes) - def add_extension_types(dtypes, search_dtype, func): """Adds bool or numeric extension types to include/exclude""" extension_dtypes = [dtype.type for dtype in self.dtypes - if func(dtype)] + if func(dtype)] if search_dtype in dtypes: return frozenset(dtypes.union(extension_dtypes)) else: return dtypes - include = add_extension_types(include, np.number, is_numeric_dtype) exclude = add_extension_types(exclude, np.number, is_numeric_dtype) include = add_extension_types(include, np.bool_, is_bool_dtype) From 4a5bd358ab111785e2eb9790972217b557128e76 Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sat, 29 Jun 2019 21:43:58 +0100 Subject: [PATCH 7/8] lint --- pandas/core/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f41827c9088b9..bec820129d3bc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3264,6 +3264,7 @@ def _get_info_slice(obj, indexer): lambda x: frozenset(map(infer_dtype_from_object, x)), selection) for dtypes in (include, exclude): invalidate_string_dtypes(dtypes) + def add_extension_types(dtypes, search_dtype, func): """Adds bool or numeric extension types to include/exclude""" extension_dtypes = [dtype.type for dtype in self.dtypes From 16c7f9f5db62e39b46a4f75e1261453e9585c2eb Mon Sep 17 00:00:00 2001 From: andrewgsavage Date: Sun, 30 Jun 2019 08:12:21 +0100 Subject: [PATCH 8/8] only add ext dtypes --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bec820129d3bc..254f096fb8020 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3268,7 +3268,8 @@ def _get_info_slice(obj, indexer): def add_extension_types(dtypes, search_dtype, func): """Adds bool or numeric extension types to include/exclude""" extension_dtypes = [dtype.type for dtype in self.dtypes - if func(dtype)] + if is_extension_array_dtype(dtype) and + func(dtype)] if search_dtype in dtypes: return frozenset(dtypes.union(extension_dtypes)) else: