From f1b940863e174dfe0cbfbd4aae63898bc3b7208f Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Mon, 15 Jul 2019 11:37:17 +0800 Subject: [PATCH 1/7] infer integer-na in infer_dtype --- pandas/_libs/lib.pyx | 25 +++++++++++++++++++++++-- pandas/core/arrays/integer.py | 1 + pandas/core/indexes/base.py | 8 +++++--- pandas/core/indexes/datetimes.py | 1 + pandas/core/indexes/timedeltas.py | 1 + 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 1936404b75602..57edd488be4fb 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1250,7 +1250,10 @@ def infer_dtype(value: object, skipna: object=None) -> str: if is_integer_array(values): return 'integer' elif is_integer_float_array(values): - return 'mixed-integer-float' + if is_integer_na_array(values): + return 'integer-na' + else: + return 'mixed-integer-float' return 'mixed-integer' elif PyDateTime_Check(val): @@ -1275,7 +1278,10 @@ def infer_dtype(value: object, skipna: object=None) -> str: if is_float_array(values): return 'floating' elif is_integer_float_array(values): - return 'mixed-integer-float' + if is_integer_na_array(values): + return 'integer-na' + else: + return 'mixed-integer-float' elif util.is_bool_object(val): if is_bool_array(values, skipna=skipna): @@ -1511,6 +1517,21 @@ cpdef bint is_integer_array(ndarray values): return validator.validate(values) +cdef class IntegerNaValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_integer_object(value) or util.is_nan(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.integer) + + +cdef bint is_integer_na_array(ndarray values): + cdef: + IntegerNaValidator validator = IntegerNaValidator(len(values), + values.dtype) + return validator.validate(values) + + cdef class IntegerFloatValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: return util.is_integer_object(value) or util.is_float_object(value) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 867122964fe59..eca3d257d68fe 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -186,6 +186,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False): "floating", "integer", "mixed-integer", + "integer-na", "mixed-integer-float", ]: raise TypeError( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e084f99ec5a2c..f384f8753f226 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -472,7 +472,7 @@ def __new__( pass return Index(subarr, copy=copy, dtype=object, name=name) - elif inferred in ["floating", "mixed-integer-float"]: + elif inferred in ["floating", "mixed-integer-float", "integer-na"]: from .numeric import Float64Index return Float64Index(subarr, copy=copy, name=name) @@ -1810,7 +1810,7 @@ def is_integer(self): return self.inferred_type in ["integer"] def is_floating(self): - return self.inferred_type in ["floating", "mixed-integer-float"] + return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"] def is_numeric(self): return self.inferred_type in ["integer", "floating"] @@ -3119,6 +3119,7 @@ def _convert_scalar_indexer(self, key, kind=None): if self.inferred_type not in [ "floating", "mixed-integer-float", + "integer-na", "string", "unicode", "mixed", @@ -3196,7 +3197,8 @@ def is_int(v): self.get_loc(stop) is_positional = False except KeyError: - if self.inferred_type == "mixed-integer-float": + if (self.inferred_type == "mixed-integer-float" or + self.inferred_type == "integer-na"): raise if is_null_slicer: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 5024eebe03bb4..be8aefb448311 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -778,6 +778,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) not in ( "floating", "integer", + "integer-na", "mixed-integer", "mixed-integer-float", "mixed", diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index ecadd11894bfb..77f7e959d6478 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -727,6 +727,7 @@ def _is_convertible_to_index(other): "floating", "mixed-integer", "integer", + "integer-na", "mixed-integer-float", "mixed", ): From 514a7c0ec5c4f72a551f55eb7c2d9d7974f28739 Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Mon, 15 Jul 2019 16:34:49 +0800 Subject: [PATCH 2/7] add test for integer-na --- pandas/core/indexes/base.py | 6 ++++-- pandas/tests/dtypes/test_inference.py | 10 ++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f384f8753f226..a1d4b5f3ea035 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3197,8 +3197,10 @@ def is_int(v): self.get_loc(stop) is_positional = False except KeyError: - if (self.inferred_type == "mixed-integer-float" or - self.inferred_type == "integer-na"): + if ( + self.inferred_type == "mixed-integer-float" + or self.inferred_type == "integer-na" + ): raise if is_null_slicer: diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 0b440e0186fbc..dad8256eef591 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -577,6 +577,16 @@ def test_integers(self): result = lib.infer_dtype(arr, skipna=True) assert result == "integer" + # GH 27392 + def test_integer_na(self): + arr = np.array([1, 2, np.nan, np.nan, 3], dtype="O") + result = lib.infer_dtype(arr, skipna=False) + assert result == "integer-na" + + arr = np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O") + result = lib.infer_dtype(arr, skipna=False) + assert result == "integer-na" + def test_deprecation(self): # GH 24050 arr = np.array([1, 2, 3], dtype=object) From fdcaf6e1f334b7130bd29e9e50a8e4ff4863a7cf Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Tue, 23 Jul 2019 11:27:49 +0800 Subject: [PATCH 3/7] make required changes --- pandas/_libs/lib.pyx | 6 ++---- pandas/core/indexes/base.py | 6 ++---- pandas/tests/dtypes/test_inference.py | 14 ++++++++------ 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 57edd488be4fb..059728a236d55 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1519,10 +1519,8 @@ cpdef bint is_integer_array(ndarray values): cdef class IntegerNaValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: - return util.is_integer_object(value) or util.is_nan(value) - - cdef inline bint is_array_typed(self) except -1: - return issubclass(self.dtype.type, np.integer) + return util.is_integer_object(value) or (util.is_nan(value) + and util.is_float_object(value)) cdef bint is_integer_na_array(ndarray values): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a1d4b5f3ea035..3d942bc5627b6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -473,6 +473,7 @@ def __new__( return Index(subarr, copy=copy, dtype=object, name=name) elif inferred in ["floating", "mixed-integer-float", "integer-na"]: + # TODO: Returns IntegerArray for integer-na case in the future from .numeric import Float64Index return Float64Index(subarr, copy=copy, name=name) @@ -3197,10 +3198,7 @@ def is_int(v): self.get_loc(stop) is_positional = False except KeyError: - if ( - self.inferred_type == "mixed-integer-float" - or self.inferred_type == "integer-na" - ): + if self.inferred_type in ["mixed-integer-float", "integer-na"]: raise if is_null_slicer: diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index dad8256eef591..c27ac35342719 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -577,15 +577,17 @@ def test_integers(self): result = lib.infer_dtype(arr, skipna=True) assert result == "integer" - # GH 27392 - def test_integer_na(self): + @pytest.mark.parametrize("skipna", [True, False]) + def test_integer_na(self, skipna): + # GH 27392 + expected = "integer" if skipna else "integer-na" arr = np.array([1, 2, np.nan, np.nan, 3], dtype="O") - result = lib.infer_dtype(arr, skipna=False) - assert result == "integer-na" + result = lib.infer_dtype(arr, skipna=skipna) + assert result == expected arr = np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O") - result = lib.infer_dtype(arr, skipna=False) - assert result == "integer-na" + result = lib.infer_dtype(arr, skipna=skipna) + assert result == expected def test_deprecation(self): # GH 24050 From 1f5fd1a5dfdc2e3746267503f3433b3b2dac9ac2 Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Wed, 24 Jul 2019 14:10:39 +0800 Subject: [PATCH 4/7] add whatsnew entry in v1.0.0 --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 982d1bb5af4d4..b45edf5a027bc 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -45,6 +45,7 @@ Backwards incompatible API changes Other API changes ^^^^^^^^^^^^^^^^^ +- :meth:`infer_type` will now return "integer-na" for integer and np.nan mix (:issue:`27283`) - - From 8123014d37f59791ef257e1c31138810c41cf365 Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Thu, 25 Jul 2019 09:44:55 +0800 Subject: [PATCH 5/7] correct indentation --- pandas/_libs/lib.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 35e8bf8ff77bf..10f65870bfec2 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1530,8 +1530,8 @@ cpdef bint is_integer_array(ndarray values): cdef class IntegerNaValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: - return util.is_integer_object(value) or (util.is_nan(value) - and util.is_float_object(value)) + return (util.is_integer_object(value) + or (util.is_nan(value) and util.is_float_object(value))) cdef bint is_integer_na_array(ndarray values): From 78c105d4f44c602069b0e2037b8f3a39700ccedb Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Fri, 26 Jul 2019 09:05:13 +0800 Subject: [PATCH 6/7] update whatsnew --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index b45edf5a027bc..a14d66ff8472c 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -45,7 +45,7 @@ Backwards incompatible API changes Other API changes ^^^^^^^^^^^^^^^^^ -- :meth:`infer_type` will now return "integer-na" for integer and np.nan mix (:issue:`27283`) +- :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`) - - From fad37c8d931f0992f76c89694018032388deae25 Mon Sep 17 00:00:00 2001 From: Jiang Yue Date: Mon, 29 Jul 2019 10:33:31 +0800 Subject: [PATCH 7/7] parameterization of tests --- pandas/tests/dtypes/test_inference.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index a87e993231cca..f05efb45ccdd7 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -577,16 +577,19 @@ def test_integers(self): result = lib.infer_dtype(arr, skipna=True) assert result == "integer" - @pytest.mark.parametrize("skipna", [True, False]) - def test_integer_na(self, skipna): + @pytest.mark.parametrize( + "arr, skipna", + [ + (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), False), + (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), True), + (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), False), + (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), True), + ], + ) + def test_integer_na(self, arr, skipna): # GH 27392 - expected = "integer" if skipna else "integer-na" - arr = np.array([1, 2, np.nan, np.nan, 3], dtype="O") - result = lib.infer_dtype(arr, skipna=skipna) - assert result == expected - - arr = np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O") result = lib.infer_dtype(arr, skipna=skipna) + expected = "integer" if skipna else "integer-na" assert result == expected def test_deprecation(self):