diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 982d1bb5af4d4..a14d66ff8472c 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -45,6 +45,7 @@ Backwards incompatible API changes Other API changes ^^^^^^^^^^^^^^^^^ +- :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`) - - diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 6b4f45fabc665..10f65870bfec2 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1261,7 +1261,10 @@ def infer_dtype(value: object, skipna: object=None) -> str: if is_integer_array(values): return 'integer' elif is_integer_float_array(values): - return 'mixed-integer-float' + if is_integer_na_array(values): + return 'integer-na' + else: + return 'mixed-integer-float' return 'mixed-integer' elif PyDateTime_Check(val): @@ -1286,7 +1289,10 @@ def infer_dtype(value: object, skipna: object=None) -> str: if is_float_array(values): return 'floating' elif is_integer_float_array(values): - return 'mixed-integer-float' + if is_integer_na_array(values): + return 'integer-na' + else: + return 'mixed-integer-float' elif util.is_bool_object(val): if is_bool_array(values, skipna=skipna): @@ -1522,6 +1528,19 @@ cpdef bint is_integer_array(ndarray values): return validator.validate(values) +cdef class IntegerNaValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return (util.is_integer_object(value) + or (util.is_nan(value) and util.is_float_object(value))) + + +cdef bint is_integer_na_array(ndarray values): + cdef: + IntegerNaValidator validator = IntegerNaValidator(len(values), + values.dtype) + return validator.validate(values) + + cdef class IntegerFloatValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: return util.is_integer_object(value) or util.is_float_object(value) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 62b1a8a184946..1f14bd169a228 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -187,6 +187,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False): "floating", "integer", "mixed-integer", + "integer-na", "mixed-integer-float", ]: raise TypeError( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b30d262f6304a..04c8dbb8e2068 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -452,7 +452,8 @@ def __new__( pass return Index(subarr, copy=copy, dtype=object, name=name) - elif inferred in ["floating", "mixed-integer-float"]: + elif inferred in ["floating", "mixed-integer-float", "integer-na"]: + # TODO: Returns IntegerArray for integer-na case in the future from .numeric import Float64Index return Float64Index(subarr, copy=copy, name=name) @@ -1789,7 +1790,7 @@ def is_integer(self): return self.inferred_type in ["integer"] def is_floating(self): - return self.inferred_type in ["floating", "mixed-integer-float"] + return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"] def is_numeric(self): return self.inferred_type in ["integer", "floating"] @@ -3098,6 +3099,7 @@ def _convert_scalar_indexer(self, key, kind=None): if self.inferred_type not in [ "floating", "mixed-integer-float", + "integer-na", "string", "unicode", "mixed", @@ -3175,7 +3177,7 @@ def is_int(v): self.get_loc(stop) is_positional = False except KeyError: - if self.inferred_type == "mixed-integer-float": + if self.inferred_type in ["mixed-integer-float", "integer-na"]: raise if is_null_slicer: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e9296eea2b8a3..1d40fbe798c54 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -778,6 +778,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) not in ( "floating", "integer", + "integer-na", "mixed-integer", "mixed-integer-float", "mixed", diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 1c2a8c4f0c10c..d39dabb9f471c 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -702,6 +702,7 @@ def _is_convertible_to_index(other): "floating", "mixed-integer", "integer", + "integer-na", "mixed-integer-float", "mixed", ): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 4d688976cd50b..f05efb45ccdd7 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -577,6 +577,21 @@ def test_integers(self): result = lib.infer_dtype(arr, skipna=True) assert result == "integer" + @pytest.mark.parametrize( + "arr, skipna", + [ + (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), False), + (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), True), + (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), False), + (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), True), + ], + ) + def test_integer_na(self, arr, skipna): + # GH 27392 + result = lib.infer_dtype(arr, skipna=skipna) + expected = "integer" if skipna else "integer-na" + assert result == expected + def test_deprecation(self): # GH 24050 arr = np.array([1, 2, 3], dtype=object)