Skip to content

ENH: Infer integer-na in infer_dtype #27392

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 31, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ Backwards incompatible API changes
Other API changes
^^^^^^^^^^^^^^^^^

- :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`)
-
-

Expand Down
23 changes: 21 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1261,7 +1261,10 @@ def infer_dtype(value: object, skipna: object=None) -> str:
if is_integer_array(values):
return 'integer'
elif is_integer_float_array(values):
return 'mixed-integer-float'
if is_integer_na_array(values):
return 'integer-na'
else:
return 'mixed-integer-float'
return 'mixed-integer'

elif PyDateTime_Check(val):
Expand All @@ -1286,7 +1289,10 @@ def infer_dtype(value: object, skipna: object=None) -> str:
if is_float_array(values):
return 'floating'
elif is_integer_float_array(values):
return 'mixed-integer-float'
if is_integer_na_array(values):
return 'integer-na'
else:
return 'mixed-integer-float'

elif util.is_bool_object(val):
if is_bool_array(values, skipna=skipna):
Expand Down Expand Up @@ -1522,6 +1528,19 @@ cpdef bint is_integer_array(ndarray values):
return validator.validate(values)


cdef class IntegerNaValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return (util.is_integer_object(value)
or (util.is_nan(value) and util.is_float_object(value)))


cdef bint is_integer_na_array(ndarray values):
cdef:
IntegerNaValidator validator = IntegerNaValidator(len(values),
values.dtype)
return validator.validate(values)


cdef class IntegerFloatValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return util.is_integer_object(value) or util.is_float_object(value)
Expand Down
1 change: 1 addition & 0 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
"floating",
"integer",
"mixed-integer",
"integer-na",
"mixed-integer-float",
]:
raise TypeError(
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,8 @@ def __new__(
pass

return Index(subarr, copy=copy, dtype=object, name=name)
elif inferred in ["floating", "mixed-integer-float"]:
elif inferred in ["floating", "mixed-integer-float", "integer-na"]:
# TODO: Returns IntegerArray for integer-na case in the future
from .numeric import Float64Index

return Float64Index(subarr, copy=copy, name=name)
Expand Down Expand Up @@ -1789,7 +1790,7 @@ def is_integer(self):
return self.inferred_type in ["integer"]

def is_floating(self):
return self.inferred_type in ["floating", "mixed-integer-float"]
return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"]

def is_numeric(self):
return self.inferred_type in ["integer", "floating"]
Expand Down Expand Up @@ -3098,6 +3099,7 @@ def _convert_scalar_indexer(self, key, kind=None):
if self.inferred_type not in [
"floating",
"mixed-integer-float",
"integer-na",
"string",
"unicode",
"mixed",
Expand Down Expand Up @@ -3175,7 +3177,7 @@ def is_int(v):
self.get_loc(stop)
is_positional = False
except KeyError:
if self.inferred_type == "mixed-integer-float":
if self.inferred_type in ["mixed-integer-float", "integer-na"]:
raise

if is_null_slicer:
Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False)
not in (
"floating",
"integer",
"integer-na",
"mixed-integer",
"mixed-integer-float",
"mixed",
Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,7 @@ def _is_convertible_to_index(other):
"floating",
"mixed-integer",
"integer",
"integer-na",
"mixed-integer-float",
"mixed",
):
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,21 @@ def test_integers(self):
result = lib.infer_dtype(arr, skipna=True)
assert result == "integer"

@pytest.mark.parametrize(
"arr, skipna",
[
(np.array([1, 2, np.nan, np.nan, 3], dtype="O"), False),
(np.array([1, 2, np.nan, np.nan, 3], dtype="O"), True),
(np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), False),
(np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), True),
],
)
def test_integer_na(self, arr, skipna):
# GH 27392
result = lib.infer_dtype(arr, skipna=skipna)
expected = "integer" if skipna else "integer-na"
assert result == expected

def test_deprecation(self):
# GH 24050
arr = np.array([1, 2, 3], dtype=object)
Expand Down