Skip to content

ENH: Infer integer-na in infer_dtype #27392

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 31, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1250,7 +1250,10 @@ def infer_dtype(value: object, skipna: object=None) -> str:
if is_integer_array(values):
return 'integer'
elif is_integer_float_array(values):
return 'mixed-integer-float'
if is_integer_na_array(values):
return 'integer-na'
else:
return 'mixed-integer-float'
return 'mixed-integer'

elif PyDateTime_Check(val):
Expand All @@ -1275,7 +1278,10 @@ def infer_dtype(value: object, skipna: object=None) -> str:
if is_float_array(values):
return 'floating'
elif is_integer_float_array(values):
return 'mixed-integer-float'
if is_integer_na_array(values):
return 'integer-na'
else:
return 'mixed-integer-float'

elif util.is_bool_object(val):
if is_bool_array(values, skipna=skipna):
Expand Down Expand Up @@ -1511,6 +1517,21 @@ cpdef bint is_integer_array(ndarray values):
return validator.validate(values)


cdef class IntegerNaValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return util.is_integer_object(value) or util.is_nan(value)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you need a is_valid_null as well (e.g. see how PeriodValidator is done)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed to util.is_nan(value) and util.is_float_object(value) instead


cdef inline bint is_array_typed(self) except -1:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure you need this one, is it actually hit?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed

return issubclass(self.dtype.type, np.integer)


cdef bint is_integer_na_array(ndarray values):
cdef:
IntegerNaValidator validator = IntegerNaValidator(len(values),
values.dtype)
return validator.validate(values)


cdef class IntegerFloatValidator(Validator):
cdef inline bint is_value_typed(self, object value) except -1:
return util.is_integer_object(value) or util.is_float_object(value)
Expand Down
1 change: 1 addition & 0 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
"floating",
"integer",
"mixed-integer",
"integer-na",
"mixed-integer-float",
]:
raise TypeError(
Expand Down
10 changes: 7 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ def __new__(
pass

return Index(subarr, copy=copy, dtype=object, name=name)
elif inferred in ["floating", "mixed-integer-float"]:
elif inferred in ["floating", "mixed-integer-float", "integer-na"]:
from .numeric import Float64Index

return Float64Index(subarr, copy=copy, name=name)
Expand Down Expand Up @@ -1810,7 +1810,7 @@ def is_integer(self):
return self.inferred_type in ["integer"]

def is_floating(self):
return self.inferred_type in ["floating", "mixed-integer-float"]
return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"]

def is_numeric(self):
return self.inferred_type in ["integer", "floating"]
Expand Down Expand Up @@ -3119,6 +3119,7 @@ def _convert_scalar_indexer(self, key, kind=None):
if self.inferred_type not in [
"floating",
"mixed-integer-float",
"integer-na",
"string",
"unicode",
"mixed",
Expand Down Expand Up @@ -3196,7 +3197,10 @@ def is_int(v):
self.get_loc(stop)
is_positional = False
except KeyError:
if self.inferred_type == "mixed-integer-float":
if (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.inferred_type in [....]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

self.inferred_type == "mixed-integer-float"
or self.inferred_type == "integer-na"
):
raise

if is_null_slicer:
Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False)
not in (
"floating",
"integer",
"integer-na",
"mixed-integer",
"mixed-integer-float",
"mixed",
Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,7 @@ def _is_convertible_to_index(other):
"floating",
"mixed-integer",
"integer",
"integer-na",
"mixed-integer-float",
"mixed",
):
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,16 @@ def test_integers(self):
result = lib.infer_dtype(arr, skipna=True)
assert result == "integer"

# GH 27392
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you move to inside the definition

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

def test_integer_na(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you parameterize over skipna?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Parameterization done

arr = np.array([1, 2, np.nan, np.nan, 3], dtype="O")
result = lib.infer_dtype(arr, skipna=False)
assert result == "integer-na"

arr = np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O")
result = lib.infer_dtype(arr, skipna=False)
assert result == "integer-na"

def test_deprecation(self):
# GH 24050
arr = np.array([1, 2, 3], dtype=object)
Expand Down