-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: Infer integer-na in infer_dtype #27392
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
f1b9408
514a7c0
fdcaf6e
08c1b95
1f5fd1a
8123014
78c105d
fad37c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1250,7 +1250,10 @@ def infer_dtype(value: object, skipna: object=None) -> str: | |
if is_integer_array(values): | ||
return 'integer' | ||
elif is_integer_float_array(values): | ||
return 'mixed-integer-float' | ||
if is_integer_na_array(values): | ||
return 'integer-na' | ||
else: | ||
return 'mixed-integer-float' | ||
return 'mixed-integer' | ||
|
||
elif PyDateTime_Check(val): | ||
|
@@ -1275,7 +1278,10 @@ def infer_dtype(value: object, skipna: object=None) -> str: | |
if is_float_array(values): | ||
return 'floating' | ||
elif is_integer_float_array(values): | ||
return 'mixed-integer-float' | ||
if is_integer_na_array(values): | ||
return 'integer-na' | ||
else: | ||
return 'mixed-integer-float' | ||
|
||
elif util.is_bool_object(val): | ||
if is_bool_array(values, skipna=skipna): | ||
|
@@ -1511,6 +1517,21 @@ cpdef bint is_integer_array(ndarray values): | |
return validator.validate(values) | ||
|
||
|
||
cdef class IntegerNaValidator(Validator): | ||
cdef inline bint is_value_typed(self, object value) except -1: | ||
return util.is_integer_object(value) or util.is_nan(value) | ||
|
||
cdef inline bint is_array_typed(self) except -1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure you need this one, is it actually hit? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed |
||
return issubclass(self.dtype.type, np.integer) | ||
|
||
|
||
cdef bint is_integer_na_array(ndarray values): | ||
cdef: | ||
IntegerNaValidator validator = IntegerNaValidator(len(values), | ||
values.dtype) | ||
return validator.validate(values) | ||
|
||
|
||
cdef class IntegerFloatValidator(Validator): | ||
cdef inline bint is_value_typed(self, object value) except -1: | ||
return util.is_integer_object(value) or util.is_float_object(value) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -472,7 +472,7 @@ def __new__( | |
pass | ||
|
||
return Index(subarr, copy=copy, dtype=object, name=name) | ||
elif inferred in ["floating", "mixed-integer-float"]: | ||
elif inferred in ["floating", "mixed-integer-float", "integer-na"]: | ||
gfyoung marked this conversation as resolved.
Show resolved
Hide resolved
|
||
from .numeric import Float64Index | ||
|
||
return Float64Index(subarr, copy=copy, name=name) | ||
|
@@ -1810,7 +1810,7 @@ def is_integer(self): | |
return self.inferred_type in ["integer"] | ||
|
||
def is_floating(self): | ||
return self.inferred_type in ["floating", "mixed-integer-float"] | ||
return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"] | ||
|
||
def is_numeric(self): | ||
return self.inferred_type in ["integer", "floating"] | ||
|
@@ -3119,6 +3119,7 @@ def _convert_scalar_indexer(self, key, kind=None): | |
if self.inferred_type not in [ | ||
"floating", | ||
"mixed-integer-float", | ||
"integer-na", | ||
"string", | ||
"unicode", | ||
"mixed", | ||
|
@@ -3196,7 +3197,10 @@ def is_int(v): | |
self.get_loc(stop) | ||
is_positional = False | ||
except KeyError: | ||
if self.inferred_type == "mixed-integer-float": | ||
if ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. self.inferred_type in [....] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||
self.inferred_type == "mixed-integer-float" | ||
or self.inferred_type == "integer-na" | ||
): | ||
raise | ||
|
||
if is_null_slicer: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -577,6 +577,16 @@ def test_integers(self): | |
result = lib.infer_dtype(arr, skipna=True) | ||
assert result == "integer" | ||
|
||
# GH 27392 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you move to inside the definition There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||
def test_integer_na(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you parameterize over skipna? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Parameterization done |
||
arr = np.array([1, 2, np.nan, np.nan, 3], dtype="O") | ||
result = lib.infer_dtype(arr, skipna=False) | ||
assert result == "integer-na" | ||
|
||
arr = np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O") | ||
result = lib.infer_dtype(arr, skipna=False) | ||
assert result == "integer-na" | ||
|
||
def test_deprecation(self): | ||
# GH 24050 | ||
arr = np.array([1, 2, 3], dtype=object) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you need a is_valid_null as well (e.g. see how PeriodValidator is done)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I changed to
util.is_nan(value) and util.is_float_object(value)
instead