Skip to content

Commit c7154e7

Browse files
jiangyue12392quintusdias
authored andcommitted
ENH: Infer integer-na in infer_dtype (pandas-dev#27392)
1 parent aaf4ea3 commit c7154e7

File tree

7 files changed

+45
-5
lines changed

7 files changed

+45
-5
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ Backwards incompatible API changes
4545
Other API changes
4646
^^^^^^^^^^^^^^^^^
4747

48+
- :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`)
4849
-
4950
-
5051

pandas/_libs/lib.pyx

+21-2
Original file line numberDiff line numberDiff line change
@@ -1265,7 +1265,10 @@ def infer_dtype(value: object, skipna: object=None) -> str:
12651265
if is_integer_array(values):
12661266
return 'integer'
12671267
elif is_integer_float_array(values):
1268-
return 'mixed-integer-float'
1268+
if is_integer_na_array(values):
1269+
return 'integer-na'
1270+
else:
1271+
return 'mixed-integer-float'
12691272
return 'mixed-integer'
12701273

12711274
elif PyDateTime_Check(val):
@@ -1290,7 +1293,10 @@ def infer_dtype(value: object, skipna: object=None) -> str:
12901293
if is_float_array(values):
12911294
return 'floating'
12921295
elif is_integer_float_array(values):
1293-
return 'mixed-integer-float'
1296+
if is_integer_na_array(values):
1297+
return 'integer-na'
1298+
else:
1299+
return 'mixed-integer-float'
12941300

12951301
elif util.is_bool_object(val):
12961302
if is_bool_array(values, skipna=skipna):
@@ -1526,6 +1532,19 @@ cpdef bint is_integer_array(ndarray values):
15261532
return validator.validate(values)
15271533

15281534

1535+
cdef class IntegerNaValidator(Validator):
1536+
cdef inline bint is_value_typed(self, object value) except -1:
1537+
return (util.is_integer_object(value)
1538+
or (util.is_nan(value) and util.is_float_object(value)))
1539+
1540+
1541+
cdef bint is_integer_na_array(ndarray values):
1542+
cdef:
1543+
IntegerNaValidator validator = IntegerNaValidator(len(values),
1544+
values.dtype)
1545+
return validator.validate(values)
1546+
1547+
15291548
cdef class IntegerFloatValidator(Validator):
15301549
cdef inline bint is_value_typed(self, object value) except -1:
15311550
return util.is_integer_object(value) or util.is_float_object(value)

pandas/core/arrays/integer.py

+1
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
187187
"floating",
188188
"integer",
189189
"mixed-integer",
190+
"integer-na",
190191
"mixed-integer-float",
191192
]:
192193
raise TypeError(

pandas/core/indexes/base.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,8 @@ def __new__(
450450
pass
451451

452452
return Index(subarr, copy=copy, dtype=object, name=name)
453-
elif inferred in ["floating", "mixed-integer-float"]:
453+
elif inferred in ["floating", "mixed-integer-float", "integer-na"]:
454+
# TODO: Returns IntegerArray for integer-na case in the future
454455
from .numeric import Float64Index
455456

456457
return Float64Index(subarr, copy=copy, name=name)
@@ -1787,7 +1788,7 @@ def is_integer(self):
17871788
return self.inferred_type in ["integer"]
17881789

17891790
def is_floating(self):
1790-
return self.inferred_type in ["floating", "mixed-integer-float"]
1791+
return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"]
17911792

17921793
def is_numeric(self):
17931794
return self.inferred_type in ["integer", "floating"]
@@ -3096,6 +3097,7 @@ def _convert_scalar_indexer(self, key, kind=None):
30963097
if self.inferred_type not in [
30973098
"floating",
30983099
"mixed-integer-float",
3100+
"integer-na",
30993101
"string",
31003102
"unicode",
31013103
"mixed",
@@ -3173,7 +3175,7 @@ def is_int(v):
31733175
self.get_loc(stop)
31743176
is_positional = False
31753177
except KeyError:
3176-
if self.inferred_type == "mixed-integer-float":
3178+
if self.inferred_type in ["mixed-integer-float", "integer-na"]:
31773179
raise
31783180

31793181
if is_null_slicer:

pandas/core/indexes/datetimes.py

+1
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False)
778778
not in (
779779
"floating",
780780
"integer",
781+
"integer-na",
781782
"mixed-integer",
782783
"mixed-integer-float",
783784
"mixed",

pandas/core/indexes/timedeltas.py

+1
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,7 @@ def _is_convertible_to_index(other):
704704
"floating",
705705
"mixed-integer",
706706
"integer",
707+
"integer-na",
707708
"mixed-integer-float",
708709
"mixed",
709710
):

pandas/tests/dtypes/test_inference.py

+15
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,21 @@ def test_integers(self):
596596
result = lib.infer_dtype(arr, skipna=True)
597597
assert result == "integer"
598598

599+
@pytest.mark.parametrize(
600+
"arr, skipna",
601+
[
602+
(np.array([1, 2, np.nan, np.nan, 3], dtype="O"), False),
603+
(np.array([1, 2, np.nan, np.nan, 3], dtype="O"), True),
604+
(np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), False),
605+
(np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), True),
606+
],
607+
)
608+
def test_integer_na(self, arr, skipna):
609+
# GH 27392
610+
result = lib.infer_dtype(arr, skipna=skipna)
611+
expected = "integer" if skipna else "integer-na"
612+
assert result == expected
613+
599614
def test_deprecation(self):
600615
# GH 24050
601616
arr = np.array([1, 2, 3], dtype=object)

0 commit comments

Comments
 (0)