-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: Use IntergerArray to avoid forced conversion from integer to float #27335
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
475ed6d
f47b60f
57c613a
b5698c0
e2b9803
63d4bdd
a74e473
7672fa6
c24de12
f071bf6
e8591ef
cc179be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -971,6 +971,7 @@ cdef class Seen: | |
bint nat_ # seen nat | ||
bint bool_ # seen_bool | ||
bint null_ # seen_null | ||
bint nan_ # seen_np.nan | ||
bint uint_ # seen_uint (unsigned integer) | ||
bint sint_ # seen_sint (signed integer) | ||
bint float_ # seen_float | ||
|
@@ -995,6 +996,7 @@ cdef class Seen: | |
self.nat_ = 0 | ||
self.bool_ = 0 | ||
self.null_ = 0 | ||
self.nan_ = 0 | ||
self.uint_ = 0 | ||
self.sint_ = 0 | ||
self.float_ = 0 | ||
|
@@ -1956,10 +1958,37 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, | |
@cython.wraparound(False) | ||
def maybe_convert_objects(ndarray[object] objects, bint try_float=0, | ||
bint safe=0, bint convert_datetime=0, | ||
bint convert_timedelta=0): | ||
bint convert_timedelta=0, | ||
bint convert_to_nullable_integer=0): | ||
""" | ||
Type inference function-- convert object array to proper dtype | ||
|
||
Parameters | ||
---------- | ||
values : ndarray | ||
Array of object elements to convert. | ||
try_float : bool, default False | ||
If an array-like object contains only float or NaN values is | ||
encountered, whether to convert and return an array of float dtype. | ||
safe : bool, default False | ||
Whether to upcast numeric type (e.g. int cast to float). If set to | ||
True, no upcasting will be performed. | ||
convert_datetime : bool, default False | ||
If an array-like object contains only datetime values or NaT is | ||
encountered, whether to convert and return an array of M8[ns] dtype. | ||
convert_timedelta : bool, default False | ||
If an array-like object contains only timedelta values or NaT is | ||
encountered, whether to convert and return an array of m8[ns] dtype. | ||
convert_to_nullable_integer : bool, default False | ||
If an array-like object contains only interger values (and NaN) is | ||
encountered, whether to convert and return an IntegerArray. | ||
|
||
Returns | ||
------- | ||
array : array of converted object values to more specific dtypes if | ||
pplicable | ||
""" | ||
|
||
cdef: | ||
Py_ssize_t i, n | ||
ndarray[float64_t] floats | ||
|
@@ -1980,6 +2009,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, | |
ints = np.empty(n, dtype='i8') | ||
uints = np.empty(n, dtype='u8') | ||
bools = np.empty(n, dtype=np.uint8) | ||
mask = np.full(n, False) | ||
|
||
if convert_datetime: | ||
datetimes = np.empty(n, dtype='M8[ns]') | ||
|
@@ -1997,6 +2027,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, | |
if val is None: | ||
seen.null_ = 1 | ||
floats[i] = complexes[i] = fnan | ||
mask[i] = True | ||
elif val is NaT: | ||
seen.nat_ = 1 | ||
if convert_datetime: | ||
|
@@ -2006,6 +2037,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, | |
if not (convert_datetime or convert_timedelta): | ||
seen.object_ = 1 | ||
break | ||
elif val is np.nan: | ||
seen.nan_ = 1 | ||
mask[i] = True | ||
floats[i] = complexes[i] = val | ||
elif util.is_bool_object(val): | ||
seen.bool_ = 1 | ||
bools[i] = val | ||
|
@@ -2087,11 +2122,19 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, | |
|
||
if not seen.object_: | ||
if not safe: | ||
if seen.null_: | ||
if seen.null_ or seen.nan_: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. separate issue is we should change line 2083 to use a DatetimeArray (can be separate PR) or here if it works out. can you also update the doc-string (well add it really :->) thanks for workign on this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will leave line 2083 out as it is a separate issue. |
||
if seen.is_float_or_complex: | ||
if seen.complex_: | ||
return complexes | ||
elif seen.float_ or seen.int_: | ||
elif seen.float_: | ||
return floats | ||
elif seen.int_: | ||
if convert_to_nullable_integer: | ||
from pandas.core.arrays import IntegerArray | ||
return IntegerArray(ints, mask) | ||
else: | ||
return floats | ||
elif seen.nan_: | ||
return floats | ||
else: | ||
if not seen.bool_: | ||
|
@@ -2130,7 +2173,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, | |
if seen.complex_: | ||
if not seen.int_: | ||
return complexes | ||
elif seen.float_: | ||
elif seen.float_ or seen.nan_: | ||
if not seen.int_: | ||
return floats | ||
else: | ||
|
@@ -2154,7 +2197,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, | |
if seen.complex_: | ||
if not seen.int_: | ||
return complexes | ||
elif seen.float_: | ||
elif seen.float_ or seen.nan_: | ||
if not seen.int_: | ||
return floats | ||
elif seen.int_: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if you can update this doc-string here e.g. Returns / Parameters
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doc-string added