@@ -2029,7 +2029,8 @@ def maybe_convert_numeric(
2029
2029
set na_values ,
2030
2030
bint convert_empty = True ,
2031
2031
bint coerce_numeric = False ,
2032
- ) -> ndarray:
2032
+ bint convert_to_masked_nullable = False ,
2033
+ ) -> tuple[np.ndarray , np.ndarray | None]:
2033
2034
"""
2034
2035
Convert object array to a numeric array if possible.
2035
2036
@@ -2053,14 +2054,20 @@ def maybe_convert_numeric(
2053
2054
numeric array has no suitable numerical dtype to return (i.e. uint64 ,
2054
2055
int32 , uint8 ). If set to False , the original object array will be
2055
2056
returned. Otherwise , a ValueError will be raised.
2056
-
2057
+ convert_to_masked_nullable : bool , default False
2058
+ Whether to return a mask for the converted values. This also disables
2059
+ upcasting for ints with nulls to float64.
2057
2060
Returns
2058
2061
-------
2059
2062
np.ndarray
2060
2063
Array of converted object values to numerical ones.
2064
+
2065
+ Optional[np.ndarray]
2066
+ If convert_to_masked_nullable is True ,
2067
+ returns a boolean mask for the converted values , otherwise returns None.
2061
2068
"""
2062
2069
if len(values ) == 0:
2063
- return np.array([], dtype = ' i8' )
2070
+ return ( np.array([], dtype = ' i8' ), None )
2064
2071
2065
2072
# fastpath for ints - try to convert all based on first value
2066
2073
cdef:
@@ -2070,7 +2077,7 @@ def maybe_convert_numeric(
2070
2077
try :
2071
2078
maybe_ints = values.astype(' i8' )
2072
2079
if (maybe_ints == values).all():
2073
- return maybe_ints
2080
+ return ( maybe_ints, None )
2074
2081
except (ValueError , OverflowError , TypeError ):
2075
2082
pass
2076
2083
@@ -2084,21 +2091,40 @@ def maybe_convert_numeric(
2084
2091
ndarray[int64_t] ints = np.empty(n, dtype = ' i8' )
2085
2092
ndarray[uint64_t] uints = np.empty(n, dtype = ' u8' )
2086
2093
ndarray[uint8_t] bools = np.empty(n, dtype = ' u1' )
2094
+ ndarray[uint8_t] mask = np.zeros(n, dtype = " u1" )
2087
2095
float64_t fval
2096
+ bint allow_null_in_int = convert_to_masked_nullable
2088
2097
2089
2098
for i in range (n):
2090
2099
val = values[i]
2100
+ # We only want to disable NaNs showing as float if
2101
+ # a) convert_to_masked_nullable = True
2102
+ # b) no floats have been seen ( assuming an int shows up later )
2103
+ # However, if no ints present (all null array), we need to return floats
2104
+ allow_null_in_int = convert_to_masked_nullable and not seen.float_
2091
2105
2092
2106
if val.__hash__ is not None and val in na_values:
2093
- seen.saw_null()
2107
+ if allow_null_in_int:
2108
+ seen.null_ = True
2109
+ mask[i] = 1
2110
+ else :
2111
+ if convert_to_masked_nullable:
2112
+ mask[i] = 1
2113
+ seen.saw_null()
2094
2114
floats[i] = complexes[i] = NaN
2095
2115
elif util.is_float_object(val):
2096
2116
fval = val
2097
2117
if fval != fval:
2098
2118
seen.null_ = True
2099
-
2119
+ if allow_null_in_int:
2120
+ mask[i] = 1
2121
+ else :
2122
+ if convert_to_masked_nullable:
2123
+ mask[i] = 1
2124
+ seen.float_ = True
2125
+ else :
2126
+ seen.float_ = True
2100
2127
floats[i] = complexes[i] = fval
2101
- seen.float_ = True
2102
2128
elif util.is_integer_object(val):
2103
2129
floats[i] = complexes[i] = val
2104
2130
@@ -2121,7 +2147,13 @@ def maybe_convert_numeric(
2121
2147
floats[i] = uints[i] = ints[i] = bools[i] = val
2122
2148
seen.bool_ = True
2123
2149
elif val is None or val is C_NA:
2124
- seen.saw_null()
2150
+ if allow_null_in_int:
2151
+ seen.null_ = True
2152
+ mask[i] = 1
2153
+ else :
2154
+ if convert_to_masked_nullable:
2155
+ mask[i] = 1
2156
+ seen.saw_null()
2125
2157
floats[i] = complexes[i] = NaN
2126
2158
elif hasattr (val, ' __len__' ) and len (val) == 0 :
2127
2159
if convert_empty or seen.coerce_numeric:
@@ -2142,17 +2174,22 @@ def maybe_convert_numeric(
2142
2174
if fval in na_values:
2143
2175
seen.saw_null()
2144
2176
floats[i] = complexes[i] = NaN
2177
+ mask[i] = 1
2145
2178
else :
2146
2179
if fval != fval:
2147
2180
seen.null_ = True
2181
+ mask[i] = 1
2148
2182
2149
2183
floats[i] = fval
2150
2184
2151
2185
if maybe_int:
2152
2186
as_int = int (val)
2153
2187
2154
2188
if as_int in na_values:
2155
- seen.saw_null()
2189
+ mask[i] = 1
2190
+ seen.null_ = True
2191
+ if not allow_null_in_int:
2192
+ seen.float_ = True
2156
2193
else :
2157
2194
seen.saw_int(as_int)
2158
2195
@@ -2180,22 +2217,34 @@ def maybe_convert_numeric(
2180
2217
floats[i] = NaN
2181
2218
2182
2219
if seen.check_uint64_conflict():
2183
- return values
2220
+ return (values, None )
2221
+
2222
+ # This occurs since we disabled float nulls showing as null in anticipation
2223
+ # of seeing ints that were never seen. So then, we return float
2224
+ if allow_null_in_int and seen.null_ and not seen.int_:
2225
+ seen.float_ = True
2184
2226
2185
2227
if seen.complex_:
2186
- return complexes
2228
+ return ( complexes, None )
2187
2229
elif seen.float_:
2188
- return floats
2230
+ if seen.null_ and convert_to_masked_nullable:
2231
+ return (floats, mask.view(np.bool_))
2232
+ return (floats, None )
2189
2233
elif seen.int_:
2234
+ if seen.null_ and convert_to_masked_nullable:
2235
+ if seen.uint_:
2236
+ return (uints, mask.view(np.bool_))
2237
+ else :
2238
+ return (ints, mask.view(np.bool_))
2190
2239
if seen.uint_:
2191
- return uints
2240
+ return ( uints, None )
2192
2241
else :
2193
- return ints
2242
+ return ( ints, None )
2194
2243
elif seen.bool_:
2195
- return bools.view(np.bool_)
2244
+ return ( bools.view(np.bool_), None )
2196
2245
elif seen.uint_:
2197
- return uints
2198
- return ints
2246
+ return ( uints, None )
2247
+ return ( ints, None )
2199
2248
2200
2249
2201
2250
@ cython.boundscheck (False )
0 commit comments