Skip to content

Commit 008e9ec

Browse files
chris-b1jreback
authored andcommitted
PERF: maybe_convert_numeric speedup (#16104)
1 parent 72248e7 commit 008e9ec

File tree

1 file changed

+35
-17
lines changed

1 file changed

+35
-17
lines changed

pandas/_libs/src/inference.pyx

+35-17
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ cdef class Seen(object):
9696
encountered when trying to perform type conversions.
9797
"""
9898

99-
cdef public:
99+
cdef:
100100
bint int_ # seen_int
101101
bint bool_ # seen_bool
102102
bint null_ # seen_null
@@ -185,7 +185,7 @@ cdef class Seen(object):
185185
self.null_ = 1
186186
self.float_ = 1
187187

188-
def saw_int(self, val):
188+
cdef saw_int(self, object val):
189189
"""
190190
Set flags indicating that an integer value was encountered.
191191
@@ -196,7 +196,7 @@ cdef class Seen(object):
196196
"""
197197
self.int_ = 1
198198
self.sint_ = self.sint_ or (val < 0)
199-
self.uint_ = self.uint_ or (val > iINT64_MAX)
199+
self.uint_ = self.uint_ or (val > oINT64_MAX)
200200

201201
@property
202202
def numeric_(self):
@@ -908,11 +908,15 @@ cpdef bint is_interval_array(ndarray[object] values):
908908
cdef extern from "parse_helper.h":
909909
inline int floatify(object, double *result, int *maybe_int) except -1
910910

911-
cdef int64_t iINT64_MAX = <int64_t> INT64_MAX
912-
cdef int64_t iINT64_MIN = <int64_t> INT64_MIN
913-
cdef uint64_t iUINT64_MAX = <uint64_t> UINT64_MAX
911+
# constants that will be compared to potentially arbitrarily large
912+
# python int
913+
cdef object oINT64_MAX = <int64_t> INT64_MAX
914+
cdef object oINT64_MIN = <int64_t> INT64_MIN
915+
cdef object oUINT64_MAX = <uint64_t> UINT64_MAX
914916

915917

918+
@cython.boundscheck(False)
919+
@cython.wraparound(False)
916920
def maybe_convert_numeric(ndarray[object] values, set na_values,
917921
bint convert_empty=True, bint coerce_numeric=False):
918922
"""
@@ -943,6 +947,17 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
943947
-------
944948
numeric_array : array of converted object values to numerical ones
945949
"""
950+
# fastpath for ints - try to convert all based on first value
951+
cdef object val = values[0]
952+
if util.is_integer_object(val):
953+
try:
954+
maybe_ints = values.astype('i8')
955+
if (maybe_ints == values).all():
956+
return maybe_ints
957+
except (ValueError, OverflowError, TypeError):
958+
pass
959+
960+
# otherwise, iterate and do full infererence
946961
cdef:
947962
int status, maybe_int
948963
Py_ssize_t i, n = values.size
@@ -952,7 +967,6 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
952967
ndarray[int64_t] ints = np.empty(n, dtype='i8')
953968
ndarray[uint64_t] uints = np.empty(n, dtype='u8')
954969
ndarray[uint8_t] bools = np.empty(n, dtype='u1')
955-
object val
956970
float64_t fval
957971

958972
for i in range(n):
@@ -962,21 +976,23 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
962976
seen.saw_null()
963977
floats[i] = complexes[i] = nan
964978
elif util.is_float_object(val):
965-
if val != val:
979+
fval = val
980+
if fval != fval:
966981
seen.null_ = True
967982

968-
floats[i] = complexes[i] = val
983+
floats[i] = complexes[i] = fval
969984
seen.float_ = True
970985
elif util.is_integer_object(val):
971986
floats[i] = complexes[i] = val
972987

973-
as_int = int(val)
974-
seen.saw_int(as_int)
988+
val = int(val)
989+
seen.saw_int(val)
990+
991+
if val >= 0:
992+
uints[i] = val
975993

976-
if as_int >= 0:
977-
uints[i] = as_int
978-
if as_int <= iINT64_MAX:
979-
ints[i] = as_int
994+
if val <= oINT64_MAX:
995+
ints[i] = val
980996
elif util.is_bool_object(val):
981997
floats[i] = uints[i] = ints[i] = bools[i] = val
982998
seen.bool_ = True
@@ -1017,12 +1033,12 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
10171033
seen.saw_int(as_int)
10181034

10191035
if not (seen.float_ or as_int in na_values):
1020-
if as_int < iINT64_MIN or as_int > iUINT64_MAX:
1036+
if as_int < oINT64_MIN or as_int > oUINT64_MAX:
10211037
raise ValueError('Integer out of range.')
10221038

10231039
if as_int >= 0:
10241040
uints[i] = as_int
1025-
if as_int <= iINT64_MAX:
1041+
if as_int <= oINT64_MAX:
10261042
ints[i] = as_int
10271043
else:
10281044
seen.float_ = True
@@ -1053,6 +1069,8 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
10531069
return ints
10541070

10551071

1072+
@cython.boundscheck(False)
1073+
@cython.wraparound(False)
10561074
def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
10571075
bint safe=0, bint convert_datetime=0,
10581076
bint convert_timedelta=0):

0 commit comments

Comments
 (0)