diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f527882a9dc9d..985ddf37a8b5c 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1440,25 +1440,28 @@ def infer_dtype(value: object, skipna: bool = True) -> str: from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike values = construct_1d_object_array_from_listlike(value) - # make contiguous - # for f-contiguous array 1000 x 1000, passing order="K" gives 5000x speedup - values = values.ravel(order="K") - val = _try_infer_map(values.dtype) if val is not None: + # Anything other than object-dtype should return here. return val - if values.dtype != np.object_: - values = values.astype("O") + if values.descr.type_num != NPY_OBJECT: + # i.e. values.dtype != np.object + # This should not be reached + values = values.astype(object) + + # for f-contiguous array 1000 x 1000, passing order="K" gives 5000x speedup + values = values.ravel(order="K") if skipna: values = values[~isnaobj(values)] - n = len(values) + n = cnp.PyArray_SIZE(values) if n == 0: return "empty" - # try to use a valid value + # Iterate until we find our first valid value. We will use this + # value to decide which of the is_foo_array functions to call. for i in range(n): val = values[i] diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index fe9fa4169c547..a6dc8cc16b229 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -227,12 +227,7 @@ def ensure_datetime64ns(arr: ndarray, copy: bool = True): dtype = arr.dtype arr = arr.astype(dtype.newbyteorder("<")) - ivalues = arr.view(np.int64).ravel("K") - - result = np.empty_like(arr, dtype=DT64NS_DTYPE) - iresult = result.ravel("K").view(np.int64) - - if len(iresult) == 0: + if arr.size == 0: result = arr.view(DT64NS_DTYPE) if copy: result = result.copy() @@ -245,17 +240,23 @@ def ensure_datetime64ns(arr: ndarray, copy: bool = True): raise ValueError("datetime64/timedelta64 must have a unit specified") if unit == NPY_FR_ns: + # Check this before allocating result for perf, might save some memory if copy: - arr = arr.copy() - result = arr - else: - for i in range(n): - if ivalues[i] != NPY_NAT: - pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts) - iresult[i] = dtstruct_to_dt64(&dts) - check_dts_bounds(&dts) - else: - iresult[i] = NPY_NAT + return arr.copy() + return arr + + ivalues = arr.view(np.int64).ravel("K") + + result = np.empty_like(arr, dtype=DT64NS_DTYPE) + iresult = result.ravel("K").view(np.int64) + + for i in range(n): + if ivalues[i] != NPY_NAT: + pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts) + iresult[i] = dtstruct_to_dt64(&dts) + check_dts_bounds(&dts) + else: + iresult[i] = NPY_NAT return result