diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 4c72e09a4851b..2387607569431 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -37,6 +37,9 @@ from cython cimport Py_ssize_t import pytz UTC = pytz.utc +from tslibs.arraylike import ( # noqa:F841 + format_array_from_datetime, array_to_datetime, array_with_unit_to_datetime, + ints_to_pytimedelta, ints_to_pydatetime) from tslibs.timedeltas cimport cast_from_unit from tslibs.timedeltas import Timedelta @@ -57,156 +60,6 @@ from tslibs.timestamps cimport (create_timestamp_from_ts, _NS_UPPER_BOUND, _NS_LOWER_BOUND) from tslibs.timestamps import Timestamp -cdef bint PY2 = str == bytes - - -cdef inline object create_datetime_from_ts( - int64_t value, pandas_datetimestruct dts, - object tz, object freq): - """ convenience routine to construct a datetime.datetime from its parts """ - return datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) - -cdef inline object create_date_from_ts( - int64_t value, pandas_datetimestruct dts, - object tz, object freq): - """ convenience routine to construct a datetime.date from its parts """ - return date(dts.year, dts.month, dts.day) - -cdef inline object create_time_from_ts( - int64_t value, pandas_datetimestruct dts, - object tz, object freq): - """ convenience routine to construct a datetime.time from its parts """ - return time(dts.hour, dts.min, dts.sec, dts.us) - - -def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, - box="datetime"): - """ - Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp - - Parameters - ---------- - arr : array of i8 - tz : str, default None - convert to this timezone - freq : str/Offset, default None - freq to convert - box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' - If datetime, convert to datetime.datetime - If date, convert to datetime.date - If time, convert to datetime.time - If Timestamp, convert to pandas.Timestamp - - Returns - ------- - result : array of dtype specified by box - """ - - cdef: - Py_ssize_t i, n = len(arr) - ndarray[int64_t] trans, deltas - pandas_datetimestruct dts - object dt - int64_t value - ndarray[object] result = np.empty(n, dtype=object) - object (*func_create)(int64_t, pandas_datetimestruct, object, object) - - if box == "date": - assert (tz is None), "tz should be None when converting to date" - - func_create = create_date_from_ts - elif box == "timestamp": - func_create = create_timestamp_from_ts - - if is_string_object(freq): - freq = to_offset(freq) - elif box == "time": - func_create = create_time_from_ts - elif box == "datetime": - func_create = create_datetime_from_ts - else: - raise ValueError("box must be one of 'datetime', 'date', 'time' or" - " 'timestamp'") - - if tz is not None: - if is_utc(tz): - for i in range(n): - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - dt64_to_dtstruct(value, &dts) - result[i] = func_create(value, dts, tz, freq) - elif is_tzlocal(tz) or is_fixed_offset(tz): - for i in range(n): - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - # Python datetime objects do not support nanosecond - # resolution (yet, PEP 564). Need to compute new value - # using the i8 representation. - local_value = tz_convert_utc_to_tzlocal(value, tz) - dt64_to_dtstruct(local_value, &dts) - result[i] = func_create(value, dts, tz, freq) - else: - trans, deltas, typ = get_dst_info(tz) - - for i in range(n): - - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - - # Adjust datetime64 timestamp, recompute datetimestruct - pos = trans.searchsorted(value, side='right') - 1 - if treat_tz_as_pytz(tz): - # find right representation of dst etc in pytz timezone - new_tz = tz._tzinfos[tz._transition_info[pos]] - else: - # no zone-name change for dateutil tzs - dst etc - # represented in single object. - new_tz = tz - - dt64_to_dtstruct(value + deltas[pos], &dts) - result[i] = func_create(value, dts, new_tz, freq) - else: - for i in range(n): - - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - dt64_to_dtstruct(value, &dts) - result[i] = func_create(value, dts, None, freq) - - return result - - -def ints_to_pytimedelta(ndarray[int64_t] arr, box=False): - # convert an i8 repr to an ndarray of timedelta or Timedelta (if box == - # True) - - cdef: - Py_ssize_t i, n = len(arr) - int64_t value - ndarray[object] result = np.empty(n, dtype=object) - - for i in range(n): - - value = arr[i] - if value == NPY_NAT: - result[i] = NaT - else: - if box: - result[i] = Timedelta(value) - else: - result[i] = timedelta(microseconds=int(value) / 1000) - - return result - def _test_parse_iso8601(object ts): """ @@ -252,512 +105,6 @@ cpdef inline object _localize_pydatetime(object dt, object tz): return dt.replace(tzinfo=tz) -def format_array_from_datetime(ndarray[int64_t] values, object tz=None, - object format=None, object na_rep=None): - """ - return a np object array of the string formatted values - - Parameters - ---------- - values : a 1-d i8 array - tz : the timezone (or None) - format : optional, default is None - a strftime capable string - na_rep : optional, default is None - a nat format - - """ - cdef: - int64_t val, ns, N = len(values) - ndarray[int64_t] consider_values - bint show_ms = 0, show_us = 0, show_ns = 0, basic_format = 0 - ndarray[object] result = np.empty(N, dtype=object) - object ts, res - pandas_datetimestruct dts - - if na_rep is None: - na_rep = 'NaT' - - # if we don't have a format nor tz, then choose - # a format based on precision - basic_format = format is None and tz is None - if basic_format: - consider_values = values[values != NPY_NAT] - show_ns = (consider_values % 1000).any() - - if not show_ns: - consider_values //= 1000 - show_us = (consider_values % 1000).any() - - if not show_ms: - consider_values //= 1000 - show_ms = (consider_values % 1000).any() - - for i in range(N): - val = values[i] - - if val == NPY_NAT: - result[i] = na_rep - elif basic_format: - - dt64_to_dtstruct(val, &dts) - res = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, - dts.month, - dts.day, - dts.hour, - dts.min, - dts.sec) - - if show_ns: - ns = dts.ps / 1000 - res += '.%.9d' % (ns + 1000 * dts.us) - elif show_us: - res += '.%.6d' % dts.us - elif show_ms: - res += '.%.3d' % (dts.us /1000) - - result[i] = res - - else: - - ts = Timestamp(val, tz=tz) - if format is None: - result[i] = str(ts) - else: - - # invalid format string - # requires dates > 1900 - try: - result[i] = ts.strftime(format) - except ValueError: - result[i] = str(ts) - - return result - - -cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): - """ - convert the ndarray according to the unit - if errors: - - raise: return converted values or raise OutOfBoundsDatetime - if out of range on the conversion or - ValueError for other conversions (e.g. a string) - - ignore: return non-convertible values as the same unit - - coerce: NaT for non-convertibles - - """ - cdef: - Py_ssize_t i, j, n=len(values) - int64_t m - ndarray[float64_t] fvalues - ndarray mask - bint is_ignore = errors=='ignore' - bint is_coerce = errors=='coerce' - bint is_raise = errors=='raise' - bint need_to_iterate = True - ndarray[int64_t] iresult - ndarray[object] oresult - - assert is_ignore or is_coerce or is_raise - - if unit == 'ns': - if issubclass(values.dtype.type, np.integer): - return values.astype('M8[ns]') - return array_to_datetime(values.astype(object), errors=errors) - - m = cast_from_unit(None, unit) - - if is_raise: - - # try a quick conversion to i8 - # if we have nulls that are not type-compat - # then need to iterate - try: - iresult = values.astype('i8', casting='same_kind', copy=False) - mask = iresult == iNaT - iresult[mask] = 0 - fvalues = iresult.astype('f8') * m - need_to_iterate = False - except: - pass - - # check the bounds - if not need_to_iterate: - - if ((fvalues < _NS_LOWER_BOUND).any() - or (fvalues > _NS_UPPER_BOUND).any()): - raise OutOfBoundsDatetime( - "cannot convert input with unit '{0}'".format(unit)) - result = (iresult * m).astype('M8[ns]') - iresult = result.view('i8') - iresult[mask] = iNaT - return result - - result = np.empty(n, dtype='M8[ns]') - iresult = result.view('i8') - - try: - for i in range(n): - val = values[i] - - if checknull_with_nat(val): - iresult[i] = NPY_NAT - - elif is_integer_object(val) or is_float_object(val): - - if val != val or val == NPY_NAT: - iresult[i] = NPY_NAT - else: - try: - iresult[i] = cast_from_unit(val, unit) - except OverflowError: - if is_raise: - raise OutOfBoundsDatetime( - "cannot convert input {0} with the unit " - "'{1}'".format(val, unit)) - elif is_ignore: - raise AssertionError - iresult[i] = NPY_NAT - - elif is_string_object(val): - if len(val) == 0 or val in nat_strings: - iresult[i] = NPY_NAT - - else: - try: - iresult[i] = cast_from_unit(float(val), unit) - except ValueError: - if is_raise: - raise ValueError( - "non convertible value {0} with the unit " - "'{1}'".format(val, unit)) - elif is_ignore: - raise AssertionError - iresult[i] = NPY_NAT - except: - if is_raise: - raise OutOfBoundsDatetime( - "cannot convert input {0} with the unit " - "'{1}'".format(val, unit)) - elif is_ignore: - raise AssertionError - iresult[i] = NPY_NAT - - else: - - if is_raise: - raise ValueError("unit='{0}' not valid with non-numerical " - "val='{1}'".format(unit, val)) - if is_ignore: - raise AssertionError - - iresult[i] = NPY_NAT - - return result - - except AssertionError: - pass - - # we have hit an exception - # and are in ignore mode - # redo as object - - oresult = np.empty(n, dtype=object) - for i in range(n): - val = values[i] - - if checknull_with_nat(val): - oresult[i] = NaT - elif is_integer_object(val) or is_float_object(val): - - if val != val or val == NPY_NAT: - oresult[i] = NaT - else: - try: - oresult[i] = Timestamp(cast_from_unit(val, unit)) - except: - oresult[i] = val - - elif is_string_object(val): - if len(val) == 0 or val in nat_strings: - oresult[i] = NaT - - else: - oresult[i] = val - - return oresult - - -cpdef array_to_datetime(ndarray[object] values, errors='raise', - dayfirst=False, yearfirst=False, - format=None, utc=None, - require_iso8601=False): - cdef: - Py_ssize_t i, n = len(values) - object val, py_dt - ndarray[int64_t] iresult - ndarray[object] oresult - pandas_datetimestruct dts - bint utc_convert = bool(utc) - bint seen_integer = 0 - bint seen_string = 0 - bint seen_datetime = 0 - bint is_raise = errors=='raise' - bint is_ignore = errors=='ignore' - bint is_coerce = errors=='coerce' - _TSObject _ts - int out_local=0, out_tzoffset=0 - - # specify error conditions - assert is_raise or is_ignore or is_coerce - - try: - result = np.empty(n, dtype='M8[ns]') - iresult = result.view('i8') - for i in range(n): - val = values[i] - - if checknull_with_nat(val): - iresult[i] = NPY_NAT - - elif PyDateTime_Check(val): - seen_datetime = 1 - if val.tzinfo is not None: - if utc_convert: - try: - _ts = convert_datetime_to_tsobject(val, None) - iresult[i] = _ts.value - except OutOfBoundsDatetime: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - else: - raise ValueError('Tz-aware datetime.datetime cannot ' - 'be converted to datetime64 unless ' - 'utc=True') - else: - iresult[i] = pydatetime_to_dt64(val, &dts) - if not PyDateTime_CheckExact(val): - # i.e. a Timestamp object - iresult[i] += val.nanosecond - try: - check_dts_bounds(&dts) - except OutOfBoundsDatetime: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - - elif PyDate_Check(val): - seen_datetime = 1 - iresult[i] = pydate_to_dt64(val, &dts) - try: - check_dts_bounds(&dts) - except OutOfBoundsDatetime: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - - elif is_datetime64_object(val): - seen_datetime = 1 - if get_datetime64_value(val) == NPY_NAT: - iresult[i] = NPY_NAT - else: - try: - iresult[i] = get_datetime64_nanos(val) - except OutOfBoundsDatetime: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - - elif is_integer_object(val) or is_float_object(val): - # these must be ns unit by-definition - seen_integer = 1 - - if val != val or val == NPY_NAT: - iresult[i] = NPY_NAT - elif is_raise or is_ignore: - iresult[i] = val - else: - # coerce - # we now need to parse this as if unit='ns' - # we can ONLY accept integers at this point - # if we have previously (or in future accept - # datetimes/strings, then we must coerce) - try: - iresult[i] = cast_from_unit(val, 'ns') - except: - iresult[i] = NPY_NAT - - elif is_string_object(val): - # string - seen_string = 1 - - if len(val) == 0 or val in nat_strings: - iresult[i] = NPY_NAT - continue - if PyUnicode_Check(val) and PY2: - val = val.encode('utf-8') - - try: - _string_to_dts(val, &dts, &out_local, &out_tzoffset) - except ValueError: - # A ValueError at this point is a _parsing_ error - # specifically _not_ OutOfBoundsDatetime - if _parse_today_now(val, &iresult[i]): - continue - elif require_iso8601: - # if requiring iso8601 strings, skip trying - # other formats - if is_coerce: - iresult[i] = NPY_NAT - continue - elif is_raise: - raise ValueError("time data {val} doesn't match " - "format specified" - .format(val=val)) - return values - - try: - py_dt = parse_datetime_string(val, dayfirst=dayfirst, - yearfirst=yearfirst) - except Exception: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise TypeError("invalid string coercion to datetime") - - try: - _ts = convert_datetime_to_tsobject(py_dt, None) - iresult[i] = _ts.value - except OutOfBoundsDatetime: - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - except: - # TODO: What exception are we concerned with here? - if is_coerce: - iresult[i] = NPY_NAT - continue - raise - else: - # No error raised by string_to_dts, pick back up - # where we left off - value = dtstruct_to_dt64(&dts) - if out_local == 1: - tz = pytz.FixedOffset(out_tzoffset) - value = tz_convert_single(value, tz, 'UTC') - iresult[i] = value - try: - check_dts_bounds(&dts) - except OutOfBoundsDatetime: - # GH#19382 for just-barely-OutOfBounds falling back to - # dateutil parser will return incorrect result because - # it will ignore nanoseconds - if is_coerce: - iresult[i] = NPY_NAT - continue - elif require_iso8601: - if is_raise: - raise ValueError("time data {val} doesn't " - "match format specified" - .format(val=val)) - return values - raise - - else: - if is_coerce: - iresult[i] = NPY_NAT - else: - raise TypeError("{0} is not convertible to datetime" - .format(type(val))) - - if seen_datetime and seen_integer: - # we have mixed datetimes & integers - - if is_coerce: - # coerce all of the integers/floats to NaT, preserve - # the datetimes and other convertibles - for i in range(n): - val = values[i] - if is_integer_object(val) or is_float_object(val): - result[i] = NPY_NAT - elif is_raise: - raise ValueError( - "mixed datetimes and integers in passed array") - else: - raise TypeError - - return result - except OutOfBoundsDatetime: - if is_raise: - raise - - oresult = np.empty(n, dtype=object) - for i in range(n): - val = values[i] - - # set as nan except if its a NaT - if checknull_with_nat(val): - if PyFloat_Check(val): - oresult[i] = np.nan - else: - oresult[i] = NaT - elif is_datetime64_object(val): - if get_datetime64_value(val) == NPY_NAT: - oresult[i] = NaT - else: - oresult[i] = val.item() - else: - oresult[i] = val - return oresult - except TypeError: - oresult = np.empty(n, dtype=object) - - for i in range(n): - val = values[i] - if checknull_with_nat(val): - oresult[i] = val - elif is_string_object(val): - - if len(val) == 0 or val in nat_strings: - oresult[i] = 'NaT' - continue - - try: - oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, - yearfirst=yearfirst) - pydatetime_to_dt64(oresult[i], &dts) - check_dts_bounds(&dts) - except Exception: - if is_raise: - raise - return values - # oresult[i] = val - else: - if is_raise: - raise - return values - - return oresult - - -cdef inline bint _parse_today_now(str val, int64_t* iresult): - # We delay this check for as long as possible - # because it catches relatively rare cases - if val == 'now': - # Note: this is *not* the same as Timestamp('now') - iresult[0] = Timestamp.utcnow().value - return True - elif val == 'today': - iresult[0] = Timestamp.today().value - return True - return False - # ---------------------------------------------------------------------- # Some general helper functions diff --git a/pandas/_libs/tslibs/arraylike.pyx b/pandas/_libs/tslibs/arraylike.pyx new file mode 100644 index 0000000000000..65dc719efbcad --- /dev/null +++ b/pandas/_libs/tslibs/arraylike.pyx @@ -0,0 +1,717 @@ +# -*- coding: utf-8 -*- + +cimport numpy as cnp +from numpy cimport int64_t, ndarray, float64_t +import numpy as np +cnp.import_array() + + +from cpython cimport PyFloat_Check, PyUnicode_Check + +from util cimport (is_integer_object, is_float_object, is_string_object, + is_datetime64_object) + +from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, + PyDateTime_CheckExact, + PyDateTime_IMPORT, + timedelta, datetime, date, time) +# import datetime C API +PyDateTime_IMPORT + + +from np_datetime cimport (check_dts_bounds, + pandas_datetimestruct, + _string_to_dts, + dt64_to_dtstruct, dtstruct_to_dt64, + pydatetime_to_dt64, pydate_to_dt64, + get_datetime64_value) +from np_datetime import OutOfBoundsDatetime + +from parsing import parse_datetime_string + +cimport cython +from cython cimport Py_ssize_t + + +import pytz +UTC = pytz.utc + + +from timedeltas cimport cast_from_unit +from timedeltas import Timedelta +from timezones cimport (is_utc, is_tzlocal, is_fixed_offset, + treat_tz_as_pytz, get_dst_info) +from conversion cimport (tz_convert_single, _TSObject, + convert_datetime_to_tsobject, + get_datetime64_nanos, + tz_convert_utc_to_tzlocal) +from conversion import tz_convert_single + +from nattype import NaT, nat_strings, iNaT +from nattype cimport checknull_with_nat, NPY_NAT + +from timestamps cimport (create_timestamp_from_ts, + _NS_UPPER_BOUND, _NS_LOWER_BOUND) +from timestamps import Timestamp + +cdef bint PY2 = str == bytes + + +# ------------------------------------------------------------------ +# Parsing/Casting from array-like inputs + + +cpdef array_to_datetime(ndarray[object] values, errors='raise', + dayfirst=False, yearfirst=False, + format=None, utc=None, + require_iso8601=False): + cdef: + Py_ssize_t i, n = len(values) + object val, py_dt + ndarray[int64_t] iresult + ndarray[object] oresult + pandas_datetimestruct dts + bint utc_convert = bool(utc) + bint seen_integer = 0 + bint seen_string = 0 + bint seen_datetime = 0 + bint is_raise = errors=='raise' + bint is_ignore = errors=='ignore' + bint is_coerce = errors=='coerce' + _TSObject _ts + int out_local=0, out_tzoffset=0 + + # specify error conditions + assert is_raise or is_ignore or is_coerce + + try: + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') + for i in range(n): + val = values[i] + + if checknull_with_nat(val): + iresult[i] = NPY_NAT + + elif PyDateTime_Check(val): + seen_datetime = 1 + if val.tzinfo is not None: + if utc_convert: + try: + _ts = convert_datetime_to_tsobject(val, None) + iresult[i] = _ts.value + except OutOfBoundsDatetime: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise + else: + raise ValueError('Tz-aware datetime.datetime cannot ' + 'be converted to datetime64 unless ' + 'utc=True') + else: + iresult[i] = pydatetime_to_dt64(val, &dts) + if not PyDateTime_CheckExact(val): + # i.e. a Timestamp object + iresult[i] += val.nanosecond + try: + check_dts_bounds(&dts) + except OutOfBoundsDatetime: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise + + elif PyDate_Check(val): + seen_datetime = 1 + iresult[i] = pydate_to_dt64(val, &dts) + try: + check_dts_bounds(&dts) + except OutOfBoundsDatetime: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise + + elif is_datetime64_object(val): + seen_datetime = 1 + if get_datetime64_value(val) == NPY_NAT: + iresult[i] = NPY_NAT + else: + try: + iresult[i] = get_datetime64_nanos(val) + except OutOfBoundsDatetime: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise + + elif is_integer_object(val) or is_float_object(val): + # these must be ns unit by-definition + seen_integer = 1 + + if val != val or val == NPY_NAT: + iresult[i] = NPY_NAT + elif is_raise or is_ignore: + iresult[i] = val + else: + # coerce + # we now need to parse this as if unit='ns' + # we can ONLY accept integers at this point + # if we have previously (or in future accept + # datetimes/strings, then we must coerce) + try: + iresult[i] = cast_from_unit(val, 'ns') + except: + iresult[i] = NPY_NAT + + elif is_string_object(val): + # string + seen_string = 1 + + if len(val) == 0 or val in nat_strings: + iresult[i] = NPY_NAT + continue + if PyUnicode_Check(val) and PY2: + val = val.encode('utf-8') + + try: + _string_to_dts(val, &dts, &out_local, &out_tzoffset) + except ValueError: + # A ValueError at this point is a _parsing_ error + # specifically _not_ OutOfBoundsDatetime + if _parse_today_now(val, &iresult[i]): + continue + elif require_iso8601: + # if requiring iso8601 strings, skip trying + # other formats + if is_coerce: + iresult[i] = NPY_NAT + continue + elif is_raise: + raise ValueError("time data {val} doesn't match " + "format specified" + .format(val=val)) + return values + + try: + py_dt = parse_datetime_string(val, dayfirst=dayfirst, + yearfirst=yearfirst) + except Exception: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise TypeError("invalid string coercion to datetime") + + try: + _ts = convert_datetime_to_tsobject(py_dt, None) + iresult[i] = _ts.value + except OutOfBoundsDatetime: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise + except: + # TODO: What exception are we concerned with here? + if is_coerce: + iresult[i] = NPY_NAT + continue + raise + else: + # No error raised by string_to_dts, pick back up + # where we left off + value = dtstruct_to_dt64(&dts) + if out_local == 1: + tz = pytz.FixedOffset(out_tzoffset) + value = tz_convert_single(value, tz, 'UTC') + iresult[i] = value + try: + check_dts_bounds(&dts) + except OutOfBoundsDatetime: + # GH#19382 for just-barely-OutOfBounds falling back to + # dateutil parser will return incorrect result because + # it will ignore nanoseconds + if is_coerce: + iresult[i] = NPY_NAT + continue + elif require_iso8601: + if is_raise: + raise ValueError("time data {val} doesn't " + "match format specified" + .format(val=val)) + return values + raise + + else: + if is_coerce: + iresult[i] = NPY_NAT + else: + raise TypeError("{0} is not convertible to datetime" + .format(type(val))) + + if seen_datetime and seen_integer: + # we have mixed datetimes & integers + + if is_coerce: + # coerce all of the integers/floats to NaT, preserve + # the datetimes and other convertibles + for i in range(n): + val = values[i] + if is_integer_object(val) or is_float_object(val): + result[i] = NPY_NAT + elif is_raise: + raise ValueError( + "mixed datetimes and integers in passed array") + else: + raise TypeError + + return result + except OutOfBoundsDatetime: + if is_raise: + raise + + oresult = np.empty(n, dtype=object) + for i in range(n): + val = values[i] + + # set as nan except if its a NaT + if checknull_with_nat(val): + if PyFloat_Check(val): + oresult[i] = np.nan + else: + oresult[i] = NaT + elif is_datetime64_object(val): + if get_datetime64_value(val) == NPY_NAT: + oresult[i] = NaT + else: + oresult[i] = val.item() + else: + oresult[i] = val + return oresult + except TypeError: + oresult = np.empty(n, dtype=object) + + for i in range(n): + val = values[i] + if checknull_with_nat(val): + oresult[i] = val + elif is_string_object(val): + + if len(val) == 0 or val in nat_strings: + oresult[i] = 'NaT' + continue + + try: + oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, + yearfirst=yearfirst) + pydatetime_to_dt64(oresult[i], &dts) + check_dts_bounds(&dts) + except Exception: + if is_raise: + raise + return values + # oresult[i] = val + else: + if is_raise: + raise + return values + + return oresult + + +cdef inline bint _parse_today_now(str val, int64_t* iresult): + # We delay this check for as long as possible + # because it catches relatively rare cases + if val == 'now': + # Note: this is *not* the same as Timestamp('now') + iresult[0] = Timestamp.utcnow().value + return True + elif val == 'today': + iresult[0] = Timestamp.today().value + return True + return False + + +cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): + """ + convert the ndarray according to the unit + if errors: + - raise: return converted values or raise OutOfBoundsDatetime + if out of range on the conversion or + ValueError for other conversions (e.g. a string) + - ignore: return non-convertible values as the same unit + - coerce: NaT for non-convertibles + + """ + cdef: + Py_ssize_t i, j, n=len(values) + int64_t m + ndarray[float64_t] fvalues + ndarray mask + bint is_ignore = errors=='ignore' + bint is_coerce = errors=='coerce' + bint is_raise = errors=='raise' + bint need_to_iterate = True + ndarray[int64_t] iresult + ndarray[object] oresult + + assert is_ignore or is_coerce or is_raise + + if unit == 'ns': + if issubclass(values.dtype.type, np.integer): + return values.astype('M8[ns]') + return array_to_datetime(values.astype(object), errors=errors) + + m = cast_from_unit(None, unit) + + if is_raise: + + # try a quick conversion to i8 + # if we have nulls that are not type-compat + # then need to iterate + try: + iresult = values.astype('i8', casting='same_kind', copy=False) + mask = iresult == iNaT + iresult[mask] = 0 + fvalues = iresult.astype('f8') * m + need_to_iterate = False + except: + pass + + # check the bounds + if not need_to_iterate: + + if ((fvalues < _NS_LOWER_BOUND).any() + or (fvalues > _NS_UPPER_BOUND).any()): + raise OutOfBoundsDatetime( + "cannot convert input with unit '{0}'".format(unit)) + result = (iresult * m).astype('M8[ns]') + iresult = result.view('i8') + iresult[mask] = iNaT + return result + + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') + + try: + for i in range(n): + val = values[i] + + if checknull_with_nat(val): + iresult[i] = NPY_NAT + + elif is_integer_object(val) or is_float_object(val): + + if val != val or val == NPY_NAT: + iresult[i] = NPY_NAT + else: + try: + iresult[i] = cast_from_unit(val, unit) + except OverflowError: + if is_raise: + raise OutOfBoundsDatetime( + "cannot convert input {0} with the unit " + "'{1}'".format(val, unit)) + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + + elif is_string_object(val): + if len(val) == 0 or val in nat_strings: + iresult[i] = NPY_NAT + + else: + try: + iresult[i] = cast_from_unit(float(val), unit) + except ValueError: + if is_raise: + raise ValueError( + "non convertible value {0} with the unit " + "'{1}'".format(val, unit)) + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + except: + if is_raise: + raise OutOfBoundsDatetime( + "cannot convert input {0} with the unit " + "'{1}'".format(val, unit)) + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + + else: + + if is_raise: + raise ValueError("unit='{0}' not valid with non-numerical " + "val='{1}'".format(unit, val)) + if is_ignore: + raise AssertionError + + iresult[i] = NPY_NAT + + return result + + except AssertionError: + pass + + # we have hit an exception + # and are in ignore mode + # redo as object + + oresult = np.empty(n, dtype=object) + for i in range(n): + val = values[i] + + if checknull_with_nat(val): + oresult[i] = NaT + elif is_integer_object(val) or is_float_object(val): + + if val != val or val == NPY_NAT: + oresult[i] = NaT + else: + try: + oresult[i] = Timestamp(cast_from_unit(val, unit)) + except: + oresult[i] = val + + elif is_string_object(val): + if len(val) == 0 or val in nat_strings: + oresult[i] = NaT + + else: + oresult[i] = val + + return oresult + + +# ------------------------------------------------------------------ +# Casting from specifically integer arrays + +cdef inline object create_datetime_from_ts( + int64_t value, pandas_datetimestruct dts, + object tz, object freq): + """ convenience routine to construct a datetime.datetime from its parts """ + return datetime(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, tz) + +cdef inline object create_date_from_ts( + int64_t value, pandas_datetimestruct dts, + object tz, object freq): + """ convenience routine to construct a datetime.date from its parts """ + return date(dts.year, dts.month, dts.day) + +cdef inline object create_time_from_ts( + int64_t value, pandas_datetimestruct dts, + object tz, object freq): + """ convenience routine to construct a datetime.time from its parts """ + return time(dts.hour, dts.min, dts.sec, dts.us) + + +def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, + box="datetime"): + """ + Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp + + Parameters + ---------- + arr : array of i8 + tz : str, default None + convert to this timezone + freq : str/Offset, default None + freq to convert + box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' + If datetime, convert to datetime.datetime + If date, convert to datetime.date + If time, convert to datetime.time + If Timestamp, convert to pandas.Timestamp + + Returns + ------- + result : array of dtype specified by box + """ + + cdef: + Py_ssize_t i, n = len(arr) + ndarray[int64_t] trans, deltas + pandas_datetimestruct dts + object dt + int64_t value + ndarray[object] result = np.empty(n, dtype=object) + object (*func_create)(int64_t, pandas_datetimestruct, object, object) + + if box == "date": + assert tz is None, "tz should be None when converting to date" + func_create = create_date_from_ts + elif box == "timestamp": + func_create = create_timestamp_from_ts + if is_string_object(freq): + from pandas.tseries.frequencies import to_offset + freq = to_offset(freq) + elif box == "time": + func_create = create_time_from_ts + elif box == "datetime": + func_create = create_datetime_from_ts + else: + raise ValueError("box must be one of 'datetime', 'date', 'time' or" + " 'timestamp'") + + if tz is not None: + if is_utc(tz): + for i in range(n): + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + dt64_to_dtstruct(value, &dts) + result[i] = func_create(value, dts, tz, freq) + elif is_tzlocal(tz) or is_fixed_offset(tz): + for i in range(n): + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + # Python datetime objects do not support nanosecond + # resolution (yet, PEP 564). Need to compute new value + # using the i8 representation. + local_value = tz_convert_utc_to_tzlocal(value, tz) + dt64_to_dtstruct(local_value, &dts) + result[i] = func_create(value, dts, tz, freq) + else: + trans, deltas, typ = get_dst_info(tz) + + for i in range(n): + + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + + # Adjust datetime64 timestamp, recompute datetimestruct + pos = trans.searchsorted(value, side='right') - 1 + if treat_tz_as_pytz(tz): + # find right representation of dst etc in pytz timezone + new_tz = tz._tzinfos[tz._transition_info[pos]] + else: + # no zone-name change for dateutil tzs - dst etc + # represented in single object. + new_tz = tz + + dt64_to_dtstruct(value + deltas[pos], &dts) + result[i] = func_create(value, dts, new_tz, freq) + else: + for i in range(n): + + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + dt64_to_dtstruct(value, &dts) + result[i] = func_create(value, dts, None, freq) + + return result + + +def ints_to_pytimedelta(ndarray[int64_t] arr, box=False): + # convert an i8 repr to an ndarray of timedelta or Timedelta (if box == + # True) + + cdef: + Py_ssize_t i, n = len(arr) + int64_t value + ndarray[object] result = np.empty(n, dtype=object) + + for i in range(n): + + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + if box: + result[i] = Timedelta(value) + else: + result[i] = timedelta(microseconds=int(value) / 1000) + + return result + + +# ------------------------------------------------------------------ + +def format_array_from_datetime(ndarray[int64_t] values, object tz=None, + object format=None, object na_rep=None): + """ + Return a numpy object array of the string formatted values + + Parameters + ---------- + values : a 1-d i8 array + tz : the timezone (or None) + format : optional, default is None + a strftime capable string + na_rep : optional, default is None + a nat format + """ + cdef: + int64_t val, ns, N = len(values) + ndarray[int64_t] consider_values + bint show_ms = 0, show_us = 0, show_ns = 0, basic_format = 0 + ndarray[object] result = np.empty(N, dtype=object) + object ts, res + pandas_datetimestruct dts + + if na_rep is None: + na_rep = 'NaT' + + # if we don't have a format nor tz, then choose + # a format based on precision + basic_format = format is None and tz is None + if basic_format: + consider_values = values[values != NPY_NAT] + show_ns = (consider_values % 1000).any() + + if not show_ns: + consider_values //= 1000 + show_us = (consider_values % 1000).any() + + if not show_ms: + consider_values //= 1000 + show_ms = (consider_values % 1000).any() + + for i in range(N): + val = values[i] + + if val == NPY_NAT: + result[i] = na_rep + elif basic_format: + + dt64_to_dtstruct(val, &dts) + res = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, + dts.month, + dts.day, + dts.hour, + dts.min, + dts.sec) + + if show_ns: + ns = dts.ps / 1000 + res += '.%.9d' % (ns + 1000 * dts.us) + elif show_us: + res += '.%.6d' % dts.us + elif show_ms: + res += '.%.3d' % (dts.us /1000) + + result[i] = res + + else: + ts = Timestamp(val, tz=tz) + if format is None: + result[i] = str(ts) + else: + # invalid format string + # requires dates > 1900 + try: + result[i] = ts.strftime(format) + except ValueError: + result[i] = str(ts) + + return result diff --git a/setup.py b/setup.py index 75e2b18409e19..3a47556eb4adb 100755 --- a/setup.py +++ b/setup.py @@ -316,6 +316,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/sparse.pyx', 'pandas/_libs/ops.pyx', 'pandas/_libs/parsers.pyx', + 'pandas/_libs/tslibs/arraylike.pyx', 'pandas/_libs/tslibs/ccalendar.pyx', 'pandas/_libs/tslibs/period.pyx', 'pandas/_libs/tslibs/strptime.pyx', @@ -565,6 +566,16 @@ def pxd(name): '_libs/tslibs/offsets'], 'depends': tseries_depends, 'sources': np_datetime_sources}, + '_libs.tslibs.arraylike': { + 'pyxfile': '_libs/tslibs/arraylike', + 'pxdfiles': ['_libs/src/util', + '_libs/tslibs/conversion', + '_libs/tslibs/timedeltas', + '_libs/tslibs/timestamps', + '_libs/tslibs/timezones', + '_libs/tslibs/nattype'], + 'depends': tseries_depends, + 'sources': np_datetime_sources}, '_libs.tslibs.ccalendar': { 'pyxfile': '_libs/tslibs/ccalendar'}, '_libs.tslibs.conversion': {