diff --git a/RELEASE.rst b/RELEASE.rst index 161047c478d88..0d94337ffea78 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -82,6 +82,9 @@ pandas 0.11.1 - Series and DataFrame hist methods now take a ``figsize`` argument (GH3834_) - DatetimeIndexes no longer try to convert mixed-integer indexes during join operations (GH3877_) + - Add ``unit`` keyword to ``Timestamp`` and ``to_datetime`` to enable passing of + integers or floats that are in an epoch unit of ``s, ms, us, ns`` + (e.g. unix timestamps or epoch ``s``, with fracional seconds allowed) (GH3540_) **API Changes** @@ -264,6 +267,7 @@ pandas 0.11.1 .. _GH3499: https://github.com/pydata/pandas/issues/3499 .. _GH3495: https://github.com/pydata/pandas/issues/3495 .. _GH3492: https://github.com/pydata/pandas/issues/3492 +.. _GH3540: https://github.com/pydata/pandas/issues/3540 .. _GH3552: https://github.com/pydata/pandas/issues/3552 .. _GH3562: https://github.com/pydata/pandas/issues/3562 .. _GH3586: https://github.com/pydata/pandas/issues/3586 diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 5343819b9fbfe..270fb01a42033 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -471,7 +471,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, seen_float = 1 elif util.is_datetime64_object(val): if convert_datetime: - idatetimes[i] = convert_to_tsobject(val, None).value + idatetimes[i] = convert_to_tsobject(val, None, None).value seen_datetime = 1 else: seen_object = 1 @@ -493,7 +493,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, elif PyDateTime_Check(val) or util.is_datetime64_object(val): if convert_datetime: seen_datetime = 1 - idatetimes[i] = convert_to_tsobject(val, None).value + idatetimes[i] = convert_to_tsobject(val, None, None).value else: seen_object = 1 break diff --git a/pandas/src/offsets.pyx b/pandas/src/offsets.pyx index 5868ca5210e33..1823edeb0a4d9 100644 --- a/pandas/src/offsets.pyx +++ b/pandas/src/offsets.pyx @@ -76,7 +76,7 @@ cdef class _Offset: cpdef anchor(self, object start=None): if start is not None: self.start = start - self.ts = convert_to_tsobject(self.start) + self.ts = convert_to_tsobject(self.start, None, None) self._setup() cdef _setup(self): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 51e657d1723b2..1cb986ee6cd7c 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1204,6 +1204,9 @@ def slice_indexer(self, start=None, end=None, step=None): if isinstance(start, time) or isinstance(end, time): raise KeyError('Cannot mix time and non-time slice keys') + if isinstance(start, float) or isinstance(end, float): + raise TypeError('Cannot index datetime64 with float keys') + return Index.slice_indexer(self, start, end, step) def slice_locs(self, start=None, end=None): diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index f5415a195db77..ac02dee335afc 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -38,6 +38,7 @@ import pandas.util.py3compat as py3compat from pandas.core.datetools import BDay import pandas.core.common as com +from pandas import concat from numpy.testing.decorators import slow @@ -171,7 +172,6 @@ def test_indexing_over_size_cutoff(self): def test_indexing_unordered(self): # GH 2437 - from pandas import concat rng = date_range(start='2011-01-01', end='2011-01-15') ts = Series(randn(len(rng)), index=rng) ts2 = concat([ts[0:4],ts[-4:],ts[4:-4]]) @@ -593,6 +593,34 @@ def test_frame_add_datetime64_col_other_units(self): self.assert_((tmp['dates'].values == ex_vals).all()) + def test_to_datetime_unit(self): + + epoch = 1370745748 + s = Series([ epoch + t for t in range(20) ]) + result = to_datetime(s,unit='s') + expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ]) + assert_series_equal(result,expected) + + s = Series([ epoch + t for t in range(20) ]).astype(float) + result = to_datetime(s,unit='s') + expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ]) + assert_series_equal(result,expected) + + s = Series([ epoch + t for t in range(20) ] + [iNaT]) + result = to_datetime(s,unit='s') + expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ] + [NaT]) + assert_series_equal(result,expected) + + s = Series([ epoch + t for t in range(20) ] + [iNaT]).astype(float) + result = to_datetime(s,unit='s') + expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ] + [NaT]) + assert_series_equal(result,expected) + + s = concat([Series([ epoch + t for t in range(20) ]).astype(float),Series([np.nan])],ignore_index=True) + result = to_datetime(s,unit='s') + expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ] + [NaT]) + assert_series_equal(result,expected) + def test_series_ctor_datetime64(self): rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') @@ -2691,6 +2719,61 @@ def test_basics_nanos(self): self.assert_(stamp.microsecond == 0) self.assert_(stamp.nanosecond == 500) + def test_unit(self): + def check(val,unit=None,s=1,us=0): + stamp = Timestamp(val, unit=unit) + self.assert_(stamp.year == 2000) + self.assert_(stamp.month == 1) + self.assert_(stamp.day == 1) + self.assert_(stamp.hour == 1) + self.assert_(stamp.minute == 1) + self.assert_(stamp.second == s) + self.assert_(stamp.microsecond == us) + self.assert_(stamp.nanosecond == 0) + + val = Timestamp('20000101 01:01:01').value + + check(val) + check(val/1000L,unit='us') + check(val/1000000L,unit='ms') + check(val/1000000000L,unit='s') + + # using truediv, so these are like floats + if py3compat.PY3: + check((val+500000)/1000000000L,unit='s',us=500) + check((val+500000000)/1000000000L,unit='s',us=500000) + check((val+500000)/1000000L,unit='ms',us=500) + + # get chopped in py2 + else: + check((val+500000)/1000000000L,unit='s') + check((val+500000000)/1000000000L,unit='s') + check((val+500000)/1000000L,unit='ms') + + # ok + check((val+500000)/1000L,unit='us',us=500) + check((val+500000000)/1000000L,unit='ms',us=500000) + + # floats + check(val/1000.0 + 5,unit='us',us=5) + check(val/1000.0 + 5000,unit='us',us=5000) + check(val/1000000.0 + 0.5,unit='ms',us=500) + check(val/1000000.0 + 0.005,unit='ms',us=5) + check(val/1000000000.0 + 0.5,unit='s',us=500000) + + # nan + result = Timestamp(np.nan) + self.assert_(result is NaT) + + result = Timestamp(None) + self.assert_(result is NaT) + + result = Timestamp(iNaT) + self.assert_(result is NaT) + + result = Timestamp(NaT) + self.assert_(result is NaT) + def test_comparison(self): # 5-18-2012 00:00:00.000 stamp = 1337299200000000000L diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 62ee19da6b845..90bc0beb8eb84 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -50,7 +50,7 @@ def _maybe_get_tz(tz): def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True, - format=None, coerce=False): + format=None, coerce=False, unit='ns'): """ Convert argument to datetime @@ -69,6 +69,8 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True, format : string, default None strftime to parse time, eg "%d/%m/%Y" coerce : force errors to NaT (False by default) + unit : unit of the arg (s,ms,us,ns) denote the unit in epoch + (e.g. a unix timestamp), which is an integer/float number Returns ------- @@ -86,7 +88,7 @@ def _convert_f(arg): else: result = tslib.array_to_datetime(arg, raise_=errors == 'raise', utc=utc, dayfirst=dayfirst, - coerce=coerce) + coerce=coerce, unit=unit) if com.is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz='utc' if utc else None) return result diff --git a/pandas/tslib.pxd b/pandas/tslib.pxd index 3e7a6ef615e00..a70f9883c5bb1 100644 --- a/pandas/tslib.pxd +++ b/pandas/tslib.pxd @@ -1,3 +1,3 @@ from numpy cimport ndarray, int64_t -cdef convert_to_tsobject(object, object) +cdef convert_to_tsobject(object, object, object) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index abec45b52a363..ec11de7392680 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -131,21 +131,17 @@ class Timestamp(_Timestamp): note: by definition there cannot be any tz info on the ordinal itself """ return cls(datetime.fromordinal(ordinal),offset=offset,tz=tz) - def __new__(cls, object ts_input, object offset=None, tz=None): + def __new__(cls, object ts_input, object offset=None, tz=None, unit=None): cdef _TSObject ts cdef _Timestamp ts_base - if PyFloat_Check(ts_input): - # to do, do we want to support this, ie with fractional seconds? - raise TypeError("Cannot convert a float to datetime") - if util.is_string_object(ts_input): try: ts_input = parse_date(ts_input) except Exception: pass - ts = convert_to_tsobject(ts_input, tz) + ts = convert_to_tsobject(ts_input, tz, unit) if ts.value == NPY_NAT: return NaT @@ -311,7 +307,7 @@ class Timestamp(_Timestamp): if self.nanosecond != 0 and warn: print 'Warning: discarding nonzero nanoseconds' - ts = convert_to_tsobject(self, self.tzinfo) + ts = convert_to_tsobject(self, self.tzinfo, None) return datetime(ts.dts.year, ts.dts.month, ts.dts.day, ts.dts.hour, ts.dts.min, ts.dts.sec, @@ -530,7 +526,7 @@ cdef class _Timestamp(datetime): cdef: pandas_datetimestruct dts _TSObject ts - ts = convert_to_tsobject(self, self.tzinfo) + ts = convert_to_tsobject(self, self.tzinfo, None) dts = ts.dts return datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, @@ -623,12 +619,13 @@ cpdef _get_utcoffset(tzinfo, obj): return tzinfo.utcoffset(obj) # helper to extract datetime and int64 from several different possibilities -cdef convert_to_tsobject(object ts, object tz): +cdef convert_to_tsobject(object ts, object tz, object unit): """ Extract datetime and int64 from any of: - - np.int64 + - np.int64 (with unit providing a possible modifier) - np.datetime64 - - python int or long object + - a float (with unit providing a possible modifier) + - python int or long object (with unit providing a possible modifier) - iso8601 string object - python datetime object - another timestamp object @@ -643,12 +640,25 @@ cdef convert_to_tsobject(object ts, object tz): obj = _TSObject() - if is_datetime64_object(ts): + if ts is None or ts is NaT: + obj.value = NPY_NAT + elif is_datetime64_object(ts): obj.value = _get_datetime64_nanos(ts) pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns, &obj.dts) elif is_integer_object(ts): - obj.value = ts - pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts) + if ts == NPY_NAT: + obj.value = NPY_NAT + else: + ts = ts * cast_from_unit(unit,None) + obj.value = ts + pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts) + elif util.is_float_object(ts): + if ts != ts or ts == NPY_NAT: + obj.value = NPY_NAT + else: + ts = cast_from_unit(unit,ts) + obj.value = ts + pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts) elif util.is_string_object(ts): if ts in _nat_strings: obj.value = NPY_NAT @@ -699,7 +709,7 @@ cdef convert_to_tsobject(object ts, object tz): elif PyDate_Check(ts): # Keep the converter same as PyDateTime's ts = datetime.combine(ts, datetime_time()) - return convert_to_tsobject(ts, tz) + return convert_to_tsobject(ts, tz, None) else: raise ValueError("Could not construct Timestamp from argument %s" % type(ts)) @@ -804,7 +814,7 @@ def datetime_to_datetime64(ndarray[object] values): else: inferred_tz = _get_zone(val.tzinfo) - _ts = convert_to_tsobject(val, None) + _ts = convert_to_tsobject(val, None, None) iresult[i] = _ts.value _check_dts_bounds(iresult[i], &_ts.dts) else: @@ -819,7 +829,7 @@ def datetime_to_datetime64(ndarray[object] values): def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, - format=None, utc=None, coerce=False): + format=None, utc=None, coerce=False, unit=None): cdef: Py_ssize_t i, n = len(values) object val @@ -828,6 +838,7 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, pandas_datetimestruct dts bint utc_convert = bool(utc) _TSObject _ts + int64_t m = cast_from_unit(unit,None) from dateutil.parser import parse @@ -841,7 +852,7 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, elif PyDateTime_Check(val): if val.tzinfo is not None: if utc_convert: - _ts = convert_to_tsobject(val, None) + _ts = convert_to_tsobject(val, None, unit) iresult[i] = _ts.value _check_dts_bounds(iresult[i], &_ts.dts) else: @@ -861,7 +872,15 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, # if we are coercing, dont' allow integers elif util.is_integer_object(val) and not coerce: - iresult[i] = val + if val == iNaT: + iresult[i] = iNaT + else: + iresult[i] = val*m + elif util.is_float_object(val) and not coerce: + if val != val or val == iNaT: + iresult[i] = iNaT + else: + iresult[i] = cast_from_unit(unit,val) else: try: if len(val) == 0: @@ -1246,6 +1265,31 @@ cdef inline _get_datetime64_nanos(object val): else: return ival +cdef inline int64_t cast_from_unit(object unit, object ts): + """ return a casting of the unit represented to nanoseconds + round the fractional part of a float to our precision, p """ + if unit == 's': + m = 1000000000L + p = 6 + elif unit == 'ms': + m = 1000000L + p = 3 + elif unit == 'us': + m = 1000L + p = 0 + else: + m = 1L + p = 0 + + # just give me the unit back + if ts is None: + return m + + # cast the unit, multiply base/frace separately + # to avoid precision issues from float -> int + base = ts + frac = ts-base + return (base*m) + (round(frac,p)*m) def cast_to_nanoseconds(ndarray arr): cdef: @@ -1286,7 +1330,7 @@ def pydt_to_i8(object pydt): cdef: _TSObject ts - ts = convert_to_tsobject(pydt, None) + ts = convert_to_tsobject(pydt, None, None) return ts.value @@ -1784,7 +1828,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field): for i in range(count): if dtindex[i] == NPY_NAT: out[i] = -1; continue - ts = convert_to_tsobject(dtindex[i], None) + ts = convert_to_tsobject(dtindex[i], None, None) out[i] = ts_dayofweek(ts) return out @@ -1793,7 +1837,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field): if dtindex[i] == NPY_NAT: out[i] = -1; continue pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts) - ts = convert_to_tsobject(dtindex[i], None) + ts = convert_to_tsobject(dtindex[i], None, None) isleap = is_leapyear(dts.year) isleap_prev = is_leapyear(dts.year - 1) mo_off = _month_offset[isleap, dts.month - 1] @@ -1831,7 +1875,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field): cdef inline int m8_weekday(int64_t val): - ts = convert_to_tsobject(val, None) + ts = convert_to_tsobject(val, None, None) return ts_dayofweek(ts) cdef int64_t DAY_NS = 86400000000000LL