Skip to content

Commit 73d58a9

Browse files
committed
ENH: Add unit keyword to Timestamp and to_datetime to enable passing of
integers or floats that are in an epoch unit of s, ms, us, ns (e.g. unix timestamps or epoch s, with fracional seconds allowed) (GH 3540)
1 parent 89e62c0 commit 73d58a9

File tree

7 files changed

+105
-26
lines changed

7 files changed

+105
-26
lines changed

RELEASE.rst

+4
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ pandas 0.11.1
8282
- Series and DataFrame hist methods now take a ``figsize`` argument (GH3834_)
8383
- DatetimeIndexes no longer try to convert mixed-integer indexes during join
8484
operations (GH3877_)
85+
- Add ``unit`` keyword to ``Timestamp`` and ``to_datetime`` to enable passing of
86+
integers or floats that are in an epoch unit of ``s, ms, us, ns``
87+
(e.g. unix timestamps or epoch ``s``, with fracional seconds allowed) (GH3540_)
8588

8689
**API Changes**
8790

@@ -264,6 +267,7 @@ pandas 0.11.1
264267
.. _GH3499: https://github.com/pydata/pandas/issues/3499
265268
.. _GH3495: https://github.com/pydata/pandas/issues/3495
266269
.. _GH3492: https://github.com/pydata/pandas/issues/3492
270+
.. _GH3540: https://github.com/pydata/pandas/issues/3540
267271
.. _GH3552: https://github.com/pydata/pandas/issues/3552
268272
.. _GH3562: https://github.com/pydata/pandas/issues/3562
269273
.. _GH3586: https://github.com/pydata/pandas/issues/3586

pandas/src/inference.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
471471
seen_float = 1
472472
elif util.is_datetime64_object(val):
473473
if convert_datetime:
474-
idatetimes[i] = convert_to_tsobject(val, None).value
474+
idatetimes[i] = convert_to_tsobject(val, None, None).value
475475
seen_datetime = 1
476476
else:
477477
seen_object = 1
@@ -493,7 +493,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
493493
elif PyDateTime_Check(val) or util.is_datetime64_object(val):
494494
if convert_datetime:
495495
seen_datetime = 1
496-
idatetimes[i] = convert_to_tsobject(val, None).value
496+
idatetimes[i] = convert_to_tsobject(val, None, None).value
497497
else:
498498
seen_object = 1
499499
break

pandas/src/offsets.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ cdef class _Offset:
7676
cpdef anchor(self, object start=None):
7777
if start is not None:
7878
self.start = start
79-
self.ts = convert_to_tsobject(self.start)
79+
self.ts = convert_to_tsobject(self.start, None, None)
8080
self._setup()
8181

8282
cdef _setup(self):

pandas/tseries/tests/test_timeseries.py

+43
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,14 @@ def test_frame_add_datetime64_col_other_units(self):
593593

594594
self.assert_((tmp['dates'].values == ex_vals).all())
595595

596+
def test_to_datetime_unit(self):
597+
598+
epoch = 1370745748
599+
s = Series([ epoch + t for t in range(20) ])
600+
result = to_datetime(s,unit='s')
601+
expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ])
602+
assert_series_equal(result,expected)
603+
596604
def test_series_ctor_datetime64(self):
597605
rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50',
598606
freq='10s')
@@ -2691,6 +2699,41 @@ def test_basics_nanos(self):
26912699
self.assert_(stamp.microsecond == 0)
26922700
self.assert_(stamp.nanosecond == 500)
26932701

2702+
def test_unit(self):
2703+
def check(val,unit=None,s=1,us=0):
2704+
stamp = Timestamp(val, unit=unit)
2705+
self.assert_(stamp.year == 2000)
2706+
self.assert_(stamp.month == 1)
2707+
self.assert_(stamp.day == 1)
2708+
self.assert_(stamp.hour == 1)
2709+
self.assert_(stamp.minute == 1)
2710+
self.assert_(stamp.second == s)
2711+
self.assert_(stamp.microsecond == us)
2712+
self.assert_(stamp.nanosecond == 0)
2713+
2714+
val = Timestamp('20000101 01:01:01').value
2715+
2716+
check(val)
2717+
check(val/1000L,unit='us')
2718+
check(val/1000000L,unit='ms')
2719+
check(val/1000000000L,unit='s')
2720+
2721+
# get chopped
2722+
check((val+500000)/1000000000L,unit='s')
2723+
check((val+500000000)/1000000000L,unit='s')
2724+
check((val+500000)/1000000L,unit='ms')
2725+
2726+
# ok
2727+
check((val+500000)/1000L,unit='us',us=500)
2728+
check((val+500000000)/1000000L,unit='ms',us=500000)
2729+
2730+
# floats
2731+
check(val/1000.0 + 5,unit='us',us=5)
2732+
check(val/1000.0 + 5000,unit='us',us=5000)
2733+
check(val/1000000.0 + 0.5,unit='ms',us=500)
2734+
check(val/1000000.0 + 0.005,unit='ms',us=5)
2735+
check(val/1000000000.0 + 0.5,unit='s',us=500000)
2736+
26942737
def test_comparison(self):
26952738
# 5-18-2012 00:00:00.000
26962739
stamp = 1337299200000000000L

pandas/tseries/tools.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def _maybe_get_tz(tz):
5050

5151

5252
def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True,
53-
format=None, coerce=False):
53+
format=None, coerce=False, unit=None):
5454
"""
5555
Convert argument to datetime
5656
@@ -69,6 +69,8 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True,
6969
format : string, default None
7070
strftime to parse time, eg "%d/%m/%Y"
7171
coerce : force errors to NaT (False by default)
72+
unit : unit of the arg (s,ms,us,ns) denote the unit in epoch
73+
(e.g. a unix timestamp)
7274
7375
Returns
7476
-------
@@ -86,7 +88,7 @@ def _convert_f(arg):
8688
else:
8789
result = tslib.array_to_datetime(arg, raise_=errors == 'raise',
8890
utc=utc, dayfirst=dayfirst,
89-
coerce=coerce)
91+
coerce=coerce, unit=unit)
9092
if com.is_datetime64_dtype(result) and box:
9193
result = DatetimeIndex(result, tz='utc' if utc else None)
9294
return result

pandas/tslib.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from numpy cimport ndarray, int64_t
22

3-
cdef convert_to_tsobject(object, object)
3+
cdef convert_to_tsobject(object, object, object)

pandas/tslib.pyx

+50-20
Original file line numberDiff line numberDiff line change
@@ -131,21 +131,17 @@ class Timestamp(_Timestamp):
131131
note: by definition there cannot be any tz info on the ordinal itself """
132132
return cls(datetime.fromordinal(ordinal),offset=offset,tz=tz)
133133

134-
def __new__(cls, object ts_input, object offset=None, tz=None):
134+
def __new__(cls, object ts_input, object offset=None, tz=None, unit=None):
135135
cdef _TSObject ts
136136
cdef _Timestamp ts_base
137137

138-
if PyFloat_Check(ts_input):
139-
# to do, do we want to support this, ie with fractional seconds?
140-
raise TypeError("Cannot convert a float to datetime")
141-
142138
if util.is_string_object(ts_input):
143139
try:
144140
ts_input = parse_date(ts_input)
145141
except Exception:
146142
pass
147143

148-
ts = convert_to_tsobject(ts_input, tz)
144+
ts = convert_to_tsobject(ts_input, tz, unit)
149145

150146
if ts.value == NPY_NAT:
151147
return NaT
@@ -311,7 +307,7 @@ class Timestamp(_Timestamp):
311307

312308
if self.nanosecond != 0 and warn:
313309
print 'Warning: discarding nonzero nanoseconds'
314-
ts = convert_to_tsobject(self, self.tzinfo)
310+
ts = convert_to_tsobject(self, self.tzinfo, None)
315311

316312
return datetime(ts.dts.year, ts.dts.month, ts.dts.day,
317313
ts.dts.hour, ts.dts.min, ts.dts.sec,
@@ -530,7 +526,7 @@ cdef class _Timestamp(datetime):
530526
cdef:
531527
pandas_datetimestruct dts
532528
_TSObject ts
533-
ts = convert_to_tsobject(self, self.tzinfo)
529+
ts = convert_to_tsobject(self, self.tzinfo, None)
534530
dts = ts.dts
535531
return datetime(dts.year, dts.month, dts.day,
536532
dts.hour, dts.min, dts.sec,
@@ -623,12 +619,13 @@ cpdef _get_utcoffset(tzinfo, obj):
623619
return tzinfo.utcoffset(obj)
624620

625621
# helper to extract datetime and int64 from several different possibilities
626-
cdef convert_to_tsobject(object ts, object tz):
622+
cdef convert_to_tsobject(object ts, object tz, object unit):
627623
"""
628624
Extract datetime and int64 from any of:
629-
- np.int64
625+
- np.int64 (with unit providing a possible modifier)
630626
- np.datetime64
631-
- python int or long object
627+
- a float (with unit providing a possible modifier)
628+
- python int or long object (with unit providing a possible modifier)
632629
- iso8601 string object
633630
- python datetime object
634631
- another timestamp object
@@ -647,6 +644,11 @@ cdef convert_to_tsobject(object ts, object tz):
647644
obj.value = _get_datetime64_nanos(ts)
648645
pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns, &obj.dts)
649646
elif is_integer_object(ts):
647+
ts = ts * cast_from_unit(unit,None)
648+
obj.value = ts
649+
pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts)
650+
elif util.is_float_object(ts):
651+
ts = cast_from_unit(unit,ts)
650652
obj.value = ts
651653
pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts)
652654
elif util.is_string_object(ts):
@@ -699,7 +701,7 @@ cdef convert_to_tsobject(object ts, object tz):
699701
elif PyDate_Check(ts):
700702
# Keep the converter same as PyDateTime's
701703
ts = datetime.combine(ts, datetime_time())
702-
return convert_to_tsobject(ts, tz)
704+
return convert_to_tsobject(ts, tz, None)
703705
else:
704706
raise ValueError("Could not construct Timestamp from argument %s" %
705707
type(ts))
@@ -804,7 +806,7 @@ def datetime_to_datetime64(ndarray[object] values):
804806
else:
805807
inferred_tz = _get_zone(val.tzinfo)
806808

807-
_ts = convert_to_tsobject(val, None)
809+
_ts = convert_to_tsobject(val, None, None)
808810
iresult[i] = _ts.value
809811
_check_dts_bounds(iresult[i], &_ts.dts)
810812
else:
@@ -819,7 +821,7 @@ def datetime_to_datetime64(ndarray[object] values):
819821

820822

821823
def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
822-
format=None, utc=None, coerce=False):
824+
format=None, utc=None, coerce=False, unit=None):
823825
cdef:
824826
Py_ssize_t i, n = len(values)
825827
object val
@@ -828,6 +830,7 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
828830
pandas_datetimestruct dts
829831
bint utc_convert = bool(utc)
830832
_TSObject _ts
833+
int64_t m = cast_from_unit(unit,None)
831834

832835
from dateutil.parser import parse
833836

@@ -841,7 +844,7 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
841844
elif PyDateTime_Check(val):
842845
if val.tzinfo is not None:
843846
if utc_convert:
844-
_ts = convert_to_tsobject(val, None)
847+
_ts = convert_to_tsobject(val, None, unit)
845848
iresult[i] = _ts.value
846849
_check_dts_bounds(iresult[i], &_ts.dts)
847850
else:
@@ -861,7 +864,9 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
861864

862865
# if we are coercing, dont' allow integers
863866
elif util.is_integer_object(val) and not coerce:
864-
iresult[i] = val
867+
iresult[i] = val*m
868+
elif util.is_float_object(val) and not coerce:
869+
iresult[i] = cast_from_unit(unit,val)
865870
else:
866871
try:
867872
if len(val) == 0:
@@ -1246,6 +1251,31 @@ cdef inline _get_datetime64_nanos(object val):
12461251
else:
12471252
return ival
12481253

1254+
cdef inline int64_t cast_from_unit(object unit, object ts):
1255+
""" return a casting of the unit represented to nanoseconds
1256+
round the fractional part of a float to our precision, p """
1257+
p = 0
1258+
if unit == 's':
1259+
m = 1000000000L
1260+
p = 6
1261+
elif unit == 'ms':
1262+
m = 1000000L
1263+
p = 3
1264+
elif unit == 'us':
1265+
m = 1000L
1266+
p = 0
1267+
else:
1268+
m = 1L
1269+
1270+
# just give me the unit back
1271+
if ts is None:
1272+
return m
1273+
1274+
# cast the unit, multiply base/frace separately
1275+
# to avoid precision issues from float -> int
1276+
base = <int64_t> ts
1277+
frac = ts-base
1278+
return <int64_t> (base*m) + <int64_t> (round(frac,p)*m)
12491279

12501280
def cast_to_nanoseconds(ndarray arr):
12511281
cdef:
@@ -1286,7 +1316,7 @@ def pydt_to_i8(object pydt):
12861316
cdef:
12871317
_TSObject ts
12881318

1289-
ts = convert_to_tsobject(pydt, None)
1319+
ts = convert_to_tsobject(pydt, None, None)
12901320

12911321
return ts.value
12921322

@@ -1784,7 +1814,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
17841814
for i in range(count):
17851815
if dtindex[i] == NPY_NAT: out[i] = -1; continue
17861816

1787-
ts = convert_to_tsobject(dtindex[i], None)
1817+
ts = convert_to_tsobject(dtindex[i], None, None)
17881818
out[i] = ts_dayofweek(ts)
17891819
return out
17901820

@@ -1793,7 +1823,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
17931823
if dtindex[i] == NPY_NAT: out[i] = -1; continue
17941824

17951825
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
1796-
ts = convert_to_tsobject(dtindex[i], None)
1826+
ts = convert_to_tsobject(dtindex[i], None, None)
17971827
isleap = is_leapyear(dts.year)
17981828
isleap_prev = is_leapyear(dts.year - 1)
17991829
mo_off = _month_offset[isleap, dts.month - 1]
@@ -1831,7 +1861,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
18311861

18321862

18331863
cdef inline int m8_weekday(int64_t val):
1834-
ts = convert_to_tsobject(val, None)
1864+
ts = convert_to_tsobject(val, None, None)
18351865
return ts_dayofweek(ts)
18361866

18371867
cdef int64_t DAY_NS = 86400000000000LL

0 commit comments

Comments
 (0)