Skip to content

Commit cf0db6f

Browse files
committed
Merge pull request #3818 from jreback/epoch
ENH: Add unit keyword to Timestamp and to_datetime
2 parents 7327d69 + fbcd5ab commit cf0db6f

File tree

8 files changed

+166
-30
lines changed

8 files changed

+166
-30
lines changed

RELEASE.rst

+4
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ pandas 0.11.1
8484
- Series and DataFrame hist methods now take a ``figsize`` argument (GH3834_)
8585
- DatetimeIndexes no longer try to convert mixed-integer indexes during join
8686
operations (GH3877_)
87+
- Add ``unit`` keyword to ``Timestamp`` and ``to_datetime`` to enable passing of
88+
integers or floats that are in an epoch unit of ``s, ms, us, ns``
89+
(e.g. unix timestamps or epoch ``s``, with fracional seconds allowed) (GH3540_)
8790

8891
**API Changes**
8992

@@ -267,6 +270,7 @@ pandas 0.11.1
267270
.. _GH3499: https://github.com/pydata/pandas/issues/3499
268271
.. _GH3495: https://github.com/pydata/pandas/issues/3495
269272
.. _GH3492: https://github.com/pydata/pandas/issues/3492
273+
.. _GH3540: https://github.com/pydata/pandas/issues/3540
270274
.. _GH3552: https://github.com/pydata/pandas/issues/3552
271275
.. _GH3562: https://github.com/pydata/pandas/issues/3562
272276
.. _GH3586: https://github.com/pydata/pandas/issues/3586

pandas/src/inference.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
471471
seen_float = 1
472472
elif util.is_datetime64_object(val):
473473
if convert_datetime:
474-
idatetimes[i] = convert_to_tsobject(val, None).value
474+
idatetimes[i] = convert_to_tsobject(val, None, None).value
475475
seen_datetime = 1
476476
else:
477477
seen_object = 1
@@ -493,7 +493,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
493493
elif PyDateTime_Check(val) or util.is_datetime64_object(val):
494494
if convert_datetime:
495495
seen_datetime = 1
496-
idatetimes[i] = convert_to_tsobject(val, None).value
496+
idatetimes[i] = convert_to_tsobject(val, None, None).value
497497
else:
498498
seen_object = 1
499499
break

pandas/src/offsets.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ cdef class _Offset:
7676
cpdef anchor(self, object start=None):
7777
if start is not None:
7878
self.start = start
79-
self.ts = convert_to_tsobject(self.start)
79+
self.ts = convert_to_tsobject(self.start, None, None)
8080
self._setup()
8181

8282
cdef _setup(self):

pandas/tseries/index.py

+3
Original file line numberDiff line numberDiff line change
@@ -1204,6 +1204,9 @@ def slice_indexer(self, start=None, end=None, step=None):
12041204
if isinstance(start, time) or isinstance(end, time):
12051205
raise KeyError('Cannot mix time and non-time slice keys')
12061206

1207+
if isinstance(start, float) or isinstance(end, float):
1208+
raise TypeError('Cannot index datetime64 with float keys')
1209+
12071210
return Index.slice_indexer(self, start, end, step)
12081211

12091212
def slice_locs(self, start=None, end=None):

pandas/tseries/tests/test_timeseries.py

+84-1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import pandas.util.py3compat as py3compat
3939
from pandas.core.datetools import BDay
4040
import pandas.core.common as com
41+
from pandas import concat
4142

4243
from numpy.testing.decorators import slow
4344

@@ -171,7 +172,6 @@ def test_indexing_over_size_cutoff(self):
171172
def test_indexing_unordered(self):
172173

173174
# GH 2437
174-
from pandas import concat
175175
rng = date_range(start='2011-01-01', end='2011-01-15')
176176
ts = Series(randn(len(rng)), index=rng)
177177
ts2 = concat([ts[0:4],ts[-4:],ts[4:-4]])
@@ -593,6 +593,34 @@ def test_frame_add_datetime64_col_other_units(self):
593593

594594
self.assert_((tmp['dates'].values == ex_vals).all())
595595

596+
def test_to_datetime_unit(self):
597+
598+
epoch = 1370745748
599+
s = Series([ epoch + t for t in range(20) ])
600+
result = to_datetime(s,unit='s')
601+
expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ])
602+
assert_series_equal(result,expected)
603+
604+
s = Series([ epoch + t for t in range(20) ]).astype(float)
605+
result = to_datetime(s,unit='s')
606+
expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ])
607+
assert_series_equal(result,expected)
608+
609+
s = Series([ epoch + t for t in range(20) ] + [iNaT])
610+
result = to_datetime(s,unit='s')
611+
expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ] + [NaT])
612+
assert_series_equal(result,expected)
613+
614+
s = Series([ epoch + t for t in range(20) ] + [iNaT]).astype(float)
615+
result = to_datetime(s,unit='s')
616+
expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ] + [NaT])
617+
assert_series_equal(result,expected)
618+
619+
s = concat([Series([ epoch + t for t in range(20) ]).astype(float),Series([np.nan])],ignore_index=True)
620+
result = to_datetime(s,unit='s')
621+
expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ] + [NaT])
622+
assert_series_equal(result,expected)
623+
596624
def test_series_ctor_datetime64(self):
597625
rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50',
598626
freq='10s')
@@ -2691,6 +2719,61 @@ def test_basics_nanos(self):
26912719
self.assert_(stamp.microsecond == 0)
26922720
self.assert_(stamp.nanosecond == 500)
26932721

2722+
def test_unit(self):
2723+
def check(val,unit=None,s=1,us=0):
2724+
stamp = Timestamp(val, unit=unit)
2725+
self.assert_(stamp.year == 2000)
2726+
self.assert_(stamp.month == 1)
2727+
self.assert_(stamp.day == 1)
2728+
self.assert_(stamp.hour == 1)
2729+
self.assert_(stamp.minute == 1)
2730+
self.assert_(stamp.second == s)
2731+
self.assert_(stamp.microsecond == us)
2732+
self.assert_(stamp.nanosecond == 0)
2733+
2734+
val = Timestamp('20000101 01:01:01').value
2735+
2736+
check(val)
2737+
check(val/1000L,unit='us')
2738+
check(val/1000000L,unit='ms')
2739+
check(val/1000000000L,unit='s')
2740+
2741+
# using truediv, so these are like floats
2742+
if py3compat.PY3:
2743+
check((val+500000)/1000000000L,unit='s',us=500)
2744+
check((val+500000000)/1000000000L,unit='s',us=500000)
2745+
check((val+500000)/1000000L,unit='ms',us=500)
2746+
2747+
# get chopped in py2
2748+
else:
2749+
check((val+500000)/1000000000L,unit='s')
2750+
check((val+500000000)/1000000000L,unit='s')
2751+
check((val+500000)/1000000L,unit='ms')
2752+
2753+
# ok
2754+
check((val+500000)/1000L,unit='us',us=500)
2755+
check((val+500000000)/1000000L,unit='ms',us=500000)
2756+
2757+
# floats
2758+
check(val/1000.0 + 5,unit='us',us=5)
2759+
check(val/1000.0 + 5000,unit='us',us=5000)
2760+
check(val/1000000.0 + 0.5,unit='ms',us=500)
2761+
check(val/1000000.0 + 0.005,unit='ms',us=5)
2762+
check(val/1000000000.0 + 0.5,unit='s',us=500000)
2763+
2764+
# nan
2765+
result = Timestamp(np.nan)
2766+
self.assert_(result is NaT)
2767+
2768+
result = Timestamp(None)
2769+
self.assert_(result is NaT)
2770+
2771+
result = Timestamp(iNaT)
2772+
self.assert_(result is NaT)
2773+
2774+
result = Timestamp(NaT)
2775+
self.assert_(result is NaT)
2776+
26942777
def test_comparison(self):
26952778
# 5-18-2012 00:00:00.000
26962779
stamp = 1337299200000000000L

pandas/tseries/tools.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def _maybe_get_tz(tz):
5050

5151

5252
def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True,
53-
format=None, coerce=False):
53+
format=None, coerce=False, unit='ns'):
5454
"""
5555
Convert argument to datetime
5656
@@ -69,6 +69,8 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True,
6969
format : string, default None
7070
strftime to parse time, eg "%d/%m/%Y"
7171
coerce : force errors to NaT (False by default)
72+
unit : unit of the arg (s,ms,us,ns) denote the unit in epoch
73+
(e.g. a unix timestamp), which is an integer/float number
7274
7375
Returns
7476
-------
@@ -86,7 +88,7 @@ def _convert_f(arg):
8688
else:
8789
result = tslib.array_to_datetime(arg, raise_=errors == 'raise',
8890
utc=utc, dayfirst=dayfirst,
89-
coerce=coerce)
91+
coerce=coerce, unit=unit)
9092
if com.is_datetime64_dtype(result) and box:
9193
result = DatetimeIndex(result, tz='utc' if utc else None)
9294
return result

pandas/tslib.pxd

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from numpy cimport ndarray, int64_t
22

3-
cdef convert_to_tsobject(object, object)
3+
cdef convert_to_tsobject(object, object, object)

0 commit comments

Comments
 (0)