Skip to content

ENH: Add unit keyword to Timestamp and to_datetime #3818

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 13, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ pandas 0.11.1
- Series and DataFrame hist methods now take a ``figsize`` argument (GH3834_)
- DatetimeIndexes no longer try to convert mixed-integer indexes during join
operations (GH3877_)
- Add ``unit`` keyword to ``Timestamp`` and ``to_datetime`` to enable passing of
integers or floats that are in an epoch unit of ``s, ms, us, ns``
(e.g. unix timestamps or epoch ``s``, with fracional seconds allowed) (GH3540_)

**API Changes**

Expand Down Expand Up @@ -264,6 +267,7 @@ pandas 0.11.1
.. _GH3499: https://github.com/pydata/pandas/issues/3499
.. _GH3495: https://github.com/pydata/pandas/issues/3495
.. _GH3492: https://github.com/pydata/pandas/issues/3492
.. _GH3540: https://github.com/pydata/pandas/issues/3540
.. _GH3552: https://github.com/pydata/pandas/issues/3552
.. _GH3562: https://github.com/pydata/pandas/issues/3562
.. _GH3586: https://github.com/pydata/pandas/issues/3586
Expand Down
4 changes: 2 additions & 2 deletions pandas/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
seen_float = 1
elif util.is_datetime64_object(val):
if convert_datetime:
idatetimes[i] = convert_to_tsobject(val, None).value
idatetimes[i] = convert_to_tsobject(val, None, None).value
seen_datetime = 1
else:
seen_object = 1
Expand All @@ -493,7 +493,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
elif PyDateTime_Check(val) or util.is_datetime64_object(val):
if convert_datetime:
seen_datetime = 1
idatetimes[i] = convert_to_tsobject(val, None).value
idatetimes[i] = convert_to_tsobject(val, None, None).value
else:
seen_object = 1
break
Expand Down
2 changes: 1 addition & 1 deletion pandas/src/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ cdef class _Offset:
cpdef anchor(self, object start=None):
if start is not None:
self.start = start
self.ts = convert_to_tsobject(self.start)
self.ts = convert_to_tsobject(self.start, None, None)
self._setup()

cdef _setup(self):
Expand Down
3 changes: 3 additions & 0 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1204,6 +1204,9 @@ def slice_indexer(self, start=None, end=None, step=None):
if isinstance(start, time) or isinstance(end, time):
raise KeyError('Cannot mix time and non-time slice keys')

if isinstance(start, float) or isinstance(end, float):
raise TypeError('Cannot index datetime64 with float keys')

return Index.slice_indexer(self, start, end, step)

def slice_locs(self, start=None, end=None):
Expand Down
85 changes: 84 additions & 1 deletion pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import pandas.util.py3compat as py3compat
from pandas.core.datetools import BDay
import pandas.core.common as com
from pandas import concat

from numpy.testing.decorators import slow

Expand Down Expand Up @@ -171,7 +172,6 @@ def test_indexing_over_size_cutoff(self):
def test_indexing_unordered(self):

# GH 2437
from pandas import concat
rng = date_range(start='2011-01-01', end='2011-01-15')
ts = Series(randn(len(rng)), index=rng)
ts2 = concat([ts[0:4],ts[-4:],ts[4:-4]])
Expand Down Expand Up @@ -593,6 +593,34 @@ def test_frame_add_datetime64_col_other_units(self):

self.assert_((tmp['dates'].values == ex_vals).all())

def test_to_datetime_unit(self):

epoch = 1370745748
s = Series([ epoch + t for t in range(20) ])
result = to_datetime(s,unit='s')
expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ])
assert_series_equal(result,expected)

s = Series([ epoch + t for t in range(20) ]).astype(float)
result = to_datetime(s,unit='s')
expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ])
assert_series_equal(result,expected)

s = Series([ epoch + t for t in range(20) ] + [iNaT])
result = to_datetime(s,unit='s')
expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ] + [NaT])
assert_series_equal(result,expected)

s = Series([ epoch + t for t in range(20) ] + [iNaT]).astype(float)
result = to_datetime(s,unit='s')
expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ] + [NaT])
assert_series_equal(result,expected)

s = concat([Series([ epoch + t for t in range(20) ]).astype(float),Series([np.nan])],ignore_index=True)
result = to_datetime(s,unit='s')
expected = Series([ Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t) for t in range(20) ] + [NaT])
assert_series_equal(result,expected)

def test_series_ctor_datetime64(self):
rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50',
freq='10s')
Expand Down Expand Up @@ -2691,6 +2719,61 @@ def test_basics_nanos(self):
self.assert_(stamp.microsecond == 0)
self.assert_(stamp.nanosecond == 500)

def test_unit(self):
def check(val,unit=None,s=1,us=0):
stamp = Timestamp(val, unit=unit)
self.assert_(stamp.year == 2000)
self.assert_(stamp.month == 1)
self.assert_(stamp.day == 1)
self.assert_(stamp.hour == 1)
self.assert_(stamp.minute == 1)
self.assert_(stamp.second == s)
self.assert_(stamp.microsecond == us)
self.assert_(stamp.nanosecond == 0)

val = Timestamp('20000101 01:01:01').value

check(val)
check(val/1000L,unit='us')
check(val/1000000L,unit='ms')
check(val/1000000000L,unit='s')

# using truediv, so these are like floats
if py3compat.PY3:
check((val+500000)/1000000000L,unit='s',us=500)
check((val+500000000)/1000000000L,unit='s',us=500000)
check((val+500000)/1000000L,unit='ms',us=500)

# get chopped in py2
else:
check((val+500000)/1000000000L,unit='s')
check((val+500000000)/1000000000L,unit='s')
check((val+500000)/1000000L,unit='ms')

# ok
check((val+500000)/1000L,unit='us',us=500)
check((val+500000000)/1000000L,unit='ms',us=500000)

# floats
check(val/1000.0 + 5,unit='us',us=5)
check(val/1000.0 + 5000,unit='us',us=5000)
check(val/1000000.0 + 0.5,unit='ms',us=500)
check(val/1000000.0 + 0.005,unit='ms',us=5)
check(val/1000000000.0 + 0.5,unit='s',us=500000)

# nan
result = Timestamp(np.nan)
self.assert_(result is NaT)

result = Timestamp(None)
self.assert_(result is NaT)

result = Timestamp(iNaT)
self.assert_(result is NaT)

result = Timestamp(NaT)
self.assert_(result is NaT)

def test_comparison(self):
# 5-18-2012 00:00:00.000
stamp = 1337299200000000000L
Expand Down
6 changes: 4 additions & 2 deletions pandas/tseries/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _maybe_get_tz(tz):


def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True,
format=None, coerce=False):
format=None, coerce=False, unit='ns'):
"""
Convert argument to datetime

Expand All @@ -69,6 +69,8 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True,
format : string, default None
strftime to parse time, eg "%d/%m/%Y"
coerce : force errors to NaT (False by default)
unit : unit of the arg (s,ms,us,ns) denote the unit in epoch
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think maybe this should say "unit of the arg (s,ms,us,ns) if passed an epoch", will do that in that pr

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

go ahed....just merged

(e.g. a unix timestamp), which is an integer/float number

Returns
-------
Expand All @@ -86,7 +88,7 @@ def _convert_f(arg):
else:
result = tslib.array_to_datetime(arg, raise_=errors == 'raise',
utc=utc, dayfirst=dayfirst,
coerce=coerce)
coerce=coerce, unit=unit)
if com.is_datetime64_dtype(result) and box:
result = DatetimeIndex(result, tz='utc' if utc else None)
return result
Expand Down
2 changes: 1 addition & 1 deletion pandas/tslib.pxd
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from numpy cimport ndarray, int64_t

cdef convert_to_tsobject(object, object)
cdef convert_to_tsobject(object, object, object)
Loading