Skip to content

Commit 618bc17

Browse files
sumitbinnanijreback
authored andcommitted
ENH: Added parameter origin to to_datetime
closes pandas-dev#11276 closes pandas-dev#11745 closes pandas-dev#11470
1 parent 05e734a commit 618bc17

File tree

3 files changed

+83
-5
lines changed

3 files changed

+83
-5
lines changed

doc/source/whatsnew/v0.18.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,8 @@ Other Enhancements
221221
- Added ``.weekday_name`` property as a component to ``DatetimeIndex`` and the ``.dt`` accessor. (:issue:`11128`)
222222

223223
- ``Index.take`` now handles ``allow_fill`` and ``fill_value`` consistently (:issue:`12631`)
224+
- Added ``weekday_name`` as a component to ``DatetimeIndex`` and ``.dt`` accessor. (:issue:`11128`)
225+
- ``pd.to_datetime`` has a new parameter, ``origin``, to define an offset for ``DatetimeIndex`` (:issue:`11276`, :issue:`11745`)
224226

225227
.. ipython:: python
226228

pandas/tseries/tests/test_timeseries.py

+42
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,48 @@ def test_to_datetime_unit(self):
752752
seconds=t) for t in range(20)] + [NaT])
753753
assert_series_equal(result, expected)
754754

755+
def test_to_datetime_origin(self):
756+
units = ['D', 's', 'ms', 'us', 'ns']
757+
# Addresses Issue Number 11276, 11745
758+
# for origin as julian
759+
julian_dates = pd.date_range(
760+
'2014-1-1', periods=10).to_julian_date().values
761+
result = Series(pd.to_datetime(
762+
julian_dates, unit='D', origin='julian'))
763+
expected = Series(pd.to_datetime(
764+
julian_dates - pd.Timestamp(0).to_julian_date(), unit='D'))
765+
assert_series_equal(result, expected)
766+
767+
# checking for invalid combination of origin='julian' and unit != D
768+
for unit in units:
769+
if unit == 'D':
770+
continue
771+
with self.assertRaises(ValueError):
772+
pd.to_datetime(julian_dates, unit=unit, origin='julian')
773+
774+
# for origin as 1960-01-01
775+
epoch_1960 = pd.Timestamp('1960-01-01')
776+
epoch_timestamp_convertible = [epoch_1960, epoch_1960.to_datetime(
777+
), epoch_1960.to_datetime64(), str(epoch_1960)]
778+
units = ['D', 's', 'ms', 'us', 'ns']
779+
invalid_origins = ['random_string', '13-24-1990']
780+
units_from_epoch = range(5)
781+
782+
for unit in units:
783+
for epoch in epoch_timestamp_convertible:
784+
expected = Series(
785+
[pd.Timedelta(x, unit=unit) +
786+
epoch_1960 for x in units_from_epoch])
787+
result = Series(pd.to_datetime(
788+
units_from_epoch, unit=unit, origin=epoch))
789+
assert_series_equal(result, expected)
790+
791+
# check for invalid origins
792+
for origin in invalid_origins:
793+
with self.assertRaises(ValueError):
794+
pd.to_datetime(units_from_epoch, unit=unit,
795+
origin=origin)
796+
755797
def test_series_ctor_datetime64(self):
756798
rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s')
757799
dates = np.asarray(rng)

pandas/tseries/tools.py

+39-5
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def _guess_datetime_format_for_array(arr, **kwargs):
170170
mapping={True: 'coerce', False: 'raise'})
171171
def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
172172
utc=None, box=True, format=None, exact=True, coerce=None,
173-
unit=None, infer_datetime_format=False):
173+
unit=None, infer_datetime_format=False, origin='epoch'):
174174
"""
175175
Convert argument to datetime.
176176
@@ -228,6 +228,17 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
228228
datetime strings, and if it can be inferred, switch to a faster
229229
method of parsing them. In some cases this can increase the parsing
230230
speed by ~5-10x.
231+
origin : scalar convertible to Timestamp / string ('julian', 'epoch'),
232+
default 'epoch'.
233+
Define relative offset for the returned dates.
234+
235+
- If 'epoch', offset is set to 1-1-1970.
236+
- If 'julian', unit must be 'D', and offset is set to beginning of
237+
Julian Calendar.
238+
- If Timestamp convertible, offset is set to Timestamp identified by
239+
origin.
240+
241+
.. versionadded: 0.18.1
231242
232243
Returns
233244
-------
@@ -260,6 +271,14 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
260271
If a date that does not meet timestamp limitations, passing errors='coerce'
261272
will force to NaT. Furthermore this will force non-dates to NaT as well.
262273
274+
>>> pd.to_datetime(range(100), unit='D', origin=Timestamp('1960-01-01'))
275+
0 1960-01-01
276+
1 1960-01-02
277+
...
278+
98 1960-04-08
279+
99 1960-04-09
280+
281+
263282
>>> pd.to_datetime('13000101', format='%Y%m%d')
264283
datetime.datetime(1300, 1, 1, 0, 0)
265284
>>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce')
@@ -285,10 +304,25 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
285304
1 loop, best of 3: 471 ms per loop
286305
287306
"""
288-
return _to_datetime(arg, errors=errors, dayfirst=dayfirst,
289-
yearfirst=yearfirst,
290-
utc=utc, box=box, format=format, exact=exact,
291-
unit=unit, infer_datetime_format=infer_datetime_format)
307+
# variable to set offset as per origin parameter
308+
offset = None
309+
310+
if origin == 'julian':
311+
if unit != 'D':
312+
raise ValueError("unit must be 'D' for origin='julian'")
313+
arg = arg - tslib.Timestamp(0).to_julian_date()
314+
elif origin != 'epoch':
315+
offset = tslib.Timestamp(origin) - tslib.Timestamp(0)
316+
317+
result = _to_datetime(arg, errors=errors, dayfirst=dayfirst,
318+
yearfirst=yearfirst, utc=utc, box=box, format=format,
319+
exact=exact, unit=unit,
320+
infer_datetime_format=infer_datetime_format)
321+
322+
if offset is not None:
323+
result = result + offset
324+
325+
return result
292326

293327

294328
def _to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,

0 commit comments

Comments
 (0)