-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: Adding origin parameter in pd.to_datetime #11470
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
8651285
05a9973
ff3828e
21f5c43
1fe275d
759d5a7
b475ec6
96c5bd8
529a051
978f0d2
4396d81
4b9a3f4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,16 @@ New features | |
|
||
.. _whatsnew_0190.dev_api: | ||
|
||
to_datetime can be used with Offset | ||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
``pd.to_datetime`` has a new parameter, ``origin``, to define an offset for ``DatetimeIndex``. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. say its a starting offset There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
|
||
.. ipython:: python | ||
|
||
to_datetime([1,2,3], unit='D', origin=pd.Timestamp('1960-01-01')) | ||
|
||
The above code would return days with offset from origin as defined by timestamp set by origin. | ||
|
||
pandas development API | ||
^^^^^^^^^^^^^^^^^^^^^^ | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,6 @@ New features | |
|
||
|
||
|
||
|
||
.. _whatsnew_0200.enhancements.other: | ||
|
||
Other enhancements | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -772,6 +772,48 @@ def test_to_datetime_unit(self): | |
result = to_datetime([1, 2, 111111111], unit='D', errors='coerce') | ||
tm.assert_index_equal(result, expected) | ||
|
||
def test_to_datetime_origin(self): | ||
units = ['D', 's', 'ms', 'us', 'ns'] | ||
# Addresses Issue Number 11276, 11745 | ||
# for origin as julian | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add the issue number as a comment |
||
julian_dates = pd.date_range( | ||
'2014-1-1', periods=10).to_julian_date().values | ||
result = Series(pd.to_datetime( | ||
julian_dates, unit='D', origin='julian')) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can omit the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. using assert_index_equal raises AssertionError.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah, yes, sorry. It then probably returns a numpy array and not an Index. Then you can leave it as is. |
||
expected = Series(pd.to_datetime( | ||
julian_dates - pd.Timestamp(0).to_julian_date(), unit='D')) | ||
assert_series_equal(result, expected) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add tests for invalid when |
||
|
||
# checking for invalid combination of origin='julian' and unit != D | ||
for unit in units: | ||
if unit == 'D': | ||
continue | ||
with self.assertRaises(ValueError): | ||
pd.to_datetime(julian_dates, unit=unit, origin='julian') | ||
|
||
# for origin as 1960-01-01 | ||
epoch_1960 = pd.Timestamp('1960-01-01') | ||
epoch_timestamp_convertible = [epoch_1960, epoch_1960.to_datetime(), | ||
epoch_1960.to_datetime64(), | ||
str(epoch_1960)] | ||
invalid_origins = ['random_string', '13-24-1990'] | ||
units_from_epoch = [0, 1, 2, 3, 4] | ||
|
||
for unit in units: | ||
for epoch in epoch_timestamp_convertible: | ||
expected = Series( | ||
[pd.Timedelta(x, unit=unit) + | ||
epoch_1960 for x in units_from_epoch]) | ||
result = Series(pd.to_datetime( | ||
units_from_epoch, unit=unit, origin=epoch)) | ||
assert_series_equal(result, expected) | ||
|
||
# check for invalid origins | ||
for origin in invalid_origins: | ||
with self.assertRaises(ValueError): | ||
pd.to_datetime(units_from_epoch, unit=unit, | ||
origin=origin) | ||
|
||
def test_series_ctor_datetime64(self): | ||
rng = date_range('1/1/2000 00:00:00', '1/1/2000 1:59:50', freq='10s') | ||
dates = np.asarray(rng) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -179,7 +179,7 @@ def _guess_datetime_format_for_array(arr, **kwargs): | |
mapping={True: 'coerce', False: 'raise'}) | ||
def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, | ||
utc=None, box=True, format=None, exact=True, coerce=None, | ||
unit=None, infer_datetime_format=False): | ||
unit=None, infer_datetime_format=False, origin='epoch'): | ||
""" | ||
Convert argument to datetime. | ||
|
||
|
@@ -238,6 +238,19 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, | |
datetime strings, and if it can be inferred, switch to a faster | ||
method of parsing them. In some cases this can increase the parsing | ||
speed by ~5-10x. | ||
origin : scalar convertible to Timestamp / string ('julian', 'epoch'), | ||
default 'epoch'. | ||
Define reference date. The numeric values would be parsed as number | ||
of units (defined by `unit`) since this reference date. | ||
|
||
- If 'epoch', origin is set to 1970-01-01. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could consider other standard offsets: |
||
- If 'julian', unit must be 'D', and origin is set to beginning of | ||
Julian Calendar. Julian day number 0 is assigned to the day starting | ||
at noon on January 1, 4713 BC. | ||
- If Timestamp convertible, origin is set to Timestamp identified by | ||
origin. | ||
|
||
.. versionadded: 0.19.0 | ||
|
||
Returns | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. set what origin means if its not 'julian' There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rectified. |
||
------- | ||
|
@@ -294,8 +307,14 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, | |
>>> %timeit pd.to_datetime(s,infer_datetime_format=False) | ||
1 loop, best of 3: 471 ms per loop | ||
|
||
""" | ||
Using non-epoch origins to parse date | ||
|
||
>>> pd.to_datetime([1,2,3], unit='D', origin=pd.Timestamp('1960-01-01')) | ||
0 1960-01-02 | ||
1 1960-01-03 | ||
2 1960-01-04 | ||
|
||
""" | ||
from pandas.tseries.index import DatetimeIndex | ||
|
||
tz = 'utc' if utc else None | ||
|
@@ -406,22 +425,39 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): | |
except (ValueError, TypeError): | ||
raise e | ||
|
||
if arg is None: | ||
return arg | ||
elif isinstance(arg, tslib.Timestamp): | ||
return arg | ||
elif isinstance(arg, ABCSeries): | ||
from pandas import Series | ||
values = _convert_listlike(arg._values, False, format) | ||
return Series(values, index=arg.index, name=arg.name) | ||
elif isinstance(arg, (ABCDataFrame, MutableMapping)): | ||
return _assemble_from_unit_mappings(arg, errors=errors) | ||
elif isinstance(arg, ABCIndexClass): | ||
return _convert_listlike(arg, box, format, name=arg.name) | ||
elif is_list_like(arg): | ||
return _convert_listlike(arg, box, format) | ||
def result_without_offset(arg): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
if origin == 'julian': | ||
if unit != 'D': | ||
raise ValueError("unit must be 'D' for origin='julian'") | ||
arg = arg - tslib.Timestamp(0).to_julian_date() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure how to proceed with the aforementioned check. |
||
if arg is None: | ||
return arg | ||
elif isinstance(arg, tslib.Timestamp): | ||
return arg | ||
elif isinstance(arg, ABCSeries): | ||
from pandas import Series | ||
values = _convert_listlike(arg._values, False, format) | ||
return Series(values, index=arg.index, name=arg.name) | ||
elif isinstance(arg, (ABCDataFrame, MutableMapping)): | ||
return _assemble_from_unit_mappings(arg, errors=errors) | ||
elif isinstance(arg, ABCIndexClass): | ||
return _convert_listlike(arg, box, format, name=arg.name) | ||
elif is_list_like(arg): | ||
return _convert_listlike(arg, box, format) | ||
return _convert_listlike(np.array([arg]), box, format)[0] | ||
|
||
result = result_without_offset(arg) | ||
|
||
offset = None | ||
if origin != 'epoch' and origin != 'julian': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
try: | ||
offset = tslib.Timestamp(origin) - tslib.Timestamp(0) | ||
except ValueError: | ||
raise ValueError("Invalid 'origin' or 'origin' Out of Bound") | ||
|
||
return _convert_listlike(np.array([arg]), box, format)[0] | ||
if offset is not None: | ||
result = result + offset | ||
return result | ||
|
||
# mappings for assembling units | ||
_unit_map = {'year': 'year', | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
to_datetime has gained an origin kwarg
.don't call this
Offset
which is a very specific meaningThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
in the doc-string this is a 'reference date', so I would use that here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.