From 4b80346d8be3b7726e8f197cb77a2f115531d177 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 26 Apr 2016 11:44:52 -0400 Subject: [PATCH] BUG/DOC: restrict possibilities for assembly of dates using a DataFrame xref #12967 --- pandas/tseries/tests/test_timeseries.py | 38 +++++------- pandas/tseries/tools.py | 81 ++++++------------------- 2 files changed, 33 insertions(+), 86 deletions(-) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 6fbe7ef488d6c..104324cea8aca 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -2318,24 +2318,18 @@ def test_dataframe(self): assert_series_equal(result, expected2) # unit mappings - units = [{'year': 'year', + units = [{'year': 'years', + 'month': 'months', + 'day': 'days', + 'hour': 'hours', + 'minute': 'minutes', + 'second': 'seconds'}, + {'year': 'year', 'month': 'month', 'day': 'day', - 'hour': 'HH', - 'minute': 'MM', - 'second': 'SS'}, - {'year': '%Y', - 'month': '%m', - 'day': '%d', - 'hour': '%H', - 'minute': '%M', - 'second': '%S'}, - {'year': 'y', - 'month': 'month', - 'day': 'd', - 'hour': 'h', - 'minute': 'm', - 'second': 's'}, + 'hour': 'hour', + 'minute': 'min', + 'second': 'sec'}, ] for d in units: @@ -2344,12 +2338,12 @@ def test_dataframe(self): Timestamp('20160305 07:59:11')]) assert_series_equal(result, expected) - d = {'year': 'y', + d = {'year': 'year', 'month': 'month', - 'day': 'd', - 'hour': 'h', - 'minute': 'm', - 'second': 's', + 'day': 'day', + 'hour': 'hour', + 'minute': 'minute', + 'second': 'second', 'ms': 'ms', 'us': 'us', 'ns': 'ns'} @@ -2360,7 +2354,7 @@ def test_dataframe(self): assert_series_equal(result, expected) # coerce back to int - result = to_datetime(df.astype(str), unit=d) + result = to_datetime(df.astype(str)) assert_series_equal(result, expected) # passing coerce diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index adad34bb32169..86cf361e038af 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -244,53 +244,10 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, Examples -------- - Take separate series and convert to datetime - - >>> import pandas as pd - >>> i = pd.date_range('20000101',periods=100) - >>> df = pd.DataFrame(dict(year = i.year, month = i.month, day = i.day)) - >>> pd.to_datetime(df.year*10000 + df.month*100 + df.day, format='%Y%m%d') - 0 2000-01-01 - 1 2000-01-02 - ... - 98 2000-04-08 - 99 2000-04-09 - Length: 100, dtype: datetime64[ns] - - Or from strings - - >>> dfs = df.astype(str) - >>> pd.to_datetime(dfs.day + dfs.month + dfs.year, format="%d%m%Y") - 0 2000-01-01 - 1 2000-01-02 - ... - 98 2000-04-08 - 99 2000-04-09 - Length: 100, dtype: datetime64[ns] - - Infer the format from the first entry - - >>> pd.to_datetime(dfs.month + '/' + dfs.day + '/' + dfs.year, - infer_datetime_format=True) - 0 2000-01-01 - 1 2000-01-02 - ... - 98 2000-04-08 - 99 2000-04-09 - - This gives the same results as omitting the `infer_datetime_format=True`, - but is much faster. - - Date that does not meet timestamp limitations: - - >>> pd.to_datetime('13000101', format='%Y%m%d') - datetime.datetime(1300, 1, 1, 0, 0) - >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') - NaT - Assembling a datetime from multiple columns of a DataFrame. The keys can be - strptime-like (%Y, %m) or common abbreviations like ('year', 'month') + common abbreviations like ['year', 'month', 'day', 'minute', 'second', + 'ms', 'us', 'ns']) or plurals of the same >>> df = pd.DataFrame({'year': [2015, 2016], 'month': [2, 3], @@ -300,6 +257,12 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, 1 2016-03-05 dtype: datetime64[ns] + Date that does not meet timestamp limitations: + + >>> pd.to_datetime('13000101', format='%Y%m%d') + datetime.datetime(1300, 1, 1, 0, 0) + >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') + NaT """ return _to_datetime(arg, errors=errors, dayfirst=dayfirst, yearfirst=yearfirst, @@ -439,31 +402,21 @@ def _convert_listlike(arg, box, format, name=None): # mappings for assembling units _unit_map = {'year': 'year', - 'y': 'year', - '%Y': 'year', + 'years': 'year', 'month': 'month', - 'M': 'month', - '%m': 'month', + 'months': 'month', 'day': 'day', 'days': 'day', - 'd': 'day', - '%d': 'day', - 'h': 'h', 'hour': 'h', - 'hh': 'h', - '%H': 'h', + 'hours': 'h', 'minute': 'm', - 't': 'm', + 'minutes': 'm', 'min': 'm', - '%M': 'm', - 'mm': 'm', - 'MM': 'm', - '%M': 'm', - 's': 's', - 'seconds': 's', + 'mins': 'm', 'second': 's', - '%S': 's', - 'ss': 's', + 'seconds': 's', + 'sec': 's', + 'secs': 's', 'ms': 'ms', 'millisecond': 'ms', 'milliseconds': 'ms', @@ -505,7 +458,7 @@ def f(value): return _unit_map[value] # m is case significant - if value.lower() in _unit_map and not value.startswith('m'): + if value.lower() in _unit_map: return _unit_map[value.lower()] return value