Skip to content

Commit db6d009

Browse files
committed
BUG/DOC: restrict possibilities for assembly of dates using a DataFrame
xref #12967 closes #12996
1 parent 37a7e69 commit db6d009

File tree

2 files changed

+33
-86
lines changed

2 files changed

+33
-86
lines changed

pandas/tseries/tests/test_timeseries.py

+16-22
Original file line numberDiff line numberDiff line change
@@ -2318,24 +2318,18 @@ def test_dataframe(self):
23182318
assert_series_equal(result, expected2)
23192319

23202320
# unit mappings
2321-
units = [{'year': 'year',
2321+
units = [{'year': 'years',
2322+
'month': 'months',
2323+
'day': 'days',
2324+
'hour': 'hours',
2325+
'minute': 'minutes',
2326+
'second': 'seconds'},
2327+
{'year': 'year',
23222328
'month': 'month',
23232329
'day': 'day',
2324-
'hour': 'HH',
2325-
'minute': 'MM',
2326-
'second': 'SS'},
2327-
{'year': '%Y',
2328-
'month': '%m',
2329-
'day': '%d',
2330-
'hour': '%H',
2331-
'minute': '%M',
2332-
'second': '%S'},
2333-
{'year': 'y',
2334-
'month': 'month',
2335-
'day': 'd',
2336-
'hour': 'h',
2337-
'minute': 'm',
2338-
'second': 's'},
2330+
'hour': 'hour',
2331+
'minute': 'min',
2332+
'second': 'sec'},
23392333
]
23402334

23412335
for d in units:
@@ -2344,12 +2338,12 @@ def test_dataframe(self):
23442338
Timestamp('20160305 07:59:11')])
23452339
assert_series_equal(result, expected)
23462340

2347-
d = {'year': 'y',
2341+
d = {'year': 'year',
23482342
'month': 'month',
2349-
'day': 'd',
2350-
'hour': 'h',
2351-
'minute': 'm',
2352-
'second': 's',
2343+
'day': 'day',
2344+
'hour': 'hour',
2345+
'minute': 'minute',
2346+
'second': 'second',
23532347
'ms': 'ms',
23542348
'us': 'us',
23552349
'ns': 'ns'}
@@ -2360,7 +2354,7 @@ def test_dataframe(self):
23602354
assert_series_equal(result, expected)
23612355

23622356
# coerce back to int
2363-
result = to_datetime(df.astype(str), unit=d)
2357+
result = to_datetime(df.astype(str))
23642358
assert_series_equal(result, expected)
23652359

23662360
# passing coerce

pandas/tseries/tools.py

+17-64
Original file line numberDiff line numberDiff line change
@@ -244,53 +244,10 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
244244
245245
Examples
246246
--------
247-
Take separate series and convert to datetime
248-
249-
>>> import pandas as pd
250-
>>> i = pd.date_range('20000101',periods=100)
251-
>>> df = pd.DataFrame(dict(year = i.year, month = i.month, day = i.day))
252-
>>> pd.to_datetime(df.year*10000 + df.month*100 + df.day, format='%Y%m%d')
253-
0 2000-01-01
254-
1 2000-01-02
255-
...
256-
98 2000-04-08
257-
99 2000-04-09
258-
Length: 100, dtype: datetime64[ns]
259-
260-
Or from strings
261-
262-
>>> dfs = df.astype(str)
263-
>>> pd.to_datetime(dfs.day + dfs.month + dfs.year, format="%d%m%Y")
264-
0 2000-01-01
265-
1 2000-01-02
266-
...
267-
98 2000-04-08
268-
99 2000-04-09
269-
Length: 100, dtype: datetime64[ns]
270-
271-
Infer the format from the first entry
272-
273-
>>> pd.to_datetime(dfs.month + '/' + dfs.day + '/' + dfs.year,
274-
infer_datetime_format=True)
275-
0 2000-01-01
276-
1 2000-01-02
277-
...
278-
98 2000-04-08
279-
99 2000-04-09
280-
281-
This gives the same results as omitting the `infer_datetime_format=True`,
282-
but is much faster.
283-
284-
Date that does not meet timestamp limitations:
285-
286-
>>> pd.to_datetime('13000101', format='%Y%m%d')
287-
datetime.datetime(1300, 1, 1, 0, 0)
288-
>>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce')
289-
NaT
290-
291247
292248
Assembling a datetime from multiple columns of a DataFrame. The keys can be
293-
strptime-like (%Y, %m) or common abbreviations like ('year', 'month')
249+
common abbreviations like ['year', 'month', 'day', 'minute', 'second',
250+
'ms', 'us', 'ns']) or plurals of the same
294251
295252
>>> df = pd.DataFrame({'year': [2015, 2016],
296253
'month': [2, 3],
@@ -300,6 +257,12 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
300257
1 2016-03-05
301258
dtype: datetime64[ns]
302259
260+
Date that does not meet timestamp limitations:
261+
262+
>>> pd.to_datetime('13000101', format='%Y%m%d')
263+
datetime.datetime(1300, 1, 1, 0, 0)
264+
>>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce')
265+
NaT
303266
"""
304267
return _to_datetime(arg, errors=errors, dayfirst=dayfirst,
305268
yearfirst=yearfirst,
@@ -439,31 +402,21 @@ def _convert_listlike(arg, box, format, name=None):
439402

440403
# mappings for assembling units
441404
_unit_map = {'year': 'year',
442-
'y': 'year',
443-
'%Y': 'year',
405+
'years': 'year',
444406
'month': 'month',
445-
'M': 'month',
446-
'%m': 'month',
407+
'months': 'month',
447408
'day': 'day',
448409
'days': 'day',
449-
'd': 'day',
450-
'%d': 'day',
451-
'h': 'h',
452410
'hour': 'h',
453-
'hh': 'h',
454-
'%H': 'h',
411+
'hours': 'h',
455412
'minute': 'm',
456-
't': 'm',
413+
'minutes': 'm',
457414
'min': 'm',
458-
'%M': 'm',
459-
'mm': 'm',
460-
'MM': 'm',
461-
'%M': 'm',
462-
's': 's',
463-
'seconds': 's',
415+
'mins': 'm',
464416
'second': 's',
465-
'%S': 's',
466-
'ss': 's',
417+
'seconds': 's',
418+
'sec': 's',
419+
'secs': 's',
467420
'ms': 'ms',
468421
'millisecond': 'ms',
469422
'milliseconds': 'ms',
@@ -505,7 +458,7 @@ def f(value):
505458
return _unit_map[value]
506459

507460
# m is case significant
508-
if value.lower() in _unit_map and not value.startswith('m'):
461+
if value.lower() in _unit_map:
509462
return _unit_map[value.lower()]
510463

511464
return value

0 commit comments

Comments
 (0)