From f73e38d7afb9dc8f39b43031743dec30b7f8e94b Mon Sep 17 00:00:00 2001 From: jreback Date: Mon, 8 Jul 2013 19:49:50 -0400 Subject: [PATCH] TST: to_datetime format fixes CLN/TST: to_datetime cleanup TST: tests for dayfirst=True --- pandas/tseries/tests/test_timeseries.py | 43 +++++++++++++++++----- pandas/tseries/tools.py | 49 +++++++++++-------------- 2 files changed, 55 insertions(+), 37 deletions(-) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index f952483f54a9a..07780b575fa95 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -793,17 +793,40 @@ def test_to_datetime_default(self): xp = datetime(2001, 1, 1) self.assert_(rs, xp) + def test_dayfirst(self): + + # GH 3341 + result = to_datetime('13-01-2012', dayfirst=True) + expected = Timestamp('20120113') + self.assert_(result == expected) + + #### dayfirst is essentially broken + #### to_datetime('01-13-2012', dayfirst=True) + #### self.assertRaises(ValueError, to_datetime('01-13-2012', dayfirst=True)) + def test_to_datetime_format(self): values = ['1/1/2000', '1/2/2000', '1/3/2000'] - def _parse_format(fmt, values): - return to_datetime([datetime.strptime(x, fmt) - for x in values]) - - for fmt in ['%d/%m/%Y', '%m/%d/%Y']: - result = to_datetime(values, format=fmt) - expected = _parse_format(fmt, values) - self.assert_(result.equals(expected)) + results1 = [ Timestamp('20000101'), Timestamp('20000201'), + Timestamp('20000301') ] + results2 = [ Timestamp('20000101'), Timestamp('20000102'), + Timestamp('20000103') ] + for vals, expecteds in [ (values, (Index(results1), Index(results2))), + (Series(values),(Series(results1), Series(results2))), + (values[0], (results1[0], results2[0])), + (values[1], (results1[1], results2[1])), + (values[2], (results1[2], results2[2])) ]: + + for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']): + result = to_datetime(vals, format=fmt) + expected = expecteds[i] + + if isinstance(expected, Series): + assert_series_equal(result, Series(expected)) + elif isinstance(expected, Timestamp): + self.assert_(result == expected) + else: + self.assert_(result.equals(expected)) def test_to_datetime_format_microsecond(self): val = '01-Apr-2011 00:00:01.978' @@ -2812,10 +2835,10 @@ def check(val,unit=None,h=1,s=1,us=0): # nan result = Timestamp(np.nan) self.assert_(result is NaT) - + result = Timestamp(None) self.assert_(result is NaT) - + result = Timestamp(iNaT) self.assert_(result is NaT) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index c39f65f95d99f..6dc6b91073f19 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -76,12 +76,26 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True, ------- ret : datetime if parsing succeeded """ + from pandas import Timestamp from pandas.core.series import Series from pandas.tseries.index import DatetimeIndex - def _convert_f(arg): - arg = com._ensure_object(arg) + def _convert_listlike(arg, box): + + if isinstance(arg, (list,tuple)): + arg = np.array(arg, dtype='O') + if com.is_datetime64_dtype(arg): + if box and not isinstance(arg, DatetimeIndex): + try: + return DatetimeIndex(arg, tz='utc' if utc else None) + except ValueError, e: + values, tz = tslib.datetime_to_datetime64(arg) + return DatetimeIndex._simple_new(values, None, tz=tz) + + return arg + + arg = com._ensure_object(arg) try: if format is not None: result = tslib.array_strptime(arg, format) @@ -92,6 +106,7 @@ def _convert_f(arg): if com.is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz='utc' if utc else None) return result + except ValueError, e: try: values, tz = tslib.datetime_to_datetime64(arg) @@ -99,37 +114,17 @@ def _convert_f(arg): except (ValueError, TypeError): raise e - def _convert_listlike(arg): - if isinstance(arg, list): - arg = np.array(arg, dtype='O') - - if com.is_datetime64_dtype(arg): - if box and not isinstance(arg, DatetimeIndex): - try: - return DatetimeIndex(arg, tz='utc' if utc else None) - except ValueError, e: - try: - values, tz = tslib.datetime_to_datetime64(arg) - return DatetimeIndex._simple_new(values, None, tz=tz) - except (ValueError, TypeError): - raise e - return arg - - return _convert_f(arg) - if arg is None: return arg - elif isinstance(arg, datetime): + elif isinstance(arg, Timestamp): return arg elif isinstance(arg, Series): - values = arg.values - if not com.is_datetime64_dtype(values): - values = _convert_f(values) + values = _convert_listlike(arg.values, box=False) return Series(values, index=arg.index, name=arg.name) - elif isinstance(arg, (np.ndarray, list)): - return _convert_listlike(arg) + elif com.is_list_like(arg): + return _convert_listlike(arg, box=box) - return _convert_listlike(np.array([ arg ], dtype='O'))[0] + return _convert_listlike(np.array([ arg ]), box=box)[0] class DateParseError(ValueError): pass