From ba874d5546a0ddde7f34ebaea5f4b5d2847757b7 Mon Sep 17 00:00:00 2001 From: David Bew Date: Fri, 6 Jun 2014 15:58:06 +0100 Subject: [PATCH 1/3] TST7337: Disallow use of dateutil tzfile objects whose filename contains '.tar.gz'. --- pandas/tslib.pyx | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 9f1db62a54bf3..62e3b120c9d64 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -1029,6 +1029,10 @@ cdef inline object _get_zone(object tz): return 'UTC' else: if _treat_tz_as_dateutil(tz): + if '.tar.gz' in tz._filename: + raise ValueError('Bad tz filename. Dateutil on python 3 on windows has a bug which causes tzfile._filename to be the same for all ' + 'timezone files. Please construct dateutil timezones implicitly by passing a string like "dateutil/Europe/London" ' + 'when you construct your pandas objects instead of passing a timezone object. See https://github.com/pydata/pandas/pull/7362') return 'dateutil/' + tz._filename else: # tz is a pytz timezone or unknown. @@ -1048,7 +1052,11 @@ cpdef inline object maybe_get_tz(object tz): ''' if isinstance(tz, string_types): if tz.startswith('dateutil/'): + zone = tz[9:] tz = _dateutil_gettz(tz[9:]) + # On Python 3 on Windows, the filename is not always set correctly. + if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename: + tz._filename = zone else: tz = pytz.timezone(tz) return tz @@ -1965,6 +1973,10 @@ cdef inline object _tz_cache_key(object tz): if isinstance(tz, _pytz_BaseTzInfo): return tz.zone elif isinstance(tz, _dateutil_tzfile): + if '.tar.gz' in tz._filename: + raise ValueError('Bad tz filename. Dateutil on python 3 on windows has a bug which causes tzfile._filename to be the same for all ' + 'timezone files. Please construct dateutil timezones implicitly by passing a string like "dateutil/Europe/London" ' + 'when you construct your pandas objects instead of passing a timezone object. See https://github.com/pydata/pandas/pull/7362') return tz._filename else: return None From 7b6a65060818ac9f169a00131da32b48ad079ed8 Mon Sep 17 00:00:00 2001 From: David Bew Date: Thu, 5 Jun 2014 14:53:35 +0100 Subject: [PATCH 2/3] TST7337: Fix test failures on windows. Avoid constructing dateutil timezones explicitly because of filename issue. Use dateutil.tz.tzutc() to construct UTC timezone instead of dateutil.tz.gettz('UTC') Update docs to reflect this. Tidy up examples in the timezones section of the docs. --- doc/source/timeseries.rst | 52 ++++++++++++++---------- doc/source/v0.14.1.txt | 8 ++-- pandas/tests/test_format.py | 2 +- pandas/tests/test_series.py | 3 +- pandas/tseries/tests/test_daterange.py | 18 ++++++--- pandas/tseries/tests/test_period.py | 3 +- pandas/tseries/tests/test_timeseries.py | 54 +++++++++++++++++-------- pandas/tseries/tests/test_timezones.py | 8 +++- 8 files changed, 94 insertions(+), 54 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index aed54ab0f5040..0303b41e42e55 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -12,6 +12,8 @@ randint = np.random.randint np.set_printoptions(precision=4, suppress=True) options.display.max_rows=15 + import dateutil + import pytz from dateutil.relativedelta import relativedelta from pandas.tseries.api import * from pandas.tseries.offsets import * @@ -1266,32 +1268,37 @@ common zones, the names are the same as ``pytz``. .. ipython:: python # pytz - rng_utc = date_range('3/6/2012 00:00', periods=10, freq='D', tz='UTC') - rng_utc.tz + rng_pytz = date_range('3/6/2012 00:00', periods=10, freq='D', + tz='Europe/London') + rng_pytz.tz # dateutil - rng_utc_dateutil = date_range('3/6/2012 00:00', periods=10, freq='D', - tz='dateutil/UTC') - rng_utc_dateutil.tz + rng_dateutil = date_range('3/6/2012 00:00', periods=10, freq='D', + tz='dateutil/Europe/London') + rng_dateutil.tz -You can also construct the timezone explicitly first, which gives you more control over which -time zone is used: + # dateutil - utc special case + rng_utc = date_range('3/6/2012 00:00', periods=10, freq='D', + tz=dateutil.tz.tzutc()) + rng_utc.tz + +Note that the ``UTC`` timezone is a special case in ``dateutil`` and should be constructed explicitly +as an instance of ``dateutil.tz.tzutc``. You can also construct other timezones explicitly first, +which gives you more control over which time zone is used: .. ipython:: python # pytz - import pytz - tz_pytz = pytz.timezone('UTC') - rng_utc = date_range('3/6/2012 00:00', periods=10, freq='D', tz=tz_pytz) - rng_utc.tz + tz_pytz = pytz.timezone('Europe/London') + rng_pytz = date_range('3/6/2012 00:00', periods=10, freq='D', + tz=tz_pytz) + rng_pytz.tz == tz_pytz # dateutil - import dateutil - tz_dateutil = dateutil.tz.gettz('UTC') - rng_utc_dateutil = date_range('3/6/2012 00:00', periods=10, freq='D', - tz=tz_dateutil) - rng_utc_dateutil.tz - + tz_dateutil = dateutil.tz.gettz('Europe/London') + rng_dateutil = date_range('3/6/2012 00:00', periods=10, freq='D', + tz=tz_dateutil) + rng_dateutil.tz == tz_dateutil Timestamps, like Python's ``datetime.datetime`` object can be either time zone naive or time zone aware. Naive time series and DatetimeIndex objects can be @@ -1313,9 +1320,10 @@ tz-aware data to another time zone: ts_utc.tz_convert('US/Eastern') .. warning:: - Be very wary of conversions between libraries as ``pytz`` and ``dateutil`` - may have different definitions of the time zones. This is more of a problem for - unusual timezones than for 'standard' zones like ``US/Eastern``. + + Be wary of conversions between libraries. For some zones ``pytz`` and ``dateutil`` have different + definitions of the zone. This is more of a problem for unusual timezones than for + 'standard' zones like ``US/Eastern``. Under the hood, all timestamps are stored in UTC. Scalar values from a ``DatetimeIndex`` with a time zone will have their fields (day, hour, minute) @@ -1359,8 +1367,6 @@ TimeSeries, aligning the data on the UTC timestamps: result result.index -.. _timeseries.timedeltas: - In some cases, localize cannot determine the DST and non-DST hours when there are duplicates. This often happens when reading files that simply duplicate the hours. The infer_dst argument in tz_localize will attempt @@ -1376,6 +1382,8 @@ to determine the right offset. rng_hourly_eastern = rng_hourly.tz_localize('US/Eastern', infer_dst=True) rng_hourly_eastern.values +.. _timeseries.timedeltas: + Time Deltas ----------- diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 586e47ff4f303..d38565008640f 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -104,11 +104,9 @@ Enhancements .. ipython:: python - rng_utc_dateutil = date_range('3/6/2012 00:00', - periods=10, - freq='D', - tz='dateutil/UTC') - rng_utc_dateutil.tz + rng = date_range('3/6/2012 00:00', periods=10, freq='D', + tz='dateutil/Europe/London') + rng.tz See :ref:`the docs `. diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 302b8ca9983e0..456d331156011 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -2946,7 +2946,7 @@ def test_tz_pytz(self): def test_tz_dateutil(self): _skip_if_no_dateutil() import dateutil - utc = dateutil.tz.gettz('UTC') + utc = dateutil.tz.tzutc() dt_date = datetime(2013, 1, 2, tzinfo=utc) self.assertEqual(str(dt_date), str(Timestamp(dt_date))) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 31e5363dd5abe..3881ed5277b85 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -4630,7 +4630,8 @@ def test_getitem_setitem_datetime_tz_pytz(self): def test_getitem_setitem_datetime_tz_dateutil(self): _skip_if_no_dateutil(); - from dateutil.tz import gettz as tz + from dateutil.tz import gettz, tzutc + tz = lambda x: tzutc() if x == 'UTC' else gettz(x) # handle special case for utc in dateutil from pandas import date_range N = 50 diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index dd84ee27caf0e..0a732ac7bc7e8 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -2,7 +2,7 @@ from pandas.compat import range import pickle import nose - +import sys import numpy as np from pandas.core.index import Index @@ -36,6 +36,11 @@ def _skip_if_no_cday(): raise nose.SkipTest("CustomBusinessDay not available.") +def _skip_if_windows_python_3(): + if sys.version_info > (3,) and sys.platform == 'win32': + raise nose.SkipTest("not used on python 3/win32") + + def eq_gen_range(kwargs, expected): rng = generate_range(**kwargs) assert(np.array_equal(list(rng), expected)) @@ -300,7 +305,7 @@ def test_summary_pytz(self): def test_summary_dateutil(self): _skip_if_no_dateutil() import dateutil - bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.gettz('UTC')).summary() + bdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() def test_misc(self): end = datetime(2009, 5, 13) @@ -391,8 +396,10 @@ def test_range_tz_pytz(self): def test_range_tz_dateutil(self): # GH 2906 _skip_if_no_dateutil() - from dateutil.tz import gettz as tz - + # Use maybe_get_tz to fix filename in tz under dateutil. + from pandas.tslib import maybe_get_tz + tz = lambda x: maybe_get_tz('dateutil/' + x) + start = datetime(2011, 1, 1, tzinfo=tz('US/Eastern')) end = datetime(2011, 1, 3, tzinfo=tz('US/Eastern')) @@ -428,6 +435,7 @@ def test_month_range_union_tz_pytz(self): early_dr.union(late_dr) def test_month_range_union_tz_dateutil(self): + _skip_if_windows_python_3() _skip_if_no_dateutil() from dateutil.tz import gettz as timezone tz = timezone('US/Eastern') @@ -633,7 +641,7 @@ def test_summary_pytz(self): def test_summary_dateutil(self): _skip_if_no_dateutil() import dateutil - cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.gettz('UTC')).summary() + cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() def test_misc(self): end = datetime(2009, 5, 13) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 81387c3736481..38887ede2faca 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -85,7 +85,8 @@ def test_timestamp_tz_arg(self): def test_timestamp_tz_arg_dateutil(self): import dateutil - p = Period('1/1/2005', freq='M').to_timestamp(tz=dateutil.tz.gettz('Europe/Brussels')) + from pandas.tslib import maybe_get_tz + p = Period('1/1/2005', freq='M').to_timestamp(tz=maybe_get_tz('dateutil/Europe/Brussels')) self.assertEqual(p.tz, dateutil.tz.gettz('Europe/Brussels')) def test_timestamp_tz_arg_dateutil_from_string(self): diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 83cc5dcc7485f..04210b4f0c88f 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -58,6 +58,15 @@ def _skip_if_has_locale(): lang, _ = locale.getlocale() if lang is not None: raise nose.SkipTest("Specific locale is set {0}".format(lang)) + +def _skip_if_windows_python_3(): + if sys.version_info > (3,) and sys.platform == 'win32': + raise nose.SkipTest("not used on python 3/win32") + +def _skip_if_not_windows_python_3(): + if sys.version_info < (3,) or sys.platform != 'win32': + raise nose.SkipTest("only run on python 3/win32") + class TestTimeSeriesDuplicates(tm.TestCase): _multiprocess_can_split_ = True @@ -406,6 +415,16 @@ def test_timestamp_to_datetime(self): self.assertEqual(stamp, dtval) self.assertEqual(stamp.tzinfo, dtval.tzinfo) + def test_timestamp_to_datetime_dateutil(self): + _skip_if_no_pytz() + rng = date_range('20090415', '20090519', + tz='dateutil/US/Eastern') + + stamp = rng[0] + dtval = stamp.to_pydatetime() + self.assertEqual(stamp, dtval) + self.assertEqual(stamp.tzinfo, dtval.tzinfo) + def test_timestamp_to_datetime_explicit_pytz(self): _skip_if_no_pytz() import pytz @@ -418,6 +437,7 @@ def test_timestamp_to_datetime_explicit_pytz(self): self.assertEquals(stamp.tzinfo, dtval.tzinfo) def test_timestamp_to_datetime_explicit_dateutil(self): + _skip_if_windows_python_3() _skip_if_no_dateutil() import dateutil rng = date_range('20090415', '20090519', @@ -467,7 +487,7 @@ def _check_rng(rng): _check_rng(rng_eastern) _check_rng(rng_utc) - def test_index_convert_to_datetime_array_explicit_dateutil(self): + def test_index_convert_to_datetime_array_dateutil(self): _skip_if_no_dateutil() import dateutil @@ -480,8 +500,8 @@ def _check_rng(rng): self.assertEquals(x.tzinfo, stamp.tzinfo) rng = date_range('20090415', '20090519') - rng_eastern = date_range('20090415', '20090519', tz=dateutil.tz.gettz('US/Eastern')) - rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.gettz('UTC')) + rng_eastern = date_range('20090415', '20090519', tz='dateutil/US/Eastern') + rng_utc = date_range('20090415', '20090519', tz=dateutil.tz.tzutc()) _check_rng(rng) _check_rng(rng_eastern) @@ -1560,14 +1580,14 @@ def test_to_period_tz_explicit_pytz(self): self.assert_(result == expected) self.assert_(ts.to_period().equals(xp)) - def test_to_period_tz_explicit_dateutil(self): + def test_to_period_tz_dateutil(self): _skip_if_no_dateutil() import dateutil from dateutil.tz import tzlocal xp = date_range('1/1/2000', '4/1/2000').to_period() - ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.gettz('US/Eastern')) + ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') result = ts.to_period()[0] expected = ts[0].to_period() @@ -1575,7 +1595,7 @@ def test_to_period_tz_explicit_dateutil(self): self.assert_(result == expected) self.assert_(ts.to_period().equals(xp)) - ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.gettz('UTC')) + ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) result = ts.to_period()[0] expected = ts[0].to_period() @@ -1793,17 +1813,17 @@ def test_append_concat_tz_explicit_pytz(self): appended = rng.append(rng2) self.assert_(appended.equals(rng3)) - def test_append_concat_tz_explicit_dateutil(self): + def test_append_concat_tz_dateutil(self): # GH 2938 _skip_if_no_dateutil() from dateutil.tz import gettz as timezone rng = date_range('5/8/2012 1:45', periods=10, freq='5T', - tz=timezone('US/Eastern')) + tz='dateutil/US/Eastern') rng2 = date_range('5/8/2012 2:35', periods=10, freq='5T', - tz=timezone('US/Eastern')) + tz='dateutil/US/Eastern') rng3 = date_range('5/8/2012 1:45', periods=20, freq='5T', - tz=timezone('US/Eastern')) + tz='dateutil/US/Eastern') ts = Series(np.random.randn(len(rng)), rng) df = DataFrame(np.random.randn(len(rng), 4), index=rng) ts2 = Series(np.random.randn(len(rng2)), rng2) @@ -2021,11 +2041,11 @@ def test_period_resample_with_local_timezone_dateutil(self): _skip_if_no_dateutil() import dateutil - local_timezone = dateutil.tz.gettz('America/Los_Angeles') + local_timezone = 'dateutil/America/Los_Angeles' - start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.gettz('UTC')) + start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()) # 1 day later - end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.gettz('UTC')) + end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()) index = pd.date_range(start, end, freq='H') @@ -2990,13 +3010,13 @@ def compare(x, y): def test_class_ops_dateutil(self): _skip_if_no_dateutil() - from dateutil.tz import gettz as timezone + from dateutil.tz import tzutc def compare(x,y): self.assertEqual(int(np.round(Timestamp(x).value/1e9)), int(np.round(Timestamp(y).value/1e9))) compare(Timestamp.now(),datetime.now()) - compare(Timestamp.now('UTC'), datetime.now(timezone('UTC'))) + compare(Timestamp.now('UTC'), datetime.now(tzutc())) compare(Timestamp.utcnow(),datetime.utcnow()) compare(Timestamp.today(),datetime.today()) @@ -3149,8 +3169,8 @@ def test_cant_compare_tz_naive_w_aware_explicit_pytz(self): def test_cant_compare_tz_naive_w_aware_dateutil(self): _skip_if_no_dateutil() - from dateutil.tz import gettz - utc = gettz('UTC') + from dateutil.tz import tzutc + utc = tzutc() # #1404 a = Timestamp('3/12/2012') b = Timestamp('3/12/2012', tz=utc) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 5fb1f9db620ae..40c3a044a5acf 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -770,8 +770,12 @@ def setUp(self): _skip_if_no_dateutil() def tz(self, tz): - ''' Construct a timezone object from a string. Overridden in subclass to parameterize tests. ''' - return dateutil.tz.gettz(tz) + ''' + Construct a dateutil timezone. + Use tslib.maybe_get_tz so that we get the filename on the tz right + on windows. See #7337. + ''' + return tslib.maybe_get_tz('dateutil/' + tz) def tzstr(self, tz): ''' Construct a timezone string from a string. Overridden in subclass to parameterize tests. ''' From 703934a5ae69a38de446eaef8b82e3f6f0d7742a Mon Sep 17 00:00:00 2001 From: David Bew Date: Mon, 9 Jun 2014 16:31:12 +0100 Subject: [PATCH 3/3] Add test for conversion between dateutil.tz.tzutc() and dateutil.tz.gettz('UTC') --- pandas/tseries/tests/test_timezones.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 40c3a044a5acf..51c533df863e6 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -788,6 +788,19 @@ def cmptz(self, tz1, tz2): def localize(self, tz, x): return x.replace(tzinfo=tz) + def test_utc_with_system_utc(self): + from pandas.tslib import maybe_get_tz + + # from system utc to real utc + ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) + # check that the time hasn't changed. + self.assertEqual(ts, ts.tz_convert(dateutil.tz.tzutc())) + + # from system utc to real utc + ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) + # check that the time hasn't changed. + self.assertEqual(ts, ts.tz_convert(dateutil.tz.tzutc())) + class TestTimeZones(tm.TestCase): _multiprocess_can_split_ = True