From ff4c2907ac229cf3f1015d08126c1dfd7d94ab4a Mon Sep 17 00:00:00 2001 From: sinhrks Date: Fri, 8 Aug 2014 16:53:38 +0900 Subject: [PATCH 1/2] ENH: Add duplicated/drop_duplicates to Index --- doc/source/v0.15.0.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index b2581a4f2aab3..e841c60c47515 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -344,7 +344,6 @@ API changes - ``Series.to_csv()`` now returns a string when ``path=None``, matching the behaviour of ``DataFrame.to_csv()`` (:issue:`8215`). - .. _whatsnew_0150.index_set_ops: - The Index set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain index types. ``+`` can be replace by ``.union()`` or ``|``, and ``-`` by ``.difference()``. Further the method name ``Index.diff()`` is deprecated and can be replaced by ``Index.difference()`` (:issue:`8226`) From 5d32eab2c4617ab50e1ef03e5086939e50c17fb7 Mon Sep 17 00:00:00 2001 From: rockg Date: Fri, 8 Aug 2014 07:57:38 -0400 Subject: [PATCH 2/2] New "ambiguous" argument in tz_localize to enhance support for ways of resolving transition times ('infer', dot indicators, 'NaT', and 'raise') --- doc/source/timeseries.rst | 50 +++++--- doc/source/v0.15.0.txt | 9 +- pandas/core/generic.py | 29 +++-- pandas/tseries/index.py | 45 ++++++-- pandas/tseries/tests/test_timezones.py | 85 ++++++++++++-- pandas/tseries/tests/test_tslib.py | 11 ++ pandas/tslib.pyx | 152 +++++++++++++++---------- 7 files changed, 274 insertions(+), 107 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 8f96ec98df6f2..a23d067cefa4f 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1357,6 +1357,9 @@ Pandas provides rich support for working with timestamps in different time zones ``dateutil`` support is new [in 0.14.1] and currently only supported for fixed offset and tzfile zones. The default library is ``pytz``. Support for ``dateutil`` is provided for compatibility with other applications e.g. if you use ``dateutil`` in other python packages. +Working with Time Zones +~~~~~~~~~~~~~~~~~~~~~~~ + By default, pandas objects are time zone unaware: .. ipython:: python @@ -1488,10 +1491,29 @@ TimeSeries, aligning the data on the UTC timestamps: result result.index +To remove timezone from tz-aware ``DatetimeIndex``, use ``tz_localize(None)`` or ``tz_convert(None)``. +``tz_localize(None)`` will remove timezone holding local time representations. +``tz_convert(None)`` will remove timezone after converting to UTC time. + +.. ipython:: python + + didx = DatetimeIndex(start='2014-08-01 09:00', freq='H', periods=10, tz='US/Eastern') + didx + didx.tz_localize(None) + didx.tz_convert(None) + + # tz_convert(None) is identical with tz_convert('UTC').tz_localize(None) + didx.tz_convert('UCT').tz_localize(None) + +.. _timeseries.timezone_ambiguous: + +Ambiguous Times when Localizing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + In some cases, localize cannot determine the DST and non-DST hours when there are -duplicates. This often happens when reading files that simply duplicate the hours. -The infer_dst argument in tz_localize will attempt -to determine the right offset. +duplicates. This often happens when reading files or database records that simply +duplicate the hours. Passing ``ambiguous='infer'`` (``infer_dst`` argument in prior +releases) into ``tz_localize`` will attempt to determine the right offset. .. ipython:: python :okexcept: @@ -1500,21 +1522,23 @@ to determine the right offset. '11/06/2011 01:00', '11/06/2011 02:00', '11/06/2011 03:00']) rng_hourly.tz_localize('US/Eastern') - rng_hourly_eastern = rng_hourly.tz_localize('US/Eastern', infer_dst=True) + rng_hourly_eastern = rng_hourly.tz_localize('US/Eastern', ambiguous='infer') rng_hourly_eastern.values - -To remove timezone from tz-aware ``DatetimeIndex``, use ``tz_localize(None)`` or ``tz_convert(None)``. ``tz_localize(None)`` will remove timezone holding local time representations. ``tz_convert(None)`` will remove timezone after converting to UTC time. +In addition to 'infer', there are several other arguments supported. Passing +an array-like of bools or 0s/1s where True represents a DST hour and False a +non-DST hour, allows for distinguishing more than one DST +transition (e.g., if you have multiple records in a database each with their +own DST transition). Or passing 'NaT' will fill in transition times +with not-a-time values. These methods are available in the ``DatetimeIndex`` +constructor as well as ``tz_localize``. .. ipython:: python + + rng_hourly_dst = np.array([1, 1, 0, 0, 0]) + rng_hourly.tz_localize('US/Eastern', ambiguous=rng_hourly_dst).values + rng_hourly.tz_localize('US/Eastern', ambiguous='NaT').values - didx = DatetimeIndex(start='2014-08-01 09:00', freq='H', periods=10, tz='US/Eastern') - didx - didx.tz_localize(None) - didx.tz_convert(None) - - # tz_convert(None) is identical with tz_convert('UTC').tz_localize(None) - didx.tz_convert('UCT').tz_localize(None) .. _timeseries.timedeltas: diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index e841c60c47515..21ba7ebbc940c 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -465,6 +465,10 @@ Deprecations - The ``convert_dummies`` method has been deprecated in favor of ``get_dummies`` (:issue:`8140`) +- The ``infer_dst`` argument in ``tz_localize`` will be deprecated in favor of + ``ambiguous`` to allow for more flexibility in dealing with DST transitions. + Replace ``infer_dst=True`` with ``ambiguous='infer'`` for the same behavior (:issue:`7943`). + See :ref:`the docs` for more details. .. _whatsnew_0150.knownissues: @@ -543,7 +547,10 @@ Enhancements - +- ``tz_localize`` now accepts the ``ambiguous`` keyword which allows for passing an array of bools + indicating whether the date belongs in DST or not, 'NaT' for setting transition times to NaT, + 'infer' for inferring DST/non-DST, and 'raise' (default) for an AmbiguousTimeError to be raised (:issue:`7943`). + See :ref:`the docs` for more details. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3a75f145587c0..dddfa3bf7d56a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -23,7 +23,7 @@ _maybe_box_datetimelike, ABCSeries, SettingWithCopyError, SettingWithCopyWarning) import pandas.core.nanops as nanops -from pandas.util.decorators import Appender, Substitution +from pandas.util.decorators import Appender, Substitution, deprecate_kwarg from pandas.core import config # goal is to be able to define the docs close to function, while still being @@ -3558,8 +3558,11 @@ def _tz_convert(ax, tz): result = self._constructor(self._data, copy=copy) result.set_axis(axis,ax) return result.__finalize__(self) - - def tz_localize(self, tz, axis=0, level=None, copy=True, infer_dst=False): + + @deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous', + mapping={True: 'infer', False: 'raise'}) + def tz_localize(self, tz, axis=0, level=None, copy=True, + ambiguous='raise'): """ Localize tz-naive TimeSeries to target time zone @@ -3572,16 +3575,22 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, infer_dst=False): must be None copy : boolean, default True Also make a copy of the underlying data - infer_dst : boolean, default False - Attempt to infer fall dst-transition times based on order - + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + - 'infer' will attempt to infer fall dst-transition hours based on order + - bool-ndarray where True signifies a DST time, False designates + a non-DST time (note that this flag is only applicable for ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous times + infer_dst : boolean, default False (DEPRECATED) + Attempt to infer fall dst-transition hours based on order + Returns ------- """ axis = self._get_axis_number(axis) ax = self._get_axis(axis) - def _tz_localize(ax, tz, infer_dst): + def _tz_localize(ax, tz, ambiguous): if not hasattr(ax, 'tz_localize'): if len(ax) > 0: ax_name = self._get_axis_name(axis) @@ -3590,19 +3599,19 @@ def _tz_localize(ax, tz, infer_dst): else: ax = DatetimeIndex([],tz=tz) else: - ax = ax.tz_localize(tz, infer_dst=infer_dst) + ax = ax.tz_localize(tz, ambiguous=ambiguous) return ax # if a level is given it must be a MultiIndex level or # equivalent to the axis name if isinstance(ax, MultiIndex): level = ax._get_level_number(level) - new_level = _tz_localize(ax.levels[level], tz, infer_dst) + new_level = _tz_localize(ax.levels[level], tz, ambiguous) ax = ax.set_levels(new_level, level=level) else: if level not in (None, 0, ax.name): raise ValueError("The level {0} is not valid".format(level)) - ax = _tz_localize(ax, tz, infer_dst) + ax = _tz_localize(ax, tz, ambiguous) result = self._constructor(self._data, copy=copy) result.set_axis(axis,ax) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 2acdcfffb7d9a..e2cb8216bb270 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -6,6 +6,8 @@ import numpy as np +import warnings + from pandas.core.common import (_NS_DTYPE, _INT64_DTYPE, _values_from_object, _maybe_box, ABCSeries) @@ -18,7 +20,7 @@ from pandas.core.base import DatetimeIndexOpsMixin from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay from pandas.tseries.tools import parse_time_string, normalize_date -from pandas.util.decorators import cache_readonly +from pandas.util.decorators import cache_readonly, deprecate_kwarg import pandas.core.common as com import pandas.tseries.offsets as offsets import pandas.tseries.tools as tools @@ -145,6 +147,15 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index): closed : string or None, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) + tz : pytz.timezone or dateutil.tz.tzfile + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + - 'infer' will attempt to infer fall dst-transition hours based on order + - bool-ndarray where True signifies a DST time, False signifies + a non-DST time (note that this flag is only applicable for ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous times + infer_dst : boolean, default False (DEPRECATED) + Attempt to infer fall dst-transition hours based on order name : object Name to be stored in the index """ @@ -180,15 +191,17 @@ class DatetimeIndex(DatetimeIndexOpsMixin, Int64Index): 'is_quarter_start','is_quarter_end','is_year_start','is_year_end'] _is_numeric_dtype = False + + @deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous', + mapping={True: 'infer', False: 'raise'}) def __new__(cls, data=None, freq=None, start=None, end=None, periods=None, copy=False, name=None, tz=None, verify_integrity=True, normalize=False, - closed=None, **kwargs): + closed=None, ambiguous='raise', **kwargs): dayfirst = kwargs.pop('dayfirst', None) yearfirst = kwargs.pop('yearfirst', None) - infer_dst = kwargs.pop('infer_dst', False) freq_infer = False if not isinstance(freq, DateOffset): @@ -214,7 +227,7 @@ def __new__(cls, data=None, if data is None: return cls._generate(start, end, periods, name, freq, tz=tz, normalize=normalize, closed=closed, - infer_dst=infer_dst) + ambiguous=ambiguous) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if np.isscalar(data): @@ -240,7 +253,7 @@ def __new__(cls, data=None, data.name = name if tz is not None: - return data.tz_localize(tz, infer_dst=infer_dst) + return data.tz_localize(tz, ambiguous=ambiguous) return data @@ -309,7 +322,7 @@ def __new__(cls, data=None, # Convert tz-naive to UTC ints = subarr.view('i8') subarr = tslib.tz_localize_to_utc(ints, tz, - infer_dst=infer_dst) + ambiguous=ambiguous) subarr = subarr.view(_NS_DTYPE) @@ -333,7 +346,7 @@ def __new__(cls, data=None, @classmethod def _generate(cls, start, end, periods, name, offset, - tz=None, normalize=False, infer_dst=False, closed=None): + tz=None, normalize=False, ambiguous='raise', closed=None): if com._count_not_none(start, end, periods) != 2: raise ValueError('Must specify two of start, end, or periods') @@ -447,7 +460,7 @@ def _generate(cls, start, end, periods, name, offset, if tz is not None and getattr(index, 'tz', None) is None: index = tslib.tz_localize_to_utc(com._ensure_int64(index), tz, - infer_dst=infer_dst) + ambiguous=ambiguous) index = index.view(_NS_DTYPE) index = cls._simple_new(index, name=name, freq=offset, tz=tz) @@ -1645,7 +1658,9 @@ def tz_convert(self, tz): # No conversion since timestamps are all UTC to begin with return self._shallow_copy(tz=tz) - def tz_localize(self, tz, infer_dst=False): + @deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous', + mapping={True: 'infer', False: 'raise'}) + def tz_localize(self, tz, ambiguous='raise'): """ Localize tz-naive DatetimeIndex to given time zone (using pytz/dateutil), or remove timezone from tz-aware DatetimeIndex @@ -1656,7 +1671,13 @@ def tz_localize(self, tz, infer_dst=False): Time zone for time. Corresponding timestamps would be converted to time zone of the TimeSeries. None will remove timezone holding local time. - infer_dst : boolean, default False + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + - 'infer' will attempt to infer fall dst-transition hours based on order + - bool-ndarray where True signifies a DST time, False signifies + a non-DST time (note that this flag is only applicable for ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous times + infer_dst : boolean, default False (DEPRECATED) Attempt to infer fall dst-transition hours based on order Returns @@ -1671,7 +1692,9 @@ def tz_localize(self, tz, infer_dst=False): else: tz = tslib.maybe_get_tz(tz) # Convert to UTC - new_dates = tslib.tz_localize_to_utc(self.asi8, tz, infer_dst=infer_dst) + + new_dates = tslib.tz_localize_to_utc(self.asi8, tz, + ambiguous=ambiguous) new_dates = new_dates.view(_NS_DTYPE) return self._shallow_copy(new_dates, tz=tz) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 5635bb75dd9ce..9fbdb714d8cfa 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -423,31 +423,98 @@ def test_with_tz_ambiguous_times(self): dr = date_range(datetime(2011, 3, 13), periods=48, freq=datetools.Minute(30), tz=pytz.utc) - def test_infer_dst(self): + def test_ambiguous_infer(self): # November 6, 2011, fall back, repeat 2 AM hour # With no repeated hours, we cannot infer the transition tz = self.tz('US/Eastern') dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=datetools.Hour()) - self.assertRaises(pytz.AmbiguousTimeError, dr.tz_localize, - tz, infer_dst=True) + self.assertRaises(pytz.AmbiguousTimeError, dr.tz_localize, tz) # With repeated hours, we can infer the transition dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=datetools.Hour(), tz=tz) - di = DatetimeIndex(['11/06/2011 00:00', '11/06/2011 01:00', - '11/06/2011 01:00', '11/06/2011 02:00', - '11/06/2011 03:00']) - localized = di.tz_localize(tz, infer_dst=True) + times = ['11/06/2011 00:00', '11/06/2011 01:00', + '11/06/2011 01:00', '11/06/2011 02:00', + '11/06/2011 03:00'] + di = DatetimeIndex(times) + localized = di.tz_localize(tz, ambiguous='infer') self.assert_numpy_array_equal(dr, localized) - + localized_old = di.tz_localize(tz, infer_dst=True) + self.assert_numpy_array_equal(dr, localized_old) + self.assert_numpy_array_equal(dr, DatetimeIndex(times, tz=tz, ambiguous='infer')) + # When there is no dst transition, nothing special happens dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=datetools.Hour()) localized = dr.tz_localize(tz) - localized_infer = dr.tz_localize(tz, infer_dst=True) + localized_infer = dr.tz_localize(tz, ambiguous='infer') self.assert_numpy_array_equal(localized, localized_infer) + localized_infer_old = dr.tz_localize(tz, infer_dst=True) + self.assert_numpy_array_equal(localized, localized_infer_old) + + def test_ambiguous_flags(self): + # November 6, 2011, fall back, repeat 2 AM hour + tz = self.tz('US/Eastern') + + # Pass in flags to determine right dst transition + dr = date_range(datetime(2011, 11, 6, 0), periods=5, + freq=datetools.Hour(), tz=tz) + times = ['11/06/2011 00:00', '11/06/2011 01:00', + '11/06/2011 01:00', '11/06/2011 02:00', + '11/06/2011 03:00'] + + # Test tz_localize + di = DatetimeIndex(times) + is_dst = [1, 1, 0, 0, 0] + localized = di.tz_localize(tz, ambiguous=is_dst) + self.assert_numpy_array_equal(dr, localized) + self.assert_numpy_array_equal(dr, DatetimeIndex(times, tz=tz, ambiguous=is_dst)) + + localized = di.tz_localize(tz, ambiguous=np.array(is_dst)) + self.assert_numpy_array_equal(dr, localized) + + localized = di.tz_localize(tz, ambiguous=np.array(is_dst).astype('bool')) + self.assert_numpy_array_equal(dr, localized) + + # Test constructor + localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst) + self.assert_numpy_array_equal(dr, localized) + + # Test duplicate times where infer_dst fails + times += times + di = DatetimeIndex(times) + + # When the sizes are incompatible, make sure error is raised + self.assertRaises(Exception, di.tz_localize, tz, ambiguous=is_dst) + + # When sizes are compatible and there are repeats ('infer' won't work) + is_dst = np.hstack((is_dst, is_dst)) + localized = di.tz_localize(tz, ambiguous=is_dst) + dr = dr.append(dr) + self.assert_numpy_array_equal(dr, localized) + # When there is no dst transition, nothing special happens + dr = date_range(datetime(2011, 6, 1, 0), periods=10, + freq=datetools.Hour()) + is_dst = np.array([1] * 10) + localized = dr.tz_localize(tz) + localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst) + self.assert_numpy_array_equal(localized, localized_is_dst) + + def test_ambiguous_nat(self): + tz = self.tz('US/Eastern') + times = ['11/06/2011 00:00', '11/06/2011 01:00', + '11/06/2011 01:00', '11/06/2011 02:00', + '11/06/2011 03:00'] + di = DatetimeIndex(times) + localized = di.tz_localize(tz, ambiguous='NaT') + + times = ['11/06/2011 00:00', np.NaN, + np.NaN, '11/06/2011 02:00', + '11/06/2011 03:00'] + di_test = DatetimeIndex(times, tz='US/Eastern') + self.assert_numpy_array_equal(di_test, localized) # test utility methods def test_infer_tz(self): diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 61fc3652fb8a4..9adcbb4ea4a41 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -232,6 +232,17 @@ def test_tz(self): conv = local.tz_convert('US/Eastern') self.assertEqual(conv.nanosecond, 5) self.assertEqual(conv.hour, 19) + + def test_tz_localize_ambiguous(self): + + ts = Timestamp('2014-11-02 01:00') + ts_dst = ts.tz_localize('US/Eastern', ambiguous=True) + ts_no_dst = ts.tz_localize('US/Eastern', ambiguous=False) + + rng = date_range('2014-11-02', periods=3, freq='H', tz='US/Eastern') + self.assertEqual(rng[1], ts_dst) + self.assertEqual(rng[2], ts_no_dst) + self.assertRaises(ValueError, ts.tz_localize, 'US/Eastern', ambiguous='infer') # GH 8025 with tm.assertRaisesRegexp(TypeError, 'Cannot localize tz-aware Timestamp, use ' diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 36c40f8ca39af..c05d85a39441e 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -100,9 +100,9 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, offset=None, box=False): offset = to_offset(offset) if box: - func_create = create_timestamp_from_ts + func_create = create_timestamp_from_ts else: - func_create = create_datetime_from_ts + func_create = create_datetime_from_ts if tz is not None: if _is_utc(tz): @@ -359,7 +359,7 @@ class Timestamp(_Timestamp): def is_year_end(self): return self._get_start_end_field('is_year_end') - def tz_localize(self, tz, infer_dst=False): + def tz_localize(self, tz, ambiguous='raise'): """ Convert naive Timestamp to local time zone, or remove timezone from tz-aware Timestamp. @@ -369,18 +369,26 @@ class Timestamp(_Timestamp): tz : string, pytz.timezone, dateutil.tz.tzfile or None Time zone for time which Timestamp will be converted to. None will remove timezone holding local time. - infer_dst : boolean, default False - Attempt to infer fall dst-transition hours based on order - + ambiguous : bool, 'NaT', default 'raise' + - bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates) + - 'NaT' will return NaT for an ambiguous time + - 'raise' will raise an AmbiguousTimeError for an ambiguous time + Returns ------- localized : Timestamp """ + if ambiguous == 'infer': + raise ValueError('Cannot infer offset with only one time.') + if self.tzinfo is None: # tz naive, localize tz = maybe_get_tz(tz) + if not isinstance(ambiguous, basestring): + ambiguous = [ambiguous] value = tz_localize_to_utc(np.array([self.value]), tz, - infer_dst=infer_dst)[0] + ambiguous=ambiguous)[0] return Timestamp(value, tz=tz) else: if tz is None: @@ -1330,12 +1338,12 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, else: try: if len(val) == 0: - iresult[i] = iNaT - continue + iresult[i] = iNaT + continue elif val in _nat_strings: - iresult[i] = iNaT - continue + iresult[i] = iNaT + continue _string_to_dts(val, &dts, &out_local, &out_tzoffset) value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts) @@ -1349,8 +1357,8 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, py_dt = parse_datetime_string(val, dayfirst=dayfirst) except Exception: if coerce: - iresult[i] = iNaT - continue + iresult[i] = iNaT + continue raise TypeError try: @@ -1491,7 +1499,7 @@ cdef inline convert_to_timedelta64(object ts, object unit, object coerce): return ts.astype('timedelta64[ns]') def repr_timedelta64(object value, format=None): - """ + """ provide repr for timedelta64 Parameters @@ -1503,60 +1511,60 @@ def repr_timedelta64(object value, format=None): ------- converted : Timestamp - """ - cdef object ivalue + """ + cdef object ivalue - ivalue = value.view('i8') + ivalue = value.view('i8') - # put frac in seconds - frac = float(ivalue)/1e9 - sign = np.sign(frac) - frac = np.abs(frac) + # put frac in seconds + frac = float(ivalue)/1e9 + sign = np.sign(frac) + frac = np.abs(frac) - if frac >= 86400: - days = int(frac / 86400) - frac -= days * 86400 - else: - days = 0 + if frac >= 86400: + days = int(frac / 86400) + frac -= days * 86400 + else: + days = 0 - if frac >= 3600: - hours = int(frac / 3600) - frac -= hours * 3600 - else: - hours = 0 + if frac >= 3600: + hours = int(frac / 3600) + frac -= hours * 3600 + else: + hours = 0 - if frac >= 60: - minutes = int(frac / 60) - frac -= minutes * 60 - else: - minutes = 0 + if frac >= 60: + minutes = int(frac / 60) + frac -= minutes * 60 + else: + minutes = 0 - if frac >= 1: - seconds = int(frac) - frac -= seconds - else: - seconds = 0 + if frac >= 1: + seconds = int(frac) + frac -= seconds + else: + seconds = 0 - if frac == int(frac): - seconds_pretty = "%02d" % seconds - else: - sp = abs(round(1e6*frac)) - seconds_pretty = "%02d.%06d" % (seconds, sp) + if frac == int(frac): + seconds_pretty = "%02d" % seconds + else: + sp = abs(round(1e6*frac)) + seconds_pretty = "%02d.%06d" % (seconds, sp) - if sign < 0: - sign_pretty = "-" - else: - sign_pretty = "" + if sign < 0: + sign_pretty = "-" + else: + sign_pretty = "" - if days or format == 'long': - if (hours or minutes or seconds or frac) or format != 'short': - return "%s%d days, %02d:%02d:%s" % (sign_pretty, days, hours, minutes, - seconds_pretty) - else: - return "%s%d days" % (sign_pretty, days) + if days or format == 'long': + if (hours or minutes or seconds or frac) or format != 'short': + return "%s%d days, %02d:%02d:%s" % (sign_pretty, days, hours, minutes, + seconds_pretty) + else: + return "%s%d days" % (sign_pretty, days) - return "%s%02d:%02d:%s" % (sign_pretty, hours, minutes, seconds_pretty) + return "%s%02d:%02d:%s" % (sign_pretty, hours, minutes, seconds_pretty) def array_strptime(ndarray[object] values, object fmt, coerce=False): @@ -1765,8 +1773,8 @@ def array_strptime(ndarray[object] values, object fmt, coerce=False): # Need to add 1 to result since first day of the year is 1, not 0. julian = datetime_date(year, month, day).toordinal() - \ datetime_date(year, 1, 1).toordinal() + 1 - else: # Assume that if they bothered to include Julian day it will - # be accurate. + else: # Assume that if they bothered to include Julian day it will + # be accurate. datetime_result = datetime_date.fromordinal( (julian - 1) + datetime_date(year, 1, 1).toordinal()) year = datetime_result.year @@ -1850,7 +1858,7 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except -1: base = ts frac = ts-base if p: - frac = round(frac,p) + frac = round(frac,p) return (base*m) + (frac*m) def cast_to_nanoseconds(ndarray arr): @@ -2183,7 +2191,7 @@ cpdef ndarray _unbox_utcoffsets(object transinfo): @cython.boundscheck(False) @cython.wraparound(False) -def tz_localize_to_utc(ndarray[int64_t] vals, object tz, bint infer_dst=False): +def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None): """ Localize tzinfo-naive DateRange to given time zone (using pytz). If there are ambiguities in the values, raise AmbiguousTimeError. @@ -2199,6 +2207,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, bint infer_dst=False): int64_t v, left, right ndarray[int64_t] result, result_a, result_b, dst_hours pandas_datetimestruct dts + bint infer_dst = False, is_dst = False, fill = False # Vectorized version of DstTzInfo.localize @@ -2220,6 +2229,16 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, bint infer_dst=False): result[i] = v - delta return result + if isinstance(ambiguous, string_types): + if ambiguous == 'infer': + infer_dst = True + elif ambiguous == 'NaT': + fill = True + elif hasattr(ambiguous, '__iter__'): + is_dst = True + if len(ambiguous) != len(vals): + raise ValueError("Length of ambiguous bool-array must be the same size as vals") + trans = _get_transitions(tz) # transition dates deltas = _get_deltas(tz) # utc offsets @@ -2307,10 +2326,17 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, bint infer_dst=False): else: if infer_dst and dst_hours[i] != NPY_NAT: result[i] = dst_hours[i] + elif is_dst: + if ambiguous[i]: + result[i] = left + else: + result[i] = right + elif fill: + result[i] = NPY_NAT else: stamp = Timestamp(vals[i]) raise pytz.AmbiguousTimeError("Cannot infer dst time from %r, "\ - "try using the 'infer_dst' argument" + "try using the 'ambiguous' argument" % stamp) elif left != NPY_NAT: result[i] = left @@ -3328,7 +3354,7 @@ cdef object _period_strftime(int64_t value, int freq, object fmt): result = result.replace(str_extra_fmts[i], repl) if PY2: - result = result.decode('utf-8', 'ignore') + result = result.decode('utf-8', 'ignore') return result