Skip to content

ENH: New level argument for DataFrame.tz_localize and DataFrame.tz_convert (GH7846) #7915

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 7, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ previously results in ``Exception`` or ``TypeError`` (:issue:`7812`)
didx
didx.tz_localize(None)

- ``DataFrame.tz_localize`` and ``DataFrame.tz_convert`` now accepts an optional ``level`` argument
for localizing a specific level of a MultiIndex (:issue:`7846`)

.. _whatsnew_0150.refactoring:

Internal Refactoring
Expand Down
66 changes: 50 additions & 16 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3467,14 +3467,18 @@ def truncate(self, before=None, after=None, axis=None, copy=True):

return result

def tz_convert(self, tz, axis=0, copy=True):
def tz_convert(self, tz, axis=0, level=None, copy=True):
"""
Convert the axis to target time zone. If it is time zone naive, it
will be localized to the passed time zone.

Parameters
----------
tz : string or pytz.timezone object
axis : the axis to convert
level : int, str, default None
If axis ia a MultiIndex, convert a specific level. Otherwise
must be None
copy : boolean, default True
Also make a copy of the underlying data

Expand All @@ -3484,27 +3488,44 @@ def tz_convert(self, tz, axis=0, copy=True):
axis = self._get_axis_number(axis)
ax = self._get_axis(axis)

if not hasattr(ax, 'tz_convert'):
if len(ax) > 0:
ax_name = self._get_axis_name(axis)
raise TypeError('%s is not a valid DatetimeIndex or PeriodIndex' %
ax_name)
def _tz_convert(ax, tz):
if not hasattr(ax, 'tz_convert'):
if len(ax) > 0:
ax_name = self._get_axis_name(axis)
raise TypeError('%s is not a valid DatetimeIndex or PeriodIndex' %
ax_name)
else:
ax = DatetimeIndex([],tz=tz)
else:
ax = DatetimeIndex([],tz=tz)
ax = ax.tz_convert(tz)
return ax

# if a level is given it must be a MultiIndex level or
# equivalent to the axis name
if isinstance(ax, MultiIndex):
level = ax._get_level_number(level)
new_level = _tz_convert(ax.levels[level], tz)
ax = ax.set_levels(new_level, level=level)
else:
ax = ax.tz_convert(tz)
if level not in (None, 0, ax.name):
raise ValueError("The level {0} is not valid".format(level))
ax = _tz_convert(ax, tz)

result = self._constructor(self._data, copy=copy)
result.set_axis(axis,ax)
return result.__finalize__(self)

def tz_localize(self, tz, axis=0, copy=True, infer_dst=False):
def tz_localize(self, tz, axis=0, level=None, copy=True, infer_dst=False):
"""
Localize tz-naive TimeSeries to target time zone

Parameters
----------
tz : string or pytz.timezone object
axis : the axis to localize
level : int, str, default None
If axis ia a MultiIndex, localize a specific level. Otherwise
must be None
copy : boolean, default True
Also make a copy of the underlying data
infer_dst : boolean, default False
Expand All @@ -3516,15 +3537,28 @@ def tz_localize(self, tz, axis=0, copy=True, infer_dst=False):
axis = self._get_axis_number(axis)
ax = self._get_axis(axis)

if not hasattr(ax, 'tz_localize'):
if len(ax) > 0:
ax_name = self._get_axis_name(axis)
raise TypeError('%s is not a valid DatetimeIndex or PeriodIndex' %
ax_name)
def _tz_localize(ax, tz, infer_dst):
if not hasattr(ax, 'tz_localize'):
if len(ax) > 0:
ax_name = self._get_axis_name(axis)
raise TypeError('%s is not a valid DatetimeIndex or PeriodIndex' %
ax_name)
else:
ax = DatetimeIndex([],tz=tz)
else:
ax = DatetimeIndex([],tz=tz)
ax = ax.tz_localize(tz, infer_dst=infer_dst)
return ax

# if a level is given it must be a MultiIndex level or
# equivalent to the axis name
if isinstance(ax, MultiIndex):
level = ax._get_level_number(level)
new_level = _tz_localize(ax.levels[level], tz, infer_dst)
ax = ax.set_levels(new_level, level=level)
else:
ax = ax.tz_localize(tz, infer_dst=infer_dst)
if level not in (None, 0, ax.name):
raise ValueError("The level {0} is not valid".format(level))
ax = _tz_localize(ax, tz, infer_dst)

result = self._constructor(self._data, copy=copy)
result.set_axis(axis,ax)
Expand Down
76 changes: 75 additions & 1 deletion pandas/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas as pd

from pandas import (Index, Series, DataFrame, Panel,
isnull, notnull,date_range)
isnull, notnull, date_range, period_range)
from pandas.core.index import Index, MultiIndex

import pandas.core.common as com
Expand Down Expand Up @@ -1102,6 +1102,80 @@ def finalize(self, other, method=None, **kwargs):
DataFrame._metadata = _metadata
DataFrame.__finalize__ = _finalize

def test_tz_convert_and_localize(self):
l0 = date_range('20140701', periods=5, freq='D')

# TODO: l1 should be a PeriodIndex for testing
# after GH2106 is addressed
with tm.assertRaises(NotImplementedError):
period_range('20140701', periods=1).tz_convert('UTC')
with tm.assertRaises(NotImplementedError):
period_range('20140701', periods=1).tz_localize('UTC')
# l1 = period_range('20140701', periods=5, freq='D')
l1 = date_range('20140701', periods=5, freq='D')
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have made the test such that it tests for raises when trying to call tz_convert or tz_localize on PeriodIndex so that it will break when #2106 is fixed. In the meantime I am testing against a DatetimeIndex instead of a PeriodIndex.


int_idx = Index(range(5))

for fn in ['tz_localize', 'tz_convert']:

if fn == 'tz_convert':
l0 = l0.tz_localize('UTC')
l1 = l1.tz_localize('UTC')

for idx in [l0, l1]:

l0_expected = getattr(idx, fn)('US/Pacific')
l1_expected = getattr(idx, fn)('US/Pacific')

df1 = DataFrame(np.ones(5), index=l0)
df1 = getattr(df1, fn)('US/Pacific')
self.assertTrue(df1.index.equals(l0_expected))

# MultiIndex
# GH7846
df2 = DataFrame(np.ones(5),
MultiIndex.from_arrays([l0, l1]))

df3 = getattr(df2, fn)('US/Pacific', level=0)
self.assertFalse(df3.index.levels[0].equals(l0))
self.assertTrue(df3.index.levels[0].equals(l0_expected))
self.assertTrue(df3.index.levels[1].equals(l1))
self.assertFalse(df3.index.levels[1].equals(l1_expected))

df3 = getattr(df2, fn)('US/Pacific', level=1)
self.assertTrue(df3.index.levels[0].equals(l0))
self.assertFalse(df3.index.levels[0].equals(l0_expected))
self.assertTrue(df3.index.levels[1].equals(l1_expected))
self.assertFalse(df3.index.levels[1].equals(l1))

df4 = DataFrame(np.ones(5),
MultiIndex.from_arrays([int_idx, l0]))

df5 = getattr(df4, fn)('US/Pacific', level=1)
self.assertTrue(df3.index.levels[0].equals(l0))
self.assertFalse(df3.index.levels[0].equals(l0_expected))
self.assertTrue(df3.index.levels[1].equals(l1_expected))
self.assertFalse(df3.index.levels[1].equals(l1))

# Bad Inputs
for fn in ['tz_localize', 'tz_convert']:
# Not DatetimeIndex / PeriodIndex
with tm.assertRaisesRegexp(TypeError, 'DatetimeIndex'):
df = DataFrame(index=int_idx)
df = getattr(df, fn)('US/Pacific')

# Not DatetimeIndex / PeriodIndex
with tm.assertRaisesRegexp(TypeError, 'DatetimeIndex'):
df = DataFrame(np.ones(5),
MultiIndex.from_arrays([int_idx, l0]))
df = getattr(df, fn)('US/Pacific', level=0)

# Invalid level
with tm.assertRaisesRegexp(ValueError, 'not valid'):
df = DataFrame(index=l0)
df = getattr(df, fn)('US/Pacific', level=1)


class TestPanel(tm.TestCase, Generic):
_typ = Panel
_comparator = lambda self, x, y: assert_panel_equal(x, y)
Expand Down
46 changes: 46 additions & 0 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1174,6 +1174,52 @@ def __setstate__(self, state):
else:
raise Exception("invalid pickle state")
_unpickle_compat = __setstate__

def tz_convert(self, tz):
"""
Convert tz-aware DatetimeIndex from one time zone to another (using pytz/dateutil)

Parameters
----------
tz : string, pytz.timezone, dateutil.tz.tzfile or None
Time zone for time. Corresponding timestamps would be converted to
time zone of the TimeSeries.
None will remove timezone holding UTC time.

Returns
-------
normalized : DatetimeIndex

Note
----
Not currently implemented for PeriodIndex
"""
raise NotImplementedError("Not yet implemented for PeriodIndex")

def tz_localize(self, tz, infer_dst=False):
"""
Localize tz-naive DatetimeIndex to given time zone (using pytz/dateutil),
or remove timezone from tz-aware DatetimeIndex

Parameters
----------
tz : string, pytz.timezone, dateutil.tz.tzfile or None
Time zone for time. Corresponding timestamps would be converted to
time zone of the TimeSeries.
None will remove timezone holding local time.
infer_dst : boolean, default False
Attempt to infer fall dst-transition hours based on order

Returns
-------
localized : DatetimeIndex

Note
----
Not currently implemented for PeriodIndex
"""
raise NotImplementedError("Not yet implemented for PeriodIndex")

PeriodIndex._add_numeric_methods_disabled()

def _get_ordinal_range(start, end, periods, freq):
Expand Down