Skip to content

BUG: Bug in passing a DatetimeIndex with a timezone that was not being retained in Frame construction (GH7822) #7823

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 23, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,27 @@ API changes

rolling_min(s, window=10, min_periods=5)

- Bug in passing a ``DatetimeIndex`` with a timezone that was not being retained in DataFrame construction from a dict (:issue:`7822`)

In prior versions this would drop the timezone.

.. ipython:: python

i = date_range('1/1/2011', periods=3, freq='10s', tz = 'US/Eastern')
i
df = DataFrame( {'a' : i } )
df
df.dtypes

This behavior is unchanged.

.. ipython:: python

df = DataFrame( )
df['a'] = i
df
df.dtypes

.. _whatsnew_0150.cat:

Categoricals in Series/DataFrame
Expand Down
10 changes: 2 additions & 8 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2146,19 +2146,13 @@ def reindexer(value):
value = value.copy()

elif (isinstance(value, Index) or _is_sequence(value)):
if len(value) != len(self.index):
raise ValueError('Length of values does not match length of '
'index')

from pandas.core.series import _sanitize_index
value = _sanitize_index(value, self.index, copy=False)
if not isinstance(value, (np.ndarray, Index)):
if isinstance(value, list) and len(value) > 0:
value = com._possibly_convert_platform(value)
else:
value = com._asarray_tuplesafe(value)
elif isinstance(value, PeriodIndex):
value = value.asobject
elif isinstance(value, DatetimeIndex):
value = value._to_embed(keep_tz=True).copy()
elif value.ndim == 2:
value = value.copy().T
else:
Expand Down
24 changes: 22 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2431,8 +2431,26 @@ def remove_na(series):
return series[notnull(_values_from_object(series))]


def _sanitize_index(data, index, copy=False):
""" sanitize an index type to return an ndarray of the underlying, pass thru a non-Index """

if len(data) != len(index):
raise ValueError('Length of values does not match length of '
'index')

if isinstance(data, PeriodIndex):
data = data.asobject
elif isinstance(data, DatetimeIndex):
data = data._to_embed(keep_tz=True)
if copy:
data = data.copy()

return data

def _sanitize_array(data, index, dtype=None, copy=False,
raise_cast_failure=False):
""" sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified """

if dtype is not None:
dtype = np.dtype(dtype)

Expand Down Expand Up @@ -2482,11 +2500,13 @@ def _try_cast(arr, take_fast_path):
raise TypeError('Cannot cast datetime64 to %s' % dtype)
else:
subarr = _try_cast(data, True)
else:
elif isinstance(data, Index):
# don't coerce Index types
# e.g. indexes can have different conversions (so don't fast path them)
# GH 6140
subarr = _try_cast(data, not isinstance(data, Index))
subarr = _sanitize_index(data, index, copy=True)
else:
subarr = _try_cast(data, True)

if copy:
subarr = data.copy()
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3599,6 +3599,23 @@ def test_constructor_with_datetimes(self):
self.assertEqual(df.iat[0,0],dt)
assert_series_equal(df.dtypes,Series({'End Date' : np.dtype('object') }))

# GH 7822
# preserver an index with a tz on dict construction
i = date_range('1/1/2011', periods=5, freq='10s', tz = 'US/Eastern')

expected = DataFrame( {'a' : i.to_series(keep_tz=True).reset_index(drop=True) })
df = DataFrame()
df['a'] = i
assert_frame_equal(df, expected)
df = DataFrame( {'a' : i } )
assert_frame_equal(df, expected)

# multiples
i_no_tz = date_range('1/1/2011', periods=5, freq='10s')
df = DataFrame( {'a' : i, 'b' : i_no_tz } )
expected = DataFrame( {'a' : i.to_series(keep_tz=True).reset_index(drop=True), 'b': i_no_tz })
assert_frame_equal(df, expected)

def test_constructor_for_list_with_dtypes(self):
intname = np.dtype(np.int_).name
floatname = np.dtype(np.float_).name
Expand Down
7 changes: 5 additions & 2 deletions pandas/tseries/tests/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,10 +598,13 @@ def test_to_datetime_tzlocal(self):

def test_frame_no_datetime64_dtype(self):

# after 7822
# these retain the timezones on dict construction

dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI')
dr_tz = dr.tz_localize(self.tzstr('US/Eastern'))
e = DataFrame({'A': 'foo', 'B': dr_tz}, index=dr)
self.assertEqual(e['B'].dtype, 'M8[ns]')
self.assertEqual(e['B'].dtype, 'O')

# GH 2810 (with timezones)
datetimes_naive = [ ts.to_pydatetime() for ts in dr ]
Expand All @@ -610,7 +613,7 @@ def test_frame_no_datetime64_dtype(self):
'datetimes_naive': datetimes_naive,
'datetimes_with_tz' : datetimes_with_tz })
result = df.get_dtype_counts()
expected = Series({ 'datetime64[ns]' : 3, 'object' : 1 })
expected = Series({ 'datetime64[ns]' : 2, 'object' : 2 })
tm.assert_series_equal(result, expected)

def test_hongkong_tz_convert(self):
Expand Down