Skip to content

Commit 3657a51

Browse files
committed
Merge pull request #7823 from jreback/tz
BUG: Bug in passing a DatetimeIndex with a timezone that was not being retained in Frame construction (GH7822)
2 parents 2d3216d + dba9363 commit 3657a51

File tree

5 files changed

+67
-12
lines changed

5 files changed

+67
-12
lines changed

doc/source/v0.15.0.txt

+21
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,27 @@ API changes
5858

5959
rolling_min(s, window=10, min_periods=5)
6060

61+
- Bug in passing a ``DatetimeIndex`` with a timezone that was not being retained in DataFrame construction from a dict (:issue:`7822`)
62+
63+
In prior versions this would drop the timezone.
64+
65+
.. ipython:: python
66+
67+
i = date_range('1/1/2011', periods=3, freq='10s', tz = 'US/Eastern')
68+
i
69+
df = DataFrame( {'a' : i } )
70+
df
71+
df.dtypes
72+
73+
This behavior is unchanged.
74+
75+
.. ipython:: python
76+
77+
df = DataFrame( )
78+
df['a'] = i
79+
df
80+
df.dtypes
81+
6182
.. _whatsnew_0150.cat:
6283

6384
Categoricals in Series/DataFrame

pandas/core/frame.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -2146,19 +2146,13 @@ def reindexer(value):
21462146
value = value.copy()
21472147

21482148
elif (isinstance(value, Index) or _is_sequence(value)):
2149-
if len(value) != len(self.index):
2150-
raise ValueError('Length of values does not match length of '
2151-
'index')
2152-
2149+
from pandas.core.series import _sanitize_index
2150+
value = _sanitize_index(value, self.index, copy=False)
21532151
if not isinstance(value, (np.ndarray, Index)):
21542152
if isinstance(value, list) and len(value) > 0:
21552153
value = com._possibly_convert_platform(value)
21562154
else:
21572155
value = com._asarray_tuplesafe(value)
2158-
elif isinstance(value, PeriodIndex):
2159-
value = value.asobject
2160-
elif isinstance(value, DatetimeIndex):
2161-
value = value._to_embed(keep_tz=True).copy()
21622156
elif value.ndim == 2:
21632157
value = value.copy().T
21642158
else:

pandas/core/series.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -2431,8 +2431,26 @@ def remove_na(series):
24312431
return series[notnull(_values_from_object(series))]
24322432

24332433

2434+
def _sanitize_index(data, index, copy=False):
2435+
""" sanitize an index type to return an ndarray of the underlying, pass thru a non-Index """
2436+
2437+
if len(data) != len(index):
2438+
raise ValueError('Length of values does not match length of '
2439+
'index')
2440+
2441+
if isinstance(data, PeriodIndex):
2442+
data = data.asobject
2443+
elif isinstance(data, DatetimeIndex):
2444+
data = data._to_embed(keep_tz=True)
2445+
if copy:
2446+
data = data.copy()
2447+
2448+
return data
2449+
24342450
def _sanitize_array(data, index, dtype=None, copy=False,
24352451
raise_cast_failure=False):
2452+
""" sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified """
2453+
24362454
if dtype is not None:
24372455
dtype = np.dtype(dtype)
24382456

@@ -2482,11 +2500,13 @@ def _try_cast(arr, take_fast_path):
24822500
raise TypeError('Cannot cast datetime64 to %s' % dtype)
24832501
else:
24842502
subarr = _try_cast(data, True)
2485-
else:
2503+
elif isinstance(data, Index):
24862504
# don't coerce Index types
24872505
# e.g. indexes can have different conversions (so don't fast path them)
24882506
# GH 6140
2489-
subarr = _try_cast(data, not isinstance(data, Index))
2507+
subarr = _sanitize_index(data, index, copy=True)
2508+
else:
2509+
subarr = _try_cast(data, True)
24902510

24912511
if copy:
24922512
subarr = data.copy()

pandas/tests/test_frame.py

+17
Original file line numberDiff line numberDiff line change
@@ -3599,6 +3599,23 @@ def test_constructor_with_datetimes(self):
35993599
self.assertEqual(df.iat[0,0],dt)
36003600
assert_series_equal(df.dtypes,Series({'End Date' : np.dtype('object') }))
36013601

3602+
# GH 7822
3603+
# preserver an index with a tz on dict construction
3604+
i = date_range('1/1/2011', periods=5, freq='10s', tz = 'US/Eastern')
3605+
3606+
expected = DataFrame( {'a' : i.to_series(keep_tz=True).reset_index(drop=True) })
3607+
df = DataFrame()
3608+
df['a'] = i
3609+
assert_frame_equal(df, expected)
3610+
df = DataFrame( {'a' : i } )
3611+
assert_frame_equal(df, expected)
3612+
3613+
# multiples
3614+
i_no_tz = date_range('1/1/2011', periods=5, freq='10s')
3615+
df = DataFrame( {'a' : i, 'b' : i_no_tz } )
3616+
expected = DataFrame( {'a' : i.to_series(keep_tz=True).reset_index(drop=True), 'b': i_no_tz })
3617+
assert_frame_equal(df, expected)
3618+
36023619
def test_constructor_for_list_with_dtypes(self):
36033620
intname = np.dtype(np.int_).name
36043621
floatname = np.dtype(np.float_).name

pandas/tseries/tests/test_timezones.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -598,10 +598,13 @@ def test_to_datetime_tzlocal(self):
598598

599599
def test_frame_no_datetime64_dtype(self):
600600

601+
# after 7822
602+
# these retain the timezones on dict construction
603+
601604
dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI')
602605
dr_tz = dr.tz_localize(self.tzstr('US/Eastern'))
603606
e = DataFrame({'A': 'foo', 'B': dr_tz}, index=dr)
604-
self.assertEqual(e['B'].dtype, 'M8[ns]')
607+
self.assertEqual(e['B'].dtype, 'O')
605608

606609
# GH 2810 (with timezones)
607610
datetimes_naive = [ ts.to_pydatetime() for ts in dr ]
@@ -610,7 +613,7 @@ def test_frame_no_datetime64_dtype(self):
610613
'datetimes_naive': datetimes_naive,
611614
'datetimes_with_tz' : datetimes_with_tz })
612615
result = df.get_dtype_counts()
613-
expected = Series({ 'datetime64[ns]' : 3, 'object' : 1 })
616+
expected = Series({ 'datetime64[ns]' : 2, 'object' : 2 })
614617
tm.assert_series_equal(result, expected)
615618

616619
def test_hongkong_tz_convert(self):

0 commit comments

Comments
 (0)