diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 5e0af498557f2..e8daf41764a70 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -58,6 +58,27 @@ API changes rolling_min(s, window=10, min_periods=5) +- Bug in passing a ``DatetimeIndex`` with a timezone that was not being retained in DataFrame construction from a dict (:issue:`7822`) + + In prior versions this would drop the timezone. + + .. ipython:: python + + i = date_range('1/1/2011', periods=3, freq='10s', tz = 'US/Eastern') + i + df = DataFrame( {'a' : i } ) + df + df.dtypes + + This behavior is unchanged. + + .. ipython:: python + + df = DataFrame( ) + df['a'] = i + df + df.dtypes + .. _whatsnew_0150.cat: Categoricals in Series/DataFrame diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7b005867a404f..636dedfbeb7b7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2146,19 +2146,13 @@ def reindexer(value): value = value.copy() elif (isinstance(value, Index) or _is_sequence(value)): - if len(value) != len(self.index): - raise ValueError('Length of values does not match length of ' - 'index') - + from pandas.core.series import _sanitize_index + value = _sanitize_index(value, self.index, copy=False) if not isinstance(value, (np.ndarray, Index)): if isinstance(value, list) and len(value) > 0: value = com._possibly_convert_platform(value) else: value = com._asarray_tuplesafe(value) - elif isinstance(value, PeriodIndex): - value = value.asobject - elif isinstance(value, DatetimeIndex): - value = value._to_embed(keep_tz=True).copy() elif value.ndim == 2: value = value.copy().T else: diff --git a/pandas/core/series.py b/pandas/core/series.py index 9abc8f22009b3..502c01ce6d1d1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2431,8 +2431,26 @@ def remove_na(series): return series[notnull(_values_from_object(series))] +def _sanitize_index(data, index, copy=False): + """ sanitize an index type to return an ndarray of the underlying, pass thru a non-Index """ + + if len(data) != len(index): + raise ValueError('Length of values does not match length of ' + 'index') + + if isinstance(data, PeriodIndex): + data = data.asobject + elif isinstance(data, DatetimeIndex): + data = data._to_embed(keep_tz=True) + if copy: + data = data.copy() + + return data + def _sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False): + """ sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified """ + if dtype is not None: dtype = np.dtype(dtype) @@ -2482,11 +2500,13 @@ def _try_cast(arr, take_fast_path): raise TypeError('Cannot cast datetime64 to %s' % dtype) else: subarr = _try_cast(data, True) - else: + elif isinstance(data, Index): # don't coerce Index types # e.g. indexes can have different conversions (so don't fast path them) # GH 6140 - subarr = _try_cast(data, not isinstance(data, Index)) + subarr = _sanitize_index(data, index, copy=True) + else: + subarr = _try_cast(data, True) if copy: subarr = data.copy() diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index c4783bc49f0ce..0dd729d58f174 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3599,6 +3599,23 @@ def test_constructor_with_datetimes(self): self.assertEqual(df.iat[0,0],dt) assert_series_equal(df.dtypes,Series({'End Date' : np.dtype('object') })) + # GH 7822 + # preserver an index with a tz on dict construction + i = date_range('1/1/2011', periods=5, freq='10s', tz = 'US/Eastern') + + expected = DataFrame( {'a' : i.to_series(keep_tz=True).reset_index(drop=True) }) + df = DataFrame() + df['a'] = i + assert_frame_equal(df, expected) + df = DataFrame( {'a' : i } ) + assert_frame_equal(df, expected) + + # multiples + i_no_tz = date_range('1/1/2011', periods=5, freq='10s') + df = DataFrame( {'a' : i, 'b' : i_no_tz } ) + expected = DataFrame( {'a' : i.to_series(keep_tz=True).reset_index(drop=True), 'b': i_no_tz }) + assert_frame_equal(df, expected) + def test_constructor_for_list_with_dtypes(self): intname = np.dtype(np.int_).name floatname = np.dtype(np.float_).name diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 531724cdb6837..21f915cb50e21 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -598,10 +598,13 @@ def test_to_datetime_tzlocal(self): def test_frame_no_datetime64_dtype(self): + # after 7822 + # these retain the timezones on dict construction + dr = date_range('2011/1/1', '2012/1/1', freq='W-FRI') dr_tz = dr.tz_localize(self.tzstr('US/Eastern')) e = DataFrame({'A': 'foo', 'B': dr_tz}, index=dr) - self.assertEqual(e['B'].dtype, 'M8[ns]') + self.assertEqual(e['B'].dtype, 'O') # GH 2810 (with timezones) datetimes_naive = [ ts.to_pydatetime() for ts in dr ] @@ -610,7 +613,7 @@ def test_frame_no_datetime64_dtype(self): 'datetimes_naive': datetimes_naive, 'datetimes_with_tz' : datetimes_with_tz }) result = df.get_dtype_counts() - expected = Series({ 'datetime64[ns]' : 3, 'object' : 1 }) + expected = Series({ 'datetime64[ns]' : 2, 'object' : 2 }) tm.assert_series_equal(result, expected) def test_hongkong_tz_convert(self):