From 2d8fbd4dd5f884fcd564b7e1e426c94bca8e1491 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 25 May 2016 17:11:58 -0400 Subject: [PATCH] TST: reorg tests for datetime_with_tz construction --- pandas/tests/frame/test_constructors.py | 219 ++++-------------------- pandas/tests/frame/test_dtypes.py | 116 +++++++++++++ pandas/tests/frame/test_indexing.py | 61 +++++++ pandas/tools/tests/test_merge.py | 9 + 4 files changed, 223 insertions(+), 182 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 1d043297aa1fa..6913df765862d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -17,7 +17,7 @@ from pandas.compat import (lmap, long, zip, range, lrange, lzip, OrderedDict, is_platform_little_endian) from pandas import compat -from pandas import (DataFrame, Index, Series, notnull, isnull, +from pandas import (DataFrame, Index, Series, isnull, MultiIndex, Timedelta, Timestamp, date_range) from pandas.core.common import PandasError @@ -25,8 +25,6 @@ import pandas.core.common as com import pandas.lib as lib -from pandas.types.api import DatetimeTZDtype - from pandas.util.testing import (assert_numpy_array_equal, assert_series_equal, assert_frame_equal, @@ -1329,185 +1327,6 @@ def test_constructor_with_datetimes(self): .reset_index(drop=True), 'b': i_no_tz}) assert_frame_equal(df, expected) - def test_constructor_with_datetime_tz(self): - - # 8260 - # support datetime64 with tz - - idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), - name='foo') - dr = date_range('20130110', periods=3) - - # construction - df = DataFrame({'A': idx, 'B': dr}) - self.assertTrue(df['A'].dtype, 'M8[ns, US/Eastern') - self.assertTrue(df['A'].name == 'A') - assert_series_equal(df['A'], Series(idx, name='A')) - assert_series_equal(df['B'], Series(dr, name='B')) - - # construction from dict - df2 = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'), - B=Timestamp('20130603', tz='CET')), - index=range(5)) - assert_series_equal(df2.dtypes, Series(['datetime64[ns, US/Eastern]', - 'datetime64[ns, CET]'], - index=['A', 'B'])) - - # dtypes - tzframe = DataFrame({'A': date_range('20130101', periods=3), - 'B': date_range('20130101', periods=3, - tz='US/Eastern'), - 'C': date_range('20130101', periods=3, tz='CET')}) - tzframe.iloc[1, 1] = pd.NaT - tzframe.iloc[1, 2] = pd.NaT - result = tzframe.dtypes.sort_index() - expected = Series([np.dtype('datetime64[ns]'), - DatetimeTZDtype('datetime64[ns, US/Eastern]'), - DatetimeTZDtype('datetime64[ns, CET]')], - ['A', 'B', 'C']) - - # concat - df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) - assert_frame_equal(df2, df3) - - # select_dtypes - result = df3.select_dtypes(include=['datetime64[ns]']) - expected = df3.reindex(columns=[]) - assert_frame_equal(result, expected) - - # this will select based on issubclass, and these are the same class - result = df3.select_dtypes(include=['datetime64[ns, CET]']) - expected = df3 - assert_frame_equal(result, expected) - - # from index - idx2 = date_range('20130101', periods=3, tz='US/Eastern', name='foo') - df2 = DataFrame(idx2) - assert_series_equal(df2['foo'], Series(idx2, name='foo')) - df2 = DataFrame(Series(idx2)) - assert_series_equal(df2['foo'], Series(idx2, name='foo')) - - idx2 = date_range('20130101', periods=3, tz='US/Eastern') - df2 = DataFrame(idx2) - assert_series_equal(df2[0], Series(idx2, name=0)) - df2 = DataFrame(Series(idx2)) - assert_series_equal(df2[0], Series(idx2, name=0)) - - # interleave with object - result = self.tzframe.assign(D='foo').values - expected = np.array([[Timestamp('2013-01-01 00:00:00'), - Timestamp('2013-01-02 00:00:00'), - Timestamp('2013-01-03 00:00:00')], - [Timestamp('2013-01-01 00:00:00-0500', - tz='US/Eastern'), - pd.NaT, - Timestamp('2013-01-03 00:00:00-0500', - tz='US/Eastern')], - [Timestamp('2013-01-01 00:00:00+0100', tz='CET'), - pd.NaT, - Timestamp('2013-01-03 00:00:00+0100', tz='CET')], - ['foo', 'foo', 'foo']], dtype=object).T - self.assert_numpy_array_equal(result, expected) - - # interleave with only datetime64[ns] - result = self.tzframe.values - expected = np.array([[Timestamp('2013-01-01 00:00:00'), - Timestamp('2013-01-02 00:00:00'), - Timestamp('2013-01-03 00:00:00')], - [Timestamp('2013-01-01 00:00:00-0500', - tz='US/Eastern'), - pd.NaT, - Timestamp('2013-01-03 00:00:00-0500', - tz='US/Eastern')], - [Timestamp('2013-01-01 00:00:00+0100', tz='CET'), - pd.NaT, - Timestamp('2013-01-03 00:00:00+0100', - tz='CET')]], dtype=object).T - self.assert_numpy_array_equal(result, expected) - - # astype - expected = np.array([[Timestamp('2013-01-01 00:00:00'), - Timestamp('2013-01-02 00:00:00'), - Timestamp('2013-01-03 00:00:00')], - [Timestamp('2013-01-01 00:00:00-0500', - tz='US/Eastern'), - pd.NaT, - Timestamp('2013-01-03 00:00:00-0500', - tz='US/Eastern')], - [Timestamp('2013-01-01 00:00:00+0100', tz='CET'), - pd.NaT, - Timestamp('2013-01-03 00:00:00+0100', - tz='CET')]], - dtype=object).T - result = self.tzframe.astype(object) - assert_frame_equal(result, DataFrame( - expected, index=self.tzframe.index, columns=self.tzframe.columns)) - - result = self.tzframe.astype('datetime64[ns]') - expected = DataFrame({'A': date_range('20130101', periods=3), - 'B': (date_range('20130101', periods=3, - tz='US/Eastern') - .tz_convert('UTC') - .tz_localize(None)), - 'C': (date_range('20130101', periods=3, - tz='CET') - .tz_convert('UTC') - .tz_localize(None))}) - expected.iloc[1, 1] = pd.NaT - expected.iloc[1, 2] = pd.NaT - assert_frame_equal(result, expected) - - # str formatting - result = self.tzframe.astype(str) - expected = np.array([['2013-01-01', '2013-01-01 00:00:00-05:00', - '2013-01-01 00:00:00+01:00'], - ['2013-01-02', 'NaT', 'NaT'], - ['2013-01-03', '2013-01-03 00:00:00-05:00', - '2013-01-03 00:00:00+01:00']], dtype=object) - self.assert_numpy_array_equal(result, expected) - - result = str(self.tzframe) - self.assertTrue('0 2013-01-01 2013-01-01 00:00:00-05:00 ' - '2013-01-01 00:00:00+01:00' in result) - self.assertTrue('1 2013-01-02 ' - 'NaT NaT' in result) - self.assertTrue('2 2013-01-03 2013-01-03 00:00:00-05:00 ' - '2013-01-03 00:00:00+01:00' in result) - - # setitem - df['C'] = idx - assert_series_equal(df['C'], Series(idx, name='C')) - - df['D'] = 'foo' - df['D'] = idx - assert_series_equal(df['D'], Series(idx, name='D')) - del df['D'] - - # assert that A & C are not sharing the same base (e.g. they - # are copies) - b1 = df._data.blocks[1] - b2 = df._data.blocks[2] - self.assertTrue(b1.values.equals(b2.values)) - self.assertFalse(id(b1.values.values.base) == - id(b2.values.values.base)) - - # with nan - df2 = df.copy() - df2.iloc[1, 1] = pd.NaT - df2.iloc[1, 2] = pd.NaT - result = df2['B'] - assert_series_equal(notnull(result), Series( - [True, False, True], name='B')) - assert_series_equal(df2.dtypes, df.dtypes) - - # set/reset - df = DataFrame({'A': [0, 1, 2]}, index=idx) - result = df.reset_index() - self.assertTrue(result['foo'].dtype, 'M8[ns, US/Eastern') - - result = result.set_index('foo') - tm.assert_index_equal(df.index, idx) - def test_constructor_for_list_with_dtypes(self): # TODO(wesm): unused intname = np.dtype(np.int_).name # noqa @@ -2018,3 +1837,39 @@ def test_from_records_len0_with_columns(self): self.assertTrue(np.array_equal(result.columns, ['bar'])) self.assertEqual(len(result), 0) self.assertEqual(result.index.name, 'foo') + + +class TestDataFrameConstructorWithDatetimeTZ(tm.TestCase, TestData): + + _multiprocess_can_split_ = True + + def test_from_dict(self): + + # 8260 + # support datetime64 with tz + + idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), + name='foo') + dr = date_range('20130110', periods=3) + + # construction + df = DataFrame({'A': idx, 'B': dr}) + self.assertTrue(df['A'].dtype, 'M8[ns, US/Eastern') + self.assertTrue(df['A'].name == 'A') + assert_series_equal(df['A'], Series(idx, name='A')) + assert_series_equal(df['B'], Series(dr, name='B')) + + def test_from_index(self): + + # from index + idx2 = date_range('20130101', periods=3, tz='US/Eastern', name='foo') + df2 = DataFrame(idx2) + assert_series_equal(df2['foo'], Series(idx2, name='foo')) + df2 = DataFrame(Series(idx2)) + assert_series_equal(df2['foo'], Series(idx2, name='foo')) + + idx2 = date_range('20130101', periods=3, tz='US/Eastern') + df2 = DataFrame(idx2) + assert_series_equal(df2[0], Series(idx2, name=0)) + df2 = DataFrame(Series(idx2)) + assert_series_equal(df2[0], Series(idx2, name=0)) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 97ca8238b78f9..064230bde791a 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -9,6 +9,7 @@ from pandas import (DataFrame, Series, date_range, Timedelta, Timestamp, compat, option_context) from pandas.compat import u +from pandas.core import common as com from pandas.tests.frame.common import TestData from pandas.util.testing import (assert_series_equal, assert_frame_equal, @@ -74,6 +75,21 @@ def test_empty_frame_dtypes_ftypes(self): assert_series_equal(df[:0].dtypes, ex_dtypes) assert_series_equal(df[:0].ftypes, ex_ftypes) + def test_datetime_with_tz_dtypes(self): + tzframe = DataFrame({'A': date_range('20130101', periods=3), + 'B': date_range('20130101', periods=3, + tz='US/Eastern'), + 'C': date_range('20130101', periods=3, tz='CET')}) + tzframe.iloc[1, 1] = pd.NaT + tzframe.iloc[1, 2] = pd.NaT + result = tzframe.dtypes.sort_index() + expected = Series([np.dtype('datetime64[ns]'), + com.DatetimeTZDtype('datetime64[ns, US/Eastern]'), + com.DatetimeTZDtype('datetime64[ns, CET]')], + ['A', 'B', 'C']) + + assert_series_equal(result, expected) + def test_dtypes_are_correct_after_column_slice(self): # GH6525 df = pd.DataFrame(index=range(5), columns=list("abc"), dtype=np.float_) @@ -178,6 +194,16 @@ def test_select_dtypes_bad_datetime64(self): with tm.assertRaisesRegexp(ValueError, '.+ is too specific'): df.select_dtypes(exclude=['datetime64[as]']) + def test_select_dtypes_datetime_with_tz(self): + + df2 = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'), + B=Timestamp('20130603', tz='CET')), + index=range(5)) + df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) + result = df3.select_dtypes(include=['datetime64[ns]']) + expected = df3.reindex(columns=[]) + assert_frame_equal(result, expected) + def test_select_dtypes_str_raises(self): df = DataFrame({'a': list('abc'), 'g': list(u('abc')), @@ -394,3 +420,93 @@ def test_timedeltas(self): 'int64': 1}).sort_values() result = df.get_dtype_counts().sort_values() assert_series_equal(result, expected) + + +class TestDataFrameDatetimeWithTZ(tm.TestCase, TestData): + + _multiprocess_can_split_ = True + + def test_interleave(self): + + # interleave with object + result = self.tzframe.assign(D='foo').values + expected = np.array([[Timestamp('2013-01-01 00:00:00'), + Timestamp('2013-01-02 00:00:00'), + Timestamp('2013-01-03 00:00:00')], + [Timestamp('2013-01-01 00:00:00-0500', + tz='US/Eastern'), + pd.NaT, + Timestamp('2013-01-03 00:00:00-0500', + tz='US/Eastern')], + [Timestamp('2013-01-01 00:00:00+0100', tz='CET'), + pd.NaT, + Timestamp('2013-01-03 00:00:00+0100', tz='CET')], + ['foo', 'foo', 'foo']], dtype=object).T + self.assert_numpy_array_equal(result, expected) + + # interleave with only datetime64[ns] + result = self.tzframe.values + expected = np.array([[Timestamp('2013-01-01 00:00:00'), + Timestamp('2013-01-02 00:00:00'), + Timestamp('2013-01-03 00:00:00')], + [Timestamp('2013-01-01 00:00:00-0500', + tz='US/Eastern'), + pd.NaT, + Timestamp('2013-01-03 00:00:00-0500', + tz='US/Eastern')], + [Timestamp('2013-01-01 00:00:00+0100', tz='CET'), + pd.NaT, + Timestamp('2013-01-03 00:00:00+0100', + tz='CET')]], dtype=object).T + self.assert_numpy_array_equal(result, expected) + + def test_astype(self): + # astype + expected = np.array([[Timestamp('2013-01-01 00:00:00'), + Timestamp('2013-01-02 00:00:00'), + Timestamp('2013-01-03 00:00:00')], + [Timestamp('2013-01-01 00:00:00-0500', + tz='US/Eastern'), + pd.NaT, + Timestamp('2013-01-03 00:00:00-0500', + tz='US/Eastern')], + [Timestamp('2013-01-01 00:00:00+0100', tz='CET'), + pd.NaT, + Timestamp('2013-01-03 00:00:00+0100', + tz='CET')]], + dtype=object).T + result = self.tzframe.astype(object) + assert_frame_equal(result, DataFrame( + expected, index=self.tzframe.index, columns=self.tzframe.columns)) + + result = self.tzframe.astype('datetime64[ns]') + expected = DataFrame({'A': date_range('20130101', periods=3), + 'B': (date_range('20130101', periods=3, + tz='US/Eastern') + .tz_convert('UTC') + .tz_localize(None)), + 'C': (date_range('20130101', periods=3, + tz='CET') + .tz_convert('UTC') + .tz_localize(None))}) + expected.iloc[1, 1] = pd.NaT + expected.iloc[1, 2] = pd.NaT + assert_frame_equal(result, expected) + + def test_astype_str(self): + # str formatting + result = self.tzframe.astype(str) + expected = np.array([['2013-01-01', '2013-01-01 00:00:00-05:00', + '2013-01-01 00:00:00+01:00'], + ['2013-01-02', 'NaT', 'NaT'], + ['2013-01-03', '2013-01-03 00:00:00-05:00', + '2013-01-03 00:00:00+01:00']], dtype=object) + self.assert_numpy_array_equal(result, expected) + + result = str(self.tzframe) + self.assertTrue('0 2013-01-01 2013-01-01 00:00:00-05:00 ' + '2013-01-01 00:00:00+01:00' in result) + self.assertTrue('1 2013-01-02 ' + 'NaT NaT' in result) + self.assertTrue('2 2013-01-03 2013-01-03 00:00:00-05:00 ' + '2013-01-03 00:00:00+01:00' in result) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index ca1ebe477e903..fc8456cb59840 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2699,3 +2699,64 @@ def test_type_error_multiindex(self): result = dg['x', 0] assert_series_equal(result, expected) + + +class TestDataFrameIndexingDatetimeWithTZ(tm.TestCase, TestData): + + _multiprocess_can_split_ = True + + def setUp(self): + self.idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), + name='foo') + self.dr = date_range('20130110', periods=3) + self.df = DataFrame({'A': self.idx, 'B': self.dr}) + + def test_setitem(self): + + df = self.df + idx = self.idx + + # setitem + df['C'] = idx + assert_series_equal(df['C'], Series(idx, name='C')) + + df['D'] = 'foo' + df['D'] = idx + assert_series_equal(df['D'], Series(idx, name='D')) + del df['D'] + + # assert that A & C are not sharing the same base (e.g. they + # are copies) + b1 = df._data.blocks[1] + b2 = df._data.blocks[2] + self.assertTrue(b1.values.equals(b2.values)) + self.assertFalse(id(b1.values.values.base) == + id(b2.values.values.base)) + + # with nan + df2 = df.copy() + df2.iloc[1, 1] = pd.NaT + df2.iloc[1, 2] = pd.NaT + result = df2['B'] + assert_series_equal(notnull(result), Series( + [True, False, True], name='B')) + assert_series_equal(df2.dtypes, df.dtypes) + + def test_set_reset(self): + + idx = self.idx + + # set/reset + df = DataFrame({'A': [0, 1, 2]}, index=idx) + result = df.reset_index() + self.assertTrue(result['foo'].dtype, 'M8[ns, US/Eastern') + + result = result.set_index('foo') + tm.assert_index_equal(df.index, idx) + + def test_transpose(self): + + result = self.df.T + expected = DataFrame(self.df.values.T) + expected.index = ['A', 'B'] + assert_frame_equal(result, expected) diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 474ce0f899217..9430975d76475 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -1136,6 +1136,15 @@ def test_concat_NaT_series(self): result = pd.concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) + def test_concat_tz_frame(self): + df2 = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'), + B=Timestamp('20130603', tz='CET')), + index=range(5)) + + # concat + df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) + assert_frame_equal(df2, df3) + def test_concat_tz_series(self): # GH 11755 # tz and no tz