diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 76ba4a5f723fa..3eb8acdd300b5 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -106,6 +106,7 @@ Other enhancements - ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) +- ``.select_dtypes()`` now allows `datetimetz` to generically select datetimes with tz (:issue:`14910`) .. _whatsnew_0200.api_breaking: @@ -249,5 +250,4 @@ Bug Fixes - - Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 78d0f47d473c8..7305df0f57736 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2257,7 +2257,12 @@ def select_dtypes(self, include=None, exclude=None): this will return *all* object dtype columns * See the `numpy dtype hierarchy `__ + * To select datetimes, use np.datetime64, 'datetime' or 'datetime64' + * To select timedeltas, use np.timedelta64, 'timedelta' or + 'timedelta64' * To select Pandas categorical dtypes, use 'category' + * To select Pandas datetimetz dtypes, use 'datetimetz' (new in 0.20.0), + or a 'datetime64[ns, tz]' string Examples -------- diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 61030c262a44b..43a108e9acc80 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -109,15 +109,48 @@ def test_select_dtypes_include(self): 'c': np.arange(3, 6).astype('u1'), 'd': np.arange(4.0, 7.0, dtype='float64'), 'e': [True, False, True], - 'f': pd.Categorical(list('abc'))}) + 'f': pd.Categorical(list('abc')), + 'g': pd.date_range('20130101', periods=3), + 'h': pd.date_range('20130101', periods=3, + tz='US/Eastern'), + 'i': pd.date_range('20130101', periods=3, + tz='CET'), + 'j': pd.period_range('2013-01', periods=3, + freq='M'), + 'k': pd.timedelta_range('1 day', periods=3)}) + ri = df.select_dtypes(include=[np.number]) + ei = df[['b', 'c', 'd', 'k']] + assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=[np.number], exclude=['timedelta']) ei = df[['b', 'c', 'd']] assert_frame_equal(ri, ei) - ri = df.select_dtypes(include=[np.number, 'category']) + ri = df.select_dtypes(include=[np.number, 'category'], + exclude=['timedelta']) ei = df[['b', 'c', 'd', 'f']] assert_frame_equal(ri, ei) + ri = df.select_dtypes(include=['datetime']) + ei = df[['g']] + assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=['datetime64']) + ei = df[['g']] + assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=['datetimetz']) + ei = df[['h', 'i']] + assert_frame_equal(ri, ei) + + ri = df.select_dtypes(include=['timedelta']) + ei = df[['k']] + assert_frame_equal(ri, ei) + + self.assertRaises(NotImplementedError, + lambda: df.select_dtypes(include=['period'])) + def test_select_dtypes_exclude(self): df = DataFrame({'a': list('abc'), 'b': list(range(1, 4)), diff --git a/pandas/types/common.py b/pandas/types/common.py index b9d4c112c00d6..a7ba96f95e31b 100644 --- a/pandas/types/common.py +++ b/pandas/types/common.py @@ -400,6 +400,11 @@ def _get_dtype_from_object(dtype): pass return dtype.type elif isinstance(dtype, string_types): + if dtype in ['datetimetz', 'datetime64tz']: + return DatetimeTZDtype.type + elif dtype in ['period']: + raise NotImplementedError + if dtype == 'datetime' or dtype == 'timedelta': dtype += '64'