diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index 9f989b2cf0ea9..d0f7af2275812 100644 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -30,6 +30,7 @@ Enhancements df = DataFrame(np.random.randn(3, 3), columns=['A', 'B', 'C']) df.drop(['A', 'X'], axis=1, errors='ignore') +- Allow conversion of values with dtype ``datetime64`` or ``timedelta64`` to strings using ``astype(str)`` (:issue:`9757`) .. _whatsnew_0161.api: diff --git a/pandas/core/common.py b/pandas/core/common.py index ec805aba34d48..0fb35c2fb02fc 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2637,7 +2637,12 @@ def _astype_nansafe(arr, dtype, copy=True): if not isinstance(dtype, np.dtype): dtype = _coerce_to_dtype(dtype) - if is_datetime64_dtype(arr): + if issubclass(dtype.type, compat.text_type): + # in Py3 that's str, in Py2 that's unicode + return lib.astype_unicode(arr.ravel()).reshape(arr.shape) + elif issubclass(dtype.type, compat.string_types): + return lib.astype_str(arr.ravel()).reshape(arr.shape) + elif is_datetime64_dtype(arr): if dtype == object: return tslib.ints_to_pydatetime(arr.view(np.int64)) elif dtype == np.int64: @@ -2675,11 +2680,6 @@ def _astype_nansafe(arr, dtype, copy=True): elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer): # work around NumPy brokenness, #1987 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) - elif issubclass(dtype.type, compat.text_type): - # in Py3 that's str, in Py2 that's unicode - return lib.astype_unicode(arr.ravel()).reshape(arr.shape) - elif issubclass(dtype.type, compat.string_types): - return lib.astype_str(arr.ravel()).reshape(arr.shape) if copy: return arr.astype(dtype) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index b8bdd2d4e3b40..6ea76710b4de7 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4192,6 +4192,30 @@ def test_astype_cast_nan_int(self): df = DataFrame(data={"Values": [1.0, 2.0, 3.0, np.nan]}) self.assertRaises(ValueError, df.astype, np.int64) + def test_astype_str(self): + # GH9757 + a = Series(date_range('2010-01-04', periods=5)) + b = Series(date_range('3/6/2012 00:00', periods=5, tz='US/Eastern')) + c = Series([Timedelta(x, unit='d') for x in range(5)]) + d = Series(range(5)) + e = Series([0.0, 0.2, 0.4, 0.6, 0.8]) + + df = DataFrame({'a' : a, 'b' : b, 'c' : c, 'd' : d, 'e' : e}) + + # Test str and unicode on python 2.x and just str on python 3.x + for tt in set([str, compat.text_type]): + result = df.astype(tt) + + expected = DataFrame({ + 'a' : list(map(tt, a.values)), + 'b' : list(map(tt, b.values)), + 'c' : list(map(tt, c.values)), + 'd' : list(map(tt, d.values)), + 'e' : list(map(tt, e.values)), + }) + + assert_frame_equal(result, expected) + def test_array_interface(self): result = np.sqrt(self.frame) tm.assert_isinstance(result, type(self.frame)) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index f044fe540ea24..fec98a37b5017 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -5511,6 +5511,24 @@ def test_astype_str(self): expec = s.map(compat.text_type) assert_series_equal(res, expec) + # GH9757 + # Test str and unicode on python 2.x and just str on python 3.x + for tt in set([str, compat.text_type]): + ts = Series([Timestamp('2010-01-04 00:00:00')]) + s = ts.astype(tt) + expected = Series([tt(ts.values[0])]) + assert_series_equal(s, expected) + + ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')]) + s = ts.astype(tt) + expected = Series([tt(ts.values[0])]) + assert_series_equal(s, expected) + + td = Series([Timedelta(1, unit='d')]) + s = td.astype(tt) + expected = Series([tt(td.values[0])]) + assert_series_equal(s, expected) + def test_astype_unicode(self): # GH7758