Skip to content

Commit ebc0c09

Browse files
gfyoungjreback
authored andcommitted
DEPR: Deprecate generic timestamp dtypes (#15987)
* DEPR: Deprecate generic timestamp dtypes We only use the nanosecond frequency, and numpy doesn't even handle generic timestamp dtypes well. xref gh-15524 (comment). * TST: Use pytest idioms in series/test_dtypes.py
1 parent cd35d22 commit ebc0c09

File tree

4 files changed

+195
-96
lines changed

4 files changed

+195
-96
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1204,6 +1204,7 @@ Deprecations
12041204
- ``SparseArray.to_dense()`` has deprecated the ``fill`` parameter, as that parameter was not being respected (:issue:`14647`)
12051205
- ``SparseSeries.to_dense()`` has deprecated the ``sparse_only`` parameter (:issue:`14647`)
12061206
- ``Series.repeat()`` has deprecated the ``reps`` parameter in favor of ``repeats`` (:issue:`12662`)
1207+
- The ``Series`` constructor and ``.astype`` method have deprecated accepting timestamp dtypes without a frequency (e.g. ``np.datetime64``) for the ``dtype`` parameter (:issue:`15524`)
12071208
- ``Index.repeat()`` and ``MultiIndex.repeat()`` have deprecated the ``n`` parameter in favor of ``repeats`` (:issue:`12662`)
12081209
- ``Categorical.searchsorted()`` and ``Series.searchsorted()`` have deprecated the ``v`` parameter in favor of ``value`` (:issue:`12662`)
12091210
- ``TimedeltaIndex.searchsorted()``, ``DatetimeIndex.searchsorted()``, and ``PeriodIndex.searchsorted()`` have deprecated the ``key`` parameter in favor of ``value`` (:issue:`12662`)

pandas/tests/series/test_constructors.py

+27
Original file line numberDiff line numberDiff line change
@@ -839,3 +839,30 @@ def test_constructor_cast_object(self):
839839
s = Series(date_range('1/1/2000', periods=10), dtype=object)
840840
exp = Series(date_range('1/1/2000', periods=10))
841841
tm.assert_series_equal(s, exp)
842+
843+
def test_constructor_generic_timestamp_deprecated(self):
844+
# see gh-15524
845+
846+
with tm.assert_produces_warning(FutureWarning):
847+
dtype = np.timedelta64
848+
s = Series([], dtype=dtype)
849+
850+
assert s.empty
851+
assert s.dtype == 'm8[ns]'
852+
853+
with tm.assert_produces_warning(FutureWarning):
854+
dtype = np.datetime64
855+
s = Series([], dtype=dtype)
856+
857+
assert s.empty
858+
assert s.dtype == 'M8[ns]'
859+
860+
# These timestamps have the wrong frequencies,
861+
# so an Exception should be raised now.
862+
msg = "cannot convert timedeltalike"
863+
with tm.assertRaisesRegexp(TypeError, msg):
864+
Series([], dtype='m8[ps]')
865+
866+
msg = "cannot convert datetimelike"
867+
with tm.assertRaisesRegexp(TypeError, msg):
868+
Series([], dtype='M8[ps]')

pandas/tests/series/test_dtypes.py

+144-94
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
# coding=utf-8
22
# pylint: disable-msg=E1101,W0612
33

4-
import sys
4+
import pytest
5+
56
from datetime import datetime
7+
8+
import sys
69
import string
10+
import warnings
711

812
from numpy import nan
913
import numpy as np
@@ -12,152 +16,199 @@
1216

1317
from pandas.compat import lrange, range, u
1418
from pandas import compat
15-
from pandas.util.testing import assert_series_equal
1619
import pandas.util.testing as tm
1720

1821
from .common import TestData
1922

2023

21-
class TestSeriesDtypes(TestData, tm.TestCase):
24+
class TestSeriesDtypes(TestData):
2225

23-
def test_astype(self):
26+
@pytest.mark.parametrize("dtype", ["float32", "float64",
27+
"int64", "int32"])
28+
def test_astype(self, dtype):
2429
s = Series(np.random.randn(5), name='foo')
30+
as_typed = s.astype(dtype)
2531

26-
for dtype in ['float32', 'float64', 'int64', 'int32']:
27-
astyped = s.astype(dtype)
28-
self.assertEqual(astyped.dtype, dtype)
29-
self.assertEqual(astyped.name, s.name)
32+
assert as_typed.dtype == dtype
33+
assert as_typed.name == s.name
3034

3135
def test_dtype(self):
3236

33-
self.assertEqual(self.ts.dtype, np.dtype('float64'))
34-
self.assertEqual(self.ts.dtypes, np.dtype('float64'))
35-
self.assertEqual(self.ts.ftype, 'float64:dense')
36-
self.assertEqual(self.ts.ftypes, 'float64:dense')
37-
assert_series_equal(self.ts.get_dtype_counts(), Series(1, ['float64']))
38-
assert_series_equal(self.ts.get_ftype_counts(), Series(
39-
1, ['float64:dense']))
40-
41-
def test_astype_cast_nan_inf_int(self):
42-
# GH14265, check nan and inf raise error when converting to int
43-
types = [np.int32, np.int64]
44-
values = [np.nan, np.inf]
37+
assert self.ts.dtype == np.dtype('float64')
38+
assert self.ts.dtypes == np.dtype('float64')
39+
assert self.ts.ftype == 'float64:dense'
40+
assert self.ts.ftypes == 'float64:dense'
41+
tm.assert_series_equal(self.ts.get_dtype_counts(),
42+
Series(1, ['float64']))
43+
tm.assert_series_equal(self.ts.get_ftype_counts(),
44+
Series(1, ['float64:dense']))
45+
46+
@pytest.mark.parametrize("value", [np.nan, np.inf])
47+
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
48+
def test_astype_cast_nan_inf_int(self, dtype, value):
49+
# gh-14265: check NaN and inf raise error when converting to int
4550
msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer'
51+
s = Series([value])
4652

47-
for this_type in types:
48-
for this_val in values:
49-
s = Series([this_val])
50-
with self.assertRaisesRegexp(ValueError, msg):
51-
s.astype(this_type)
53+
with tm.assertRaisesRegexp(ValueError, msg):
54+
s.astype(dtype)
5255

53-
def test_astype_cast_object_int(self):
56+
@pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
57+
def test_astype_cast_object_int_fail(self, dtype):
5458
arr = Series(["car", "house", "tree", "1"])
59+
with pytest.raises(ValueError):
60+
arr.astype(dtype)
5561

56-
self.assertRaises(ValueError, arr.astype, int)
57-
self.assertRaises(ValueError, arr.astype, np.int64)
58-
self.assertRaises(ValueError, arr.astype, np.int8)
59-
62+
def test_astype_cast_object_int(self):
6063
arr = Series(['1', '2', '3', '4'], dtype=object)
6164
result = arr.astype(int)
62-
self.assert_series_equal(result, Series(np.arange(1, 5)))
65+
66+
tm.assert_series_equal(result, Series(np.arange(1, 5)))
6367

6468
def test_astype_datetimes(self):
6569
import pandas._libs.tslib as tslib
66-
6770
s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5))
71+
6872
s = s.astype('O')
69-
self.assertEqual(s.dtype, np.object_)
73+
assert s.dtype == np.object_
7074

7175
s = Series([datetime(2001, 1, 2, 0, 0)])
76+
7277
s = s.astype('O')
73-
self.assertEqual(s.dtype, np.object_)
78+
assert s.dtype == np.object_
7479

7580
s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])
81+
7682
s[1] = np.nan
77-
self.assertEqual(s.dtype, 'M8[ns]')
78-
s = s.astype('O')
79-
self.assertEqual(s.dtype, np.object_)
83+
assert s.dtype == 'M8[ns]'
8084

81-
def test_astype_str(self):
82-
# GH4405
83-
digits = string.digits
84-
s1 = Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)])
85-
s2 = Series([digits * 10, tm.rands(63), tm.rands(64), nan, 1.0])
86-
types = (compat.text_type, np.str_)
87-
for typ in types:
88-
for s in (s1, s2):
89-
res = s.astype(typ)
90-
expec = s.map(compat.text_type)
91-
assert_series_equal(res, expec)
92-
93-
# GH9757
94-
# Test str and unicode on python 2.x and just str on python 3.x
95-
for tt in set([str, compat.text_type]):
96-
ts = Series([Timestamp('2010-01-04 00:00:00')])
97-
s = ts.astype(tt)
98-
expected = Series([tt('2010-01-04')])
99-
assert_series_equal(s, expected)
100-
101-
ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')])
102-
s = ts.astype(tt)
103-
expected = Series([tt('2010-01-04 00:00:00-05:00')])
104-
assert_series_equal(s, expected)
105-
106-
td = Series([Timedelta(1, unit='d')])
107-
s = td.astype(tt)
108-
expected = Series([tt('1 days 00:00:00.000000000')])
109-
assert_series_equal(s, expected)
85+
s = s.astype('O')
86+
assert s.dtype == np.object_
87+
88+
@pytest.mark.parametrize("dtype", [compat.text_type, np.str_])
89+
@pytest.mark.parametrize("series", [Series([string.digits * 10,
90+
tm.rands(63),
91+
tm.rands(64),
92+
tm.rands(1000)]),
93+
Series([string.digits * 10,
94+
tm.rands(63),
95+
tm.rands(64), nan, 1.0])])
96+
def test_astype_str_map(self, dtype, series):
97+
# see gh-4405
98+
result = series.astype(dtype)
99+
expected = series.map(compat.text_type)
100+
tm.assert_series_equal(result, expected)
101+
102+
@pytest.mark.parametrize("dtype", [str, compat.text_type])
103+
def test_astype_str_cast(self, dtype):
104+
# see gh-9757: test str and unicode on python 2.x
105+
# and just str on python 3.x
106+
ts = Series([Timestamp('2010-01-04 00:00:00')])
107+
s = ts.astype(dtype)
108+
109+
expected = Series([dtype('2010-01-04')])
110+
tm.assert_series_equal(s, expected)
111+
112+
ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')])
113+
s = ts.astype(dtype)
114+
115+
expected = Series([dtype('2010-01-04 00:00:00-05:00')])
116+
tm.assert_series_equal(s, expected)
117+
118+
td = Series([Timedelta(1, unit='d')])
119+
s = td.astype(dtype)
120+
121+
expected = Series([dtype('1 days 00:00:00.000000000')])
122+
tm.assert_series_equal(s, expected)
110123

111124
def test_astype_unicode(self):
112-
113-
# GH7758
114-
# a bit of magic is required to set default encoding encoding to utf-8
125+
# see gh-7758: A bit of magic is required to set
126+
# default encoding to utf-8
115127
digits = string.digits
116128
test_series = [
117129
Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
118130
Series([u('データーサイエンス、お前はもう死んでいる')]),
119-
120131
]
121132

122133
former_encoding = None
134+
123135
if not compat.PY3:
124-
# in python we can force the default encoding for this test
136+
# In Python, we can force the default encoding for this test
125137
former_encoding = sys.getdefaultencoding()
126138
reload(sys) # noqa
139+
127140
sys.setdefaultencoding("utf-8")
128141
if sys.getdefaultencoding() == "utf-8":
129142
test_series.append(Series([u('野菜食べないとやばい')
130143
.encode("utf-8")]))
144+
131145
for s in test_series:
132146
res = s.astype("unicode")
133147
expec = s.map(compat.text_type)
134-
assert_series_equal(res, expec)
135-
# restore the former encoding
148+
tm.assert_series_equal(res, expec)
149+
150+
# Restore the former encoding
136151
if former_encoding is not None and former_encoding != "utf-8":
137152
reload(sys) # noqa
138153
sys.setdefaultencoding(former_encoding)
139154

140155
def test_astype_dict(self):
141-
# GH7271
156+
# see gh-7271
142157
s = Series(range(0, 10, 2), name='abc')
143158

144159
result = s.astype({'abc': str})
145160
expected = Series(['0', '2', '4', '6', '8'], name='abc')
146-
assert_series_equal(result, expected)
161+
tm.assert_series_equal(result, expected)
147162

148163
result = s.astype({'abc': 'float64'})
149164
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype='float64',
150165
name='abc')
151-
assert_series_equal(result, expected)
152-
153-
self.assertRaises(KeyError, s.astype, {'abc': str, 'def': str})
154-
self.assertRaises(KeyError, s.astype, {0: str})
155-
156-
def test_complexx(self):
157-
# GH4819
158-
# complex access for ndarray compat
166+
tm.assert_series_equal(result, expected)
167+
168+
with pytest.raises(KeyError):
169+
s.astype({'abc': str, 'def': str})
170+
171+
with pytest.raises(KeyError):
172+
s.astype({0: str})
173+
174+
def test_astype_generic_timestamp_deprecated(self):
175+
# see gh-15524
176+
data = [1]
177+
178+
with tm.assert_produces_warning(FutureWarning,
179+
check_stacklevel=False):
180+
s = Series(data)
181+
dtype = np.datetime64
182+
result = s.astype(dtype)
183+
expected = Series(data, dtype=dtype)
184+
tm.assert_series_equal(result, expected)
185+
186+
with tm.assert_produces_warning(FutureWarning,
187+
check_stacklevel=False):
188+
s = Series(data)
189+
dtype = np.timedelta64
190+
result = s.astype(dtype)
191+
expected = Series(data, dtype=dtype)
192+
tm.assert_series_equal(result, expected)
193+
194+
@pytest.mark.parametrize("dtype", np.typecodes['All'])
195+
def test_astype_empty_constructor_equality(self, dtype):
196+
# see gh-15524
197+
198+
if dtype not in ('S', 'V'): # poor support (if any) currently
199+
with warnings.catch_warnings(record=True):
200+
# Generic timestamp dtypes ('M' and 'm') are deprecated,
201+
# but we test that already in series/test_constructors.py
202+
203+
init_empty = Series([], dtype=dtype)
204+
as_type_empty = Series([]).astype(dtype)
205+
tm.assert_series_equal(init_empty, as_type_empty)
206+
207+
def test_complex(self):
208+
# see gh-4819: complex access for ndarray compat
159209
a = np.arange(5, dtype=np.float64)
160210
b = Series(a + 4j * a)
211+
161212
tm.assert_numpy_array_equal(a, b.real)
162213
tm.assert_numpy_array_equal(4 * a, b.imag)
163214

@@ -166,23 +217,22 @@ def test_complexx(self):
166217
tm.assert_numpy_array_equal(4 * a, b.imag)
167218

168219
def test_arg_for_errors_in_astype(self):
169-
# issue #14878
170-
171-
sr = Series([1, 2, 3])
220+
# see gh-14878
221+
s = Series([1, 2, 3])
172222

173-
with self.assertRaises(ValueError):
174-
sr.astype(np.float64, errors=False)
223+
with pytest.raises(ValueError):
224+
s.astype(np.float64, errors=False)
175225

176226
with tm.assert_produces_warning(FutureWarning):
177-
sr.astype(np.int8, raise_on_error=True)
227+
s.astype(np.int8, raise_on_error=True)
178228

179-
sr.astype(np.int8, errors='raise')
229+
s.astype(np.int8, errors='raise')
180230

181231
def test_intercept_astype_object(self):
182232
series = Series(date_range('1/1/2000', periods=10))
183233

184-
# this test no longer makes sense as series is by default already
185-
# M8[ns]
234+
# This test no longer makes sense, as
235+
# Series is by default already M8[ns].
186236
expected = series.astype('object')
187237

188238
df = DataFrame({'a': series,
@@ -192,9 +242,9 @@ def test_intercept_astype_object(self):
192242
tm.assert_series_equal(df.dtypes, exp_dtypes)
193243

194244
result = df.values.squeeze()
195-
self.assertTrue((result[:, 0] == expected.values).all())
245+
assert (result[:, 0] == expected.values).all()
196246

197247
df = DataFrame({'a': series, 'b': ['foo'] * len(series)})
198248

199249
result = df.values.squeeze()
200-
self.assertTrue((result[:, 0] == expected.values).all())
250+
assert (result[:, 0] == expected.values).all()

0 commit comments

Comments
 (0)