Skip to content

Commit 80670d2

Browse files
jbrockmendelPingviinituutti
authored andcommitted
dtype validation from 24024 (pandas-dev#24478)
1 parent c6bb077 commit 80670d2

File tree

6 files changed

+97
-11
lines changed

6 files changed

+97
-11
lines changed

pandas/core/arrays/datetimes.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -1570,6 +1570,8 @@ def sequence_to_dt64ns(data, dtype=None, copy=False,
15701570

15711571
inferred_freq = None
15721572

1573+
dtype = _validate_dt64_dtype(dtype)
1574+
15731575
if not hasattr(data, "dtype"):
15741576
# e.g. list, tuple
15751577
if np.ndim(data) == 0:
@@ -1754,7 +1756,7 @@ def maybe_convert_dtype(data, copy):
17541756
data = data.view(_NS_DTYPE)
17551757

17561758
elif is_period_dtype(data):
1757-
# Note: without explicitly raising here, PeriondIndex
1759+
# Note: without explicitly raising here, PeriodIndex
17581760
# test_setops.test_join_does_not_recur fails
17591761
raise TypeError("Passing PeriodDtype data is invalid. "
17601762
"Use `data.to_timestamp()` instead")
@@ -1807,6 +1809,38 @@ def maybe_infer_tz(tz, inferred_tz):
18071809
return tz
18081810

18091811

1812+
def _validate_dt64_dtype(dtype):
1813+
"""
1814+
Check that a dtype, if passed, represents either a numpy datetime64[ns]
1815+
dtype or a pandas DatetimeTZDtype.
1816+
1817+
Parameters
1818+
----------
1819+
dtype : object
1820+
1821+
Returns
1822+
-------
1823+
dtype : None, numpy.dtype, or DatetimeTZDtype
1824+
1825+
Raises
1826+
------
1827+
ValueError : invalid dtype
1828+
1829+
Notes
1830+
-----
1831+
Unlike validate_tz_from_dtype, this does _not_ allow non-existent
1832+
tz errors to go through
1833+
"""
1834+
if dtype is not None:
1835+
dtype = pandas_dtype(dtype)
1836+
if ((isinstance(dtype, np.dtype) and dtype != _NS_DTYPE)
1837+
or not isinstance(dtype, (np.dtype, DatetimeTZDtype))):
1838+
raise ValueError("Unexpected value for 'dtype': '{dtype}'. "
1839+
"Must be 'datetime64[ns]' or DatetimeTZDtype'."
1840+
.format(dtype=dtype))
1841+
return dtype
1842+
1843+
18101844
def validate_tz_from_dtype(dtype, tz):
18111845
"""
18121846
If the given dtype is a DatetimeTZDtype, extract the implied

pandas/core/reshape/tile.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -390,10 +390,10 @@ def _coerce_to_type(x):
390390
dtype = x.dtype
391391
elif is_datetime64_dtype(x):
392392
x = to_datetime(x)
393-
dtype = np.datetime64
393+
dtype = np.dtype('datetime64[ns]')
394394
elif is_timedelta64_dtype(x):
395395
x = to_timedelta(x)
396-
dtype = np.timedelta64
396+
dtype = np.dtype('timedelta64[ns]')
397397

398398
if dtype is not None:
399399
# GH 19768: force NaT to NaN during integer conversion

pandas/tests/arrays/test_datetimelike.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class SharedTests(object):
6464
def test_compare_len1_raises(self):
6565
# make sure we raise when comparing with different lengths, specific
6666
# to the case where one has length-1, which numpy would broadcast
67-
data = np.arange(10, dtype='i8')
67+
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
6868

6969
idx = self.index_cls._simple_new(data, freq='D')
7070
arr = self.array_cls(idx)
@@ -77,7 +77,7 @@ def test_compare_len1_raises(self):
7777
idx <= idx[[0]]
7878

7979
def test_take(self):
80-
data = np.arange(100, dtype='i8')
80+
data = np.arange(100, dtype='i8') * 24 * 3600 * 10**9
8181
np.random.shuffle(data)
8282

8383
idx = self.index_cls._simple_new(data, freq='D')
@@ -96,7 +96,7 @@ def test_take(self):
9696
tm.assert_index_equal(self.index_cls(result), expected)
9797

9898
def test_take_fill(self):
99-
data = np.arange(10, dtype='i8')
99+
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
100100

101101
idx = self.index_cls._simple_new(data, freq='D')
102102
arr = self.array_cls(idx)
@@ -121,7 +121,7 @@ def test_take_fill(self):
121121
fill_value=pd.Timestamp.now().time)
122122

123123
def test_concat_same_type(self):
124-
data = np.arange(10, dtype='i8')
124+
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
125125

126126
idx = self.index_cls._simple_new(data, freq='D').insert(0, pd.NaT)
127127
arr = self.array_cls(idx)

pandas/tests/arrays/test_datetimes.py

+36
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,31 @@
1111

1212
import pandas as pd
1313
from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray
14+
from pandas.core.arrays.datetimes import sequence_to_dt64ns
1415
import pandas.util.testing as tm
1516

1617

18+
class TestDatetimeArrayConstructor(object):
19+
def test_mismatched_timezone_raises(self):
20+
arr = DatetimeArray(np.array(['2000-01-01T06:00:00'], dtype='M8[ns]'),
21+
dtype=DatetimeTZDtype(tz='US/Central'))
22+
dtype = DatetimeTZDtype(tz='US/Eastern')
23+
with pytest.raises(TypeError, match='data is already tz-aware'):
24+
DatetimeArray(arr, dtype=dtype)
25+
26+
def test_incorrect_dtype_raises(self):
27+
with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
28+
DatetimeArray(np.array([1, 2, 3], dtype='i8'), dtype='category')
29+
30+
def test_copy(self):
31+
data = np.array([1, 2, 3], dtype='M8[ns]')
32+
arr = DatetimeArray(data, copy=False)
33+
assert arr._data is data
34+
35+
arr = DatetimeArray(data, copy=True)
36+
assert arr._data is not data
37+
38+
1739
class TestDatetimeArrayComparisons(object):
1840
# TODO: merge this into tests/arithmetic/test_datetime64 once it is
1941
# sufficiently robust
@@ -90,3 +112,17 @@ def test_setitem_clears_freq(self):
90112
tz='US/Central'))
91113
a[0] = pd.Timestamp("2000", tz="US/Central")
92114
assert a.freq is None
115+
116+
117+
class TestSequenceToDT64NS(object):
118+
119+
def test_tz_dtype_mismatch_raises(self):
120+
arr = DatetimeArray._from_sequence(['2000'], tz='US/Central')
121+
with pytest.raises(TypeError, match='data is already tz-aware'):
122+
sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="UTC"))
123+
124+
def test_tz_dtype_matches(self):
125+
arr = DatetimeArray._from_sequence(['2000'], tz='US/Central')
126+
result, _, _ = sequence_to_dt64ns(
127+
arr, dtype=DatetimeTZDtype(tz="US/Central"))
128+
tm.assert_numpy_array_equal(arr._data, result)

pandas/tests/arrays/test_timedeltas.py

+16
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,22 @@
99

1010

1111
class TestTimedeltaArrayConstructor(object):
12+
def test_other_type_raises(self):
13+
with pytest.raises(TypeError,
14+
match="dtype bool cannot be converted"):
15+
TimedeltaArray(np.array([1, 2, 3], dtype='bool'))
16+
17+
def test_incorrect_dtype_raises(self):
18+
# TODO: why TypeError for 'category' but ValueError for i8?
19+
with pytest.raises(TypeError,
20+
match='data type "category" not understood'):
21+
TimedeltaArray(np.array([1, 2, 3], dtype='i8'), dtype='category')
22+
23+
with pytest.raises(ValueError,
24+
match=r"Only timedelta64\[ns\] dtype is valid"):
25+
TimedeltaArray(np.array([1, 2, 3], dtype='i8'),
26+
dtype=np.dtype(int))
27+
1228
def test_copy(self):
1329
data = np.array([1, 2, 3], dtype='m8[ns]')
1430
arr = TimedeltaArray(data, copy=False)

pandas/tests/dtypes/test_common.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -340,8 +340,8 @@ def test_is_datetime64_any_dtype():
340340
assert com.is_datetime64_any_dtype(np.datetime64)
341341
assert com.is_datetime64_any_dtype(np.array([], dtype=np.datetime64))
342342
assert com.is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern"))
343-
assert com.is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3],
344-
dtype=np.datetime64))
343+
assert com.is_datetime64_any_dtype(
344+
pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]"))
345345

346346

347347
def test_is_datetime64_ns_dtype():
@@ -356,8 +356,8 @@ def test_is_datetime64_ns_dtype():
356356
assert not com.is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]"))
357357

358358
assert com.is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern"))
359-
assert com.is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3],
360-
dtype=np.datetime64))
359+
assert com.is_datetime64_ns_dtype(
360+
pd.DatetimeIndex([1, 2, 3], dtype=np.dtype('datetime64[ns]')))
361361

362362

363363
def test_is_timedelta64_ns_dtype():

0 commit comments

Comments
 (0)