Skip to content

Commit 61ca022

Browse files
analyticalmonkjreback
authored andcommitted
BUG: raise for invalid dtypes per issue #15520
closes #15520 Author: Akash Tandon <[email protected]> Author: root <[email protected]> Author: analyticalmonk <[email protected]> Author: Akash Tandon <[email protected]> Closes #16047 from analyticalmonk/patch_for_15520 and squashes the following commits: 3646eb6 [analyticalmonk] TST: check for invalid dtype for Series constructor per GH15520 73d980a [Akash Tandon] Merge branch 'master' into patch_for_15520 b3c2fbb [root] BUG: Added 'O' to pandas_dtype's valid list c3699fb [root] DOC: added whatsnew entry for PR#16047 addressing GH15520 fbed5a6 [Akash Tandon] TST: Added list to invalid dtype ad9f345 [Akash Tandon] CLN: refactored code related to issue GH15520 a358181 [Akash Tandon] BUG: Added numpy.dtype_ to valid pandas_dtype() type list 3eaa432 [Akash Tandon] TST: Added numpy.object_ dtype to valid pandas_dtype list f858726 [Akash Tandon] style fix d4971cd [Akash Tandon] BUG: pandas_dtype() to raise error for invalid dtype per GH15520 ee0030f [Akash Tandon] TST: added more test-cases for pandas_dtype() test 3700259 [Akash Tandon] CLN: Replace _coerce_to_dtype() with pandas_dtype() c10e1d4 [Akash Tandon] TST: maintain list containing dtypes in TestPandasDtype fecba12 [Akash Tandon] BUG: Raise when invalid dtype passed to pandas_dtype 99fb660 [Akash Tandon] TST: wrote test representing bug fix result for #15520
1 parent b8d9861 commit 61ca022

File tree

8 files changed

+57
-18
lines changed

8 files changed

+57
-18
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1605,6 +1605,7 @@ Conversion
16051605
- Bug in the return type of ``pd.unique`` on a ``Categorical``, which was returning an ndarray and not a ``Categorical`` (:issue:`15903`)
16061606
- Bug in ``Index.to_series()`` where the index was not copied (and so mutating later would change the original), (:issue:`15949`)
16071607
- Bug in indexing with partial string indexing with a len-1 DataFrame (:issue:`16071`)
1608+
- Bug in ``Series`` construction where passing invalid dtype didn't raise an error. (:issue:`15520`)
16081609

16091610
Indexing
16101611
^^^^^^^^

pandas/core/dtypes/cast.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
is_datetime_or_timedelta_dtype,
2020
is_bool_dtype, is_scalar,
2121
_string_dtypes,
22-
_coerce_to_dtype,
22+
pandas_dtype,
2323
_ensure_int8, _ensure_int16,
2424
_ensure_int32, _ensure_int64,
2525
_NS_DTYPE, _TD_DTYPE, _INT64_DTYPE,
@@ -576,7 +576,7 @@ def astype_nansafe(arr, dtype, copy=True):
576576
""" return a view if copy is False, but
577577
need to be very careful as the result shape could change! """
578578
if not isinstance(dtype, np.dtype):
579-
dtype = _coerce_to_dtype(dtype)
579+
dtype = pandas_dtype(dtype)
580580

581581
if issubclass(dtype.type, text_type):
582582
# in Py3 that's str, in Py2 that's unicode

pandas/core/dtypes/common.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -788,4 +788,19 @@ def pandas_dtype(dtype):
788788
elif isinstance(dtype, ExtensionDtype):
789789
return dtype
790790

791-
return np.dtype(dtype)
791+
try:
792+
npdtype = np.dtype(dtype)
793+
except (TypeError, ValueError):
794+
raise
795+
796+
# Any invalid dtype (such as pd.Timestamp) should raise an error.
797+
# np.dtype(invalid_type).kind = 0 for such objects. However, this will
798+
# also catch some valid dtypes such as object, np.object_ and 'object'
799+
# which we safeguard against by catching them earlier and returning
800+
# np.dtype(valid_dtype) before this condition is evaluated.
801+
if dtype in [object, np.object_, 'object', 'O']:
802+
return npdtype
803+
elif npdtype.kind == 'O':
804+
raise TypeError('dtype {0} not understood'.format(dtype))
805+
806+
return npdtype

pandas/core/generic.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
from pandas._libs import tslib, lib
1313
from pandas.core.dtypes.common import (
14-
_coerce_to_dtype,
1514
_ensure_int64,
1615
needs_i8_conversion,
1716
is_scalar,
@@ -23,7 +22,8 @@
2322
is_datetime64tz_dtype,
2423
is_list_like,
2524
is_dict_like,
26-
is_re_compilable)
25+
is_re_compilable,
26+
pandas_dtype)
2727
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
2828
from pandas.core.dtypes.missing import isnull, notnull
2929
from pandas.core.dtypes.generic import ABCSeries, ABCPanel
@@ -170,13 +170,14 @@ def _validate_dtype(self, dtype):
170170
""" validate the passed dtype """
171171

172172
if dtype is not None:
173-
dtype = _coerce_to_dtype(dtype)
173+
dtype = pandas_dtype(dtype)
174174

175175
# a compound dtype
176176
if dtype.kind == 'V':
177177
raise NotImplementedError("compound dtypes are not implemented"
178178
"in the {0} constructor"
179179
.format(self.__class__.__name__))
180+
180181
return dtype
181182

182183
def _init_mgr(self, mgr, axes=None, dtype=None, copy=False):

pandas/core/series.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import numpy.ma as ma
1515

1616
from pandas.core.dtypes.common import (
17-
_coerce_to_dtype, is_categorical_dtype,
17+
is_categorical_dtype,
1818
is_bool,
1919
is_integer, is_integer_dtype,
2020
is_float_dtype,
@@ -28,7 +28,8 @@
2828
is_dict_like,
2929
is_scalar,
3030
_is_unorderable_exception,
31-
_ensure_platform_int)
31+
_ensure_platform_int,
32+
pandas_dtype)
3233
from pandas.core.dtypes.generic import ABCSparseArray, ABCDataFrame
3334
from pandas.core.dtypes.cast import (
3435
maybe_upcast, infer_dtype_from_scalar,
@@ -2872,7 +2873,7 @@ def _sanitize_array(data, index, dtype=None, copy=False,
28722873
"""
28732874

28742875
if dtype is not None:
2875-
dtype = _coerce_to_dtype(dtype)
2876+
dtype = pandas_dtype(dtype)
28762877

28772878
if isinstance(data, ma.MaskedArray):
28782879
mask = ma.getmaskarray(data)

pandas/tests/dtypes/test_common.py

+15
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import pytest
44
import numpy as np
5+
import pandas as pd
56

67
from pandas.core.dtypes.dtypes import (
78
DatetimeTZDtype, PeriodDtype, CategoricalDtype)
@@ -13,6 +14,20 @@
1314

1415
class TestPandasDtype(tm.TestCase):
1516

17+
# Passing invalid dtype, both as a string or object, must raise TypeError
18+
# Per issue GH15520
19+
def test_invalid_dtype_error(self):
20+
msg = 'not understood'
21+
invalid_list = [pd.Timestamp, 'pd.Timestamp', list]
22+
for dtype in invalid_list:
23+
with tm.assertRaisesRegexp(TypeError, msg):
24+
pandas_dtype(dtype)
25+
26+
valid_list = [object, 'float64', np.object_, np.dtype('object'), 'O',
27+
np.float64, float, np.dtype('float64')]
28+
for dtype in valid_list:
29+
pandas_dtype(dtype)
30+
1631
def test_numpy_dtype(self):
1732
for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']:
1833
self.assertEqual(pandas_dtype(dtype), np.dtype(dtype))

pandas/tests/series/test_constructors.py

+8
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@
3030

3131
class TestSeriesConstructors(TestData, tm.TestCase):
3232

33+
def test_invalid_dtype(self):
34+
# GH15520
35+
msg = 'not understood'
36+
invalid_list = [pd.Timestamp, 'pd.Timestamp', list]
37+
for dtype in invalid_list:
38+
with tm.assertRaisesRegexp(TypeError, msg):
39+
Series([], name='time', dtype=dtype)
40+
3341
def test_scalar_conversion(self):
3442

3543
# Pass in scalar is disabled

pandas/tests/test_strings.py

+7-9
Original file line numberDiff line numberDiff line change
@@ -1208,10 +1208,9 @@ def test_extractall_same_as_extract_subject_index(self):
12081208
tm.assert_frame_equal(extract_one_noname, no_match_index)
12091209

12101210
def test_empty_str_methods(self):
1211-
empty_str = empty = Series(dtype=str)
1211+
empty_str = empty = Series(dtype=object)
12121212
empty_int = Series(dtype=int)
12131213
empty_bool = Series(dtype=bool)
1214-
empty_list = Series(dtype=list)
12151214
empty_bytes = Series(dtype=object)
12161215

12171216
# GH7241
@@ -1242,25 +1241,24 @@ def test_empty_str_methods(self):
12421241
DataFrame(columns=[0, 1], dtype=str),
12431242
empty.str.extract('()()', expand=False))
12441243
tm.assert_frame_equal(DataFrame(dtype=str), empty.str.get_dummies())
1245-
tm.assert_series_equal(empty_str, empty_list.str.join(''))
1244+
tm.assert_series_equal(empty_str, empty_str.str.join(''))
12461245
tm.assert_series_equal(empty_int, empty.str.len())
1247-
tm.assert_series_equal(empty_list, empty_list.str.findall('a'))
1246+
tm.assert_series_equal(empty_str, empty_str.str.findall('a'))
12481247
tm.assert_series_equal(empty_int, empty.str.find('a'))
12491248
tm.assert_series_equal(empty_int, empty.str.rfind('a'))
12501249
tm.assert_series_equal(empty_str, empty.str.pad(42))
12511250
tm.assert_series_equal(empty_str, empty.str.center(42))
1252-
tm.assert_series_equal(empty_list, empty.str.split('a'))
1253-
tm.assert_series_equal(empty_list, empty.str.rsplit('a'))
1254-
tm.assert_series_equal(empty_list,
1251+
tm.assert_series_equal(empty_str, empty.str.split('a'))
1252+
tm.assert_series_equal(empty_str, empty.str.rsplit('a'))
1253+
tm.assert_series_equal(empty_str,
12551254
empty.str.partition('a', expand=False))
1256-
tm.assert_series_equal(empty_list,
1255+
tm.assert_series_equal(empty_str,
12571256
empty.str.rpartition('a', expand=False))
12581257
tm.assert_series_equal(empty_str, empty.str.slice(stop=1))
12591258
tm.assert_series_equal(empty_str, empty.str.slice(step=1))
12601259
tm.assert_series_equal(empty_str, empty.str.strip())
12611260
tm.assert_series_equal(empty_str, empty.str.lstrip())
12621261
tm.assert_series_equal(empty_str, empty.str.rstrip())
1263-
tm.assert_series_equal(empty_str, empty.str.rstrip())
12641262
tm.assert_series_equal(empty_str, empty.str.wrap(42))
12651263
tm.assert_series_equal(empty_str, empty.str.get(0))
12661264
tm.assert_series_equal(empty_str, empty_bytes.str.decode('ascii'))

0 commit comments

Comments
 (0)