Skip to content

Commit e895c12

Browse files
committed
API/BUG: Raise when int-dtype coercions fail
Related to the Index and Series constructors. Closes gh-15832.
1 parent ab668b0 commit e895c12

File tree

8 files changed

+133
-20
lines changed

8 files changed

+133
-20
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ Datetimelike API Changes
3636
Other API Changes
3737
^^^^^^^^^^^^^^^^^
3838

39+
- Series and Index constructors now raise when the data is incompatible with the specified dtype (:issue:`15832`)
3940
-
4041
-
4142
-

pandas/core/dtypes/cast.py

+62
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
is_dtype_equal,
2121
is_float_dtype, is_complex_dtype,
2222
is_integer_dtype,
23+
is_unsigned_integer_dtype,
2324
is_datetime_or_timedelta_dtype,
2425
is_bool_dtype, is_scalar,
2526
is_string_dtype, _string_dtypes,
@@ -1269,3 +1270,64 @@ def construct_1d_ndarray_preserving_na(values, dtype=None, copy=False):
12691270
subarr = subarr2
12701271

12711272
return subarr
1273+
1274+
1275+
def maybe_cast_to_integer_array(arr, dtype, copy=False):
1276+
"""
1277+
Takes any dtype and returns the casted version, raising for when data is
1278+
incompatible with integer/unsigned integer dtypes.
1279+
1280+
.. versionadded:: 0.24.0
1281+
1282+
Parameters
1283+
----------
1284+
arr : ndarray
1285+
The array to cast.
1286+
dtype : str, np.dtype
1287+
The integer dtype to cast the array to.
1288+
copy: boolean, default False
1289+
Whether to make a copy of the array before returning.
1290+
1291+
Returns
1292+
-------
1293+
int_arr : ndarray
1294+
An array of integer or unsigned integer dtype
1295+
1296+
Raises
1297+
------
1298+
OverflowError : the dtype is incompatible with the data
1299+
ValueError : loss of precision has occurred during casting
1300+
1301+
Examples
1302+
--------
1303+
If you try to coerce negative values to unsigned integers, it raises:
1304+
1305+
>>> Series([-1], dtype="uint64")
1306+
Traceback (most recent call last):
1307+
...
1308+
OverflowError: Trying to coerce negative values to unsigned integers
1309+
1310+
Also, if you try to coerce float values to integers, it raises:
1311+
1312+
>>> Series([1, 2, 3.5], dtype="int64")
1313+
Traceback (most recent call last):
1314+
...
1315+
ValueError: Trying to coerce float values to integers
1316+
"""
1317+
1318+
try:
1319+
casted = arr.astype(dtype, copy=copy)
1320+
except OverflowError:
1321+
raise OverflowError("The elements provided in the data cannot all be "
1322+
"casted to the dtype {dtype}".format(dtype=dtype))
1323+
1324+
if np.array(arr == casted).all():
1325+
return casted
1326+
1327+
if is_unsigned_integer_dtype(dtype) and (arr < 0).any():
1328+
raise OverflowError("Trying to coerce negative values "
1329+
"to unsigned integers")
1330+
1331+
if is_integer_dtype(dtype) and (is_float_dtype(arr) or
1332+
is_object_dtype(arr)):
1333+
raise ValueError("Trying to coerce float values to integers")

pandas/core/indexes/base.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
ABCPeriodIndex, ABCTimedeltaIndex,
2222
ABCDateOffset)
2323
from pandas.core.dtypes.missing import isna, array_equivalent
24+
from pandas.core.dtypes.cast import maybe_cast_to_integer_array
2425
from pandas.core.dtypes.common import (
2526
_ensure_int64,
2627
_ensure_object,
@@ -309,19 +310,16 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
309310
if is_integer_dtype(dtype):
310311
inferred = lib.infer_dtype(data)
311312
if inferred == 'integer':
312-
try:
313-
data = np.array(data, copy=copy, dtype=dtype)
314-
except OverflowError:
315-
# gh-15823: a more user-friendly error message
316-
raise OverflowError(
317-
"the elements provided in the data cannot "
318-
"all be casted to the dtype {dtype}"
319-
.format(dtype=dtype))
313+
data = maybe_cast_to_integer_array(data, dtype,
314+
copy=copy)
320315
elif inferred in ['floating', 'mixed-integer-float']:
321316
if isna(data).any():
322317
raise ValueError('cannot convert float '
323318
'NaN to integer')
324319

320+
if inferred == "mixed-integer-float":
321+
maybe_cast_to_integer_array(data, dtype)
322+
325323
# If we are actually all equal to integers,
326324
# then coerce to integer.
327325
try:
@@ -350,7 +348,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
350348

351349
except (TypeError, ValueError) as e:
352350
msg = str(e)
353-
if 'cannot convert float' in msg:
351+
if ("cannot convert float" in msg or
352+
"Trying to coerce float values to integer" in msg):
354353
raise
355354

356355
# maybe coerce to a sub-class

pandas/core/series.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@
4141
maybe_cast_to_datetime, maybe_castable,
4242
construct_1d_arraylike_from_scalar,
4343
construct_1d_ndarray_preserving_na,
44-
construct_1d_object_array_from_listlike)
44+
construct_1d_object_array_from_listlike,
45+
maybe_cast_to_integer_array)
4546
from pandas.core.dtypes.missing import (
4647
isna,
4748
notna,
@@ -4067,6 +4068,9 @@ def _try_cast(arr, take_fast_path):
40674068
return arr
40684069

40694070
try:
4071+
if is_float_dtype(dtype) or is_integer_dtype(dtype):
4072+
subarr = maybe_cast_to_integer_array(np.asarray(arr), dtype)
4073+
40704074
subarr = maybe_cast_to_datetime(arr, dtype)
40714075
# Take care in creating object arrays (but iterators are not
40724076
# supported):

pandas/tests/generic/test_generic.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -199,11 +199,11 @@ def test_downcast(self):
199199
self._compare(result, expected)
200200

201201
def test_constructor_compound_dtypes(self):
202-
# GH 5191
203-
# compound dtypes should raise not-implementederror
202+
# see gh-5191
203+
# Compound dtypes should raise NotImplementedError.
204204

205205
def f(dtype):
206-
return self._construct(shape=3, dtype=dtype)
206+
return self._construct(shape=3, value=1, dtype=dtype)
207207

208208
pytest.raises(NotImplementedError, f, [("A", "datetime64[h]"),
209209
("B", "str"),
@@ -534,14 +534,14 @@ def test_truncate_out_of_bounds(self):
534534

535535
# small
536536
shape = [int(2e3)] + ([1] * (self._ndim - 1))
537-
small = self._construct(shape, dtype='int8')
537+
small = self._construct(shape, dtype='int8', value=1)
538538
self._compare(small.truncate(), small)
539539
self._compare(small.truncate(before=0, after=3e3), small)
540540
self._compare(small.truncate(before=-1, after=2e3), small)
541541

542542
# big
543543
shape = [int(2e6)] + ([1] * (self._ndim - 1))
544-
big = self._construct(shape, dtype='int8')
544+
big = self._construct(shape, dtype='int8', value=1)
545545
self._compare(big.truncate(), big)
546546
self._compare(big.truncate(before=0, after=3e6), big)
547547
self._compare(big.truncate(before=-1, after=2e6), big)

pandas/tests/indexes/test_base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,7 @@ def test_constructor_nonhashable_name(self, indices):
476476

477477
def test_constructor_overflow_int64(self):
478478
# see gh-15832
479-
msg = ("the elements provided in the data cannot "
479+
msg = ("The elements provided in the data cannot "
480480
"all be casted to the dtype int64")
481481
with tm.assert_raises_regex(OverflowError, msg):
482482
Index([np.iinfo(np.uint64).max - 1], dtype="int64")

pandas/tests/indexes/test_numeric.py

+24
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,20 @@ def test_astype(self):
451451
i = Float64Index([0, 1.1, np.NAN])
452452
pytest.raises(ValueError, lambda: i.astype(dtype))
453453

454+
@pytest.mark.parametrize("int_dtype", ["uint8", "uint16", "uint32",
455+
"uint64", "int32", "int64",
456+
"int16", "int8"])
457+
@pytest.mark.parametrize("float_dtype", ["float16", "float32"])
458+
def test_type_coercion(self, int_dtype, float_dtype):
459+
460+
# see gh-15832
461+
msg = "Trying to coerce float values to integers"
462+
with tm.assert_raises_regex(ValueError, msg):
463+
Index([1, 2, 3.5], dtype=int_dtype)
464+
465+
i = Index([1, 2, 3.5], dtype=float_dtype)
466+
tm.assert_index_equal(i, Index([1, 2, 3.5]))
467+
454468
def test_equals_numeric(self):
455469

456470
i = Float64Index([1.0, 2.0])
@@ -862,6 +876,16 @@ def test_constructor_corner(self):
862876
with tm.assert_raises_regex(TypeError, 'casting'):
863877
Int64Index(arr_with_floats)
864878

879+
@pytest.mark.parametrize("uint_dtype", ["uint8", "uint16",
880+
"uint32", "uint64"])
881+
def test_constructor_coercion_signed_to_unsigned(self, uint_dtype):
882+
883+
# see gh-15832
884+
msg = "Trying to coerce negative values to unsigned integers"
885+
886+
with tm.assert_raises_regex(OverflowError, msg):
887+
Index([-1], dtype=uint_dtype)
888+
865889
def test_coerce_list(self):
866890
# coerce things
867891
arr = Index([1, 2, 3, 4])

pandas/tests/series/test_constructors.py

+27-4
Original file line numberDiff line numberDiff line change
@@ -542,12 +542,35 @@ def test_constructor_pass_nan_nat(self):
542542
tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp)
543543

544544
def test_constructor_cast(self):
545-
pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float)
545+
msg = "could not convert string to float"
546+
with tm.assert_raises_regex(ValueError, msg):
547+
Series(["a", "b", "c"], dtype=float)
548+
549+
@pytest.mark.parametrize("uint_dtype", ["uint8", "uint16",
550+
"uint32", "uint64"])
551+
def test_constructor_unsigned_dtype_overflow(self, uint_dtype):
552+
# see gh-15832
553+
msg = 'Trying to coerce negative values to unsigned integers'
554+
with tm.assert_raises_regex(OverflowError, msg):
555+
Series([-1], dtype=uint_dtype)
556+
557+
@pytest.mark.parametrize("int_dtype", ["uint8", "uint16", "uint32",
558+
"uint64", "int32", "int64",
559+
"int16", "int8"])
560+
@pytest.mark.parametrize("float_dtype", ["float16", "float32"])
561+
def test_constructor_coerce_float_fail(self, int_dtype, float_dtype):
562+
# see gh-15832
563+
msg = "Trying to coerce float values to integers"
564+
with tm.assert_raises_regex(ValueError, msg):
565+
Series([1, 2, 3.5], dtype=int_dtype)
566+
567+
s = Series([1, 2, 3.5], dtype=float_dtype)
568+
expected = Series([1, 2, 3.5]).astype(float_dtype)
569+
assert_series_equal(s, expected)
546570

547-
def test_constructor_dtype_nocast(self):
548-
# 1572
571+
def test_constructor_dtype_no_cast(self):
572+
# see gh-1572
549573
s = Series([1, 2, 3])
550-
551574
s2 = Series(s, dtype=np.int64)
552575

553576
s2[1] = 5

0 commit comments

Comments
 (0)