Skip to content

Commit b85efa7

Browse files
committed
API/BUG: Raise when int-dtype coercions fail
* Related to the Index and Series constructors. Closes pandas-devgh-15832. * Add integer dtype fixtures to conftest.py Can used for subsequent refactoring.
1 parent bf1c3dc commit b85efa7

File tree

10 files changed

+194
-21
lines changed

10 files changed

+194
-21
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ Other API Changes
7272
^^^^^^^^^^^^^^^^^
7373

7474
- :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`)
75+
- ``Series`` and ``Index`` constructors now raise when the data is incompatible with a passed ``dtype=`` (:issue:`15832`)
7576
-
7677
-
7778

pandas/conftest.py

+63
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,66 @@ def string_dtype(request):
170170
* 'U'
171171
"""
172172
return request.param
173+
174+
175+
@pytest.fixture(params=["float32", "float64"])
176+
def float_dtype(request):
177+
"""
178+
Parameterized fixture for float dtypes.
179+
180+
* float32
181+
* float64
182+
"""
183+
184+
return request.param
185+
186+
187+
UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
188+
SIGNED_INT_DTYPES = ["int8", "int16", "int32", "int64"]
189+
ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
190+
191+
192+
@pytest.fixture(params=SIGNED_INT_DTYPES)
193+
def sint_dtype(request):
194+
"""
195+
Parameterized fixture for signed integer dtypes.
196+
197+
* int8
198+
* int16
199+
* int32
200+
* int64
201+
"""
202+
203+
return request.param
204+
205+
206+
@pytest.fixture(params=UNSIGNED_INT_DTYPES)
207+
def uint_dtype(request):
208+
"""
209+
Parameterized fixture for unsigned integer dtypes.
210+
211+
* uint8
212+
* uint16
213+
* uint32
214+
* uint64
215+
"""
216+
217+
return request.param
218+
219+
220+
@pytest.fixture(params=ALL_INT_DTYPES)
221+
def any_int_dtype(request):
222+
"""
223+
Parameterized fixture for any integer dtypes.
224+
225+
* int8
226+
* uint8
227+
* int16
228+
* uint16
229+
* int32
230+
* uint32
231+
* int64
232+
* uint64
233+
"""
234+
235+
return request.param

pandas/core/dtypes/cast.py

+62
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
is_dtype_equal,
2121
is_float_dtype, is_complex_dtype,
2222
is_integer_dtype,
23+
is_unsigned_integer_dtype,
2324
is_datetime_or_timedelta_dtype,
2425
is_bool_dtype, is_scalar,
2526
is_string_dtype, _string_dtypes,
@@ -1269,3 +1270,64 @@ def construct_1d_ndarray_preserving_na(values, dtype=None, copy=False):
12691270
subarr = subarr2
12701271

12711272
return subarr
1273+
1274+
1275+
def maybe_cast_to_integer_array(arr, dtype, copy=False):
1276+
"""
1277+
Takes any dtype and returns the casted version, raising for when data is
1278+
incompatible with integer/unsigned integer dtypes.
1279+
1280+
.. versionadded:: 0.24.0
1281+
1282+
Parameters
1283+
----------
1284+
arr : ndarray
1285+
The array to cast.
1286+
dtype : str, np.dtype
1287+
The integer dtype to cast the array to.
1288+
copy: boolean, default False
1289+
Whether to make a copy of the array before returning.
1290+
1291+
Returns
1292+
-------
1293+
int_arr : ndarray
1294+
An array of integer or unsigned integer dtype
1295+
1296+
Raises
1297+
------
1298+
OverflowError : the dtype is incompatible with the data
1299+
ValueError : loss of precision has occurred during casting
1300+
1301+
Examples
1302+
--------
1303+
If you try to coerce negative values to unsigned integers, it raises:
1304+
1305+
>>> Series([-1], dtype="uint64")
1306+
Traceback (most recent call last):
1307+
...
1308+
OverflowError: Trying to coerce negative values to unsigned integers
1309+
1310+
Also, if you try to coerce float values to integers, it raises:
1311+
1312+
>>> Series([1, 2, 3.5], dtype="int64")
1313+
Traceback (most recent call last):
1314+
...
1315+
ValueError: Trying to coerce float values to integers
1316+
"""
1317+
1318+
try:
1319+
casted = arr.astype(dtype, copy=copy)
1320+
except OverflowError:
1321+
raise OverflowError("The elements provided in the data cannot all be "
1322+
"casted to the dtype {dtype}".format(dtype=dtype))
1323+
1324+
if np.array_equal(arr, casted):
1325+
return casted
1326+
1327+
if is_unsigned_integer_dtype(dtype) and (arr < 0).any():
1328+
raise OverflowError("Trying to coerce negative values "
1329+
"to unsigned integers")
1330+
1331+
if is_integer_dtype(dtype) and (is_float_dtype(arr) or
1332+
is_object_dtype(arr)):
1333+
raise ValueError("Trying to coerce float values to integers")

pandas/core/indexes/base.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
ABCPeriodIndex, ABCTimedeltaIndex,
2222
ABCDateOffset)
2323
from pandas.core.dtypes.missing import isna, array_equivalent
24+
from pandas.core.dtypes.cast import maybe_cast_to_integer_array
2425
from pandas.core.dtypes.common import (
2526
_ensure_int64,
2627
_ensure_object,
@@ -311,19 +312,16 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
311312
if is_integer_dtype(dtype):
312313
inferred = lib.infer_dtype(data)
313314
if inferred == 'integer':
314-
try:
315-
data = np.array(data, copy=copy, dtype=dtype)
316-
except OverflowError:
317-
# gh-15823: a more user-friendly error message
318-
raise OverflowError(
319-
"the elements provided in the data cannot "
320-
"all be casted to the dtype {dtype}"
321-
.format(dtype=dtype))
315+
data = maybe_cast_to_integer_array(data, dtype,
316+
copy=copy)
322317
elif inferred in ['floating', 'mixed-integer-float']:
323318
if isna(data).any():
324319
raise ValueError('cannot convert float '
325320
'NaN to integer')
326321

322+
if inferred == "mixed-integer-float":
323+
maybe_cast_to_integer_array(data, dtype)
324+
327325
# If we are actually all equal to integers,
328326
# then coerce to integer.
329327
try:
@@ -352,7 +350,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
352350

353351
except (TypeError, ValueError) as e:
354352
msg = str(e)
355-
if 'cannot convert float' in msg:
353+
if ("cannot convert float" in msg or
354+
"Trying to coerce float values to integer" in msg):
356355
raise
357356

358357
# maybe coerce to a sub-class

pandas/core/series.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@
4141
maybe_cast_to_datetime, maybe_castable,
4242
construct_1d_arraylike_from_scalar,
4343
construct_1d_ndarray_preserving_na,
44-
construct_1d_object_array_from_listlike)
44+
construct_1d_object_array_from_listlike,
45+
maybe_cast_to_integer_array)
4546
from pandas.core.dtypes.missing import (
4647
isna,
4748
notna,
@@ -4067,6 +4068,9 @@ def _try_cast(arr, take_fast_path):
40674068
return arr
40684069

40694070
try:
4071+
if is_float_dtype(dtype) or is_integer_dtype(dtype):
4072+
subarr = maybe_cast_to_integer_array(np.asarray(arr), dtype)
4073+
40704074
subarr = maybe_cast_to_datetime(arr, dtype)
40714075
# Take care in creating object arrays (but iterators are not
40724076
# supported):

pandas/tests/generic/test_generic.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -199,11 +199,11 @@ def test_downcast(self):
199199
self._compare(result, expected)
200200

201201
def test_constructor_compound_dtypes(self):
202-
# GH 5191
203-
# compound dtypes should raise not-implementederror
202+
# see gh-5191
203+
# Compound dtypes should raise NotImplementedError.
204204

205205
def f(dtype):
206-
return self._construct(shape=3, dtype=dtype)
206+
return self._construct(shape=3, value=1, dtype=dtype)
207207

208208
pytest.raises(NotImplementedError, f, [("A", "datetime64[h]"),
209209
("B", "str"),
@@ -534,14 +534,14 @@ def test_truncate_out_of_bounds(self):
534534

535535
# small
536536
shape = [int(2e3)] + ([1] * (self._ndim - 1))
537-
small = self._construct(shape, dtype='int8')
537+
small = self._construct(shape, dtype='int8', value=1)
538538
self._compare(small.truncate(), small)
539539
self._compare(small.truncate(before=0, after=3e3), small)
540540
self._compare(small.truncate(before=-1, after=2e3), small)
541541

542542
# big
543543
shape = [int(2e6)] + ([1] * (self._ndim - 1))
544-
big = self._construct(shape, dtype='int8')
544+
big = self._construct(shape, dtype='int8', value=1)
545545
self._compare(big.truncate(), big)
546546
self._compare(big.truncate(before=0, after=3e6), big)
547547
self._compare(big.truncate(before=-1, after=2e6), big)

pandas/tests/indexes/test_base.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -483,11 +483,17 @@ def test_constructor_nonhashable_name(self, indices):
483483

484484
def test_constructor_overflow_int64(self):
485485
# see gh-15832
486-
msg = ("the elements provided in the data cannot "
486+
msg = ("The elements provided in the data cannot "
487487
"all be casted to the dtype int64")
488488
with tm.assert_raises_regex(OverflowError, msg):
489489
Index([np.iinfo(np.uint64).max - 1], dtype="int64")
490490

491+
@pytest.mark.xfail("see gh-21311: Index doesn't enforce dtype argument")
492+
def test_constructor_cast(self):
493+
msg = "could not convert string to float"
494+
with tm.assert_raises_regex(ValueError, msg):
495+
Index(["a", "b", "c"], dtype=float)
496+
491497
def test_view_with_args(self):
492498

493499
restricted = ['unicodeIndex', 'strIndex', 'catIndex', 'boolIndex',

pandas/tests/indexes/test_numeric.py

+20
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,18 @@ def test_astype(self):
451451
i = Float64Index([0, 1.1, np.NAN])
452452
pytest.raises(ValueError, lambda: i.astype(dtype))
453453

454+
def test_type_coercion_fail(self, any_int_dtype):
455+
# see gh-15832
456+
msg = "Trying to coerce float values to integers"
457+
with tm.assert_raises_regex(ValueError, msg):
458+
Index([1, 2, 3.5], dtype=any_int_dtype)
459+
460+
def test_type_coercion_valid(self, float_dtype):
461+
# There is no Float32Index, so we always
462+
# generate Float64Index.
463+
i = Index([1, 2, 3.5], dtype=float_dtype)
464+
tm.assert_index_equal(i, Index([1, 2, 3.5]))
465+
454466
def test_equals_numeric(self):
455467

456468
i = Float64Index([1.0, 2.0])
@@ -862,6 +874,14 @@ def test_constructor_corner(self):
862874
with tm.assert_raises_regex(TypeError, 'casting'):
863875
Int64Index(arr_with_floats)
864876

877+
def test_constructor_coercion_signed_to_unsigned(self, uint_dtype):
878+
879+
# see gh-15832
880+
msg = "Trying to coerce negative values to unsigned integers"
881+
882+
with tm.assert_raises_regex(OverflowError, msg):
883+
Index([-1], dtype=uint_dtype)
884+
865885
def test_coerce_list(self):
866886
# coerce things
867887
arr = Index([1, 2, 3, 4])

pandas/tests/io/test_pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2047,7 +2047,7 @@ def test_table_values_dtypes_roundtrip(self):
20472047
assert df1.dtypes[0] == 'float32'
20482048

20492049
# check with mixed dtypes
2050-
df1 = DataFrame(dict((c, Series(np.random.randn(5), dtype=c))
2050+
df1 = DataFrame(dict((c, Series(np.random.randint(5), dtype=c))
20512051
for c in ['float32', 'float64', 'int32',
20522052
'int64', 'int16', 'int8']))
20532053
df1['string'] = 'foo'

pandas/tests/series/test_constructors.py

+22-4
Original file line numberDiff line numberDiff line change
@@ -542,12 +542,30 @@ def test_constructor_pass_nan_nat(self):
542542
tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp)
543543

544544
def test_constructor_cast(self):
545-
pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float)
545+
msg = "could not convert string to float"
546+
with tm.assert_raises_regex(ValueError, msg):
547+
Series(["a", "b", "c"], dtype=float)
548+
549+
def test_constructor_unsigned_dtype_overflow(self, uint_dtype):
550+
# see gh-15832
551+
msg = 'Trying to coerce negative values to unsigned integers'
552+
with tm.assert_raises_regex(OverflowError, msg):
553+
Series([-1], dtype=uint_dtype)
554+
555+
def test_constructor_coerce_float_fail(self, any_int_dtype):
556+
# see gh-15832
557+
msg = "Trying to coerce float values to integers"
558+
with tm.assert_raises_regex(ValueError, msg):
559+
Series([1, 2, 3.5], dtype=any_int_dtype)
560+
561+
def test_constructor_coerce_float_valid(self, float_dtype):
562+
s = Series([1, 2, 3.5], dtype=float_dtype)
563+
expected = Series([1, 2, 3.5]).astype(float_dtype)
564+
assert_series_equal(s, expected)
546565

547-
def test_constructor_dtype_nocast(self):
548-
# 1572
566+
def test_constructor_dtype_no_cast(self):
567+
# see gh-1572
549568
s = Series([1, 2, 3])
550-
551569
s2 = Series(s, dtype=np.int64)
552570

553571
s2[1] = 5

0 commit comments

Comments
 (0)