pandas-dev · ucals · Mar 20, 2017 · Mar 24, 2017 · Mar 26, 2017 · Mar 26, 2017
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -54,7 +54,7 @@ Backwards incompatible API changes
 
 Other API Changes
 ^^^^^^^^^^^^^^^^^
-
+- Series and Index constructors now raises when data is incompatible with a passed dtype= kwarg (:issue:`15832`)
 - Moved definition of ``MergeError`` to the ``pandas.errors`` module.
 
 

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -16,6 +16,7 @@
                      is_timedelta64_dtype, is_dtype_equal,
                      is_float_dtype, is_complex_dtype,
                      is_integer_dtype,
+                     is_unsigned_integer_dtype,
                      is_datetime_or_timedelta_dtype,
                      is_bool_dtype, is_scalar,
                      _string_dtypes,
@@ -1026,3 +1027,56 @@ def find_common_type(types):
             return np.object
 
     return np.find_common_type(types, [])
+
+
+def maybe_cast_to_integer_array(arr, dtype, copy=False):
+    """
+    Takes any dtype and returns the casted version, raising for when data is
+    incompatible with integer/unsigned integer dtypes.
+
+    .. versionadded:: 0.21.0
+
+    Parameters
+    ----------
+    arr : ndarray
+    dtype : np.dtype
+    copy: boolean, default False
+
+    Returns
+    -------
+    integer or unsigned integer array
+
+    Raises
+    ------
+    OverflowError
+        * If ``dtype`` is incompatible
+    ValueError
+        * If coercion from float to integer loses precision
+
+    Examples
+    --------
+    If you try to coerce negative values to unsigned integers, it raises:
+
+    >>> Series([-1], dtype='uint64')
+    Traceback (most recent call last):
+        ...
+    OverflowError: Trying to coerce negative values to unsigned integers
+
+    Also, if you try to coerce float values to integers, it raises:
+    >>> Series([1, 2, 3.5], dtype='int64')
+    Traceback (most recent call last):
+        ...
+    ValueError: Trying to coerce float values to integers
+
+    """
+    casted = arr.astype(dtype, copy=copy)
+    if np.array(arr == casted).all():
+        return casted
+
+    if is_unsigned_integer_dtype(dtype) and (arr < 0).any():
+        raise OverflowError("Trying to coerce negative values to unsigned "
+                            "integers")
+
+    if is_integer_dtype(dtype) and (is_float_dtype(arr) or
+                                    is_object_dtype(arr)):
+        raise ValueError("Trying to coerce float values to integers")
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -15,6 +15,7 @@
 
 from pandas.core.dtypes.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex
 from pandas.core.dtypes.missing import isnull, array_equivalent
+from pandas.core.dtypes.cast import maybe_cast_to_integer_array
 from pandas.core.dtypes.common import (
     _ensure_int64,
     _ensure_object,
@@ -212,11 +213,14 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
                     if is_integer_dtype(dtype):
                         inferred = lib.infer_dtype(data)
                         if inferred == 'integer':
-                            data = np.array(data, copy=copy, dtype=dtype)
+                            data = maybe_cast_to_integer_array(data, dtype,
+                                                               copy=copy)
                         elif inferred in ['floating', 'mixed-integer-float']:
                             if isnull(data).any():
                                 raise ValueError('cannot convert float '
                                                  'NaN to integer')
+                            if inferred == 'mixed-integer-float':
+                                maybe_cast_to_integer_array(data, dtype)
 
                             # If we are actually all equal to integers,
                             # then coerce to integer.
@@ -246,7 +250,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
 
                 except (TypeError, ValueError) as e:
                     msg = str(e)
-                    if 'cannot convert float' in msg:
+                    if ('cannot convert float' in msg or
+                            'Trying to coerce float values to integer') in msg:
                         raise
 
             # maybe coerce to a sub-class

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -35,7 +35,8 @@
 from pandas.core.dtypes.cast import (
     maybe_upcast, infer_dtype_from_scalar,
     maybe_convert_platform,
-    maybe_cast_to_datetime, maybe_castable)
+    maybe_cast_to_datetime, maybe_castable,
+    maybe_cast_to_integer_array)
 from pandas.core.dtypes.missing import isnull, notnull
 
 from pandas.core.common import (is_bool_indexer,
@@ -2941,9 +2942,13 @@ def _try_cast(arr, take_fast_path):
                 return arr
 
         try:
+            if is_float_dtype(dtype) or is_integer_dtype(dtype):
+                subarr = maybe_cast_to_integer_array(np.asarray(arr), dtype)
+
             subarr = maybe_cast_to_datetime(arr, dtype)
             if not is_extension_type(subarr):
                 subarr = np.array(subarr, dtype=dtype, copy=copy)
+
         except (ValueError, TypeError):
             if is_categorical_dtype(dtype):
                 subarr = Categorical(arr)

diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
@@ -304,6 +304,20 @@ def test_astype(self):
             i = Float64Index([0, 1.1, np.NAN])
             pytest.raises(ValueError, lambda: i.astype(dtype))
 
+    @pytest.mark.parametrize("int_dtype", ['uint8', 'uint16', 'uint32',
+                                          'uint64', 'int32', 'int64', 'int16',
+                                          'int8'])
+    @pytest.mark.parametrize("float_dtype", ['float16', 'float32'])
+    def test_type_coercion(self, int_dtype, float_dtype):
+
+        # GH 15832
+        msg = 'Trying to coerce float values to integers'
+        with tm.assert_raises_regex(ValueError, msg):
+            Index([1, 2, 3.5], dtype=int_dtype)
+
+        i = Index([1, 2, 3.5], dtype=float_dtype)
+        tm.assert_index_equal(i, Index([1, 2, 3.5]))
+
     def test_equals_numeric(self):
 
         i = Float64Index([1.0, 2.0])
@@ -678,6 +692,13 @@ def test_constructor_corner(self):
         with tm.assert_raises_regex(TypeError, 'casting'):
             Int64Index(arr_with_floats)
 
+    @pytest.mark.parametrize("uints", ['uint8', 'uint16', 'uint32', 'uint64'])
+    def test_constructor_overflow_coercion_signed_to_unsigned(self, uints):
+        # GH 15832
+        msg = 'Trying to coerce negative values to unsigned integers'
+        with tm.assert_raises_regex(OverflowError, msg):
+            Index([-1], dtype=uints)
+
     def test_coerce_list(self):
         # coerce things
         arr = Index([1, 2, 3, 4])

diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
@@ -2080,7 +2080,7 @@ def test_table_values_dtypes_roundtrip(self):
             assert df1.dtypes[0] == 'float32'
 
             # check with mixed dtypes
-            df1 = DataFrame(dict([(c, Series(np.random.randn(5), dtype=c))
+            df1 = DataFrame(dict([(c, Series(np.random.randn(5).astype(c)))
                                   for c in ['float32', 'float64', 'int32',
                                             'int64', 'int16', 'int8']]))
             df1['string'] = 'foo'
@@ -2094,7 +2094,8 @@ def test_table_values_dtypes_roundtrip(self):
             result = store.select('df_mixed_dtypes1').get_dtype_counts()
             expected = Series({'float32': 2, 'float64': 1, 'int32': 1,
                                'bool': 1, 'int16': 1, 'int8': 1,
-                               'int64': 1, 'object': 1, 'datetime64[ns]': 2})
+                               'int64': 1, 'object': 1,
+                               'datetime64[ns]': 2})
             result = result.sort_index()
             result = expected.sort_index()
             tm.assert_series_equal(result, expected)

diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -1,30 +1,26 @@
 # coding=utf-8
 # pylint: disable-msg=E1101,W0612
 
-import pytest
-
 from datetime import datetime, timedelta
 
-from numpy import nan
 import numpy as np
 import numpy.ma as ma
 import pandas as pd
-
-from pandas.core.dtypes.common import (
-    is_categorical_dtype,
-    is_datetime64tz_dtype)
+import pytest
+from numpy import nan
 from pandas import (Index, Series, isnull, date_range,
                     NaT, period_range, MultiIndex, IntervalIndex)
-from pandas.core.indexes.datetimes import Timestamp, DatetimeIndex
+from pandas import compat
+from pandas.compat import lrange, range, zip, OrderedDict, long
 
+import pandas.util.testing as tm
 from pandas._libs import lib
 from pandas._libs.tslib import iNaT
-
-from pandas.compat import lrange, range, zip, OrderedDict, long
-from pandas import compat
+from pandas.core.dtypes.common import (
+    is_categorical_dtype,
+    is_datetime64tz_dtype)
+from pandas.core.indexes.datetimes import Timestamp, DatetimeIndex
 from pandas.util.testing import assert_series_equal
-import pandas.util.testing as tm
-
 from .common import TestData
 
 
@@ -301,12 +297,35 @@ def test_constructor_pass_nan_nat(self):
         tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp)
 
     def test_constructor_cast(self):
-        pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float)
+        msg = "could not convert string to float"
+        with tm.assert_raises_regex(ValueError, msg):
+            Series(['a', 'b', 'c'], dtype=float)
+
+    @pytest.mark.parametrize("unsigned_integers", ['uint8', 'uint16', 'uint32',
+                                                   'uint64'])
+    def test_constructor_unsigned_dtype_overflow(self, unsigned_integers):
+        # GH 15832
+        msg = 'Trying to coerce negative values to unsigned integers'
+        with tm.assert_raises_regex(OverflowError, msg):
+            Series([-1], dtype=unsigned_integers)
+
+    @pytest.mark.parametrize("integers", ['uint8', 'uint16', 'uint32',
+                                          'uint64', 'int32', 'int64', 'int16',
+                                          'int8'])
+    @pytest.mark.parametrize("floats", ['float16', 'float32'])
+    def test_constructor_coerce_float_fail(self, integers, floats):
+        # GH 15832
+        msg = 'Trying to coerce float values to integers'
+        with tm.assert_raises_regex(ValueError, msg):
+            Series([1, 2, 3.5], dtype=integers)
+
+        s = Series([1, 2, 3.5], dtype=floats)
+        expected = Series([1, 2, 3.5]).astype(floats)
+        assert_series_equal(s, expected)
 
     def test_constructor_dtype_nocast(self):
         # 1572
         s = Series([1, 2, 3])
-
         s2 = Series(s, dtype=np.int64)
 
         s2[1] = 5