pandas-dev · jreback · Aug 20, 2018 · Jul 26, 2018 · Jul 28, 2018 · Jul 29, 2018
diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
@@ -7,4 +7,4 @@
 from .period import PeriodArrayMixin  # noqa
 from .timedeltas import TimedeltaArrayMixin  # noqa
 from .integer import (  # noqa
-    IntegerArray, to_integer_array)
+    IntegerArray, integer_array)
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -11,6 +11,7 @@
 from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
 from pandas.core.dtypes.common import (
     is_integer, is_scalar, is_float,
+    is_bool_dtype,
     is_float_dtype,
     is_integer_dtype,
     is_object_dtype,
@@ -76,7 +77,7 @@ def construct_from_string(cls, string):
                         "'{}'".format(cls, string))
 
 
-def to_integer_array(values, dtype=None):
+def integer_array(values, dtype=None, copy=False):
     """
     Infer and return an integer array of the values.
 
@@ -85,6 +86,7 @@ def to_integer_array(values, dtype=None):
     values : 1D list-like
     dtype : dtype, optional
         dtype to coerce
+    copy : boolean, default False
 
     Returns
     -------
@@ -94,7 +96,8 @@ def to_integer_array(values, dtype=None):
     ------
     TypeError if incompatible types
     """
-    return IntegerArray(values, dtype=dtype, copy=False)
+    values, mask = coerce_to_array(values, dtype=dtype, copy=copy)
+    return IntegerArray(values, mask)
 
 
 def safe_cast(values, dtype, copy):
@@ -133,6 +136,11 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
     -------
     tuple of (values, mask)
     """
+    # if values is integer numpy array, preserve it's dtype
+    if dtype is None and hasattr(values, 'dtype'):
+        if is_integer_dtype(values.dtype):
+            dtype = values.dtype
+
     if dtype is not None:
         if not issubclass(type(dtype), _IntegerDtype):
             try:
@@ -174,10 +182,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
 
     # infer dtype if needed
     if dtype is None:
-        if is_integer_dtype(values):
-            dtype = values.dtype
-        else:
-            dtype = np.dtype('int64')
+        dtype = np.dtype('int64')
     else:
         dtype = dtype.type
 
@@ -197,47 +202,62 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
 
 class IntegerArray(ExtensionArray, ExtensionOpsMixin):
     """
-    We represent an IntegerArray with 2 numpy arrays
+    Array of integer (optional missing) values.
+
+    We represent an IntegerArray with 2 numpy arrays:
+
     - data: contains a numpy integer array of the appropriate dtype
-    - mask: a boolean array holding a mask on the data, False is missing
+    - mask: a boolean array holding a mask on the data, True is missing
+
+    To construct an IntegerArray from generic array-like input, use
+    ``integer_array`` function instead.
+
+    Parameters
+    ----------
+    values : integer 1D numpy array
+    mask : boolean 1D numpy array
+    copy : bool, default False
+
+    Returns
+    -------
+    IntegerArray
+
     """
 
     @cache_readonly
     def dtype(self):
         return _dtypes[str(self._data.dtype)]
 
-    def __init__(self, values, mask=None, dtype=None, copy=False):
-        """
-        Parameters
-        ----------
-        values : 1D list-like / IntegerArray
-        mask : 1D list-like, optional
-        dtype : subclass of _IntegerDtype, optional
-        copy : bool, default False
+    def __init__(self, values, mask, copy=False):
+        if not (isinstance(values, np.ndarray)
+                and is_integer_dtype(values.dtype)):
+            raise TypeError("values should be integer numpy array. Use "
+                            "the 'integer_array' function instead")
+        if not (isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype)):
+            raise TypeError("mask should be boolean numpy array. Use "
+                            "the 'integer_array' function instead")
 
-        Returns
-        -------
-        IntegerArray
-        """
-        self._data, self._mask = coerce_to_array(
-            values, dtype=dtype, mask=mask, copy=copy)
+        if copy:
+            values = values.copy()
+            mask = mask.copy()
+
+        self._data = values
+        self._mask = mask
 
     @classmethod
     def _from_sequence(cls, scalars, dtype=None, copy=False):
-        return cls(scalars, dtype=dtype, copy=copy)
+        return integer_array(scalars, dtype=dtype, copy=copy)
 
     @classmethod
     def _from_factorized(cls, values, original):
-        return cls(values, dtype=original.dtype)
+        return integer_array(values, dtype=original.dtype)
 
     def __getitem__(self, item):
         if is_integer(item):
             if self._mask[item]:
                 return self.dtype.na_value
             return self._data[item]
-        return type(self)(self._data[item],
-                          mask=self._mask[item],
-                          dtype=self.dtype)
+        return type(self)(self._data[item], self._mask[item])
 
     def _coerce_to_ndarray(self):
         """
@@ -294,7 +314,7 @@ def take(self, indexer, allow_fill=False, fill_value=None):
             result[fill_mask] = fill_value
             mask = mask ^ fill_mask
 
-        return type(self)(result, mask=mask, dtype=self.dtype, copy=False)
+        return type(self)(result, mask, copy=False)
 
     def copy(self, deep=False):
         data, mask = self._data, self._mask
@@ -304,7 +324,7 @@ def copy(self, deep=False):
         else:
             data = data.copy()
             mask = mask.copy()
-        return type(self)(data, mask, dtype=self.dtype, copy=False)
+        return type(self)(data, mask, copy=False)
 
     def __setitem__(self, key, value):
         _is_scalar = is_scalar(value)
@@ -356,7 +376,7 @@ def _na_value(self):
     def _concat_same_type(cls, to_concat):
         data = np.concatenate([x._data for x in to_concat])
         mask = np.concatenate([x._mask for x in to_concat])
-        return cls(data, mask=mask, dtype=to_concat[0].dtype)
+        return cls(data, mask)
 
     def astype(self, dtype, copy=True):
         """Cast to a NumPy array or IntegerArray with 'dtype'.
@@ -386,8 +406,7 @@ def astype(self, dtype, copy=True):
         if isinstance(dtype, _IntegerDtype):
             result = self._data.astype(dtype.numpy_dtype,
                                        casting='same_kind', copy=False)
-            return type(self)(result, mask=self._mask,
-                              dtype=dtype, copy=False)
+            return type(self)(result, mask=self._mask, copy=False)
 
         # coerce
         data = self._coerce_to_ndarray()
@@ -523,7 +542,7 @@ def _maybe_mask_result(self, result, mask, other, op_name):
             result[mask] = np.nan
             return result
 
-        return type(self)(result, mask=mask, dtype=self.dtype, copy=False)
+        return type(self)(result, mask, copy=False)
 
     @classmethod
     def _create_arithmetic_method(cls, op):

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -300,7 +300,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
             if not (dtype is None or is_object_dtype(dtype)):
 
                 # coerce to the provided dtype
-                data = dtype.construct_array_type()(
+                data = dtype.construct_array_type()._from_sequence(
                     data, dtype=dtype, copy=False)
 
             # coerce to the object dtype

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -4111,7 +4111,7 @@ def _try_cast(arr, take_fast_path):
                                      ordered=dtype.ordered)
             elif is_extension_array_dtype(dtype):
                 # create an extension array from its dtype
-                array_type = dtype.construct_array_type()
+                array_type = dtype.construct_array_type()._from_sequence
                 subarr = array_type(subarr, dtype=dtype, copy=copy)
 
             elif dtype is not None and raise_cast_failure:

diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
@@ -213,7 +213,7 @@ def test_take_series(self, data):
         s = pd.Series(data)
         result = s.take([0, -1])
         expected = pd.Series(
-            data._from_sequence([data[0], data[len(data) - 1]]),
+            data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype),
             index=[0, len(data) - 1])
         self.assert_series_equal(result, expected)
 

diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py
@@ -77,8 +77,8 @@ def test_fillna_series(self, data_missing):
         ser = pd.Series(data_missing)
 
         result = ser.fillna(fill_value)
-        expected = pd.Series(
-            data_missing._from_sequence([fill_value, fill_value]))
+        expected = pd.Series(data_missing._from_sequence(
+            [fill_value, fill_value], dtype=data_missing.dtype))
         self.assert_series_equal(result, expected)
 
         # Fill with a series
@@ -94,11 +94,11 @@ def test_fillna_series_method(self, data_missing, method):
         fill_value = data_missing[1]
 
         if method == 'ffill':
-            data_missing = type(data_missing)(data_missing[::-1])
+            data_missing = data_missing[::-1]
 
         result = pd.Series(data_missing).fillna(method=method)
-        expected = pd.Series(
-            data_missing._from_sequence([fill_value, fill_value]))
+        expected = pd.Series(data_missing._from_sequence(
+            [fill_value, fill_value], dtype=data_missing.dtype))
 
         self.assert_series_equal(result, expected)
 
@@ -111,7 +111,8 @@ def test_fillna_frame(self, data_missing):
         }).fillna(fill_value)
 
         expected = pd.DataFrame({
-            "A": data_missing._from_sequence([fill_value, fill_value]),
+            "A": data_missing._from_sequence([fill_value, fill_value],
+                                             dtype=data_missing.dtype),
             "B": [1, 2],
         })