Address merge comments

kprestel · kprestel · commit e8db3d203275 · 2018-12-09T15:47:28.000-05:00
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -26,8 +26,8 @@ New features
 - :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`)
 - :func:`DataFrame.to_string` now accepts ``decimal`` as an argument, allowing the user to specify which decimal separator should be used in the output. (:issue:`23614`)
 - :func:`DataFrame.read_feather` now accepts ``columns`` as an argument, allowing the user to specify which columns should be read. (:issue:`24025`)
-- :func:`pandas.read_csv` now supports ``EA`` types as an argument to ``dtype``,
-  allowing the user to use ``EA`` types when reading CSVs. (:issue:`23228`)
+- :func:`pandas.read_csv` now supports pandas extension types as an argument to ``dtype``,
+  allowing the user to use pandas extension types when reading CSVs. (:issue:`23228`)
 
 .. _whatsnew_0240.values_api:
 
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -56,10 +56,8 @@ from pandas.core.arrays import Categorical
 from pandas.core.dtypes.concat import union_categoricals
 import pandas.io.common as icom
 
-from pandas.errors import (
-    ParserError, DtypeWarning,
-    EmptyDataError, ParserWarning, AbstractMethodError,
-)
+from pandas.errors import ( ParserError, DtypeWarning,
+                            EmptyDataError, ParserWarning )
 
 # Import CParserError as alias of ParserError for backwards compatibility.
 # Ultimately, we want to remove this import. See gh-12665 and gh-14479.
@@ -1217,6 +1215,18 @@ cdef class TextReader:
                 cats, codes, dtype, true_values=true_values)
             return cat, na_count
 
+        elif is_extension_array_dtype(dtype):
+            result, na_count = self._string_convert(i, start, end, na_filter,
+                                                    na_hashset)
+            try:
+                # use _from_sequence_of_strings if the class defines it
+                result = dtype.construct_array_type() \
+                              ._from_sequence_of_strings(result, dtype=dtype)
+            except NotImplementedError:
+                result = dtype.construct_array_type() \
+                              ._from_sequence(result, dtype=dtype)
+            return result, na_count
+
         elif is_integer_dtype(dtype):
             try:
                 result, na_count = _try_int64(self.parser, i, start,
@@ -1231,20 +1241,7 @@ cdef class TextReader:
                 na_count = 0
 
             if result is not None and dtype != 'int64':
-                if is_extension_array_dtype(dtype):
-                    try:
-                        array_type = dtype.construct_array_type()
-                    except AttributeError:
-                        dtype = pandas_dtype(dtype)
-                        array_type = dtype.construct_array_type()
-                    try:
-                        # use _from_sequence_of_strings if the class defines it
-                        result = array_type._from_sequence_of_strings(result,
-                                                                      dtype=dtype) # noqa
-                    except AbstractMethodError:
-                        result = array_type._from_sequence(result, dtype=dtype)
-                else:
-                    result = result.astype(dtype)
+                result = result.astype(dtype)
 
             return result, na_count
 
@@ -1253,20 +1250,7 @@ cdef class TextReader:
                                            na_filter, na_hashset, na_flist)
 
             if result is not None and dtype != 'float64':
-                if is_extension_array_dtype(dtype):
-                    try:
-                        array_type = dtype.construct_array_type()
-                    except AttributeError:
-                        dtype = pandas_dtype(dtype)
-                        array_type = dtype.construct_array_type()
-                    try:
-                        # use _from_sequence_of_strings if the class defines it
-                        result = array_type._from_sequence_of_strings(result,
-                                                                    dtype=dtype) # noqa
-                    except AbstractMethodError:
-                        result = array_type._from_sequence(result, dtype=dtype)
-                else:
-                    result = result.astype(dtype)
+                result = result.astype(dtype)
             return result, na_count
         elif is_bool_dtype(dtype):
             result, na_count = _try_bool_flex(self.parser, i, start, end,
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -127,6 +127,8 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
     def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
         """Construct a new ExtensionArray from a sequence of scalars.
 
+        .. versionadded:: 0.24.0
+
         Parameters
         ----------
         strings : Sequence
@@ -141,6 +143,7 @@ def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
         Returns
         -------
         ExtensionArray
+
         """
         raise AbstractMethodError(cls)
 
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -19,6 +19,7 @@
 
 from pandas.core import nanops
 from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
+from pandas.core.tools.numeric import to_numeric
 
 
 class _IntegerDtype(ExtensionDtype):
@@ -157,7 +158,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
                 try:
                     dtype = _dtypes[str(np.dtype(dtype.name.lower()))]
                 except AttributeError:
-                    dtype = _dtypes[str(np.dtype(dtype.lower()))]
+                    dtype = _dtypes[str(np.dtype(dtype))]
             except KeyError:
                 raise ValueError("invalid dtype specified {}".format(dtype))
 
@@ -266,7 +267,8 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
 
     @classmethod
     def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
-        return cls._from_sequence([int(x) for x in strings], dtype, copy)
+        scalars = to_numeric(strings, errors='raise')
+        return cls._from_sequence(scalars, dtype, copy)
 
     @classmethod
     def _from_factorized(cls, values, original):
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -616,27 +616,18 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False):
 
     # dispatch on extension dtype if needed
     if is_extension_array_dtype(dtype):
-        if is_object_dtype(arr):
-            try:
-                array_type = dtype.construct_array_type()
-            except AttributeError:
-                dtype = pandas_dtype(dtype)
-                array_type = dtype.construct_array_type()
-            try:
-                # use _from_sequence_of_strings if the class defines it
-                return array_type._from_sequence_of_strings(arr,
-                                                            dtype=dtype,
-                                                            copy=copy)
-            except AbstractMethodError:
-                return array_type._from_sequence(arr, dtype=dtype, copy=copy)
-        else:
-            try:
-                return dtype.construct_array_type()._from_sequence(
-                    arr, dtype=dtype, copy=copy)
-            except AttributeError:
-                dtype = pandas_dtype(dtype)
-                return dtype.construct_array_type()._from_sequence(
-                    arr, dtype=dtype, copy=copy)
+        try:
+            array_type = dtype.construct_array_type()
+        except AttributeError:
+            dtype = pandas_dtype(dtype)
+            array_type = dtype.construct_array_type()
+        try:
+            # use _from_sequence_of_strings if the class defines it
+            return array_type._from_sequence_of_strings(arr,
+                                                        dtype=dtype,
+                                                        copy=copy)
+        except NotImplementedError:
+            return array_type._from_sequence(arr, dtype=dtype, copy=copy)
 
     if not isinstance(dtype, np.dtype):
         dtype = pandas_dtype(dtype)
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -1795,10 +1795,7 @@ def _get_dtype(arr_or_dtype):
     if isinstance(arr_or_dtype, np.dtype):
         return arr_or_dtype
     elif isinstance(arr_or_dtype, type):
-        try:
-            return pandas_dtype(arr_or_dtype)
-        except TypeError:
-            return np.dtype(arr_or_dtype)
+        return np.dtype(arr_or_dtype)
     elif isinstance(arr_or_dtype, ExtensionDtype):
         return arr_or_dtype
     elif isinstance(arr_or_dtype, DatetimeTZDtype):
@@ -1816,11 +1813,6 @@ def _get_dtype(arr_or_dtype):
             return PeriodDtype.construct_from_string(arr_or_dtype)
         elif is_interval_dtype(arr_or_dtype):
             return IntervalDtype.construct_from_string(arr_or_dtype)
-        else:
-            try:
-                return pandas_dtype(arr_or_dtype)
-            except TypeError:
-                pass
     elif isinstance(arr_or_dtype, (ABCCategorical, ABCCategoricalIndex,
                                    ABCSparseArray, ABCSparseSeries)):
         return arr_or_dtype.dtype
@@ -1851,15 +1843,7 @@ def _get_dtype_type(arr_or_dtype):
     if isinstance(arr_or_dtype, np.dtype):
         return arr_or_dtype.type
     elif isinstance(arr_or_dtype, type):
-        try:
-            dtype = pandas_dtype(arr_or_dtype)
-            try:
-                return dtype.type
-            except AttributeError:
-                raise TypeError
-        except TypeError:
-            return np.dtype(arr_or_dtype).type
-
+        return np.dtype(arr_or_dtype).type
     elif isinstance(arr_or_dtype, CategoricalDtype):
         return CategoricalDtypeType
     elif isinstance(arr_or_dtype, DatetimeTZDtype):
@@ -1888,10 +1872,7 @@ def _get_dtype_type(arr_or_dtype):
     try:
         return arr_or_dtype.dtype.type
     except AttributeError:
-        try:
-            return arr_or_dtype.numpy_dtype.type
-        except AttributeError:
-            return type(None)
+        return type(None)
 
 
 def _get_dtype_from_object(dtype):
diff --git a/pandas/tests/extension/base/io.py b/pandas/tests/extension/base/io.py
@@ -28,17 +28,11 @@ def data(dtype):
 class ExtensionParsingTests(BaseExtensionTests):
 
     @pytest.mark.parametrize('engine', ['c', 'python'])
-    def test_EA_types(self, engine):
-        df = pd.DataFrame({'Int': pd.Series([1, 2, 3], dtype='Int64'),
-                           'A': [1, 2, 1]})
+    def test_EA_types(self, engine, data):
+        df = pd.DataFrame({'Int': pd.Series(data, dtype=str(data.dtype)),
+                           'A': data})
         data = df.to_csv(index=False)
-        result = pd.read_csv(StringIO(data), dtype={'Int': Int64Dtype},
+        result = pd.read_csv(StringIO(data), dtype={'Int': str(data.dtype)},
                              engine=engine)
         assert result is not None
 
-        df = pd.DataFrame({'Int': pd.Series([1, 2, 3], dtype='Int8'),
-                           'A': [1, 2, 1]})
-        data = df.to_csv(index=False)
-        result = pd.read_csv(StringIO(data), dtype={'Int': 'Int8'},
-                             engine=engine)
-        assert result is not None