|
28 | 28 | from pandas.core.dtypes.cast import astype_nansafe
|
29 | 29 | from pandas.core.dtypes.common import (
|
30 | 30 | ensure_object, is_bool_dtype, is_categorical_dtype, is_dtype_equal,
|
31 |
| - is_float, is_integer, is_integer_dtype, is_list_like, is_object_dtype, |
32 |
| - is_scalar, is_string_dtype) |
| 31 | + is_extension_array_dtype, is_float, is_integer, is_integer_dtype, |
| 32 | + is_list_like, is_object_dtype, is_scalar, is_string_dtype, pandas_dtype) |
33 | 33 | from pandas.core.dtypes.dtypes import CategoricalDtype
|
34 | 34 | from pandas.core.dtypes.missing import isna
|
35 | 35 |
|
|
134 | 134 | 'X'...'X'. Passing in False will cause data to be overwritten if there
|
135 | 135 | are duplicate names in the columns.
|
136 | 136 | dtype : Type name or dict of column -> type, optional
|
137 |
| - Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32}} |
| 137 | + Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32, |
| 138 | + 'c': 'Int64'}} |
138 | 139 | Use `str` or `object` together with suitable `na_values` settings
|
139 | 140 | to preserve and not interpret dtype.
|
140 | 141 | If converters are specified, they will be applied INSTEAD
|
@@ -1659,16 +1660,20 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
|
1659 | 1660 | values, set(col_na_values) | col_na_fvalues,
|
1660 | 1661 | try_num_bool=False)
|
1661 | 1662 | else:
|
| 1663 | + is_str_or_ea_dtype = (is_string_dtype(cast_type) |
| 1664 | + or is_extension_array_dtype(cast_type)) |
1662 | 1665 | # skip inference if specified dtype is object
|
1663 |
| - try_num_bool = not (cast_type and is_string_dtype(cast_type)) |
| 1666 | + # or casting to an EA |
| 1667 | + try_num_bool = not (cast_type and is_str_or_ea_dtype) |
1664 | 1668 |
|
1665 | 1669 | # general type inference and conversion
|
1666 | 1670 | cvals, na_count = self._infer_types(
|
1667 | 1671 | values, set(col_na_values) | col_na_fvalues,
|
1668 | 1672 | try_num_bool)
|
1669 | 1673 |
|
1670 |
| - # type specified in dtype param |
1671 |
| - if cast_type and not is_dtype_equal(cvals, cast_type): |
| 1674 | + # type specified in dtype param or cast_type is an EA |
| 1675 | + if cast_type and (not is_dtype_equal(cvals, cast_type) |
| 1676 | + or is_extension_array_dtype(cast_type)): |
1672 | 1677 | try:
|
1673 | 1678 | if (is_bool_dtype(cast_type) and
|
1674 | 1679 | not is_categorical_dtype(cast_type)
|
@@ -1765,6 +1770,20 @@ def _cast_types(self, values, cast_type, column):
|
1765 | 1770 | cats, cats.get_indexer(values), cast_type,
|
1766 | 1771 | true_values=self.true_values)
|
1767 | 1772 |
|
| 1773 | + # use the EA's implementation of casting |
| 1774 | + elif is_extension_array_dtype(cast_type): |
| 1775 | + # ensure cast_type is an actual dtype and not a string |
| 1776 | + cast_type = pandas_dtype(cast_type) |
| 1777 | + array_type = cast_type.construct_array_type() |
| 1778 | + try: |
| 1779 | + return array_type._from_sequence_of_strings(values, |
| 1780 | + dtype=cast_type) |
| 1781 | + except NotImplementedError: |
| 1782 | + raise NotImplementedError( |
| 1783 | + "Extension Array: {ea} must implement " |
| 1784 | + "_from_sequence_of_strings in order " |
| 1785 | + "to be used in parser methods".format(ea=array_type)) |
| 1786 | + |
1768 | 1787 | else:
|
1769 | 1788 | try:
|
1770 | 1789 | values = astype_nansafe(values, cast_type,
|
@@ -2174,8 +2193,8 @@ def __init__(self, f, **kwds):
|
2174 | 2193 |
|
2175 | 2194 | self.verbose = kwds['verbose']
|
2176 | 2195 | self.converters = kwds['converters']
|
2177 |
| - self.dtype = kwds['dtype'] |
2178 | 2196 |
|
| 2197 | + self.dtype = kwds['dtype'] |
2179 | 2198 | self.thousands = kwds['thousands']
|
2180 | 2199 | self.decimal = kwds['decimal']
|
2181 | 2200 |
|
|
0 commit comments