36
36
from pandas ._typing import (
37
37
ArrayLike ,
38
38
DtypeArg ,
39
+ DtypeObj ,
39
40
Scalar ,
40
41
)
41
42
from pandas .errors import (
61
62
is_string_dtype ,
62
63
pandas_dtype ,
63
64
)
64
- from pandas .core .dtypes .dtypes import CategoricalDtype
65
+ from pandas .core .dtypes .dtypes import (
66
+ CategoricalDtype ,
67
+ ExtensionDtype ,
68
+ )
65
69
from pandas .core .dtypes .missing import isna
66
70
67
71
from pandas import StringDtype
68
72
from pandas .core import algorithms
69
73
from pandas .core .arrays import (
70
74
BooleanArray ,
71
75
Categorical ,
76
+ ExtensionArray ,
72
77
FloatingArray ,
73
78
IntegerArray ,
74
79
)
@@ -599,14 +604,8 @@ def _convert_to_ndarrays(
599
604
# type specified in dtype param or cast_type is an EA
600
605
if cast_type and (not is_dtype_equal (cvals , cast_type ) or is_ea ):
601
606
if not is_ea and na_count > 0 :
602
- try :
603
- if is_bool_dtype (cast_type ):
604
- raise ValueError (
605
- f"Bool column has NA values in column { c } "
606
- )
607
- except (AttributeError , TypeError ):
608
- # invalid input to is_bool_dtype
609
- pass
607
+ if is_bool_dtype (cast_type ):
608
+ raise ValueError (f"Bool column has NA values in column { c } " )
610
609
cast_type = pandas_dtype (cast_type )
611
610
cvals = self ._cast_types (cvals , cast_type , c )
612
611
@@ -686,7 +685,7 @@ def _set(x) -> int:
686
685
687
686
def _infer_types (
688
687
self , values , na_values , no_dtype_specified , try_num_bool : bool = True
689
- ):
688
+ ) -> tuple [ ArrayLike , int ] :
690
689
"""
691
690
Infer types of values, possibly casting
692
691
@@ -700,7 +699,7 @@ def _infer_types(
700
699
701
700
Returns
702
701
-------
703
- converted : ndarray
702
+ converted : ndarray or ExtensionArray
704
703
na_count : int
705
704
"""
706
705
na_count = 0
@@ -777,48 +776,50 @@ def _infer_types(
777
776
778
777
return result , na_count
779
778
780
- def _cast_types (self , values , cast_type , column ):
779
+ def _cast_types (self , values : ArrayLike , cast_type : DtypeObj , column ) -> ArrayLike :
781
780
"""
782
781
Cast values to specified type
783
782
784
783
Parameters
785
784
----------
786
- values : ndarray
787
- cast_type : string or np.dtype
785
+ values : ndarray or ExtensionArray
786
+ cast_type : np.dtype or ExtensionDtype
788
787
dtype to cast values to
789
788
column : string
790
789
column name - used only for error reporting
791
790
792
791
Returns
793
792
-------
794
- converted : ndarray
793
+ converted : ndarray or ExtensionArray
795
794
"""
796
795
if is_categorical_dtype (cast_type ):
797
796
known_cats = (
798
797
isinstance (cast_type , CategoricalDtype )
799
798
and cast_type .categories is not None
800
799
)
801
800
802
- if not is_object_dtype (values ) and not known_cats :
801
+ if not is_object_dtype (values . dtype ) and not known_cats :
803
802
# TODO: this is for consistency with
804
803
# c-parser which parses all categories
805
804
# as strings
806
805
807
- values = astype_nansafe (values , np .dtype (str ))
806
+ values = lib .ensure_string_array (
807
+ values , skipna = False , convert_na_value = False
808
+ )
808
809
809
810
cats = Index (values ).unique ().dropna ()
810
811
values = Categorical ._from_inferred_categories (
811
812
cats , cats .get_indexer (values ), cast_type , true_values = self .true_values
812
813
)
813
814
814
815
# use the EA's implementation of casting
815
- elif is_extension_array_dtype (cast_type ):
816
- # ensure cast_type is an actual dtype and not a string
817
- cast_type = pandas_dtype (cast_type )
816
+ elif isinstance (cast_type , ExtensionDtype ):
818
817
array_type = cast_type .construct_array_type ()
819
818
try :
820
819
if is_bool_dtype (cast_type ):
821
- return array_type ._from_sequence_of_strings (
820
+ # error: Unexpected keyword argument "true_values" for
821
+ # "_from_sequence_of_strings" of "ExtensionArray"
822
+ return array_type ._from_sequence_of_strings ( # type: ignore[call-arg] # noqa:E501
822
823
values ,
823
824
dtype = cast_type ,
824
825
true_values = self .true_values ,
@@ -832,6 +833,8 @@ def _cast_types(self, values, cast_type, column):
832
833
"_from_sequence_of_strings in order to be used in parser methods"
833
834
) from err
834
835
836
+ elif isinstance (values , ExtensionArray ):
837
+ values = values .astype (cast_type , copy = False )
835
838
else :
836
839
try :
837
840
values = astype_nansafe (values , cast_type , copy = True , skipna = True )
0 commit comments