63
63
ArrowExtensionArrayT = TypeVar ("ArrowExtensionArrayT" , bound = "ArrowExtensionArray" )
64
64
65
65
66
+ def to_pyarrow_type (
67
+ dtype : ArrowDtype | pa .DataType | Dtype | None ,
68
+ ) -> pa .DataType | None :
69
+ """
70
+ Convert dtype to a pyarrow type instance.
71
+ """
72
+ if isinstance (dtype , ArrowDtype ):
73
+ pa_dtype = dtype .pyarrow_dtype
74
+ elif isinstance (dtype , pa .DataType ):
75
+ pa_dtype = dtype
76
+ elif dtype :
77
+ pa_dtype = pa .from_numpy_dtype (dtype )
78
+ else :
79
+ pa_dtype = None
80
+ return pa_dtype
81
+
82
+
66
83
class ArrowExtensionArray (OpsMixin , ExtensionArray ):
67
84
"""
68
85
Base class for ExtensionArray backed by Arrow ChunkedArray.
@@ -89,13 +106,7 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
89
106
"""
90
107
Construct a new ExtensionArray from a sequence of scalars.
91
108
"""
92
- if isinstance (dtype , ArrowDtype ):
93
- pa_dtype = dtype .pyarrow_dtype
94
- elif dtype :
95
- pa_dtype = pa .from_numpy_dtype (dtype )
96
- else :
97
- pa_dtype = None
98
-
109
+ pa_dtype = to_pyarrow_type (dtype )
99
110
if isinstance (scalars , cls ):
100
111
data = scalars ._data
101
112
if pa_dtype :
@@ -113,7 +124,40 @@ def _from_sequence_of_strings(
113
124
"""
114
125
Construct a new ExtensionArray from a sequence of strings.
115
126
"""
116
- return cls ._from_sequence (strings , dtype = dtype , copy = copy )
127
+ pa_type = to_pyarrow_type (dtype )
128
+ if pa .types .is_timestamp (pa_type ):
129
+ from pandas .core .tools .datetimes import to_datetime
130
+
131
+ scalars = to_datetime (strings , errors = "raise" )
132
+ elif pa .types .is_date (pa_type ):
133
+ from pandas .core .tools .datetimes import to_datetime
134
+
135
+ scalars = to_datetime (strings , errors = "raise" ).date
136
+ elif pa .types .is_duration (pa_type ):
137
+ from pandas .core .tools .timedeltas import to_timedelta
138
+
139
+ scalars = to_timedelta (strings , errors = "raise" )
140
+ elif pa .types .is_time (pa_type ):
141
+ from pandas .core .tools .times import to_time
142
+
143
+ # "coerce" to allow "null times" (None) to not raise
144
+ scalars = to_time (strings , errors = "coerce" )
145
+ elif pa .types .is_boolean (pa_type ):
146
+ from pandas .core .arrays import BooleanArray
147
+
148
+ scalars = BooleanArray ._from_sequence_of_strings (strings ).to_numpy ()
149
+ elif (
150
+ pa .types .is_integer (pa_type )
151
+ or pa .types .is_floating (pa_type )
152
+ or pa .types .is_decimal (pa_type )
153
+ ):
154
+ from pandas .core .tools .numeric import to_numeric
155
+
156
+ scalars = to_numeric (strings , errors = "raise" )
157
+ else :
158
+ # Let pyarrow try to infer or raise
159
+ scalars = strings
160
+ return cls ._from_sequence (scalars , dtype = pa_type , copy = copy )
117
161
118
162
def __getitem__ (self , item : PositionalIndexer ):
119
163
"""Select a subset of self.
0 commit comments