1
+ from datetime import datetime , timedelta
2
+ from importlib import reload
3
+ import string
4
+ import sys
5
+
1
6
import numpy as np
2
7
import pytest
3
8
4
- from pandas import NA , Interval , Series , Timestamp , date_range
9
+ from pandas ._libs .tslibs import iNaT
10
+
11
+ from pandas import (
12
+ NA ,
13
+ Categorical ,
14
+ CategoricalDtype ,
15
+ Index ,
16
+ Interval ,
17
+ Series ,
18
+ Timedelta ,
19
+ Timestamp ,
20
+ date_range ,
21
+ )
5
22
import pandas ._testing as tm
6
23
7
24
25
+ class TestAstypeAPI :
26
+ def test_arg_for_errors_in_astype (self ):
27
+ # see GH#14878
28
+ ser = Series ([1 , 2 , 3 ])
29
+
30
+ msg = (
31
+ r"Expected value of kwarg 'errors' to be one of \['raise', "
32
+ r"'ignore'\]\. Supplied value is 'False'"
33
+ )
34
+ with pytest .raises (ValueError , match = msg ):
35
+ ser .astype (np .float64 , errors = False )
36
+
37
+ ser .astype (np .int8 , errors = "raise" )
38
+
39
+ @pytest .mark .parametrize ("dtype_class" , [dict , Series ])
40
+ def test_astype_dict_like (self , dtype_class ):
41
+ # see GH#7271
42
+ ser = Series (range (0 , 10 , 2 ), name = "abc" )
43
+
44
+ dt1 = dtype_class ({"abc" : str })
45
+ result = ser .astype (dt1 )
46
+ expected = Series (["0" , "2" , "4" , "6" , "8" ], name = "abc" )
47
+ tm .assert_series_equal (result , expected )
48
+
49
+ dt2 = dtype_class ({"abc" : "float64" })
50
+ result = ser .astype (dt2 )
51
+ expected = Series ([0.0 , 2.0 , 4.0 , 6.0 , 8.0 ], dtype = "float64" , name = "abc" )
52
+ tm .assert_series_equal (result , expected )
53
+
54
+ dt3 = dtype_class ({"abc" : str , "def" : str })
55
+ msg = (
56
+ "Only the Series name can be used for the key in Series dtype "
57
+ r"mappings\."
58
+ )
59
+ with pytest .raises (KeyError , match = msg ):
60
+ ser .astype (dt3 )
61
+
62
+ dt4 = dtype_class ({0 : str })
63
+ with pytest .raises (KeyError , match = msg ):
64
+ ser .astype (dt4 )
65
+
66
+ # GH#16717
67
+ # if dtypes provided is empty, it should error
68
+ if dtype_class is Series :
69
+ dt5 = dtype_class ({}, dtype = object )
70
+ else :
71
+ dt5 = dtype_class ({})
72
+
73
+ with pytest .raises (KeyError , match = msg ):
74
+ ser .astype (dt5 )
75
+
76
+
8
77
class TestAstype :
78
+ @pytest .mark .parametrize ("dtype" , [np .datetime64 , np .timedelta64 ])
79
+ def test_astype_generic_timestamp_no_frequency (self , dtype , request ):
80
+ # see GH#15524, GH#15987
81
+ data = [1 ]
82
+ s = Series (data )
83
+
84
+ if np .dtype (dtype ).name not in ["timedelta64" , "datetime64" ]:
85
+ mark = pytest .mark .xfail (reason = "GH#33890 Is assigned ns unit" )
86
+ request .node .add_marker (mark )
87
+
88
+ msg = (
89
+ fr"The '{ dtype .__name__ } ' dtype has no unit\. "
90
+ fr"Please pass in '{ dtype .__name__ } \[ns\]' instead."
91
+ )
92
+ with pytest .raises (ValueError , match = msg ):
93
+ s .astype (dtype )
94
+
9
95
def test_astype_dt64_to_str (self ):
10
96
# GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
11
97
dti = date_range ("2012-01-01" , periods = 3 )
@@ -27,6 +113,87 @@ def test_astype_dt64tz_to_str(self):
27
113
)
28
114
tm .assert_series_equal (result , expected )
29
115
116
+ def test_astype_datetime (self ):
117
+ s = Series (iNaT , dtype = "M8[ns]" , index = range (5 ))
118
+
119
+ s = s .astype ("O" )
120
+ assert s .dtype == np .object_
121
+
122
+ s = Series ([datetime (2001 , 1 , 2 , 0 , 0 )])
123
+
124
+ s = s .astype ("O" )
125
+ assert s .dtype == np .object_
126
+
127
+ s = Series ([datetime (2001 , 1 , 2 , 0 , 0 ) for i in range (3 )])
128
+
129
+ s [1 ] = np .nan
130
+ assert s .dtype == "M8[ns]"
131
+
132
+ s = s .astype ("O" )
133
+ assert s .dtype == np .object_
134
+
135
+ def test_astype_datetime64tz (self ):
136
+ s = Series (date_range ("20130101" , periods = 3 , tz = "US/Eastern" ))
137
+
138
+ # astype
139
+ result = s .astype (object )
140
+ expected = Series (s .astype (object ), dtype = object )
141
+ tm .assert_series_equal (result , expected )
142
+
143
+ result = Series (s .values ).dt .tz_localize ("UTC" ).dt .tz_convert (s .dt .tz )
144
+ tm .assert_series_equal (result , s )
145
+
146
+ # astype - object, preserves on construction
147
+ result = Series (s .astype (object ))
148
+ expected = s .astype (object )
149
+ tm .assert_series_equal (result , expected )
150
+
151
+ # astype - datetime64[ns, tz]
152
+ result = Series (s .values ).astype ("datetime64[ns, US/Eastern]" )
153
+ tm .assert_series_equal (result , s )
154
+
155
+ result = Series (s .values ).astype (s .dtype )
156
+ tm .assert_series_equal (result , s )
157
+
158
+ result = s .astype ("datetime64[ns, CET]" )
159
+ expected = Series (date_range ("20130101 06:00:00" , periods = 3 , tz = "CET" ))
160
+ tm .assert_series_equal (result , expected )
161
+
162
+ def test_astype_str_cast_dt64 (self ):
163
+ # see GH#9757
164
+ ts = Series ([Timestamp ("2010-01-04 00:00:00" )])
165
+ s = ts .astype (str )
166
+
167
+ expected = Series (["2010-01-04" ])
168
+ tm .assert_series_equal (s , expected )
169
+
170
+ ts = Series ([Timestamp ("2010-01-04 00:00:00" , tz = "US/Eastern" )])
171
+ s = ts .astype (str )
172
+
173
+ expected = Series (["2010-01-04 00:00:00-05:00" ])
174
+ tm .assert_series_equal (s , expected )
175
+
176
+ def test_astype_str_cast_td64 (self ):
177
+ # see GH#9757
178
+
179
+ td = Series ([Timedelta (1 , unit = "d" )])
180
+ ser = td .astype (str )
181
+
182
+ expected = Series (["1 days" ])
183
+ tm .assert_series_equal (ser , expected )
184
+
185
+ def test_dt64_series_astype_object (self ):
186
+ dt64ser = Series (date_range ("20130101" , periods = 3 ))
187
+ result = dt64ser .astype (object )
188
+ assert isinstance (result .iloc [0 ], datetime )
189
+ assert result .dtype == np .object_
190
+
191
+ def test_td64_series_astype_object (self ):
192
+ tdser = Series (["59 Days" , "59 Days" , "NaT" ], dtype = "timedelta64[ns]" )
193
+ result = tdser .astype (object )
194
+ assert isinstance (result .iloc [0 ], timedelta )
195
+ assert result .dtype == np .object_
196
+
30
197
@pytest .mark .parametrize (
31
198
"values" ,
32
199
[
@@ -70,3 +237,122 @@ def test_astype_to_str_preserves_na(self, value, string_value):
70
237
result = s .astype (str )
71
238
expected = Series (["a" , "b" , string_value ], dtype = object )
72
239
tm .assert_series_equal (result , expected )
240
+
241
+ @pytest .mark .parametrize ("dtype" , ["float32" , "float64" , "int64" , "int32" ])
242
+ def test_astype (self , dtype ):
243
+ s = Series (np .random .randn (5 ), name = "foo" )
244
+ as_typed = s .astype (dtype )
245
+
246
+ assert as_typed .dtype == dtype
247
+ assert as_typed .name == s .name
248
+
249
+ @pytest .mark .parametrize ("value" , [np .nan , np .inf ])
250
+ @pytest .mark .parametrize ("dtype" , [np .int32 , np .int64 ])
251
+ def test_astype_cast_nan_inf_int (self , dtype , value ):
252
+ # gh-14265: check NaN and inf raise error when converting to int
253
+ msg = "Cannot convert non-finite values \\ (NA or inf\\ ) to integer"
254
+ s = Series ([value ])
255
+
256
+ with pytest .raises (ValueError , match = msg ):
257
+ s .astype (dtype )
258
+
259
+ @pytest .mark .parametrize ("dtype" , [int , np .int8 , np .int64 ])
260
+ def test_astype_cast_object_int_fail (self , dtype ):
261
+ arr = Series (["car" , "house" , "tree" , "1" ])
262
+ msg = r"invalid literal for int\(\) with base 10: 'car'"
263
+ with pytest .raises (ValueError , match = msg ):
264
+ arr .astype (dtype )
265
+
266
+ def test_astype_cast_object_int (self ):
267
+ arr = Series (["1" , "2" , "3" , "4" ], dtype = object )
268
+ result = arr .astype (int )
269
+
270
+ tm .assert_series_equal (result , Series (np .arange (1 , 5 )))
271
+
272
+ def test_astype_unicode (self ):
273
+ # see GH#7758: A bit of magic is required to set
274
+ # default encoding to utf-8
275
+ digits = string .digits
276
+ test_series = [
277
+ Series ([digits * 10 , tm .rands (63 ), tm .rands (64 ), tm .rands (1000 )]),
278
+ Series (["データーサイエンス、お前はもう死んでいる" ]),
279
+ ]
280
+
281
+ former_encoding = None
282
+
283
+ if sys .getdefaultencoding () == "utf-8" :
284
+ test_series .append (Series (["野菜食べないとやばい" .encode ()]))
285
+
286
+ for s in test_series :
287
+ res = s .astype ("unicode" )
288
+ expec = s .map (str )
289
+ tm .assert_series_equal (res , expec )
290
+
291
+ # Restore the former encoding
292
+ if former_encoding is not None and former_encoding != "utf-8" :
293
+ reload (sys )
294
+ sys .setdefaultencoding (former_encoding )
295
+
296
+
297
+ class TestAstypeCategorical :
298
+ def test_astype_categoricaldtype (self ):
299
+ s = Series (["a" , "b" , "a" ])
300
+ result = s .astype (CategoricalDtype (["a" , "b" ], ordered = True ))
301
+ expected = Series (Categorical (["a" , "b" , "a" ], ordered = True ))
302
+ tm .assert_series_equal (result , expected )
303
+
304
+ result = s .astype (CategoricalDtype (["a" , "b" ], ordered = False ))
305
+ expected = Series (Categorical (["a" , "b" , "a" ], ordered = False ))
306
+ tm .assert_series_equal (result , expected )
307
+
308
+ result = s .astype (CategoricalDtype (["a" , "b" , "c" ], ordered = False ))
309
+ expected = Series (
310
+ Categorical (["a" , "b" , "a" ], categories = ["a" , "b" , "c" ], ordered = False )
311
+ )
312
+ tm .assert_series_equal (result , expected )
313
+ tm .assert_index_equal (result .cat .categories , Index (["a" , "b" , "c" ]))
314
+
315
+ @pytest .mark .parametrize ("name" , [None , "foo" ])
316
+ @pytest .mark .parametrize ("dtype_ordered" , [True , False ])
317
+ @pytest .mark .parametrize ("series_ordered" , [True , False ])
318
+ def test_astype_categorical_to_categorical (
319
+ self , name , dtype_ordered , series_ordered
320
+ ):
321
+ # GH#10696, GH#18593
322
+ s_data = list ("abcaacbab" )
323
+ s_dtype = CategoricalDtype (list ("bac" ), ordered = series_ordered )
324
+ s = Series (s_data , dtype = s_dtype , name = name )
325
+
326
+ # unspecified categories
327
+ dtype = CategoricalDtype (ordered = dtype_ordered )
328
+ result = s .astype (dtype )
329
+ exp_dtype = CategoricalDtype (s_dtype .categories , dtype_ordered )
330
+ expected = Series (s_data , name = name , dtype = exp_dtype )
331
+ tm .assert_series_equal (result , expected )
332
+
333
+ # different categories
334
+ dtype = CategoricalDtype (list ("adc" ), dtype_ordered )
335
+ result = s .astype (dtype )
336
+ expected = Series (s_data , name = name , dtype = dtype )
337
+ tm .assert_series_equal (result , expected )
338
+
339
+ if dtype_ordered is False :
340
+ # not specifying ordered, so only test once
341
+ expected = s
342
+ result = s .astype ("category" )
343
+ tm .assert_series_equal (result , expected )
344
+
345
+ def test_astype_bool_missing_to_categorical (self ):
346
+ # GH-19182
347
+ s = Series ([True , False , np .nan ])
348
+ assert s .dtypes == np .object_
349
+
350
+ result = s .astype (CategoricalDtype (categories = [True , False ]))
351
+ expected = Series (Categorical ([True , False , np .nan ], categories = [True , False ]))
352
+ tm .assert_series_equal (result , expected )
353
+
354
+ def test_astype_categories_raises (self ):
355
+ # deprecated GH#17636, removed in GH#27141
356
+ s = Series (["a" , "b" , "a" ])
357
+ with pytest .raises (TypeError , match = "got an unexpected" ):
358
+ s .astype ("category" , categories = ["a" , "b" ], ordered = True )
0 commit comments