1
1
import numpy as np
2
+ import pytest
2
3
3
4
from pandas import (
4
5
Categorical ,
5
6
CategoricalDtype ,
7
+ NaT ,
8
+ Timestamp ,
6
9
array ,
10
+ to_datetime ,
7
11
)
8
12
import pandas ._testing as tm
9
13
@@ -12,8 +16,74 @@ class TestAstype:
12
16
def test_astype_str_int_categories_to_nullable_int (self ):
13
17
# GH#39616
14
18
dtype = CategoricalDtype ([str (i ) for i in range (5 )])
15
- arr = Categorical .from_codes (np .random .randint (5 , size = 20 ), dtype = dtype )
19
+ codes = np .random .randint (5 , size = 20 )
20
+ arr = Categorical .from_codes (codes , dtype = dtype )
16
21
17
22
res = arr .astype ("Int64" )
18
- expected = array (arr . astype ( "int64" ) , dtype = "Int64" )
23
+ expected = array (codes , dtype = "Int64" )
19
24
tm .assert_extension_array_equal (res , expected )
25
+
26
+ @pytest .mark .parametrize ("ordered" , [True , False ])
27
+ def test_astype (self , ordered ):
28
+ # string
29
+ cat = Categorical (list ("abbaaccc" ), ordered = ordered )
30
+ result = cat .astype (object )
31
+ expected = np .array (cat )
32
+ tm .assert_numpy_array_equal (result , expected )
33
+
34
+ msg = r"Cannot cast object dtype to float64"
35
+ with pytest .raises (ValueError , match = msg ):
36
+ cat .astype (float )
37
+
38
+ # numeric
39
+ cat = Categorical ([0 , 1 , 2 , 2 , 1 , 0 , 1 , 0 , 2 ], ordered = ordered )
40
+ result = cat .astype (object )
41
+ expected = np .array (cat , dtype = object )
42
+ tm .assert_numpy_array_equal (result , expected )
43
+
44
+ result = cat .astype (int )
45
+ expected = np .array (cat , dtype = "int" )
46
+ tm .assert_numpy_array_equal (result , expected )
47
+
48
+ result = cat .astype (float )
49
+ expected = np .array (cat , dtype = float )
50
+ tm .assert_numpy_array_equal (result , expected )
51
+
52
+ @pytest .mark .parametrize ("dtype_ordered" , [True , False ])
53
+ @pytest .mark .parametrize ("cat_ordered" , [True , False ])
54
+ def test_astype_category (self , dtype_ordered , cat_ordered ):
55
+ # GH#10696/GH#18593
56
+ data = list ("abcaacbab" )
57
+ cat = Categorical (data , categories = list ("bac" ), ordered = cat_ordered )
58
+
59
+ # standard categories
60
+ dtype = CategoricalDtype (ordered = dtype_ordered )
61
+ result = cat .astype (dtype )
62
+ expected = Categorical (data , categories = cat .categories , ordered = dtype_ordered )
63
+ tm .assert_categorical_equal (result , expected )
64
+
65
+ # non-standard categories
66
+ dtype = CategoricalDtype (list ("adc" ), dtype_ordered )
67
+ result = cat .astype (dtype )
68
+ expected = Categorical (data , dtype = dtype )
69
+ tm .assert_categorical_equal (result , expected )
70
+
71
+ if dtype_ordered is False :
72
+ # dtype='category' can't specify ordered, so only test once
73
+ result = cat .astype ("category" )
74
+ expected = cat
75
+ tm .assert_categorical_equal (result , expected )
76
+
77
+ def test_astype_object_datetime_categories (self ):
78
+ # GH#40754
79
+ cat = Categorical (to_datetime (["2021-03-27" , NaT ]))
80
+ result = cat .astype (object )
81
+ expected = np .array ([Timestamp ("2021-03-27 00:00:00" ), NaT ], dtype = "object" )
82
+ tm .assert_numpy_array_equal (result , expected )
83
+
84
+ def test_astype_object_timestamp_categories (self ):
85
+ # GH#18024
86
+ cat = Categorical ([Timestamp ("2014-01-01" )])
87
+ result = cat .astype (object )
88
+ expected = np .array ([Timestamp ("2014-01-01 00:00:00" )], dtype = "object" )
89
+ tm .assert_numpy_array_equal (result , expected )
0 commit comments