Skip to content

Commit 17dc6b0

Browse files
authored
REF: implement test_astype (#33734)
1 parent c4ebf21 commit 17dc6b0

File tree

8 files changed

+754
-703
lines changed

8 files changed

+754
-703
lines changed

pandas/tests/frame/methods/test_astype.py

+562
Large diffs are not rendered by default.

pandas/tests/frame/test_dtypes.py

+2-544
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas import Categorical, CategoricalDtype, CategoricalIndex, Index, IntervalIndex
5+
import pandas._testing as tm
6+
7+
8+
class TestAstype:
9+
def test_astype(self):
10+
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
11+
12+
result = ci.astype(object)
13+
tm.assert_index_equal(result, Index(np.array(ci)))
14+
15+
# this IS equal, but not the same class
16+
assert result.equals(ci)
17+
assert isinstance(result, Index)
18+
assert not isinstance(result, CategoricalIndex)
19+
20+
# interval
21+
ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right")
22+
23+
ci = CategoricalIndex(
24+
Categorical.from_codes([0, 1, -1], categories=ii, ordered=True)
25+
)
26+
27+
result = ci.astype("interval")
28+
expected = ii.take([0, 1, -1])
29+
tm.assert_index_equal(result, expected)
30+
31+
result = IntervalIndex(result.values)
32+
tm.assert_index_equal(result, expected)
33+
34+
@pytest.mark.parametrize("name", [None, "foo"])
35+
@pytest.mark.parametrize("dtype_ordered", [True, False])
36+
@pytest.mark.parametrize("index_ordered", [True, False])
37+
def test_astype_category(self, name, dtype_ordered, index_ordered):
38+
# GH#18630
39+
index = CategoricalIndex(
40+
list("aabbca"), categories=list("cab"), ordered=index_ordered
41+
)
42+
if name:
43+
index = index.rename(name)
44+
45+
# standard categories
46+
dtype = CategoricalDtype(ordered=dtype_ordered)
47+
result = index.astype(dtype)
48+
expected = CategoricalIndex(
49+
index.tolist(),
50+
name=name,
51+
categories=index.categories,
52+
ordered=dtype_ordered,
53+
)
54+
tm.assert_index_equal(result, expected)
55+
56+
# non-standard categories
57+
dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered)
58+
result = index.astype(dtype)
59+
expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype)
60+
tm.assert_index_equal(result, expected)
61+
62+
if dtype_ordered is False:
63+
# dtype='category' can't specify ordered, so only test once
64+
result = index.astype("category")
65+
expected = index
66+
tm.assert_index_equal(result, expected)

pandas/tests/indexes/categorical/test_category.py

+1-60
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,8 @@
33

44
from pandas._libs import index as libindex
55

6-
from pandas.core.dtypes.dtypes import CategoricalDtype
7-
86
import pandas as pd
9-
from pandas import Categorical, IntervalIndex
7+
from pandas import Categorical
108
import pandas._testing as tm
119
from pandas.core.indexes.api import CategoricalIndex, Index
1210

@@ -196,63 +194,6 @@ def test_delete(self):
196194
# Either depending on NumPy version
197195
ci.delete(10)
198196

199-
def test_astype(self):
200-
201-
ci = self.create_index()
202-
result = ci.astype(object)
203-
tm.assert_index_equal(result, Index(np.array(ci)))
204-
205-
# this IS equal, but not the same class
206-
assert result.equals(ci)
207-
assert isinstance(result, Index)
208-
assert not isinstance(result, CategoricalIndex)
209-
210-
# interval
211-
ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right")
212-
213-
ci = CategoricalIndex(
214-
Categorical.from_codes([0, 1, -1], categories=ii, ordered=True)
215-
)
216-
217-
result = ci.astype("interval")
218-
expected = ii.take([0, 1, -1])
219-
tm.assert_index_equal(result, expected)
220-
221-
result = IntervalIndex(result.values)
222-
tm.assert_index_equal(result, expected)
223-
224-
@pytest.mark.parametrize("name", [None, "foo"])
225-
@pytest.mark.parametrize("dtype_ordered", [True, False])
226-
@pytest.mark.parametrize("index_ordered", [True, False])
227-
def test_astype_category(self, name, dtype_ordered, index_ordered):
228-
# GH 18630
229-
index = self.create_index(ordered=index_ordered)
230-
if name:
231-
index = index.rename(name)
232-
233-
# standard categories
234-
dtype = CategoricalDtype(ordered=dtype_ordered)
235-
result = index.astype(dtype)
236-
expected = CategoricalIndex(
237-
index.tolist(),
238-
name=name,
239-
categories=index.categories,
240-
ordered=dtype_ordered,
241-
)
242-
tm.assert_index_equal(result, expected)
243-
244-
# non-standard categories
245-
dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered)
246-
result = index.astype(dtype)
247-
expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype)
248-
tm.assert_index_equal(result, expected)
249-
250-
if dtype_ordered is False:
251-
# dtype='category' can't specify ordered, so only test once
252-
result = index.astype("category")
253-
expected = index
254-
tm.assert_index_equal(result, expected)
255-
256197
@pytest.mark.parametrize(
257198
"data, non_lexsorted_data",
258199
[[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]],

pandas/tests/indexes/datetimes/test_astype.py

+15-29
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
Int64Index,
1313
NaT,
1414
PeriodIndex,
15-
Series,
1615
Timestamp,
1716
date_range,
1817
)
@@ -65,38 +64,22 @@ def test_astype_with_tz(self):
6564
)
6665
tm.assert_index_equal(result, expected)
6766

68-
# BUG#10442 : testing astype(str) is correct for Series/DatetimeIndex
69-
result = pd.Series(pd.date_range("2012-01-01", periods=3)).astype(str)
70-
expected = pd.Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object)
71-
tm.assert_series_equal(result, expected)
72-
73-
result = Series(pd.date_range("2012-01-01", periods=3, tz="US/Eastern")).astype(
74-
str
75-
)
76-
expected = Series(
77-
[
78-
"2012-01-01 00:00:00-05:00",
79-
"2012-01-02 00:00:00-05:00",
80-
"2012-01-03 00:00:00-05:00",
81-
],
82-
dtype=object,
83-
)
84-
tm.assert_series_equal(result, expected)
85-
67+
def test_astype_tzaware_to_tzaware(self):
8668
# GH 18951: tz-aware to tz-aware
8769
idx = date_range("20170101", periods=4, tz="US/Pacific")
8870
result = idx.astype("datetime64[ns, US/Eastern]")
8971
expected = date_range("20170101 03:00:00", periods=4, tz="US/Eastern")
9072
tm.assert_index_equal(result, expected)
9173
assert result.freq == expected.freq
9274

75+
def test_astype_tznaive_to_tzaware(self):
9376
# GH 18951: tz-naive to tz-aware
9477
idx = date_range("20170101", periods=4)
9578
result = idx.astype("datetime64[ns, US/Eastern]")
9679
expected = date_range("20170101", periods=4, tz="US/Eastern")
9780
tm.assert_index_equal(result, expected)
9881

99-
def test_astype_str_compat(self):
82+
def test_astype_str_nat(self):
10083
# GH 13149, GH 13209
10184
# verify that we are returning NaT as a string (and not unicode)
10285

@@ -107,18 +90,19 @@ def test_astype_str_compat(self):
10790

10891
def test_astype_str(self):
10992
# test astype string - #10442
110-
result = date_range("2012-01-01", periods=4, name="test_name").astype(str)
93+
dti = date_range("2012-01-01", periods=4, name="test_name")
94+
result = dti.astype(str)
11195
expected = Index(
11296
["2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04"],
11397
name="test_name",
11498
dtype=object,
11599
)
116100
tm.assert_index_equal(result, expected)
117101

102+
def test_astype_str_tz_and_name(self):
118103
# test astype string with tz and name
119-
result = date_range(
120-
"2012-01-01", periods=3, name="test_name", tz="US/Eastern"
121-
).astype(str)
104+
dti = date_range("2012-01-01", periods=3, name="test_name", tz="US/Eastern")
105+
result = dti.astype(str)
122106
expected = Index(
123107
[
124108
"2012-01-01 00:00:00-05:00",
@@ -130,21 +114,23 @@ def test_astype_str(self):
130114
)
131115
tm.assert_index_equal(result, expected)
132116

117+
def test_astype_str_freq_and_name(self):
133118
# test astype string with freqH and name
134-
result = date_range("1/1/2011", periods=3, freq="H", name="test_name").astype(
135-
str
136-
)
119+
dti = date_range("1/1/2011", periods=3, freq="H", name="test_name")
120+
result = dti.astype(str)
137121
expected = Index(
138122
["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"],
139123
name="test_name",
140124
dtype=object,
141125
)
142126
tm.assert_index_equal(result, expected)
143127

128+
def test_astype_str_freq_and_tz(self):
144129
# test astype string with freqH and timezone
145-
result = date_range(
130+
dti = date_range(
146131
"3/6/2012 00:00", periods=2, freq="H", tz="Europe/London", name="test_name"
147-
).astype(str)
132+
)
133+
result = dti.astype(str)
148134
expected = Index(
149135
["2012-03-06 00:00:00+00:00", "2012-03-06 01:00:00+00:00"],
150136
dtype=object,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import re
2+
3+
import numpy as np
4+
import pytest
5+
6+
from pandas.core.dtypes.common import pandas_dtype
7+
8+
from pandas import Float64Index, Index, Int64Index
9+
import pandas._testing as tm
10+
11+
12+
class TestAstype:
13+
def test_astype_float64_to_object(self):
14+
float_index = Float64Index([0.0, 2.5, 5.0, 7.5, 10.0])
15+
result = float_index.astype(object)
16+
assert result.equals(float_index)
17+
assert float_index.equals(result)
18+
assert isinstance(result, Index) and not isinstance(result, Float64Index)
19+
20+
def test_astype_float64_mixed_to_object(self):
21+
# mixed int-float
22+
idx = Float64Index([1.5, 2, 3, 4, 5])
23+
idx.name = "foo"
24+
result = idx.astype(object)
25+
assert result.equals(idx)
26+
assert idx.equals(result)
27+
assert isinstance(result, Index) and not isinstance(result, Float64Index)
28+
29+
@pytest.mark.parametrize("dtype", ["int16", "int32", "int64"])
30+
def test_astype_float64_to_int_dtype(self, dtype):
31+
# GH#12881
32+
# a float astype int
33+
idx = Float64Index([0, 1, 2])
34+
result = idx.astype(dtype)
35+
expected = Int64Index([0, 1, 2])
36+
tm.assert_index_equal(result, expected)
37+
38+
idx = Float64Index([0, 1.1, 2])
39+
result = idx.astype(dtype)
40+
expected = Int64Index([0, 1, 2])
41+
tm.assert_index_equal(result, expected)
42+
43+
@pytest.mark.parametrize("dtype", ["float32", "float64"])
44+
def test_astype_float64_to_float_dtype(self, dtype):
45+
# GH#12881
46+
# a float astype int
47+
idx = Float64Index([0, 1, 2])
48+
result = idx.astype(dtype)
49+
expected = idx
50+
tm.assert_index_equal(result, expected)
51+
52+
idx = Float64Index([0, 1.1, 2])
53+
result = idx.astype(dtype)
54+
expected = Index(idx.values.astype(dtype))
55+
tm.assert_index_equal(result, expected)
56+
57+
@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
58+
def test_cannot_cast_to_datetimelike(self, dtype):
59+
idx = Float64Index([0, 1.1, 2])
60+
61+
msg = (
62+
f"Cannot convert Float64Index to dtype {pandas_dtype(dtype)}; "
63+
f"integer values are required for conversion"
64+
)
65+
with pytest.raises(TypeError, match=re.escape(msg)):
66+
idx.astype(dtype)
67+
68+
@pytest.mark.parametrize("dtype", [int, "int16", "int32", "int64"])
69+
@pytest.mark.parametrize("non_finite", [np.inf, np.nan])
70+
def test_cannot_cast_inf_to_int(self, non_finite, dtype):
71+
# GH#13149
72+
idx = Float64Index([1, 2, non_finite])
73+
74+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
75+
with pytest.raises(ValueError, match=msg):
76+
idx.astype(dtype)
77+
78+
def test_astype_from_object(self):
79+
index = Index([1.0, np.nan, 0.2], dtype="object")
80+
result = index.astype(float)
81+
expected = Float64Index([1.0, np.nan, 0.2])
82+
assert result.dtype == expected.dtype
83+
tm.assert_index_equal(result, expected)

0 commit comments

Comments
 (0)