Skip to content

Commit 6ac3765

Browse files
authored
REF/TST: collect astype tests (pandas-dev#37282)
1 parent 4a08c02 commit 6ac3765

File tree

5 files changed

+339
-318
lines changed

5 files changed

+339
-318
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import numpy as np
2+
import pytest
3+
4+
import pandas as pd
5+
import pandas._testing as tm
6+
7+
8+
class TestConvertDtypes:
9+
@pytest.mark.parametrize(
10+
"convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")]
11+
)
12+
def test_convert_dtypes(self, convert_integer, expected):
13+
# Specific types are tested in tests/series/test_dtypes.py
14+
# Just check that it works for DataFrame here
15+
df = pd.DataFrame(
16+
{
17+
"a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
18+
"b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
19+
}
20+
)
21+
result = df.convert_dtypes(True, True, convert_integer, False)
22+
expected = pd.DataFrame(
23+
{
24+
"a": pd.Series([1, 2, 3], dtype=expected),
25+
"b": pd.Series(["x", "y", "z"], dtype="string"),
26+
}
27+
)
28+
tm.assert_frame_equal(result, expected)

pandas/tests/frame/test_dtypes.py

-21
Original file line numberDiff line numberDiff line change
@@ -245,27 +245,6 @@ def test_str_to_small_float_conversion_type(self):
245245
expected = DataFrame(col_data, columns=["A"], dtype=float)
246246
tm.assert_frame_equal(result, expected)
247247

248-
@pytest.mark.parametrize(
249-
"convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")]
250-
)
251-
def test_convert_dtypes(self, convert_integer, expected):
252-
# Specific types are tested in tests/series/test_dtypes.py
253-
# Just check that it works for DataFrame here
254-
df = DataFrame(
255-
{
256-
"a": Series([1, 2, 3], dtype=np.dtype("int32")),
257-
"b": Series(["x", "y", "z"], dtype=np.dtype("O")),
258-
}
259-
)
260-
result = df.convert_dtypes(True, True, convert_integer, False)
261-
expected = DataFrame(
262-
{
263-
"a": Series([1, 2, 3], dtype=expected),
264-
"b": Series(["x", "y", "z"], dtype="string"),
265-
}
266-
)
267-
tm.assert_frame_equal(result, expected)
268-
269248

270249
class TestDataFrameDatetimeWithTZ:
271250
def test_interleave(self, timezone_frame):

pandas/tests/series/methods/test_astype.py

+287-1
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,97 @@
1+
from datetime import datetime, timedelta
2+
from importlib import reload
3+
import string
4+
import sys
5+
16
import numpy as np
27
import pytest
38

4-
from pandas import NA, Interval, Series, Timestamp, date_range
9+
from pandas._libs.tslibs import iNaT
10+
11+
from pandas import (
12+
NA,
13+
Categorical,
14+
CategoricalDtype,
15+
Index,
16+
Interval,
17+
Series,
18+
Timedelta,
19+
Timestamp,
20+
date_range,
21+
)
522
import pandas._testing as tm
623

724

25+
class TestAstypeAPI:
26+
def test_arg_for_errors_in_astype(self):
27+
# see GH#14878
28+
ser = Series([1, 2, 3])
29+
30+
msg = (
31+
r"Expected value of kwarg 'errors' to be one of \['raise', "
32+
r"'ignore'\]\. Supplied value is 'False'"
33+
)
34+
with pytest.raises(ValueError, match=msg):
35+
ser.astype(np.float64, errors=False)
36+
37+
ser.astype(np.int8, errors="raise")
38+
39+
@pytest.mark.parametrize("dtype_class", [dict, Series])
40+
def test_astype_dict_like(self, dtype_class):
41+
# see GH#7271
42+
ser = Series(range(0, 10, 2), name="abc")
43+
44+
dt1 = dtype_class({"abc": str})
45+
result = ser.astype(dt1)
46+
expected = Series(["0", "2", "4", "6", "8"], name="abc")
47+
tm.assert_series_equal(result, expected)
48+
49+
dt2 = dtype_class({"abc": "float64"})
50+
result = ser.astype(dt2)
51+
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc")
52+
tm.assert_series_equal(result, expected)
53+
54+
dt3 = dtype_class({"abc": str, "def": str})
55+
msg = (
56+
"Only the Series name can be used for the key in Series dtype "
57+
r"mappings\."
58+
)
59+
with pytest.raises(KeyError, match=msg):
60+
ser.astype(dt3)
61+
62+
dt4 = dtype_class({0: str})
63+
with pytest.raises(KeyError, match=msg):
64+
ser.astype(dt4)
65+
66+
# GH#16717
67+
# if dtypes provided is empty, it should error
68+
if dtype_class is Series:
69+
dt5 = dtype_class({}, dtype=object)
70+
else:
71+
dt5 = dtype_class({})
72+
73+
with pytest.raises(KeyError, match=msg):
74+
ser.astype(dt5)
75+
76+
877
class TestAstype:
78+
@pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64])
79+
def test_astype_generic_timestamp_no_frequency(self, dtype, request):
80+
# see GH#15524, GH#15987
81+
data = [1]
82+
s = Series(data)
83+
84+
if np.dtype(dtype).name not in ["timedelta64", "datetime64"]:
85+
mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit")
86+
request.node.add_marker(mark)
87+
88+
msg = (
89+
fr"The '{dtype.__name__}' dtype has no unit\. "
90+
fr"Please pass in '{dtype.__name__}\[ns\]' instead."
91+
)
92+
with pytest.raises(ValueError, match=msg):
93+
s.astype(dtype)
94+
995
def test_astype_dt64_to_str(self):
1096
# GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
1197
dti = date_range("2012-01-01", periods=3)
@@ -27,6 +113,87 @@ def test_astype_dt64tz_to_str(self):
27113
)
28114
tm.assert_series_equal(result, expected)
29115

116+
def test_astype_datetime(self):
117+
s = Series(iNaT, dtype="M8[ns]", index=range(5))
118+
119+
s = s.astype("O")
120+
assert s.dtype == np.object_
121+
122+
s = Series([datetime(2001, 1, 2, 0, 0)])
123+
124+
s = s.astype("O")
125+
assert s.dtype == np.object_
126+
127+
s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])
128+
129+
s[1] = np.nan
130+
assert s.dtype == "M8[ns]"
131+
132+
s = s.astype("O")
133+
assert s.dtype == np.object_
134+
135+
def test_astype_datetime64tz(self):
136+
s = Series(date_range("20130101", periods=3, tz="US/Eastern"))
137+
138+
# astype
139+
result = s.astype(object)
140+
expected = Series(s.astype(object), dtype=object)
141+
tm.assert_series_equal(result, expected)
142+
143+
result = Series(s.values).dt.tz_localize("UTC").dt.tz_convert(s.dt.tz)
144+
tm.assert_series_equal(result, s)
145+
146+
# astype - object, preserves on construction
147+
result = Series(s.astype(object))
148+
expected = s.astype(object)
149+
tm.assert_series_equal(result, expected)
150+
151+
# astype - datetime64[ns, tz]
152+
result = Series(s.values).astype("datetime64[ns, US/Eastern]")
153+
tm.assert_series_equal(result, s)
154+
155+
result = Series(s.values).astype(s.dtype)
156+
tm.assert_series_equal(result, s)
157+
158+
result = s.astype("datetime64[ns, CET]")
159+
expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET"))
160+
tm.assert_series_equal(result, expected)
161+
162+
def test_astype_str_cast_dt64(self):
163+
# see GH#9757
164+
ts = Series([Timestamp("2010-01-04 00:00:00")])
165+
s = ts.astype(str)
166+
167+
expected = Series(["2010-01-04"])
168+
tm.assert_series_equal(s, expected)
169+
170+
ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
171+
s = ts.astype(str)
172+
173+
expected = Series(["2010-01-04 00:00:00-05:00"])
174+
tm.assert_series_equal(s, expected)
175+
176+
def test_astype_str_cast_td64(self):
177+
# see GH#9757
178+
179+
td = Series([Timedelta(1, unit="d")])
180+
ser = td.astype(str)
181+
182+
expected = Series(["1 days"])
183+
tm.assert_series_equal(ser, expected)
184+
185+
def test_dt64_series_astype_object(self):
186+
dt64ser = Series(date_range("20130101", periods=3))
187+
result = dt64ser.astype(object)
188+
assert isinstance(result.iloc[0], datetime)
189+
assert result.dtype == np.object_
190+
191+
def test_td64_series_astype_object(self):
192+
tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]")
193+
result = tdser.astype(object)
194+
assert isinstance(result.iloc[0], timedelta)
195+
assert result.dtype == np.object_
196+
30197
@pytest.mark.parametrize(
31198
"values",
32199
[
@@ -70,3 +237,122 @@ def test_astype_to_str_preserves_na(self, value, string_value):
70237
result = s.astype(str)
71238
expected = Series(["a", "b", string_value], dtype=object)
72239
tm.assert_series_equal(result, expected)
240+
241+
@pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
242+
def test_astype(self, dtype):
243+
s = Series(np.random.randn(5), name="foo")
244+
as_typed = s.astype(dtype)
245+
246+
assert as_typed.dtype == dtype
247+
assert as_typed.name == s.name
248+
249+
@pytest.mark.parametrize("value", [np.nan, np.inf])
250+
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
251+
def test_astype_cast_nan_inf_int(self, dtype, value):
252+
# gh-14265: check NaN and inf raise error when converting to int
253+
msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
254+
s = Series([value])
255+
256+
with pytest.raises(ValueError, match=msg):
257+
s.astype(dtype)
258+
259+
@pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
260+
def test_astype_cast_object_int_fail(self, dtype):
261+
arr = Series(["car", "house", "tree", "1"])
262+
msg = r"invalid literal for int\(\) with base 10: 'car'"
263+
with pytest.raises(ValueError, match=msg):
264+
arr.astype(dtype)
265+
266+
def test_astype_cast_object_int(self):
267+
arr = Series(["1", "2", "3", "4"], dtype=object)
268+
result = arr.astype(int)
269+
270+
tm.assert_series_equal(result, Series(np.arange(1, 5)))
271+
272+
def test_astype_unicode(self):
273+
# see GH#7758: A bit of magic is required to set
274+
# default encoding to utf-8
275+
digits = string.digits
276+
test_series = [
277+
Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
278+
Series(["データーサイエンス、お前はもう死んでいる"]),
279+
]
280+
281+
former_encoding = None
282+
283+
if sys.getdefaultencoding() == "utf-8":
284+
test_series.append(Series(["野菜食べないとやばい".encode()]))
285+
286+
for s in test_series:
287+
res = s.astype("unicode")
288+
expec = s.map(str)
289+
tm.assert_series_equal(res, expec)
290+
291+
# Restore the former encoding
292+
if former_encoding is not None and former_encoding != "utf-8":
293+
reload(sys)
294+
sys.setdefaultencoding(former_encoding)
295+
296+
297+
class TestAstypeCategorical:
298+
def test_astype_categoricaldtype(self):
299+
s = Series(["a", "b", "a"])
300+
result = s.astype(CategoricalDtype(["a", "b"], ordered=True))
301+
expected = Series(Categorical(["a", "b", "a"], ordered=True))
302+
tm.assert_series_equal(result, expected)
303+
304+
result = s.astype(CategoricalDtype(["a", "b"], ordered=False))
305+
expected = Series(Categorical(["a", "b", "a"], ordered=False))
306+
tm.assert_series_equal(result, expected)
307+
308+
result = s.astype(CategoricalDtype(["a", "b", "c"], ordered=False))
309+
expected = Series(
310+
Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False)
311+
)
312+
tm.assert_series_equal(result, expected)
313+
tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"]))
314+
315+
@pytest.mark.parametrize("name", [None, "foo"])
316+
@pytest.mark.parametrize("dtype_ordered", [True, False])
317+
@pytest.mark.parametrize("series_ordered", [True, False])
318+
def test_astype_categorical_to_categorical(
319+
self, name, dtype_ordered, series_ordered
320+
):
321+
# GH#10696, GH#18593
322+
s_data = list("abcaacbab")
323+
s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered)
324+
s = Series(s_data, dtype=s_dtype, name=name)
325+
326+
# unspecified categories
327+
dtype = CategoricalDtype(ordered=dtype_ordered)
328+
result = s.astype(dtype)
329+
exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
330+
expected = Series(s_data, name=name, dtype=exp_dtype)
331+
tm.assert_series_equal(result, expected)
332+
333+
# different categories
334+
dtype = CategoricalDtype(list("adc"), dtype_ordered)
335+
result = s.astype(dtype)
336+
expected = Series(s_data, name=name, dtype=dtype)
337+
tm.assert_series_equal(result, expected)
338+
339+
if dtype_ordered is False:
340+
# not specifying ordered, so only test once
341+
expected = s
342+
result = s.astype("category")
343+
tm.assert_series_equal(result, expected)
344+
345+
def test_astype_bool_missing_to_categorical(self):
346+
# GH-19182
347+
s = Series([True, False, np.nan])
348+
assert s.dtypes == np.object_
349+
350+
result = s.astype(CategoricalDtype(categories=[True, False]))
351+
expected = Series(Categorical([True, False, np.nan], categories=[True, False]))
352+
tm.assert_series_equal(result, expected)
353+
354+
def test_astype_categories_raises(self):
355+
# deprecated GH#17636, removed in GH#27141
356+
s = Series(["a", "b", "a"])
357+
with pytest.raises(TypeError, match="got an unexpected"):
358+
s.astype("category", categories=["a", "b"], ordered=True)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import numpy as np
2+
3+
from pandas import Series
4+
import pandas._testing as tm
5+
6+
7+
class TestInferObjects:
8+
def test_infer_objects_series(self):
9+
# GH#11221
10+
actual = Series(np.array([1, 2, 3], dtype="O")).infer_objects()
11+
expected = Series([1, 2, 3])
12+
tm.assert_series_equal(actual, expected)
13+
14+
actual = Series(np.array([1, 2, 3, None], dtype="O")).infer_objects()
15+
expected = Series([1.0, 2.0, 3.0, np.nan])
16+
tm.assert_series_equal(actual, expected)
17+
18+
# only soft conversions, unconvertable pass thru unchanged
19+
actual = Series(np.array([1, 2, 3, None, "a"], dtype="O")).infer_objects()
20+
expected = Series([1, 2, 3, None, "a"])
21+
22+
assert actual.dtype == "object"
23+
tm.assert_series_equal(actual, expected)

0 commit comments

Comments
 (0)