Skip to content

REF/TST: collect astype tests #37282

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions pandas/tests/frame/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import numpy as np
import pytest

import pandas as pd
import pandas._testing as tm


class TestConvertDtypes:
@pytest.mark.parametrize(
"convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")]
)
def test_convert_dtypes(self, convert_integer, expected):
# Specific types are tested in tests/series/test_dtypes.py
# Just check that it works for DataFrame here
df = pd.DataFrame(
{
"a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
"b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
}
)
result = df.convert_dtypes(True, True, convert_integer, False)
expected = pd.DataFrame(
{
"a": pd.Series([1, 2, 3], dtype=expected),
"b": pd.Series(["x", "y", "z"], dtype="string"),
}
)
tm.assert_frame_equal(result, expected)
21 changes: 0 additions & 21 deletions pandas/tests/frame/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,27 +245,6 @@ def test_str_to_small_float_conversion_type(self):
expected = DataFrame(col_data, columns=["A"], dtype=float)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")]
)
def test_convert_dtypes(self, convert_integer, expected):
# Specific types are tested in tests/series/test_dtypes.py
# Just check that it works for DataFrame here
df = DataFrame(
{
"a": Series([1, 2, 3], dtype=np.dtype("int32")),
"b": Series(["x", "y", "z"], dtype=np.dtype("O")),
}
)
result = df.convert_dtypes(True, True, convert_integer, False)
expected = DataFrame(
{
"a": Series([1, 2, 3], dtype=expected),
"b": Series(["x", "y", "z"], dtype="string"),
}
)
tm.assert_frame_equal(result, expected)


class TestDataFrameDatetimeWithTZ:
def test_interleave(self, timezone_frame):
Expand Down
288 changes: 287 additions & 1 deletion pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,97 @@
from datetime import datetime, timedelta
from importlib import reload
import string
import sys

import numpy as np
import pytest

from pandas import NA, Interval, Series, Timestamp, date_range
from pandas._libs.tslibs import iNaT

from pandas import (
NA,
Categorical,
CategoricalDtype,
Index,
Interval,
Series,
Timedelta,
Timestamp,
date_range,
)
import pandas._testing as tm


class TestAstypeAPI:
def test_arg_for_errors_in_astype(self):
# see GH#14878
ser = Series([1, 2, 3])

msg = (
r"Expected value of kwarg 'errors' to be one of \['raise', "
r"'ignore'\]\. Supplied value is 'False'"
)
with pytest.raises(ValueError, match=msg):
ser.astype(np.float64, errors=False)

ser.astype(np.int8, errors="raise")

@pytest.mark.parametrize("dtype_class", [dict, Series])
def test_astype_dict_like(self, dtype_class):
# see GH#7271
ser = Series(range(0, 10, 2), name="abc")

dt1 = dtype_class({"abc": str})
result = ser.astype(dt1)
expected = Series(["0", "2", "4", "6", "8"], name="abc")
tm.assert_series_equal(result, expected)

dt2 = dtype_class({"abc": "float64"})
result = ser.astype(dt2)
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc")
tm.assert_series_equal(result, expected)

dt3 = dtype_class({"abc": str, "def": str})
msg = (
"Only the Series name can be used for the key in Series dtype "
r"mappings\."
)
with pytest.raises(KeyError, match=msg):
ser.astype(dt3)

dt4 = dtype_class({0: str})
with pytest.raises(KeyError, match=msg):
ser.astype(dt4)

# GH#16717
# if dtypes provided is empty, it should error
if dtype_class is Series:
dt5 = dtype_class({}, dtype=object)
else:
dt5 = dtype_class({})

with pytest.raises(KeyError, match=msg):
ser.astype(dt5)


class TestAstype:
@pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64])
def test_astype_generic_timestamp_no_frequency(self, dtype, request):
# see GH#15524, GH#15987
data = [1]
s = Series(data)

if np.dtype(dtype).name not in ["timedelta64", "datetime64"]:
mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit")
request.node.add_marker(mark)

msg = (
fr"The '{dtype.__name__}' dtype has no unit\. "
fr"Please pass in '{dtype.__name__}\[ns\]' instead."
)
with pytest.raises(ValueError, match=msg):
s.astype(dtype)

def test_astype_dt64_to_str(self):
# GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
dti = date_range("2012-01-01", periods=3)
Expand All @@ -27,6 +113,87 @@ def test_astype_dt64tz_to_str(self):
)
tm.assert_series_equal(result, expected)

def test_astype_datetime(self):
s = Series(iNaT, dtype="M8[ns]", index=range(5))

s = s.astype("O")
assert s.dtype == np.object_

s = Series([datetime(2001, 1, 2, 0, 0)])

s = s.astype("O")
assert s.dtype == np.object_

s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])

s[1] = np.nan
assert s.dtype == "M8[ns]"

s = s.astype("O")
assert s.dtype == np.object_

def test_astype_datetime64tz(self):
s = Series(date_range("20130101", periods=3, tz="US/Eastern"))

# astype
result = s.astype(object)
expected = Series(s.astype(object), dtype=object)
tm.assert_series_equal(result, expected)

result = Series(s.values).dt.tz_localize("UTC").dt.tz_convert(s.dt.tz)
tm.assert_series_equal(result, s)

# astype - object, preserves on construction
result = Series(s.astype(object))
expected = s.astype(object)
tm.assert_series_equal(result, expected)

# astype - datetime64[ns, tz]
result = Series(s.values).astype("datetime64[ns, US/Eastern]")
tm.assert_series_equal(result, s)

result = Series(s.values).astype(s.dtype)
tm.assert_series_equal(result, s)

result = s.astype("datetime64[ns, CET]")
expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET"))
tm.assert_series_equal(result, expected)

def test_astype_str_cast_dt64(self):
# see GH#9757
ts = Series([Timestamp("2010-01-04 00:00:00")])
s = ts.astype(str)

expected = Series(["2010-01-04"])
tm.assert_series_equal(s, expected)

ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
s = ts.astype(str)

expected = Series(["2010-01-04 00:00:00-05:00"])
tm.assert_series_equal(s, expected)

def test_astype_str_cast_td64(self):
# see GH#9757

td = Series([Timedelta(1, unit="d")])
ser = td.astype(str)

expected = Series(["1 days"])
tm.assert_series_equal(ser, expected)

def test_dt64_series_astype_object(self):
dt64ser = Series(date_range("20130101", periods=3))
result = dt64ser.astype(object)
assert isinstance(result.iloc[0], datetime)
assert result.dtype == np.object_

def test_td64_series_astype_object(self):
tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]")
result = tdser.astype(object)
assert isinstance(result.iloc[0], timedelta)
assert result.dtype == np.object_

@pytest.mark.parametrize(
"values",
[
Expand Down Expand Up @@ -70,3 +237,122 @@ def test_astype_to_str_preserves_na(self, value, string_value):
result = s.astype(str)
expected = Series(["a", "b", string_value], dtype=object)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
def test_astype(self, dtype):
s = Series(np.random.randn(5), name="foo")
as_typed = s.astype(dtype)

assert as_typed.dtype == dtype
assert as_typed.name == s.name

@pytest.mark.parametrize("value", [np.nan, np.inf])
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
def test_astype_cast_nan_inf_int(self, dtype, value):
# gh-14265: check NaN and inf raise error when converting to int
msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
s = Series([value])

with pytest.raises(ValueError, match=msg):
s.astype(dtype)

@pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
def test_astype_cast_object_int_fail(self, dtype):
arr = Series(["car", "house", "tree", "1"])
msg = r"invalid literal for int\(\) with base 10: 'car'"
with pytest.raises(ValueError, match=msg):
arr.astype(dtype)

def test_astype_cast_object_int(self):
arr = Series(["1", "2", "3", "4"], dtype=object)
result = arr.astype(int)

tm.assert_series_equal(result, Series(np.arange(1, 5)))

def test_astype_unicode(self):
# see GH#7758: A bit of magic is required to set
# default encoding to utf-8
digits = string.digits
test_series = [
Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
Series(["データーサイエンス、お前はもう死んでいる"]),
]

former_encoding = None

if sys.getdefaultencoding() == "utf-8":
test_series.append(Series(["野菜食べないとやばい".encode()]))

for s in test_series:
res = s.astype("unicode")
expec = s.map(str)
tm.assert_series_equal(res, expec)

# Restore the former encoding
if former_encoding is not None and former_encoding != "utf-8":
reload(sys)
sys.setdefaultencoding(former_encoding)


class TestAstypeCategorical:
def test_astype_categoricaldtype(self):
s = Series(["a", "b", "a"])
result = s.astype(CategoricalDtype(["a", "b"], ordered=True))
expected = Series(Categorical(["a", "b", "a"], ordered=True))
tm.assert_series_equal(result, expected)

result = s.astype(CategoricalDtype(["a", "b"], ordered=False))
expected = Series(Categorical(["a", "b", "a"], ordered=False))
tm.assert_series_equal(result, expected)

result = s.astype(CategoricalDtype(["a", "b", "c"], ordered=False))
expected = Series(
Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False)
)
tm.assert_series_equal(result, expected)
tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"]))

@pytest.mark.parametrize("name", [None, "foo"])
@pytest.mark.parametrize("dtype_ordered", [True, False])
@pytest.mark.parametrize("series_ordered", [True, False])
def test_astype_categorical_to_categorical(
self, name, dtype_ordered, series_ordered
):
# GH#10696, GH#18593
s_data = list("abcaacbab")
s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered)
s = Series(s_data, dtype=s_dtype, name=name)

# unspecified categories
dtype = CategoricalDtype(ordered=dtype_ordered)
result = s.astype(dtype)
exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
expected = Series(s_data, name=name, dtype=exp_dtype)
tm.assert_series_equal(result, expected)

# different categories
dtype = CategoricalDtype(list("adc"), dtype_ordered)
result = s.astype(dtype)
expected = Series(s_data, name=name, dtype=dtype)
tm.assert_series_equal(result, expected)

if dtype_ordered is False:
# not specifying ordered, so only test once
expected = s
result = s.astype("category")
tm.assert_series_equal(result, expected)

def test_astype_bool_missing_to_categorical(self):
# GH-19182
s = Series([True, False, np.nan])
assert s.dtypes == np.object_

result = s.astype(CategoricalDtype(categories=[True, False]))
expected = Series(Categorical([True, False, np.nan], categories=[True, False]))
tm.assert_series_equal(result, expected)

def test_astype_categories_raises(self):
# deprecated GH#17636, removed in GH#27141
s = Series(["a", "b", "a"])
with pytest.raises(TypeError, match="got an unexpected"):
s.astype("category", categories=["a", "b"], ordered=True)
23 changes: 23 additions & 0 deletions pandas/tests/series/methods/test_infer_objects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import numpy as np

from pandas import Series
import pandas._testing as tm


class TestInferObjects:
def test_infer_objects_series(self):
# GH#11221
actual = Series(np.array([1, 2, 3], dtype="O")).infer_objects()
expected = Series([1, 2, 3])
tm.assert_series_equal(actual, expected)

actual = Series(np.array([1, 2, 3, None], dtype="O")).infer_objects()
expected = Series([1.0, 2.0, 3.0, np.nan])
tm.assert_series_equal(actual, expected)

# only soft conversions, unconvertable pass thru unchanged
actual = Series(np.array([1, 2, 3, None, "a"], dtype="O")).infer_objects()
expected = Series([1, 2, 3, None, "a"])

assert actual.dtype == "object"
tm.assert_series_equal(actual, expected)
Loading