Skip to content

REF: implement test_astype #33734

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
562 changes: 562 additions & 0 deletions pandas/tests/frame/methods/test_astype.py

Large diffs are not rendered by default.

546 changes: 2 additions & 544 deletions pandas/tests/frame/test_dtypes.py

Large diffs are not rendered by default.

66 changes: 66 additions & 0 deletions pandas/tests/indexes/categorical/test_astype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import numpy as np
import pytest

from pandas import Categorical, CategoricalDtype, CategoricalIndex, Index, IntervalIndex
import pandas._testing as tm


class TestAstype:
def test_astype(self):
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)

result = ci.astype(object)
tm.assert_index_equal(result, Index(np.array(ci)))

# this IS equal, but not the same class
assert result.equals(ci)
assert isinstance(result, Index)
assert not isinstance(result, CategoricalIndex)

# interval
ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right")

ci = CategoricalIndex(
Categorical.from_codes([0, 1, -1], categories=ii, ordered=True)
)

result = ci.astype("interval")
expected = ii.take([0, 1, -1])
tm.assert_index_equal(result, expected)

result = IntervalIndex(result.values)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("name", [None, "foo"])
@pytest.mark.parametrize("dtype_ordered", [True, False])
@pytest.mark.parametrize("index_ordered", [True, False])
def test_astype_category(self, name, dtype_ordered, index_ordered):
# GH#18630
index = CategoricalIndex(
list("aabbca"), categories=list("cab"), ordered=index_ordered
)
if name:
index = index.rename(name)

# standard categories
dtype = CategoricalDtype(ordered=dtype_ordered)
result = index.astype(dtype)
expected = CategoricalIndex(
index.tolist(),
name=name,
categories=index.categories,
ordered=dtype_ordered,
)
tm.assert_index_equal(result, expected)

# non-standard categories
dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered)
result = index.astype(dtype)
expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype)
tm.assert_index_equal(result, expected)

if dtype_ordered is False:
# dtype='category' can't specify ordered, so only test once
result = index.astype("category")
expected = index
tm.assert_index_equal(result, expected)
61 changes: 1 addition & 60 deletions pandas/tests/indexes/categorical/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@

from pandas._libs import index as libindex

from pandas.core.dtypes.dtypes import CategoricalDtype

import pandas as pd
from pandas import Categorical, IntervalIndex
from pandas import Categorical
import pandas._testing as tm
from pandas.core.indexes.api import CategoricalIndex, Index

Expand Down Expand Up @@ -196,63 +194,6 @@ def test_delete(self):
# Either depending on NumPy version
ci.delete(10)

def test_astype(self):

ci = self.create_index()
result = ci.astype(object)
tm.assert_index_equal(result, Index(np.array(ci)))

# this IS equal, but not the same class
assert result.equals(ci)
assert isinstance(result, Index)
assert not isinstance(result, CategoricalIndex)

# interval
ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right")

ci = CategoricalIndex(
Categorical.from_codes([0, 1, -1], categories=ii, ordered=True)
)

result = ci.astype("interval")
expected = ii.take([0, 1, -1])
tm.assert_index_equal(result, expected)

result = IntervalIndex(result.values)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("name", [None, "foo"])
@pytest.mark.parametrize("dtype_ordered", [True, False])
@pytest.mark.parametrize("index_ordered", [True, False])
def test_astype_category(self, name, dtype_ordered, index_ordered):
# GH 18630
index = self.create_index(ordered=index_ordered)
if name:
index = index.rename(name)

# standard categories
dtype = CategoricalDtype(ordered=dtype_ordered)
result = index.astype(dtype)
expected = CategoricalIndex(
index.tolist(),
name=name,
categories=index.categories,
ordered=dtype_ordered,
)
tm.assert_index_equal(result, expected)

# non-standard categories
dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered)
result = index.astype(dtype)
expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype)
tm.assert_index_equal(result, expected)

if dtype_ordered is False:
# dtype='category' can't specify ordered, so only test once
result = index.astype("category")
expected = index
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
"data, non_lexsorted_data",
[[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]],
Expand Down
44 changes: 15 additions & 29 deletions pandas/tests/indexes/datetimes/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
Int64Index,
NaT,
PeriodIndex,
Series,
Timestamp,
date_range,
)
Expand Down Expand Up @@ -65,37 +64,21 @@ def test_astype_with_tz(self):
)
tm.assert_index_equal(result, expected)

# BUG#10442 : testing astype(str) is correct for Series/DatetimeIndex
result = pd.Series(pd.date_range("2012-01-01", periods=3)).astype(str)
expected = pd.Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object)
tm.assert_series_equal(result, expected)

result = Series(pd.date_range("2012-01-01", periods=3, tz="US/Eastern")).astype(
str
)
expected = Series(
[
"2012-01-01 00:00:00-05:00",
"2012-01-02 00:00:00-05:00",
"2012-01-03 00:00:00-05:00",
],
dtype=object,
)
tm.assert_series_equal(result, expected)

def test_astype_tzaware_to_tzaware(self):
# GH 18951: tz-aware to tz-aware
idx = date_range("20170101", periods=4, tz="US/Pacific")
result = idx.astype("datetime64[ns, US/Eastern]")
expected = date_range("20170101 03:00:00", periods=4, tz="US/Eastern")
tm.assert_index_equal(result, expected)

def test_astype_tznaive_to_tzaware(self):
# GH 18951: tz-naive to tz-aware
idx = date_range("20170101", periods=4)
result = idx.astype("datetime64[ns, US/Eastern]")
expected = date_range("20170101", periods=4, tz="US/Eastern")
tm.assert_index_equal(result, expected)

def test_astype_str_compat(self):
def test_astype_str_nat(self):
# GH 13149, GH 13209
# verify that we are returning NaT as a string (and not unicode)

Expand All @@ -106,18 +89,19 @@ def test_astype_str_compat(self):

def test_astype_str(self):
# test astype string - #10442
result = date_range("2012-01-01", periods=4, name="test_name").astype(str)
dti = date_range("2012-01-01", periods=4, name="test_name")
result = dti.astype(str)
expected = Index(
["2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04"],
name="test_name",
dtype=object,
)
tm.assert_index_equal(result, expected)

def test_astype_str_tz_and_name(self):
# test astype string with tz and name
result = date_range(
"2012-01-01", periods=3, name="test_name", tz="US/Eastern"
).astype(str)
dti = date_range("2012-01-01", periods=3, name="test_name", tz="US/Eastern")
result = dti.astype(str)
expected = Index(
[
"2012-01-01 00:00:00-05:00",
Expand All @@ -129,21 +113,23 @@ def test_astype_str(self):
)
tm.assert_index_equal(result, expected)

def test_astype_str_freq_and_name(self):
# test astype string with freqH and name
result = date_range("1/1/2011", periods=3, freq="H", name="test_name").astype(
str
)
dti = date_range("1/1/2011", periods=3, freq="H", name="test_name")
result = dti.astype(str)
expected = Index(
["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"],
name="test_name",
dtype=object,
)
tm.assert_index_equal(result, expected)

def test_astype_str_freq_and_tz(self):
# test astype string with freqH and timezone
result = date_range(
dti = date_range(
"3/6/2012 00:00", periods=2, freq="H", tz="Europe/London", name="test_name"
).astype(str)
)
result = dti.astype(str)
expected = Index(
["2012-03-06 00:00:00+00:00", "2012-03-06 01:00:00+00:00"],
dtype=object,
Expand Down
83 changes: 83 additions & 0 deletions pandas/tests/indexes/numeric/test_astype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import re

import numpy as np
import pytest

from pandas.core.dtypes.common import pandas_dtype

from pandas import Float64Index, Index, Int64Index
import pandas._testing as tm


class TestAstype:
def test_astype_float64_to_object(self):
float_index = Float64Index([0.0, 2.5, 5.0, 7.5, 10.0])
result = float_index.astype(object)
assert result.equals(float_index)
assert float_index.equals(result)
assert isinstance(result, Index) and not isinstance(result, Float64Index)

def test_astype_float64_mixed_to_object(self):
# mixed int-float
idx = Float64Index([1.5, 2, 3, 4, 5])
idx.name = "foo"
result = idx.astype(object)
assert result.equals(idx)
assert idx.equals(result)
assert isinstance(result, Index) and not isinstance(result, Float64Index)

@pytest.mark.parametrize("dtype", ["int16", "int32", "int64"])
def test_astype_float64_to_int_dtype(self, dtype):
# GH#12881
# a float astype int
idx = Float64Index([0, 1, 2])
result = idx.astype(dtype)
expected = Int64Index([0, 1, 2])
tm.assert_index_equal(result, expected)

idx = Float64Index([0, 1.1, 2])
result = idx.astype(dtype)
expected = Int64Index([0, 1, 2])
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("dtype", ["float32", "float64"])
def test_astype_float64_to_float_dtype(self, dtype):
# GH#12881
# a float astype int
idx = Float64Index([0, 1, 2])
result = idx.astype(dtype)
expected = idx
tm.assert_index_equal(result, expected)

idx = Float64Index([0, 1.1, 2])
result = idx.astype(dtype)
expected = Index(idx.values.astype(dtype))
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
def test_cannot_cast_to_datetimelike(self, dtype):
idx = Float64Index([0, 1.1, 2])

msg = (
f"Cannot convert Float64Index to dtype {pandas_dtype(dtype)}; "
f"integer values are required for conversion"
)
with pytest.raises(TypeError, match=re.escape(msg)):
idx.astype(dtype)

@pytest.mark.parametrize("dtype", [int, "int16", "int32", "int64"])
@pytest.mark.parametrize("non_finite", [np.inf, np.nan])
def test_cannot_cast_inf_to_int(self, non_finite, dtype):
# GH#13149
idx = Float64Index([1, 2, non_finite])

msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
with pytest.raises(ValueError, match=msg):
idx.astype(dtype)

def test_astype_from_object(self):
index = Index([1.0, np.nan, 0.2], dtype="object")
result = index.astype(float)
expected = Float64Index([1.0, np.nan, 0.2])
assert result.dtype == expected.dtype
tm.assert_index_equal(result, expected)
Loading