From dfca8008f3ab5b5be1b9d6641ca4c631131e96a3 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 22 Feb 2021 13:01:16 -0800 Subject: [PATCH 1/2] TST: collect tests by method --- pandas/tests/frame/methods/test_astype.py | 29 +++++ pandas/tests/series/methods/test_astype.py | 61 ++++++++++ pandas/tests/series/methods/test_reindex.py | 11 ++ pandas/tests/series/test_constructors.py | 9 ++ pandas/tests/series/test_dtypes.py | 121 -------------------- 5 files changed, 110 insertions(+), 121 deletions(-) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 35e958ff3a2b1..8c11f659e8454 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -648,3 +648,32 @@ def test_astype_bytes(self): # GH#39474 result = DataFrame(["foo", "bar", "baz"]).astype(bytes) assert result.dtypes[0] == np.dtype("S3") + + +class TestAstypeCategorical: + def test_astype_from_categorical3(self): + df = DataFrame({"cats": [1, 2, 3, 4, 5, 6], "vals": [1, 2, 3, 4, 5, 6]}) + cats = Categorical([1, 2, 3, 4, 5, 6]) + exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) + df["cats"] = df["cats"].astype("category") + tm.assert_frame_equal(exp_df, df) + + def test_astype_from_categorical4(self): + df = DataFrame( + {"cats": ["a", "b", "b", "a", "a", "d"], "vals": [1, 2, 3, 4, 5, 6]} + ) + cats = Categorical(["a", "b", "b", "a", "a", "d"]) + exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) + df["cats"] = df["cats"].astype("category") + tm.assert_frame_equal(exp_df, df) + + def test_categorical_astype_to_int(self, any_int_or_nullable_int_dtype): + # GH#39402 + + df = DataFrame(data={"col1": pd.array([2.0, 1.0, 3.0])}) + df.col1 = df.col1.astype("category") + df.col1 = df.col1.astype(any_int_or_nullable_int_dtype) + expected = DataFrame( + {"col1": pd.array([2, 1, 3], dtype=any_int_or_nullable_int_dtype)} + ) + tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index a3785518c860d..4e068690c41e5 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -351,6 +351,48 @@ def test_astype_bytes(self): class TestAstypeCategorical: + def test_astype_categorical_to_other(self): + cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) + ser = Series(np.random.RandomState(0).randint(0, 10000, 100)).sort_values() + ser = cut(ser, range(0, 10500, 500), right=False, labels=cat) + + expected = ser + tm.assert_series_equal(ser.astype("category"), expected) + tm.assert_series_equal(ser.astype(CategoricalDtype()), expected) + msg = r"Cannot cast object dtype to float64" + with pytest.raises(ValueError, match=msg): + ser.astype("float64") + + cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])) + exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"]) + tm.assert_series_equal(cat.astype("str"), exp) + s2 = Series(Categorical(["1", "2", "3", "4"])) + exp2 = Series([1, 2, 3, 4]).astype("int") + tm.assert_series_equal(s2.astype("int"), exp2) + + # object don't sort correctly, so just compare that we have the same + # values + def cmp(a, b): + tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b))) + + expected = Series(np.array(ser.values), name="value_group") + cmp(ser.astype("object"), expected) + cmp(ser.astype(np.object_), expected) + + # array conversion + tm.assert_almost_equal(np.array(ser), np.array(ser.values)) + + tm.assert_series_equal(ser.astype("category"), ser) + tm.assert_series_equal(ser.astype(CategoricalDtype()), ser) + + roundtrip_expected = ser.cat.set_categories( + ser.cat.categories.sort_values() + ).cat.remove_unused_categories() + result = ser.astype("object").astype("category") + tm.assert_series_equal(result, roundtrip_expected) + result = ser.astype("object").astype(CategoricalDtype()) + tm.assert_series_equal(result, roundtrip_expected) + def test_astype_categorical_invalid_conversions(self): # invalid conversion (these are NOT a dtype) cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) @@ -427,3 +469,22 @@ def test_astype_categories_raises(self): s = Series(["a", "b", "a"]) with pytest.raises(TypeError, match="got an unexpected"): s.astype("category", categories=["a", "b"], ordered=True) + + @pytest.mark.parametrize("items", [["a", "b", "c", "a"], [1, 2, 3, 1]]) + def test_astype_from_categorical(self, items): + ser = Series(items) + exp = Series(Categorical(items)) + res = ser.astype("category") + tm.assert_series_equal(res, exp) + + def test_astype_from_categorical_with_keywords(self): + # with keywords + lst = ["a", "b", "c", "a"] + ser = Series(lst) + exp = Series(Categorical(lst, ordered=True)) + res = ser.astype(CategoricalDtype(None, ordered=True)) + tm.assert_series_equal(res, exp) + + exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True)) + res = ser.astype(CategoricalDtype(list("abcdef"), ordered=True)) + tm.assert_series_equal(res, exp) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index ecf122679f7ca..8e54cbeb313c4 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -237,6 +237,17 @@ def test_reindex_categorical(): tm.assert_series_equal(result, expected) +def test_reindex_astype_order_consistency(): + # GH#17444 + ser = Series([1, 2, 3], index=[2, 0, 1]) + new_index = [0, 1, 2] + temp_dtype = "category" + new_dtype = str + result = ser.reindex(new_index).astype(temp_dtype).astype(new_dtype) + expected = ser.astype(temp_dtype).reindex(new_index).astype(new_dtype) + tm.assert_series_equal(result, expected) + + def test_reindex_fill_value(): # ----------------------------------------------------------- # floats diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 6cd2a1dd180c1..c2d0bf5975059 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -403,6 +403,15 @@ def test_constructor_categorical_with_coercion(self): result = x.person_name.loc[0] assert result == expected + def test_constructor_series_to_categorical(self): + # see GH#16524: test conversion of Series to Categorical + series = Series(["a", "b", "c"]) + + result = Series(series, dtype="category") + expected = Series(["a", "b", "c"], dtype="category") + + tm.assert_series_equal(result, expected) + def test_constructor_categorical_dtype(self): result = Series( ["a", "b"], dtype=CategoricalDtype(["a", "b", "c"], ordered=True) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 9121a5a5b6b82..abc0e5d13aaf7 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -1,15 +1,4 @@ import numpy as np -import pytest - -from pandas.core.dtypes.dtypes import CategoricalDtype - -import pandas as pd -from pandas import ( - Categorical, - DataFrame, - Series, -) -import pandas._testing as tm class TestSeriesDtypes: @@ -17,113 +6,3 @@ def test_dtype(self, datetime_series): assert datetime_series.dtype == np.dtype("float64") assert datetime_series.dtypes == np.dtype("float64") - - def test_astype_from_categorical(self): - items = ["a", "b", "c", "a"] - s = Series(items) - exp = Series(Categorical(items)) - res = s.astype("category") - tm.assert_series_equal(res, exp) - - items = [1, 2, 3, 1] - s = Series(items) - exp = Series(Categorical(items)) - res = s.astype("category") - tm.assert_series_equal(res, exp) - - df = DataFrame({"cats": [1, 2, 3, 4, 5, 6], "vals": [1, 2, 3, 4, 5, 6]}) - cats = Categorical([1, 2, 3, 4, 5, 6]) - exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) - df["cats"] = df["cats"].astype("category") - tm.assert_frame_equal(exp_df, df) - - df = DataFrame( - {"cats": ["a", "b", "b", "a", "a", "d"], "vals": [1, 2, 3, 4, 5, 6]} - ) - cats = Categorical(["a", "b", "b", "a", "a", "d"]) - exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) - df["cats"] = df["cats"].astype("category") - tm.assert_frame_equal(exp_df, df) - - # with keywords - lst = ["a", "b", "c", "a"] - s = Series(lst) - exp = Series(Categorical(lst, ordered=True)) - res = s.astype(CategoricalDtype(None, ordered=True)) - tm.assert_series_equal(res, exp) - - exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True)) - res = s.astype(CategoricalDtype(list("abcdef"), ordered=True)) - tm.assert_series_equal(res, exp) - - def test_astype_categorical_to_other(self): - cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) - ser = Series(np.random.RandomState(0).randint(0, 10000, 100)).sort_values() - ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat) - - expected = ser - tm.assert_series_equal(ser.astype("category"), expected) - tm.assert_series_equal(ser.astype(CategoricalDtype()), expected) - msg = r"Cannot cast object dtype to float64" - with pytest.raises(ValueError, match=msg): - ser.astype("float64") - - cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])) - exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"]) - tm.assert_series_equal(cat.astype("str"), exp) - s2 = Series(Categorical(["1", "2", "3", "4"])) - exp2 = Series([1, 2, 3, 4]).astype("int") - tm.assert_series_equal(s2.astype("int"), exp2) - - # object don't sort correctly, so just compare that we have the same - # values - def cmp(a, b): - tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b))) - - expected = Series(np.array(ser.values), name="value_group") - cmp(ser.astype("object"), expected) - cmp(ser.astype(np.object_), expected) - - # array conversion - tm.assert_almost_equal(np.array(ser), np.array(ser.values)) - - tm.assert_series_equal(ser.astype("category"), ser) - tm.assert_series_equal(ser.astype(CategoricalDtype()), ser) - - roundtrip_expected = ser.cat.set_categories( - ser.cat.categories.sort_values() - ).cat.remove_unused_categories() - result = ser.astype("object").astype("category") - tm.assert_series_equal(result, roundtrip_expected) - result = ser.astype("object").astype(CategoricalDtype()) - tm.assert_series_equal(result, roundtrip_expected) - - def test_categorical_astype_to_int(self, any_int_or_nullable_int_dtype): - # GH 39402 - - df = DataFrame(data={"col1": pd.array([2.0, 1.0, 3.0])}) - df.col1 = df.col1.astype("category") - df.col1 = df.col1.astype(any_int_or_nullable_int_dtype) - expected = DataFrame( - {"col1": pd.array([2, 1, 3], dtype=any_int_or_nullable_int_dtype)} - ) - tm.assert_frame_equal(df, expected) - - def test_series_to_categorical(self): - # see gh-16524: test conversion of Series to Categorical - series = Series(["a", "b", "c"]) - - result = Series(series, dtype="category") - expected = Series(["a", "b", "c"], dtype="category") - - tm.assert_series_equal(result, expected) - - def test_reindex_astype_order_consistency(self): - # GH 17444 - s = Series([1, 2, 3], index=[2, 0, 1]) - new_index = [0, 1, 2] - temp_dtype = "category" - new_dtype = str - s1 = s.reindex(new_index).astype(temp_dtype).astype(new_dtype) - s2 = s.astype(temp_dtype).reindex(new_index).astype(new_dtype) - tm.assert_series_equal(s1, s2) From b878689d6259a8079d2f1cf560a39e79c79ea37d Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 22 Feb 2021 13:02:43 -0800 Subject: [PATCH 2/2] mv test_dtypes --- pandas/tests/series/{ => methods}/test_dtypes.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pandas/tests/series/{ => methods}/test_dtypes.py (100%) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/methods/test_dtypes.py similarity index 100% rename from pandas/tests/series/test_dtypes.py rename to pandas/tests/series/methods/test_dtypes.py