From 35a03885a1ba8a388e7c22ec9857ce3b6b704d0f Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 19 Dec 2020 12:38:19 -0800 Subject: [PATCH 1/3] TST/REF: collect Series tests by method --- .../tests/indexes/datetimes/test_datetime.py | 9 -- .../tests/indexes/datetimes/test_npfuncs.py | 13 +++ pandas/tests/series/methods/test_is_unique.py | 41 ++++++++ pandas/tests/series/methods/test_nunique.py | 21 ++++ pandas/tests/series/methods/test_unique.py | 49 ++++++++++ pandas/tests/series/test_duplicates.py | 97 ------------------- pandas/tests/series/test_npfuncs.py | 5 + pandas/tests/series/test_ufunc.py | 2 +- 8 files changed, 130 insertions(+), 107 deletions(-) create mode 100644 pandas/tests/indexes/datetimes/test_npfuncs.py create mode 100644 pandas/tests/series/methods/test_is_unique.py create mode 100644 pandas/tests/series/methods/test_nunique.py create mode 100644 pandas/tests/series/methods/test_unique.py delete mode 100644 pandas/tests/series/test_duplicates.py diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 789510b452969..9352b3b907295 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -8,8 +8,6 @@ from pandas import DataFrame, DatetimeIndex, Index, Timestamp, date_range, offsets import pandas._testing as tm -randn = np.random.randn - class TestDatetimeIndex: def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): @@ -409,10 +407,3 @@ def test_to_frame_datetime_tz(self): result = idx.to_frame() expected = DataFrame(idx, index=idx) tm.assert_frame_equal(result, expected) - - def test_split_non_utc(self): - # GH 14042 - indices = date_range("2016-01-01 00:00:00+0200", freq="S", periods=10) - result = np.split(indices, indices_or_sections=[])[0] - expected = indices._with_freq(None) - tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_npfuncs.py b/pandas/tests/indexes/datetimes/test_npfuncs.py new file mode 100644 index 0000000000000..301466c0da41c --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_npfuncs.py @@ -0,0 +1,13 @@ +import numpy as np + +from pandas import date_range +import pandas._testing as tm + + +class TestSplit: + def test_split_non_utc(self): + # GH#14042 + indices = date_range("2016-01-01 00:00:00+0200", freq="S", periods=10) + result = np.split(indices, indices_or_sections=[])[0] + expected = indices._with_freq(None) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/series/methods/test_is_unique.py b/pandas/tests/series/methods/test_is_unique.py new file mode 100644 index 0000000000000..c696d365662ea --- /dev/null +++ b/pandas/tests/series/methods/test_is_unique.py @@ -0,0 +1,41 @@ +import numpy as np +import pytest + +from pandas import Series +from pandas.core.construction import create_series_with_explicit_dtype + + +@pytest.mark.parametrize( + "data, expected", + [ + (np.random.randint(0, 10, size=1000), False), + (np.arange(1000), True), + ([], True), + ([np.nan], True), + (["foo", "bar", np.nan], True), + (["foo", "foo", np.nan], False), + (["foo", "bar", np.nan, np.nan], False), + ], +) +def test_is_unique(data, expected): + # GH#11946 / GH#25180 + ser = create_series_with_explicit_dtype(data, dtype_if_empty=object) + assert ser.is_unique is expected + + +def test_is_unique_class_ne(capsys): + # GH#20661 + class Foo: + def __init__(self, val): + self._value = val + + def __ne__(self, other): + raise Exception("NEQ not supported") + + with capsys.disabled(): + li = [Foo(i) for i in range(5)] + ser = Series(li, index=list(range(5))) + + ser.is_unique + captured = capsys.readouterr() + assert len(captured.err) == 0 diff --git a/pandas/tests/series/methods/test_nunique.py b/pandas/tests/series/methods/test_nunique.py new file mode 100644 index 0000000000000..d2d94183aa21b --- /dev/null +++ b/pandas/tests/series/methods/test_nunique.py @@ -0,0 +1,21 @@ +import numpy as np + +from pandas import Categorical, Series + + +def test_nunique(): + # basics.rst doc example + series = Series(np.random.randn(500)) + series[20:500] = np.nan + series[10:20] = 5000 + result = series.nunique() + assert result == 11 + + +def test_nunique_categorical(): + # GH#18051 + ser = Series(Categorical([])) + assert ser.nunique() == 0 + + ser = Series(Categorical([np.nan])) + assert ser.nunique() == 0 diff --git a/pandas/tests/series/methods/test_unique.py b/pandas/tests/series/methods/test_unique.py new file mode 100644 index 0000000000000..b777d9ba1676a --- /dev/null +++ b/pandas/tests/series/methods/test_unique.py @@ -0,0 +1,49 @@ +import numpy as np + +from pandas import Categorical, Series +import pandas._testing as tm + + +class TestUnique: + def test_unique_data_ownership(self): + # it works! GH#1807 + Series(Series(["a", "c", "b"]).unique()).sort_values() + + def test_unique(self): + # GH#714 also, dtype=float + ser = Series([1.2345] * 100) + ser[::2] = np.nan + result = ser.unique() + assert len(result) == 2 + + # explicit f4 dtype + ser = Series([1.2345] * 100, dtype="f4") + ser[::2] = np.nan + result = ser.unique() + assert len(result) == 2 + + def test_unique_nan_object_dtype(self): + # NAs in object arrays GH#714 + ser = Series(["foo"] * 100, dtype="O") + ser[::2] = np.nan + result = ser.unique() + assert len(result) == 2 + + def test_unique_none(self): + # decision about None + ser = Series([1, 2, 3, None, None, None], dtype=object) + result = ser.unique() + expected = np.array([1, 2, 3, None], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_unique_categorical(self): + # GH#18051 + cat = Categorical([]) + ser = Series(cat) + result = ser.unique() + tm.assert_categorical_equal(result, cat) + + cat = Categorical([np.nan]) + ser = Series(cat) + result = ser.unique() + tm.assert_categorical_equal(result, cat) diff --git a/pandas/tests/series/test_duplicates.py b/pandas/tests/series/test_duplicates.py deleted file mode 100644 index 672be981fd7d3..0000000000000 --- a/pandas/tests/series/test_duplicates.py +++ /dev/null @@ -1,97 +0,0 @@ -import numpy as np -import pytest - -from pandas import Categorical, Series -import pandas._testing as tm -from pandas.core.construction import create_series_with_explicit_dtype - - -def test_nunique(): - # basics.rst doc example - series = Series(np.random.randn(500)) - series[20:500] = np.nan - series[10:20] = 5000 - result = series.nunique() - assert result == 11 - - # GH 18051 - s = Series(Categorical([])) - assert s.nunique() == 0 - s = Series(Categorical([np.nan])) - assert s.nunique() == 0 - - -def test_numpy_unique(datetime_series): - # it works! - np.unique(datetime_series) - - -def test_unique(): - # GH714 also, dtype=float - s = Series([1.2345] * 100) - s[::2] = np.nan - result = s.unique() - assert len(result) == 2 - - s = Series([1.2345] * 100, dtype="f4") - s[::2] = np.nan - result = s.unique() - assert len(result) == 2 - - # NAs in object arrays #714 - s = Series(["foo"] * 100, dtype="O") - s[::2] = np.nan - result = s.unique() - assert len(result) == 2 - - # decision about None - s = Series([1, 2, 3, None, None, None], dtype=object) - result = s.unique() - expected = np.array([1, 2, 3, None], dtype=object) - tm.assert_numpy_array_equal(result, expected) - - # GH 18051 - s = Series(Categorical([])) - tm.assert_categorical_equal(s.unique(), Categorical([])) - s = Series(Categorical([np.nan])) - tm.assert_categorical_equal(s.unique(), Categorical([np.nan])) - - -def test_unique_data_ownership(): - # it works! #1807 - Series(Series(["a", "c", "b"]).unique()).sort_values() - - -@pytest.mark.parametrize( - "data, expected", - [ - (np.random.randint(0, 10, size=1000), False), - (np.arange(1000), True), - ([], True), - ([np.nan], True), - (["foo", "bar", np.nan], True), - (["foo", "foo", np.nan], False), - (["foo", "bar", np.nan, np.nan], False), - ], -) -def test_is_unique(data, expected): - # GH11946 / GH25180 - s = create_series_with_explicit_dtype(data, dtype_if_empty=object) - assert s.is_unique is expected - - -def test_is_unique_class_ne(capsys): - # GH 20661 - class Foo: - def __init__(self, val): - self._value = val - - def __ne__(self, other): - raise Exception("NEQ not supported") - - with capsys.disabled(): - li = [Foo(i) for i in range(5)] - s = Series(li, index=list(range(5))) - s.is_unique - captured = capsys.readouterr() - assert len(captured.err) == 0 diff --git a/pandas/tests/series/test_npfuncs.py b/pandas/tests/series/test_npfuncs.py index 645a849015c23..a0b672fffa84a 100644 --- a/pandas/tests/series/test_npfuncs.py +++ b/pandas/tests/series/test_npfuncs.py @@ -14,3 +14,8 @@ def test_ptp(self): arr = np.random.randn(N) ser = Series(arr) assert np.ptp(ser) == np.ptp(arr) + + +def test_numpy_unique(datetime_series): + # it works! + np.unique(datetime_series) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index bcd6a7a7308a3..271ac31d303ae 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -167,7 +167,7 @@ def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("ufunc", [np.divmod]) # any others? +@pytest.mark.parametrize("ufunc", [np.divmod]) # TODO: any others? @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) @pytest.mark.parametrize("shuffle", SHUFFLE) @pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning") From f45a5a111bbcfe468274430aac807a8b29ea88e9 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 19 Dec 2020 18:41:12 -0800 Subject: [PATCH 2/3] TST/REF: collect indexing tests by method --- pandas/tests/series/indexing/test_getitem.py | 18 +++++++++++++ pandas/tests/series/indexing/test_numeric.py | 28 -------------------- pandas/tests/series/indexing/test_setitem.py | 24 ++++++++++++++++- 3 files changed, 41 insertions(+), 29 deletions(-) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index b4c30cb6d4cd2..8c34921e13da4 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -26,6 +26,13 @@ class TestSeriesGetitemScalars: + def test_getitem_negative_out_of_bounds(self): + ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10)) + + msg = "index -11 is out of bounds for axis 0 with size 10" + with pytest.raises(IndexError, match=msg): + ser[-11] + def test_getitem_out_of_bounds_indexerror(self, datetime_series): # don't segfault, GH#495 msg = r"index \d+ is out of bounds for axis 0 with size \d+" @@ -186,6 +193,17 @@ def test_getitem_slice_date(self, slc, positions): expected = ser.take(positions) tm.assert_series_equal(result, expected) + def test_getitem_slice_float_raises(self, datetime_series): + msg = ( + "cannot do slice indexing on DatetimeIndex with these indexers " + r"\[{key}\] of type float" + ) + with pytest.raises(TypeError, match=msg.format(key=r"4\.0")): + datetime_series[4.0:10.0] + + with pytest.raises(TypeError, match=msg.format(key=r"4\.5")): + datetime_series[4.5:10.0] + class TestSeriesGetitemListLike: @pytest.mark.parametrize("box", [list, np.array, Index, pd.Series]) diff --git a/pandas/tests/series/indexing/test_numeric.py b/pandas/tests/series/indexing/test_numeric.py index 2ad21d8221e25..4caf6d03d8d80 100644 --- a/pandas/tests/series/indexing/test_numeric.py +++ b/pandas/tests/series/indexing/test_numeric.py @@ -1,5 +1,4 @@ import numpy as np -import pytest from pandas import DataFrame, Index, Series import pandas._testing as tm @@ -30,16 +29,6 @@ def test_slice_float64(): tm.assert_frame_equal(result, expected) -def test_getitem_negative_out_of_bounds(): - s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10)) - - msg = "index -11 is out of bounds for axis 0 with size 10" - with pytest.raises(IndexError, match=msg): - s[-11] - with pytest.raises(IndexError, match=msg): - s[-11] = "foo" - - def test_getitem_setitem_slice_bug(): s = Series(range(10), index=list(range(10))) result = s[-12:] @@ -69,20 +58,3 @@ def test_getitem_setitem_slice_integers(): s[:4] = 0 assert (s[:4] == 0).all() assert not (s[4:] == 0).any() - - -def test_slice_float_get_set(datetime_series): - msg = ( - "cannot do slice indexing on DatetimeIndex with these indexers " - r"\[{key}\] of type float" - ) - with pytest.raises(TypeError, match=msg.format(key=r"4\.0")): - datetime_series[4.0:10.0] - - with pytest.raises(TypeError, match=msg.format(key=r"4\.0")): - datetime_series[4.0:10.0] = 0 - - with pytest.raises(TypeError, match=msg.format(key=r"4\.5")): - datetime_series[4.5:10.0] - with pytest.raises(TypeError, match=msg.format(key=r"4\.5")): - datetime_series[4.5:10.0] = 0 diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 119019da529e4..5f09283249fe3 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -12,8 +12,8 @@ date_range, period_range, ) +import pandas._testing as tm from pandas.core.indexing import IndexingError -import pandas.testing as tm from pandas.tseries.offsets import BDay @@ -84,6 +84,28 @@ def test_setitem_na_period_dtype_casts_to_nat(self, na_val): assert ser[4] is NaT +class TestSetitemScalarIndexer: + def test_setitem_negative_out_of_bounds(self): + ser = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10)) + + msg = "index -11 is out of bounds for axis 0 with size 10" + with pytest.raises(IndexError, match=msg): + ser[-11] = "foo" + + +class TestSetitemSlices: + def test_setitem_slice_float_raises(self, datetime_series): + msg = ( + "cannot do slice indexing on DatetimeIndex with these indexers " + r"\[{key}\] of type float" + ) + with pytest.raises(TypeError, match=msg.format(key=r"4\.0")): + datetime_series[4.0:10.0] = 0 + + with pytest.raises(TypeError, match=msg.format(key=r"4\.5")): + datetime_series[4.5:10.0] = 0 + + class TestSetitemBooleanMask: def test_setitem_boolean(self, string_series): mask = string_series > string_series.median() From f7528e90a502a68e5856aa745a42060109a60fec Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 19 Dec 2020 19:12:19 -0800 Subject: [PATCH 3/3] TST/REF: collect tests by method --- pandas/tests/series/indexing/test_getitem.py | 11 +++++++++++ pandas/tests/series/test_api.py | 10 ---------- pandas/tests/series/test_dtypes.py | 15 --------------- 3 files changed, 11 insertions(+), 25 deletions(-) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 8c34921e13da4..2022bca514540 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -479,3 +479,14 @@ def test_getitem_1tuple_slice_without_multiindex(): result = ser[key] expected = ser[key[0]] tm.assert_series_equal(result, expected) + + +def test_getitem_preserve_name(datetime_series): + result = datetime_series[datetime_series > 0] + assert result.name == datetime_series.name + + result = datetime_series[[0, 2, 4]] + assert result.name == datetime_series.name + + result = datetime_series[5:10] + assert result.name == datetime_series.name diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index ea0e1203e22ed..2f255d92d86e3 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -9,16 +9,6 @@ class TestSeriesMisc: - def test_getitem_preserve_name(self, datetime_series): - result = datetime_series[datetime_series > 0] - assert result.name == datetime_series.name - - result = datetime_series[[0, 2, 4]] - assert result.name == datetime_series.name - - result = datetime_series[5:10] - assert result.name == datetime_series.name - def test_tab_completion(self): # GH 9910 s = Series(list("abcd")) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 865ae565b6501..d59f0c05c7462 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -94,21 +94,6 @@ def cmp(a, b): result = ser.astype("object").astype(CategoricalDtype()) tm.assert_series_equal(result, roundtrip_expected) - def test_astype_categorical_invalid_conversions(self): - # invalid conversion (these are NOT a dtype) - cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)]) - ser = Series(np.random.RandomState(0).randint(0, 10000, 100)).sort_values() - ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat) - - msg = ( - "dtype '' " - "not understood" - ) - with pytest.raises(TypeError, match=msg): - ser.astype(Categorical) - with pytest.raises(TypeError, match=msg): - ser.astype("object").astype(Categorical) - def test_series_to_categorical(self): # see gh-16524: test conversion of Series to Categorical series = Series(["a", "b", "c"])