From 761d4e5669a00a357ab6f915396a04a7370e03a4 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Thu, 27 Feb 2020 23:09:02 +0100 Subject: [PATCH 1/5] fixturized test_searchsorted and test_validate_bool_args --- pandas/tests/base/test_ops.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index f85d823cb2fac..4d38248625f03 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -820,22 +820,26 @@ def test_memory_usage(self): diff = res_deep - sys.getsizeof(o) assert abs(diff) < 100 - def test_searchsorted(self): + def test_searchsorted(self, index_or_series_obj): # See gh-12238 - for o in self.objs: - index = np.searchsorted(o, max(o)) - assert 0 <= index <= len(o) + obj = index_or_series_obj + + if isinstance(obj, pd.MultiIndex): + pytest.skip("np.searchsorted doesn't work on pd.MultiIndex") - index = np.searchsorted(o, max(o), sorter=range(len(o))) - assert 0 <= index <= len(o) + max_obj = max(obj, default=0) + index = np.searchsorted(obj, max_obj) + assert 0 <= index <= len(obj) - def test_validate_bool_args(self): - invalid_values = [1, "True", [1, 2, 3], 5.0] + index = np.searchsorted(obj, max_obj, sorter=range(len(obj))) + assert 0 <= index <= len(obj) - for value in invalid_values: - msg = "expected type bool" - with pytest.raises(ValueError, match=msg): - self.int_series.drop_duplicates(inplace=value) + @pytest.mark.parametrize("invalid_value", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, invalid_value, series_with_simple_index): + series = series_with_simple_index + msg = "expected type bool" + with pytest.raises(ValueError, match=msg): + series.drop_duplicates(inplace=invalid_value) def test_getitem(self): for i in self.indexes: From 76ec85551c3535f77e091b39e00d6c67c960f383 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Thu, 27 Feb 2020 23:20:20 +0100 Subject: [PATCH 2/5] refactored test_memory_usage --- pandas/tests/base/test_ops.py | 51 ++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 4d38248625f03..39e5d38705bf4 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -11,6 +11,7 @@ from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.dtypes.common import ( + is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_object_dtype, @@ -797,28 +798,36 @@ def test_fillna(self): assert o is not result @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") - def test_memory_usage(self): - for o in self.objs: - res = o.memory_usage() - res_deep = o.memory_usage(deep=True) - - if is_object_dtype(o) or ( - isinstance(o, Series) and is_object_dtype(o.index) - ): - # if there are objects, only deep will pick them up - assert res_deep > res - else: - assert res == res_deep + def test_memory_usage(self, index_or_series_obj): + obj = index_or_series_obj + res = obj.memory_usage() + res_deep = obj.memory_usage(deep=True) - if isinstance(o, Series): - assert ( - o.memory_usage(index=False) + o.index.memory_usage() - ) == o.memory_usage(index=True) - - # sys.getsizeof will call the .memory_usage with - # deep=True, and add on some GC overhead - diff = res_deep - sys.getsizeof(o) - assert abs(diff) < 100 + is_object = is_object_dtype(obj) or ( + isinstance(obj, Series) and is_object_dtype(obj.index) + ) + is_categorical = is_categorical_dtype(obj) or ( + isinstance(obj, Series) and is_categorical_dtype(obj.index) + ) + + if len(obj) == 0: + assert res_deep == res == 0 + elif is_object or is_categorical: + # only deep will pick them up + assert res_deep > res + else: + assert res == res_deep + + if isinstance(obj, Series): + total_usage = obj.memory_usage(index=True) + non_index_usage = obj.memory_usage(index=False) + index_usage = obj.index.memory_usage() + assert total_usage == non_index_usage + index_usage + + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = res_deep - sys.getsizeof(obj) + assert abs(diff) < 100 def test_searchsorted(self, index_or_series_obj): # See gh-12238 From 6f5fe30bfcd53806c1c6897d02cedbc3f5852ec3 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Thu, 27 Feb 2020 23:27:29 +0100 Subject: [PATCH 3/5] refactored test_getitem --- pandas/tests/base/test_ops.py | 38 ++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 39e5d38705bf4..4344b666dd453 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -850,22 +850,28 @@ def test_validate_bool_args(self, invalid_value, series_with_simple_index): with pytest.raises(ValueError, match=msg): series.drop_duplicates(inplace=invalid_value) - def test_getitem(self): - for i in self.indexes: - s = pd.Series(i) - - assert i[0] == s.iloc[0] - assert i[5] == s.iloc[5] - assert i[-1] == s.iloc[-1] - - assert i[-1] == i[9] - - msg = "index 20 is out of bounds for axis 0 with size 10" - with pytest.raises(IndexError, match=msg): - i[20] - msg = "single positional indexer is out-of-bounds" - with pytest.raises(IndexError, match=msg): - s.iloc[20] + def test_getitem(self, indices): + index = indices + + if len(index) == 0: + pytest.skip("Test doesn't make sense on empty data") + elif isinstance(index, pd.MultiIndex): + pytest.skip("Can't instantiate Series from MultiIndex") + + series = pd.Series(index) + assert index[0] == series.iloc[0] + assert index[5] == series.iloc[5] + assert index[-1] == series.iloc[-1] + + size = len(index) + assert index[-1] == index[size - 1] + + msg = f"index {size} is out of bounds for axis 0 with size {size}" + with pytest.raises(IndexError, match=msg): + index[size] + msg = "single positional indexer is out-of-bounds" + with pytest.raises(IndexError, match=msg): + series.iloc[size] @pytest.mark.parametrize("indexer_klass", [list, pd.Index]) @pytest.mark.parametrize( From 983e9f294e294d918ae01c0f191a69308a88a52c Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Fri, 28 Feb 2020 10:53:48 +0100 Subject: [PATCH 4/5] split out a memory components test for series from test_memory_usage --- pandas/conftest.py | 10 ++++++++++ pandas/tests/base/test_ops.py | 20 ++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index be44e6c2b36da..834015371bb49 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1047,6 +1047,16 @@ def series_with_simple_index(indices): for dtype in _narrow_dtypes } + +@pytest.fixture(params=_narrow_series.keys()) +def narrow_series(request): + """ + Fixture for Series with low precision data types + """ + # copy to avoid mutation, e.g. setting .name + return _narrow_series[request.param].copy() + + _index_or_series_objs = {**indices_dict, **_series, **_narrow_series} diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 4344b666dd453..35942a15ccd4e 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -818,17 +818,25 @@ def test_memory_usage(self, index_or_series_obj): else: assert res == res_deep - if isinstance(obj, Series): - total_usage = obj.memory_usage(index=True) - non_index_usage = obj.memory_usage(index=False) - index_usage = obj.index.memory_usage() - assert total_usage == non_index_usage + index_usage - # sys.getsizeof will call the .memory_usage with # deep=True, and add on some GC overhead diff = res_deep - sys.getsizeof(obj) assert abs(diff) < 100 + def test_memory_usage_components_series(self, series_with_simple_index): + series = series_with_simple_index + total_usage = series.memory_usage(index=True) + non_index_usage = series.memory_usage(index=False) + index_usage = series.index.memory_usage() + assert total_usage == non_index_usage + index_usage + + def test_memory_usage_components_narrow_series(self, narrow_series): + series = narrow_series + total_usage = series.memory_usage(index=True) + non_index_usage = series.memory_usage(index=False) + index_usage = series.index.memory_usage() + assert total_usage == non_index_usage + index_usage + def test_searchsorted(self, index_or_series_obj): # See gh-12238 obj = index_or_series_obj From fc7d05d31bd6eef198532856b3bb8d316e4bc9cc Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Mon, 2 Mar 2020 23:28:15 +0100 Subject: [PATCH 5/5] review comments --- pandas/tests/base/test_ops.py | 11 +++-------- pandas/tests/series/test_validate.py | 10 +++++++++- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 35942a15ccd4e..c1fa03d7396dd 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -838,10 +838,12 @@ def test_memory_usage_components_narrow_series(self, narrow_series): assert total_usage == non_index_usage + index_usage def test_searchsorted(self, index_or_series_obj): + # numpy.searchsorted calls obj.searchsorted under the hood. # See gh-12238 obj = index_or_series_obj if isinstance(obj, pd.MultiIndex): + # See gh-14833 pytest.skip("np.searchsorted doesn't work on pd.MultiIndex") max_obj = max(obj, default=0) @@ -851,14 +853,7 @@ def test_searchsorted(self, index_or_series_obj): index = np.searchsorted(obj, max_obj, sorter=range(len(obj))) assert 0 <= index <= len(obj) - @pytest.mark.parametrize("invalid_value", [1, "True", [1, 2, 3], 5.0]) - def test_validate_bool_args(self, invalid_value, series_with_simple_index): - series = series_with_simple_index - msg = "expected type bool" - with pytest.raises(ValueError, match=msg): - series.drop_duplicates(inplace=invalid_value) - - def test_getitem(self, indices): + def test_access_by_position(self, indices): index = indices if len(index) == 0: diff --git a/pandas/tests/series/test_validate.py b/pandas/tests/series/test_validate.py index 511d24ca7fa29..e2f050650b298 100644 --- a/pandas/tests/series/test_validate.py +++ b/pandas/tests/series/test_validate.py @@ -3,7 +3,15 @@ @pytest.mark.parametrize( "func", - ["reset_index", "_set_name", "sort_values", "sort_index", "rename", "dropna"], + [ + "reset_index", + "_set_name", + "sort_values", + "sort_index", + "rename", + "dropna", + "drop_duplicates", + ], ) @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) def test_validate_bool_args(string_series, func, inplace):