From d70d86072e6c6a09adbc30c35c57dffe4054d616 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Mon, 9 Mar 2020 20:12:39 +0100 Subject: [PATCH 01/13] TST: Add tests for duplicated and drop_duplicates --- .../indexes/categorical/test_category.py | 73 +++++++++++++++++-- pandas/tests/indexes/datetimes/test_ops.py | 49 +++++++------ pandas/tests/indexes/period/test_ops.py | 49 +++++++------ pandas/tests/indexes/timedeltas/test_ops.py | 49 +++++++------ 4 files changed, 152 insertions(+), 68 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index c18cd1f252c83..5df72a58a7e1a 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -8,7 +8,7 @@ from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd -from pandas import Categorical, IntervalIndex +from pandas import Categorical, IntervalIndex, Series import pandas._testing as tm from pandas.core.indexes.api import CategoricalIndex, Index @@ -353,16 +353,79 @@ def test_is_monotonic(self, data, non_lexsorted_data): assert c.is_monotonic_decreasing is False def test_has_duplicates(self): - idx = CategoricalIndex([0, 0, 0], name="foo") assert idx.is_unique is False assert idx.has_duplicates is True + idx = CategoricalIndex([0, 1], categories=[2,3], name="foo") + assert idx.is_unique is False + assert idx.has_duplicates is True + + idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo") + assert idx.is_unique is True + assert idx.has_duplicates is False + + + def _test_drop_duplicates(self, idx, keep, expected): + for k, e in zip(keep, expected): + np.testing.assert_equal(idx.duplicated(keep=k), e) + exp = idx[~e] + + result = idx.drop_duplicates(keep=k) + tm.assert_index_equal(result, exp) + + result = Series(idx).drop_duplicates(keep=k) + tm.assert_series_equal(result, Series(exp)) + def test_drop_duplicates(self): + keep = ["first"]#, "last", False] + + categories = [[1, 2, 3], list('abc')] + expected = [ + np.array([False, True, True]), + np.array([True, True, False]), + np.array([True, True, True]) + ] + for c in categories: + idx = pd.CategoricalIndex([1, 1, 1], categories=c, name="foo") + self._test_drop_duplicates(idx, keep, expected) + + categories = ['a', 'b', 'c'] + idx = CategoricalIndex([2, 'a', 'b'], categories=categories, name="foo") + expected = np.zeros(shape=(3, 3), dtype=np.bool) + self._test_drop_duplicates(idx, keep, expected) + + idx = CategoricalIndex(list('abb'), categories=categories, name="foo") + expected = [ + np.array([False, False, True]), + np.array([False, True, False]), + np.array([True, True, False]) + ] + self._test_drop_duplicates(idx, keep, expected) + + + def test_unique(self): + + categories = [1, 2, 3] + idx = CategoricalIndex([1, 1, 1], categories=categories) + expected = CategoricalIndex([1], categories=[1]) + tm.assert_index_equal(idx.unique(), expected) - idx = CategoricalIndex([0, 0, 0], name="foo") - expected = CategoricalIndex([0], name="foo") - tm.assert_index_equal(idx.drop_duplicates(), expected) + + categories = list('abc') + idx = CategoricalIndex([1, 1, 1], categories=categories) + expected = CategoricalIndex([np.nan], categories=[]) + tm.assert_index_equal(idx.unique(), expected) + + + categories = [1, 2, 3] + idx = CategoricalIndex([1, 2, 'a'], categories=categories) + expected = CategoricalIndex([1, 2, np.nan], categories=[1, 2]) + tm.assert_index_equal(idx.unique(), expected) + + categories = list('abc') + idx = CategoricalIndex([2, 'a', 'b'], categories=categories) + expected = CategoricalIndex([np.nan, 'a', 'b'], categories=['a', 'b']) tm.assert_index_equal(idx.unique(), expected) def test_repr_roundtrip(self): diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 8ed98410ad9a4..72a8a538bc6f6 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -259,9 +259,12 @@ def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None - def test_drop_duplicates_metadata(self): + @pytest.mark.parametrize( + "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] + ) + def test_drop_duplicates_metadata(self, freq): # GH 10115 - idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx") + idx = pd.date_range("2011-01-01", freq=freq, periods=10, name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq @@ -272,26 +275,30 @@ def test_drop_duplicates_metadata(self): tm.assert_index_equal(idx, result) assert result.freq is None - def test_drop_duplicates(self): + @pytest.mark.parametrize( + "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] + ) + @pytest.mark.parametrize( + "keep, expected, index", + [ + ("first", np.concatenate(([False]*10, [True]*5)), np.arange(0, 10)), + ("last", np.concatenate(([True]*5, [False]*10)), np.arange(5, 15)), + (False, np.concatenate(([True]*5, [False]*5, [True]*5)), np.arange(5, 10)), + ] + ) + def test_drop_duplicates(self, freq, keep, expected, index): # to check Index/Series compat - base = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx") - idx = base.append(base[:5]) - - res = idx.drop_duplicates() - tm.assert_index_equal(res, base) - res = Series(idx).drop_duplicates() - tm.assert_series_equal(res, Series(base)) - - res = idx.drop_duplicates(keep="last") - exp = base[5:].append(base[:5]) - tm.assert_index_equal(res, exp) - res = Series(idx).drop_duplicates(keep="last") - tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) - - res = idx.drop_duplicates(keep=False) - tm.assert_index_equal(res, base[5:]) - res = Series(idx).drop_duplicates(keep=False) - tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) + idx = pd.date_range("2011-01-01", freq=freq, periods=10, name="idx") + idx = idx.append(idx[:5]) + + np.testing.assert_equal(idx.duplicated(keep=keep), expected) + expected = idx[~expected] + + result = idx.drop_duplicates(keep=keep) + tm.assert_index_equal(result, expected) + + result = Series(idx).drop_duplicates(keep=keep) + tm.assert_series_equal(result, Series(expected, index=index)) @pytest.mark.parametrize( "freq", diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 196946e696c8d..1f9d5252bdec1 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -81,9 +81,12 @@ def test_value_counts_unique(self): tm.assert_index_equal(idx.unique(), exp_idx) - def test_drop_duplicates_metadata(self): + @pytest.mark.parametrize( + "freq", ["D"]#, "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] + ) + def test_drop_duplicates_metadata(self, freq): # GH 10115 - idx = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx") + idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq @@ -93,26 +96,30 @@ def test_drop_duplicates_metadata(self): tm.assert_index_equal(idx, result) assert idx.freq == result.freq - def test_drop_duplicates(self): + @pytest.mark.parametrize( + "freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"] + ) + @pytest.mark.parametrize( + "keep, expected, index", + [ + ("first", np.concatenate(([False]*10, [True]*5)), np.arange(0, 10)), + ("last", np.concatenate(([True]*5, [False]*10)), np.arange(5, 15)), + (False, np.concatenate(([True]*5, [False]*5, [True]*5)), np.arange(5, 10)), + ] + ) + def test_drop_duplicates(self, freq, keep, expected, index): # to check Index/Series compat - base = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx") - idx = base.append(base[:5]) - - res = idx.drop_duplicates() - tm.assert_index_equal(res, base) - res = Series(idx).drop_duplicates() - tm.assert_series_equal(res, Series(base)) - - res = idx.drop_duplicates(keep="last") - exp = base[5:].append(base[:5]) - tm.assert_index_equal(res, exp) - res = Series(idx).drop_duplicates(keep="last") - tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) - - res = idx.drop_duplicates(keep=False) - tm.assert_index_equal(res, base[5:]) - res = Series(idx).drop_duplicates(keep=False) - tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) + idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx") + idx = idx.append(idx[:5]) + + np.testing.assert_equal(idx.duplicated(keep=keep), expected) + expected = idx[~expected] + + result = idx.drop_duplicates(keep=keep) + tm.assert_index_equal(result, expected) + + result = Series(idx).drop_duplicates(keep=keep) + tm.assert_series_equal(result, Series(expected, index=index)) def test_order_compat(self): def _check_freq(index, expected_index): diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 6606507dabc29..6a398d72f1f79 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -136,9 +136,12 @@ def test_order(self): tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None - def test_drop_duplicates_metadata(self): + @pytest.mark.parametrize( + "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] + ) + def test_drop_duplicates_metadata(self, freq): # GH 10115 - idx = pd.timedelta_range("1 day", "31 day", freq="D", name="idx") + idx = pd.timedelta_range("1 day", periods=10, freq=freq, name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq @@ -149,26 +152,30 @@ def test_drop_duplicates_metadata(self): tm.assert_index_equal(idx, result) assert result.freq is None - def test_drop_duplicates(self): + @pytest.mark.parametrize( + "freq", ["D",]# "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] + ) + @pytest.mark.parametrize( + "keep, expected, index", + [ + ("first", np.concatenate(([False]*10, [True]*5)), np.arange(0, 10)), + ("last", np.concatenate(([True]*5, [False]*10)), np.arange(5, 15)), + (False, np.concatenate(([True]*5, [False]*5, [True]*5)), np.arange(5, 10)), + ] + ) + def test_drop_duplicates(self, freq, keep, expected, index): # to check Index/Series compat - base = pd.timedelta_range("1 day", "31 day", freq="D", name="idx") - idx = base.append(base[:5]) - - res = idx.drop_duplicates() - tm.assert_index_equal(res, base) - res = Series(idx).drop_duplicates() - tm.assert_series_equal(res, Series(base)) - - res = idx.drop_duplicates(keep="last") - exp = base[5:].append(base[:5]) - tm.assert_index_equal(res, exp) - res = Series(idx).drop_duplicates(keep="last") - tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) - - res = idx.drop_duplicates(keep=False) - tm.assert_index_equal(res, base[5:]) - res = Series(idx).drop_duplicates(keep=False) - tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) + idx = pd.timedelta_range("1 day", periods=10, freq=freq, name="idx") + idx = idx.append(idx[:5]) + + np.testing.assert_equal(idx.duplicated(keep=keep), expected) + expected = idx[~expected] + + result = idx.drop_duplicates(keep=keep) + tm.assert_index_equal(result, expected) + + result = Series(idx).drop_duplicates(keep=keep) + tm.assert_series_equal(result, Series(expected, index=index)) @pytest.mark.parametrize( "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] From 59d9119b80edc72d3ef4dec88374965a8cdac575 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Tue, 10 Mar 2020 10:43:08 +0100 Subject: [PATCH 02/13] TST: Fix syntax --- pandas/tests/indexes/categorical/test_category.py | 8 ++------ pandas/tests/indexes/datetimes/test_ops.py | 8 ++++---- pandas/tests/indexes/period/test_ops.py | 8 ++++---- pandas/tests/indexes/timedeltas/test_ops.py | 10 +++++----- 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 5df72a58a7e1a..02fa4e36b67d4 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -357,7 +357,7 @@ def test_has_duplicates(self): assert idx.is_unique is False assert idx.has_duplicates is True - idx = CategoricalIndex([0, 1], categories=[2,3], name="foo") + idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo") assert idx.is_unique is False assert idx.has_duplicates is True @@ -365,7 +365,6 @@ def test_has_duplicates(self): assert idx.is_unique is True assert idx.has_duplicates is False - def _test_drop_duplicates(self, idx, keep, expected): for k, e in zip(keep, expected): np.testing.assert_equal(idx.duplicated(keep=k), e) @@ -378,7 +377,7 @@ def _test_drop_duplicates(self, idx, keep, expected): tm.assert_series_equal(result, Series(exp)) def test_drop_duplicates(self): - keep = ["first"]#, "last", False] + keep = ["first", "last", False] categories = [[1, 2, 3], list('abc')] expected = [ @@ -403,7 +402,6 @@ def test_drop_duplicates(self): ] self._test_drop_duplicates(idx, keep, expected) - def test_unique(self): categories = [1, 2, 3] @@ -411,13 +409,11 @@ def test_unique(self): expected = CategoricalIndex([1], categories=[1]) tm.assert_index_equal(idx.unique(), expected) - categories = list('abc') idx = CategoricalIndex([1, 1, 1], categories=categories) expected = CategoricalIndex([np.nan], categories=[]) tm.assert_index_equal(idx.unique(), expected) - categories = [1, 2, 3] idx = CategoricalIndex([1, 2, 'a'], categories=categories) expected = CategoricalIndex([1, 2, np.nan], categories=[1, 2]) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 72a8a538bc6f6..a87a7296c9d95 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -279,11 +279,11 @@ def test_drop_duplicates_metadata(self, freq): "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] ) @pytest.mark.parametrize( - "keep, expected, index", + "keep, expected, index", [ - ("first", np.concatenate(([False]*10, [True]*5)), np.arange(0, 10)), - ("last", np.concatenate(([True]*5, [False]*10)), np.arange(5, 15)), - (False, np.concatenate(([True]*5, [False]*5, [True]*5)), np.arange(5, 10)), + ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), + ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), + (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10)), ] ) def test_drop_duplicates(self, freq, keep, expected, index): diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 1f9d5252bdec1..d1524774a095f 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -82,7 +82,7 @@ def test_value_counts_unique(self): tm.assert_index_equal(idx.unique(), exp_idx) @pytest.mark.parametrize( - "freq", ["D"]#, "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] + "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] ) def test_drop_duplicates_metadata(self, freq): # GH 10115 @@ -102,9 +102,9 @@ def test_drop_duplicates_metadata(self, freq): @pytest.mark.parametrize( "keep, expected, index", [ - ("first", np.concatenate(([False]*10, [True]*5)), np.arange(0, 10)), - ("last", np.concatenate(([True]*5, [False]*10)), np.arange(5, 15)), - (False, np.concatenate(([True]*5, [False]*5, [True]*5)), np.arange(5, 10)), + ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), + ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), + (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10)), ] ) def test_drop_duplicates(self, freq, keep, expected, index): diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 6a398d72f1f79..666503ff5b021 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -153,14 +153,14 @@ def test_drop_duplicates_metadata(self, freq): assert result.freq is None @pytest.mark.parametrize( - "freq", ["D",]# "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] + "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] ) @pytest.mark.parametrize( - "keep, expected, index", + "keep, expected, index", [ - ("first", np.concatenate(([False]*10, [True]*5)), np.arange(0, 10)), - ("last", np.concatenate(([True]*5, [False]*10)), np.arange(5, 15)), - (False, np.concatenate(([True]*5, [False]*5, [True]*5)), np.arange(5, 10)), + ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), + ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), + (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10)), ] ) def test_drop_duplicates(self, freq, keep, expected, index): From dc6b993434046d846971b3d63cce16c674adfe75 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Tue, 10 Mar 2020 10:47:20 +0100 Subject: [PATCH 03/13] TST: Fix indent --- pandas/tests/indexes/datetimes/test_ops.py | 13 ++++++++----- pandas/tests/indexes/period/test_ops.py | 13 ++++++++----- pandas/tests/indexes/timedeltas/test_ops.py | 13 ++++++++----- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index a87a7296c9d95..977fc2dc72c36 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -280,11 +280,14 @@ def test_drop_duplicates_metadata(self, freq): ) @pytest.mark.parametrize( "keep, expected, index", - [ - ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), - ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), - (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10)), - ] + [ + ("first", np.concatenate(([False] * 10, [True] * 5)), + np.arange(0, 10)), + ("last", np.concatenate(([True] * 5, [False] * 10)), + np.arange(5, 15)), + (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), + np.arange(5, 10)), + ] ) def test_drop_duplicates(self, freq, keep, expected, index): # to check Index/Series compat diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index d1524774a095f..f722a07cc69a7 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -101,11 +101,14 @@ def test_drop_duplicates_metadata(self, freq): ) @pytest.mark.parametrize( "keep, expected, index", - [ - ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), - ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), - (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10)), - ] + [ + ("first", np.concatenate(([False] * 10, [True] * 5)), + np.arange(0, 10)), + ("last", np.concatenate(([True] * 5, [False] * 10)), + np.arange(5, 15)), + (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), + np.arange(5, 10)), + ] ) def test_drop_duplicates(self, freq, keep, expected, index): # to check Index/Series compat diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 666503ff5b021..abc33bbe68909 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -157,11 +157,14 @@ def test_drop_duplicates_metadata(self, freq): ) @pytest.mark.parametrize( "keep, expected, index", - [ - ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), - ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), - (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10)), - ] + [ + ("first", np.concatenate(([False] * 10, [True] * 5)), + np.arange(0, 10)), + ("last", np.concatenate(([True] * 5, [False] * 10)), + np.arange(5, 15)), + (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), + np.arange(5, 10)), + ] ) def test_drop_duplicates(self, freq, keep, expected, index): # to check Index/Series compat From 8e9e0ad4613fe700617590400e3df7b37a13c3d2 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Tue, 10 Mar 2020 10:48:53 +0100 Subject: [PATCH 04/13] TST: Fix trailing whitespaces --- pandas/tests/indexes/datetimes/test_ops.py | 8 ++++---- pandas/tests/indexes/period/test_ops.py | 6 +++--- pandas/tests/indexes/timedeltas/test_ops.py | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 977fc2dc72c36..a2c77cb46a3bf 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -281,11 +281,11 @@ def test_drop_duplicates_metadata(self, freq): @pytest.mark.parametrize( "keep, expected, index", [ - ("first", np.concatenate(([False] * 10, [True] * 5)), + ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), - ("last", np.concatenate(([True] * 5, [False] * 10)), - np.arange(5, 15)), - (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), + ("last", np.concatenate(([True] * 5, [False] * 10)), + np.arange(5, 15)), + (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10)), ] ) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index f722a07cc69a7..9b8b80050bdee 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -102,11 +102,11 @@ def test_drop_duplicates_metadata(self, freq): @pytest.mark.parametrize( "keep, expected, index", [ - ("first", np.concatenate(([False] * 10, [True] * 5)), + ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), - ("last", np.concatenate(([True] * 5, [False] * 10)), + ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), - (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), + (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10)), ] ) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index abc33bbe68909..0ce573faced25 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -158,11 +158,11 @@ def test_drop_duplicates_metadata(self, freq): @pytest.mark.parametrize( "keep, expected, index", [ - ("first", np.concatenate(([False] * 10, [True] * 5)), + ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), - ("last", np.concatenate(([True] * 5, [False] * 10)), + ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), - (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), + (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10)), ] ) From ed3798fd80d8b1b8f4a10a647a1d406356ba9359 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Tue, 10 Mar 2020 20:39:58 +0100 Subject: [PATCH 05/13] TST: Delete numpy testing --- pandas/tests/indexes/categorical/test_category.py | 2 +- pandas/tests/indexes/datetimes/test_ops.py | 2 +- pandas/tests/indexes/period/test_ops.py | 2 +- pandas/tests/indexes/timedeltas/test_ops.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 02fa4e36b67d4..1dac0ae8cfafe 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -367,7 +367,7 @@ def test_has_duplicates(self): def _test_drop_duplicates(self, idx, keep, expected): for k, e in zip(keep, expected): - np.testing.assert_equal(idx.duplicated(keep=k), e) + tm.assert_numpy_array_equal(idx.duplicated(keep=k), e) exp = idx[~e] result = idx.drop_duplicates(keep=k) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index a2c77cb46a3bf..c80c910d14dd5 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -294,7 +294,7 @@ def test_drop_duplicates(self, freq, keep, expected, index): idx = pd.date_range("2011-01-01", freq=freq, periods=10, name="idx") idx = idx.append(idx[:5]) - np.testing.assert_equal(idx.duplicated(keep=keep), expected) + tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) expected = idx[~expected] result = idx.drop_duplicates(keep=keep) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 9b8b80050bdee..814178460ca25 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -115,7 +115,7 @@ def test_drop_duplicates(self, freq, keep, expected, index): idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx") idx = idx.append(idx[:5]) - np.testing.assert_equal(idx.duplicated(keep=keep), expected) + tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) expected = idx[~expected] result = idx.drop_duplicates(keep=keep) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 0ce573faced25..308eb953be0a6 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -171,7 +171,7 @@ def test_drop_duplicates(self, freq, keep, expected, index): idx = pd.timedelta_range("1 day", periods=10, freq=freq, name="idx") idx = idx.append(idx[:5]) - np.testing.assert_equal(idx.duplicated(keep=keep), expected) + tm.testing.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) expected = idx[~expected] result = idx.drop_duplicates(keep=keep) From e7d388220c02fb3803a7249be1c709d9857f7731 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Tue, 10 Mar 2020 20:59:24 +0100 Subject: [PATCH 06/13] TST: Fix test --- pandas/tests/indexes/timedeltas/test_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 308eb953be0a6..fc223c7b483cf 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -171,7 +171,7 @@ def test_drop_duplicates(self, freq, keep, expected, index): idx = pd.timedelta_range("1 day", periods=10, freq=freq, name="idx") idx = idx.append(idx[:5]) - tm.testing.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) + tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) expected = idx[~expected] result = idx.drop_duplicates(keep=keep) From 8b094506f913f73ee71a1f0025253dad0bf7412e Mon Sep 17 00:00:00 2001 From: mproszewska Date: Tue, 10 Mar 2020 21:17:17 +0100 Subject: [PATCH 07/13] TST: Delete negative values in period --- pandas/tests/indexes/period/test_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 814178460ca25..a20f4614684f7 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -82,7 +82,7 @@ def test_value_counts_unique(self): tm.assert_index_equal(idx.unique(), exp_idx) @pytest.mark.parametrize( - "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] + "freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"] ) def test_drop_duplicates_metadata(self, freq): # GH 10115 From 0067641c7d3cd27858702b53403c3f0388419fd0 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Tue, 10 Mar 2020 21:51:16 +0100 Subject: [PATCH 08/13] TST: Fix categorical tests --- .../indexes/categorical/test_category.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 1dac0ae8cfafe..248c05ae08c0f 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -365,16 +365,16 @@ def test_has_duplicates(self): assert idx.is_unique is True assert idx.has_duplicates is False - def _test_drop_duplicates(self, idx, keep, expected): - for k, e in zip(keep, expected): + def _test_drop_duplicates(self, idx, keep, expected, index): + for k, e, i in zip(keep, expected, index): tm.assert_numpy_array_equal(idx.duplicated(keep=k), e) - exp = idx[~e] + e = idx[~e] result = idx.drop_duplicates(keep=k) - tm.assert_index_equal(result, exp) + tm.assert_index_equal(result, e) result = Series(idx).drop_duplicates(keep=k) - tm.assert_series_equal(result, Series(exp)) + tm.assert_series_equal(result, Series(e, i)) def test_drop_duplicates(self): keep = ["first", "last", False] @@ -385,22 +385,25 @@ def test_drop_duplicates(self): np.array([True, True, False]), np.array([True, True, True]) ] + index = [[0], [2], np.empty(shape=(0), dtype=int)] for c in categories: idx = pd.CategoricalIndex([1, 1, 1], categories=c, name="foo") - self._test_drop_duplicates(idx, keep, expected) + self._test_drop_duplicates(idx, keep, expected, index) categories = ['a', 'b', 'c'] idx = CategoricalIndex([2, 'a', 'b'], categories=categories, name="foo") expected = np.zeros(shape=(3, 3), dtype=np.bool) - self._test_drop_duplicates(idx, keep, expected) + index = [[0, 1, 2], [0, 1, 2], [0, 1, 2]] + self._test_drop_duplicates(idx, keep, expected, index) idx = CategoricalIndex(list('abb'), categories=categories, name="foo") expected = [ np.array([False, False, True]), np.array([False, True, False]), - np.array([True, True, False]) + np.array([False, True, True]) ] - self._test_drop_duplicates(idx, keep, expected) + index = [[0, 1], [0, 2], [0]] + self._test_drop_duplicates(idx, keep, expected, index) def test_unique(self): From 79cba23445e7f8ca426b0996b09f17a048b67039 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Tue, 10 Mar 2020 22:36:45 +0100 Subject: [PATCH 09/13] TST: Fix format with black --- .../indexes/categorical/test_category.py | 22 +++++++++--------- pandas/tests/indexes/datetimes/test_ops.py | 15 ++++++------ pandas/tests/indexes/period/test_ops.py | 23 ++++++++----------- pandas/tests/indexes/timedeltas/test_ops.py | 15 ++++++------ 4 files changed, 37 insertions(+), 38 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 248c05ae08c0f..0280d89576e09 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -379,28 +379,28 @@ def _test_drop_duplicates(self, idx, keep, expected, index): def test_drop_duplicates(self): keep = ["first", "last", False] - categories = [[1, 2, 3], list('abc')] + categories = [[1, 2, 3], list("abc")] expected = [ np.array([False, True, True]), np.array([True, True, False]), - np.array([True, True, True]) + np.array([True, True, True]), ] index = [[0], [2], np.empty(shape=(0), dtype=int)] for c in categories: idx = pd.CategoricalIndex([1, 1, 1], categories=c, name="foo") self._test_drop_duplicates(idx, keep, expected, index) - categories = ['a', 'b', 'c'] - idx = CategoricalIndex([2, 'a', 'b'], categories=categories, name="foo") + categories = ["a", "b", "c"] + idx = CategoricalIndex([2, "a", "b"], categories=categories, name="foo") expected = np.zeros(shape=(3, 3), dtype=np.bool) index = [[0, 1, 2], [0, 1, 2], [0, 1, 2]] self._test_drop_duplicates(idx, keep, expected, index) - idx = CategoricalIndex(list('abb'), categories=categories, name="foo") + idx = CategoricalIndex(list("abb"), categories=categories, name="foo") expected = [ np.array([False, False, True]), np.array([False, True, False]), - np.array([False, True, True]) + np.array([False, True, True]), ] index = [[0, 1], [0, 2], [0]] self._test_drop_duplicates(idx, keep, expected, index) @@ -412,19 +412,19 @@ def test_unique(self): expected = CategoricalIndex([1], categories=[1]) tm.assert_index_equal(idx.unique(), expected) - categories = list('abc') + categories = list("abc") idx = CategoricalIndex([1, 1, 1], categories=categories) expected = CategoricalIndex([np.nan], categories=[]) tm.assert_index_equal(idx.unique(), expected) categories = [1, 2, 3] - idx = CategoricalIndex([1, 2, 'a'], categories=categories) + idx = CategoricalIndex([1, 2, "a"], categories=categories) expected = CategoricalIndex([1, 2, np.nan], categories=[1, 2]) tm.assert_index_equal(idx.unique(), expected) - categories = list('abc') - idx = CategoricalIndex([2, 'a', 'b'], categories=categories) - expected = CategoricalIndex([np.nan, 'a', 'b'], categories=['a', 'b']) + categories = list("abc") + idx = CategoricalIndex([2, "a", "b"], categories=categories) + expected = CategoricalIndex([np.nan, "a", "b"], categories=["a", "b"]) tm.assert_index_equal(idx.unique(), expected) def test_repr_roundtrip(self): diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index c80c910d14dd5..1c2987981a19e 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -281,13 +281,14 @@ def test_drop_duplicates_metadata(self, freq): @pytest.mark.parametrize( "keep, expected, index", [ - ("first", np.concatenate(([False] * 10, [True] * 5)), - np.arange(0, 10)), - ("last", np.concatenate(([True] * 5, [False] * 10)), - np.arange(5, 15)), - (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), - np.arange(5, 10)), - ] + ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), + ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), + ( + False, + np.concatenate(([True] * 5, [False] * 5, [True] * 5)), + np.arange(5, 10), + ), + ], ) def test_drop_duplicates(self, freq, keep, expected, index): # to check Index/Series compat diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index a20f4614684f7..fc44226f9d72f 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -81,9 +81,7 @@ def test_value_counts_unique(self): tm.assert_index_equal(idx.unique(), exp_idx) - @pytest.mark.parametrize( - "freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"] - ) + @pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"]) def test_drop_duplicates_metadata(self, freq): # GH 10115 idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx") @@ -96,19 +94,18 @@ def test_drop_duplicates_metadata(self, freq): tm.assert_index_equal(idx, result) assert idx.freq == result.freq - @pytest.mark.parametrize( - "freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"] - ) + @pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"]) @pytest.mark.parametrize( "keep, expected, index", [ - ("first", np.concatenate(([False] * 10, [True] * 5)), - np.arange(0, 10)), - ("last", np.concatenate(([True] * 5, [False] * 10)), - np.arange(5, 15)), - (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), - np.arange(5, 10)), - ] + ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), + ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), + ( + False, + np.concatenate(([True] * 5, [False] * 5, [True] * 5)), + np.arange(5, 10), + ), + ], ) def test_drop_duplicates(self, freq, keep, expected, index): # to check Index/Series compat diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index fc223c7b483cf..dac4862599bd1 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -158,13 +158,14 @@ def test_drop_duplicates_metadata(self, freq): @pytest.mark.parametrize( "keep, expected, index", [ - ("first", np.concatenate(([False] * 10, [True] * 5)), - np.arange(0, 10)), - ("last", np.concatenate(([True] * 5, [False] * 10)), - np.arange(5, 15)), - (False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), - np.arange(5, 10)), - ] + ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), + ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), + ( + False, + np.concatenate(([True] * 5, [False] * 5, [True] * 5)), + np.arange(5, 10), + ), + ], ) def test_drop_duplicates(self, freq, keep, expected, index): # to check Index/Series compat From 5931a48dd243d24b6c2b6cf7ab9f460ff5dcfdb5 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Sat, 14 Mar 2020 12:19:01 +0100 Subject: [PATCH 10/13] TST: Apply requested changes --- .../indexes/categorical/test_category.py | 104 ++++++++---------- pandas/tests/indexes/conftest.py | 10 ++ pandas/tests/indexes/datetimes/test_ops.py | 44 ++------ pandas/tests/indexes/timedeltas/test_ops.py | 23 ++-- 4 files changed, 72 insertions(+), 109 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 0280d89576e09..63cadb34abb00 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -365,66 +365,58 @@ def test_has_duplicates(self): assert idx.is_unique is True assert idx.has_duplicates is False - def _test_drop_duplicates(self, idx, keep, expected, index): - for k, e, i in zip(keep, expected, index): - tm.assert_numpy_array_equal(idx.duplicated(keep=k), e) - e = idx[~e] - result = idx.drop_duplicates(keep=k) - tm.assert_index_equal(result, e) - - result = Series(idx).drop_duplicates(keep=k) - tm.assert_series_equal(result, Series(e, i)) - - def test_drop_duplicates(self): - keep = ["first", "last", False] - - categories = [[1, 2, 3], list("abc")] - expected = [ - np.array([False, True, True]), - np.array([True, True, False]), - np.array([True, True, True]), - ] - index = [[0], [2], np.empty(shape=(0), dtype=int)] - for c in categories: - idx = pd.CategoricalIndex([1, 1, 1], categories=c, name="foo") - self._test_drop_duplicates(idx, keep, expected, index) - - categories = ["a", "b", "c"] - idx = CategoricalIndex([2, "a", "b"], categories=categories, name="foo") - expected = np.zeros(shape=(3, 3), dtype=np.bool) - index = [[0, 1, 2], [0, 1, 2], [0, 1, 2]] - self._test_drop_duplicates(idx, keep, expected, index) - - idx = CategoricalIndex(list("abb"), categories=categories, name="foo") - expected = [ - np.array([False, False, True]), - np.array([False, True, False]), - np.array([False, True, True]), - ] - index = [[0, 1], [0, 2], [0]] - self._test_drop_duplicates(idx, keep, expected, index) - - def test_unique(self): - - categories = [1, 2, 3] - idx = CategoricalIndex([1, 1, 1], categories=categories) - expected = CategoricalIndex([1], categories=[1]) - tm.assert_index_equal(idx.unique(), expected) + @pytest.mark.parametrize( + "data, categories, expected", + [ + ([1, 1, 1], [1, 2, 3], + { + "first" : np.array([False, True, True]), + "last" : np.array([True, True, False]), + False : np.array([True, True, True]), + }), + ([1, 1, 1], list("abc"), + { + "first" : np.array([False, True, True]), + "last" : np.array([True, True, False]), + False : np.array([True, True, True]), + }), + ([2, "a", "b"], list('abc'), + { + "first" : np.zeros(shape=(3), dtype=np.bool), + "last" : np.zeros(shape=(3), dtype=np.bool), + False : np.zeros(shape=(3), dtype=np.bool), + }), + (list("abb"), list('abc'), + { + "first" : np.array([False, False, True]), + "last" : np.array([False, True, False]), + False : np.array([False, True, True]), + }), + ], + ) + def test_drop_duplicates(self, data, categories, expected): - categories = list("abc") - idx = CategoricalIndex([1, 1, 1], categories=categories) - expected = CategoricalIndex([np.nan], categories=[]) - tm.assert_index_equal(idx.unique(), expected) + idx = CategoricalIndex(data, categories=categories, name="foo") + for keep, e in expected.items(): + tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e) + e = idx[~e] + result = idx.drop_duplicates(keep=keep) + tm.assert_index_equal(result, e) - categories = [1, 2, 3] - idx = CategoricalIndex([1, 2, "a"], categories=categories) - expected = CategoricalIndex([1, 2, np.nan], categories=[1, 2]) - tm.assert_index_equal(idx.unique(), expected) + @pytest.mark.parametrize( + "data, categories, expected_data, expected_categories", + [ + ([1, 1, 1], [1, 2, 3], [1], [1]), + ([1, 1, 1], list('abc'), [np.nan], []), + ([1, 2, "a"], [1, 2, 3], [1, 2, np.nan], [1, 2]), + ([2, "a", "b"], list("abc"), [np.nan, "a", "b"], ["a", "b"]), + ], + ) + def test_unique(self, data, categories, expected_data, expected_categories): - categories = list("abc") - idx = CategoricalIndex([2, "a", "b"], categories=categories) - expected = CategoricalIndex([np.nan, "a", "b"], categories=["a", "b"]) + idx = CategoricalIndex(data, categories=categories) + expected = CategoricalIndex(expected_data, categories=expected_categories) tm.assert_index_equal(idx.unique(), expected) def test_repr_roundtrip(self): diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index a9fb228073ab4..de84833b393e3 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -16,3 +16,13 @@ def sort(request): in in the Index setops methods. """ return request.param + +@pytest.fixture( + params=["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] +) +def freq_sample(request): + """ + Valid values for 'freq' parameter used to create date_range and + timedelta_range.. + """ + return request.param diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 1c2987981a19e..91507f19e9d71 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -259,12 +259,9 @@ def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None - @pytest.mark.parametrize( - "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] - ) - def test_drop_duplicates_metadata(self, freq): + def test_drop_duplicates_metadata(self, freq_sample): # GH 10115 - idx = pd.date_range("2011-01-01", freq=freq, periods=10, name="idx") + idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq @@ -275,9 +272,6 @@ def test_drop_duplicates_metadata(self, freq): tm.assert_index_equal(idx, result) assert result.freq is None - @pytest.mark.parametrize( - "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] - ) @pytest.mark.parametrize( "keep, expected, index", [ @@ -290,9 +284,9 @@ def test_drop_duplicates_metadata(self, freq): ), ], ) - def test_drop_duplicates(self, freq, keep, expected, index): + def test_drop_duplicates(self, freq_sample, keep, expected, index): # to check Index/Series compat - idx = pd.date_range("2011-01-01", freq=freq, periods=10, name="idx") + idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") idx = idx.append(idx[:5]) tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) @@ -304,36 +298,12 @@ def test_drop_duplicates(self, freq, keep, expected, index): result = Series(idx).drop_duplicates(keep=keep) tm.assert_series_equal(result, Series(expected, index=index)) - @pytest.mark.parametrize( - "freq", - [ - "A", - "2A", - "-2A", - "Q", - "-1Q", - "M", - "-1M", - "D", - "3D", - "-3D", - "W", - "-1W", - "H", - "2H", - "-2H", - "T", - "2T", - "S", - "-3S", - ], - ) - def test_infer_freq(self, freq): + def test_infer_freq(self, freq_sample): # GH 11018 - idx = pd.date_range("2011-01-01 09:00:00", freq=freq, periods=10) + idx = pd.date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) result = pd.DatetimeIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) - assert result.freq == freq + assert result.freq == freq_sample def test_nat(self, tz_naive_fixture): tz = tz_naive_fixture diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index dac4862599bd1..329b19a84268c 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -136,12 +136,9 @@ def test_order(self): tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None - @pytest.mark.parametrize( - "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] - ) - def test_drop_duplicates_metadata(self, freq): + def test_drop_duplicates_metadata(self, freq_sample): # GH 10115 - idx = pd.timedelta_range("1 day", periods=10, freq=freq, name="idx") + idx = pd.timedelta_range("1 day", periods=10, freq=freq_sample, name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq @@ -152,9 +149,6 @@ def test_drop_duplicates_metadata(self, freq): tm.assert_index_equal(idx, result) assert result.freq is None - @pytest.mark.parametrize( - "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] - ) @pytest.mark.parametrize( "keep, expected, index", [ @@ -167,9 +161,9 @@ def test_drop_duplicates_metadata(self, freq): ), ], ) - def test_drop_duplicates(self, freq, keep, expected, index): + def test_drop_duplicates(self, freq_sample, keep, expected, index): # to check Index/Series compat - idx = pd.timedelta_range("1 day", periods=10, freq=freq, name="idx") + idx = pd.timedelta_range("1 day", periods=10, freq=freq_sample, name="idx") idx = idx.append(idx[:5]) tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) @@ -181,15 +175,12 @@ def test_drop_duplicates(self, freq, keep, expected, index): result = Series(idx).drop_duplicates(keep=keep) tm.assert_series_equal(result, Series(expected, index=index)) - @pytest.mark.parametrize( - "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] - ) - def test_infer_freq(self, freq): + def test_infer_freq(self, freq_sample): # GH#11018 - idx = pd.timedelta_range("1", freq=freq, periods=10) + idx = pd.timedelta_range("1", freq=freq_sample, periods=10) result = pd.TimedeltaIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) - assert result.freq == freq + assert result.freq == freq_sample def test_repeat(self): index = pd.timedelta_range("1 days", periods=2, freq="D") From d228fd2b16f970133355c8f2e493d5ca2afec88e Mon Sep 17 00:00:00 2001 From: mproszewska Date: Sat, 14 Mar 2020 13:52:39 +0100 Subject: [PATCH 11/13] TST: Fix syntax --- .../indexes/categorical/test_category.py | 23 +++++++++++-------- pandas/tests/indexes/conftest.py | 1 + 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 63cadb34abb00..954f04643495a 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -8,7 +8,7 @@ from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd -from pandas import Categorical, IntervalIndex, Series +from pandas import Categorical, IntervalIndex import pandas._testing as tm from pandas.core.indexes.api import CategoricalIndex, Index @@ -365,36 +365,39 @@ def test_has_duplicates(self): assert idx.is_unique is True assert idx.has_duplicates is False - @pytest.mark.parametrize( "data, categories, expected", [ - ([1, 1, 1], [1, 2, 3], + ([1, 1, 1], [1, 2, 3], { "first" : np.array([False, True, True]), "last" : np.array([True, True, False]), False : np.array([True, True, True]), - }), + } + ), ([1, 1, 1], list("abc"), { "first" : np.array([False, True, True]), "last" : np.array([True, True, False]), False : np.array([True, True, True]), - }), + } + ), ([2, "a", "b"], list('abc'), { "first" : np.zeros(shape=(3), dtype=np.bool), "last" : np.zeros(shape=(3), dtype=np.bool), False : np.zeros(shape=(3), dtype=np.bool), - }), + } + ), (list("abb"), list('abc'), { "first" : np.array([False, False, True]), "last" : np.array([False, True, False]), False : np.array([False, True, True]), - }), + } + ), ], - ) + ) def test_drop_duplicates(self, data, categories, expected): idx = CategoricalIndex(data, categories=categories, name="foo") @@ -402,7 +405,7 @@ def test_drop_duplicates(self, data, categories, expected): tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e) e = idx[~e] result = idx.drop_duplicates(keep=keep) - tm.assert_index_equal(result, e) + tm.assert_index_equal(result, e) @pytest.mark.parametrize( "data, categories, expected_data, expected_categories", @@ -412,7 +415,7 @@ def test_drop_duplicates(self, data, categories, expected): ([1, 2, "a"], [1, 2, 3], [1, 2, np.nan], [1, 2]), ([2, "a", "b"], list("abc"), [np.nan, "a", "b"], ["a", "b"]), ], - ) + ) def test_unique(self, data, categories, expected_data, expected_categories): idx = CategoricalIndex(data, categories=categories) diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index de84833b393e3..990ecbf6be1ce 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -17,6 +17,7 @@ def sort(request): """ return request.param + @pytest.fixture( params=["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] ) From 444c081f696299edb34f85748ec35f678a711b23 Mon Sep 17 00:00:00 2001 From: mproszewska Date: Sat, 21 Mar 2020 17:33:57 +0100 Subject: [PATCH 12/13] TST: Fix indentation --- .../indexes/categorical/test_category.py | 50 +++++++++++-------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 954f04643495a..28b7991e95541 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -368,33 +368,41 @@ def test_has_duplicates(self): @pytest.mark.parametrize( "data, categories, expected", [ - ([1, 1, 1], [1, 2, 3], + ( + [1, 1, 1], + [1, 2, 3], { - "first" : np.array([False, True, True]), - "last" : np.array([True, True, False]), - False : np.array([True, True, True]), - } + "first": np.array([False, True, True]), + "last": np.array([True, True, False]), + False: np.array([True, True, True]), + }, ), - ([1, 1, 1], list("abc"), + ( + [1, 1, 1], + list("abc"), { - "first" : np.array([False, True, True]), - "last" : np.array([True, True, False]), - False : np.array([True, True, True]), - } + "first": np.array([False, True, True]), + "last": np.array([True, True, False]), + False: np.array([True, True, True]), + }, ), - ([2, "a", "b"], list('abc'), + ( + [2, "a", "b"], + list("abc"), { - "first" : np.zeros(shape=(3), dtype=np.bool), - "last" : np.zeros(shape=(3), dtype=np.bool), - False : np.zeros(shape=(3), dtype=np.bool), - } + "first": np.zeros(shape=(3), dtype=np.bool), + "last": np.zeros(shape=(3), dtype=np.bool), + False: np.zeros(shape=(3), dtype=np.bool), + }, ), - (list("abb"), list('abc'), + ( + list("abb"), + list("abc"), { - "first" : np.array([False, False, True]), - "last" : np.array([False, True, False]), - False : np.array([False, True, True]), - } + "first": np.array([False, False, True]), + "last": np.array([False, True, False]), + False: np.array([False, True, True]), + }, ), ], ) @@ -411,7 +419,7 @@ def test_drop_duplicates(self, data, categories, expected): "data, categories, expected_data, expected_categories", [ ([1, 1, 1], [1, 2, 3], [1], [1]), - ([1, 1, 1], list('abc'), [np.nan], []), + ([1, 1, 1], list("abc"), [np.nan], []), ([1, 2, "a"], [1, 2, 3], [1, 2, np.nan], [1, 2]), ([2, "a", "b"], list("abc"), [np.nan, "a", "b"], ["a", "b"]), ], From 815c0ee156d333ad4e0bd3bb66a64a8efbee853d Mon Sep 17 00:00:00 2001 From: mproszewska Date: Sat, 21 Mar 2020 17:59:53 +0100 Subject: [PATCH 13/13] TST: Reformat conftest file with black --- pandas/tests/indexes/conftest.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py index 990ecbf6be1ce..fb17e1df6341b 100644 --- a/pandas/tests/indexes/conftest.py +++ b/pandas/tests/indexes/conftest.py @@ -18,9 +18,7 @@ def sort(request): return request.param -@pytest.fixture( - params=["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] -) +@pytest.fixture(params=["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"]) def freq_sample(request): """ Valid values for 'freq' parameter used to create date_range and