diff --git a/pandas/conftest.py b/pandas/conftest.py index c1376670ffbf0..a979c3fc3bfac 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -248,7 +248,19 @@ def tz_aware_fixture(request): return request.param -@pytest.fixture(params=[str, 'str', 'U']) +UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"] +SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"] +ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES + +FLOAT_DTYPES = [float, "float32", "float64"] +COMPLEX_DTYPES = [complex, "complex64", "complex128"] +STRING_DTYPES = [str, 'str', 'U'] + +ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES +ALL_NUMPY_DTYPES = ALL_REAL_DTYPES + COMPLEX_DTYPES + STRING_DTYPES + + +@pytest.fixture(params=STRING_DTYPES) def string_dtype(request): """Parametrized fixture for string dtypes. @@ -259,9 +271,6 @@ def string_dtype(request): return request.param -FLOAT_DTYPES = [float, "float32", "float64"] - - @pytest.fixture(params=FLOAT_DTYPES) def float_dtype(request): """ @@ -274,7 +283,7 @@ def float_dtype(request): return request.param -@pytest.fixture(params=[complex, "complex64", "complex128"]) +@pytest.fixture(params=COMPLEX_DTYPES) def complex_dtype(request): """ Parameterized fixture for complex dtypes. @@ -286,12 +295,6 @@ def complex_dtype(request): return request.param -UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"] -SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"] -ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES -ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES - - @pytest.fixture(params=SIGNED_INT_DTYPES) def sint_dtype(request): """ @@ -358,6 +361,31 @@ def any_real_dtype(request): return request.param +@pytest.fixture(params=ALL_NUMPY_DTYPES) +def any_numpy_dtype(request): + """ + Parameterized fixture for all numpy dtypes. + + * int8 + * uint8 + * int16 + * uint16 + * int32 + * uint32 + * int64 + * uint64 + * float32 + * float64 + * complex64 + * complex128 + * str + * 'str' + * 'U' + """ + + return request.param + + @pytest.fixture def mock(): """ diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index fd14118bd833f..28a77bbb1d3fa 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -907,144 +907,6 @@ def test_matmul(self): pytest.raises(Exception, a.dot, a.values[:3]) pytest.raises(ValueError, a.dot, b.T) - def test_value_counts_nunique(self): - - # basics.rst doc example - series = Series(np.random.randn(500)) - series[20:500] = np.nan - series[10:20] = 5000 - result = series.nunique() - assert result == 11 - - # GH 18051 - s = pd.Series(pd.Categorical([])) - assert s.nunique() == 0 - s = pd.Series(pd.Categorical([np.nan])) - assert s.nunique() == 0 - - def test_unique(self): - - # 714 also, dtype=float - s = Series([1.2345] * 100) - s[::2] = np.nan - result = s.unique() - assert len(result) == 2 - - s = Series([1.2345] * 100, dtype='f4') - s[::2] = np.nan - result = s.unique() - assert len(result) == 2 - - # NAs in object arrays #714 - s = Series(['foo'] * 100, dtype='O') - s[::2] = np.nan - result = s.unique() - assert len(result) == 2 - - # decision about None - s = Series([1, 2, 3, None, None, None], dtype=object) - result = s.unique() - expected = np.array([1, 2, 3, None], dtype=object) - tm.assert_numpy_array_equal(result, expected) - - # GH 18051 - s = pd.Series(pd.Categorical([])) - tm.assert_categorical_equal(s.unique(), pd.Categorical([]), - check_dtype=False) - s = pd.Series(pd.Categorical([np.nan])) - tm.assert_categorical_equal(s.unique(), pd.Categorical([np.nan]), - check_dtype=False) - - @pytest.mark.parametrize( - "tc1, tc2", - [ - ( - Series([1, 2, 3, 3], dtype=np.dtype('int_')), - Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('int_')) - ), - ( - Series([1, 2, 3, 3], dtype=np.dtype('uint')), - Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('uint')) - ), - ( - Series([1, 2, 3, 3], dtype=np.dtype('float_')), - Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('float_')) - ), - ( - Series([1, 2, 3, 3], dtype=np.dtype('unicode_')), - Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype('unicode_')) - ) - ] - ) - def test_drop_duplicates_non_bool(self, tc1, tc2): - # Test case 1 - expected = Series([False, False, False, True]) - assert_series_equal(tc1.duplicated(), expected) - assert_series_equal(tc1.drop_duplicates(), tc1[~expected]) - sc = tc1.copy() - sc.drop_duplicates(inplace=True) - assert_series_equal(sc, tc1[~expected]) - - expected = Series([False, False, True, False]) - assert_series_equal(tc1.duplicated(keep='last'), expected) - assert_series_equal(tc1.drop_duplicates(keep='last'), tc1[~expected]) - sc = tc1.copy() - sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, tc1[~expected]) - - expected = Series([False, False, True, True]) - assert_series_equal(tc1.duplicated(keep=False), expected) - assert_series_equal(tc1.drop_duplicates(keep=False), tc1[~expected]) - sc = tc1.copy() - sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, tc1[~expected]) - - # Test case 2 - expected = Series([False, False, False, False, True, True, False]) - assert_series_equal(tc2.duplicated(), expected) - assert_series_equal(tc2.drop_duplicates(), tc2[~expected]) - sc = tc2.copy() - sc.drop_duplicates(inplace=True) - assert_series_equal(sc, tc2[~expected]) - - expected = Series([False, True, True, False, False, False, False]) - assert_series_equal(tc2.duplicated(keep='last'), expected) - assert_series_equal(tc2.drop_duplicates(keep='last'), tc2[~expected]) - sc = tc2.copy() - sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, tc2[~expected]) - - expected = Series([False, True, True, False, True, True, False]) - assert_series_equal(tc2.duplicated(keep=False), expected) - assert_series_equal(tc2.drop_duplicates(keep=False), tc2[~expected]) - sc = tc2.copy() - sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, tc2[~expected]) - - def test_drop_duplicates_bool(self): - tc = Series([True, False, True, False]) - - expected = Series([False, False, True, True]) - assert_series_equal(tc.duplicated(), expected) - assert_series_equal(tc.drop_duplicates(), tc[~expected]) - sc = tc.copy() - sc.drop_duplicates(inplace=True) - assert_series_equal(sc, tc[~expected]) - - expected = Series([True, True, False, False]) - assert_series_equal(tc.duplicated(keep='last'), expected) - assert_series_equal(tc.drop_duplicates(keep='last'), tc[~expected]) - sc = tc.copy() - sc.drop_duplicates(keep='last', inplace=True) - assert_series_equal(sc, tc[~expected]) - - expected = Series([True, True, True, True]) - assert_series_equal(tc.duplicated(keep=False), expected) - assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected]) - sc = tc.copy() - sc.drop_duplicates(keep=False, inplace=True) - assert_series_equal(sc, tc[~expected]) - def test_clip(self): val = self.ts.median() @@ -1416,7 +1278,8 @@ def test_ptp(self): N = 1000 arr = np.random.randn(N) ser = Series(arr) - assert np.ptp(ser) == np.ptp(arr) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + assert np.ptp(ser) == np.ptp(arr) # GH11163 s = Series([3, 5, np.nan, -3, 10]) @@ -1457,10 +1320,6 @@ def test_empty_timeseries_redections_return_nat(self): assert Series([], dtype=dtype).min() is pd.NaT assert Series([], dtype=dtype).max() is pd.NaT - def test_unique_data_ownership(self): - # it works! #1807 - Series(Series(["a", "c", "b"]).unique()).sort_values() - def test_repeat(self): s = Series(np.random.randn(3), index=['a', 'b', 'c']) @@ -1537,29 +1396,6 @@ def test_searchsorted_sorter(self): e = np.array([0, 2], dtype=np.intp) tm.assert_numpy_array_equal(r, e) - def test_is_unique(self): - # GH11946 - s = Series(np.random.randint(0, 10, size=1000)) - assert not s.is_unique - s = Series(np.arange(1000)) - assert s.is_unique - - def test_is_unique_class_ne(self, capsys): - # GH 20661 - class Foo(object): - def __init__(self, val): - self._value = val - - def __ne__(self, other): - raise Exception("NEQ not supported") - - li = [Foo(i) for i in range(5)] - s = pd.Series(li, index=[i for i in range(5)]) - _, err = capsys.readouterr() - s.is_unique - _, err = capsys.readouterr() - assert len(err) == 0 - def test_is_monotonic(self): s = Series(np.random.randint(0, 10, size=1000)) diff --git a/pandas/tests/series/test_duplicates.py b/pandas/tests/series/test_duplicates.py new file mode 100644 index 0000000000000..2e4d64188307c --- /dev/null +++ b/pandas/tests/series/test_duplicates.py @@ -0,0 +1,140 @@ +# coding=utf-8 + +import pytest + +import numpy as np + +from pandas import Series, Categorical +import pandas.util.testing as tm + + +def test_value_counts_nunique(): + # basics.rst doc example + series = Series(np.random.randn(500)) + series[20:500] = np.nan + series[10:20] = 5000 + result = series.nunique() + assert result == 11 + + # GH 18051 + s = Series(Categorical([])) + assert s.nunique() == 0 + s = Series(Categorical([np.nan])) + assert s.nunique() == 0 + + +def test_unique(): + # GH714 also, dtype=float + s = Series([1.2345] * 100) + s[::2] = np.nan + result = s.unique() + assert len(result) == 2 + + s = Series([1.2345] * 100, dtype='f4') + s[::2] = np.nan + result = s.unique() + assert len(result) == 2 + + # NAs in object arrays #714 + s = Series(['foo'] * 100, dtype='O') + s[::2] = np.nan + result = s.unique() + assert len(result) == 2 + + # decision about None + s = Series([1, 2, 3, None, None, None], dtype=object) + result = s.unique() + expected = np.array([1, 2, 3, None], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # GH 18051 + s = Series(Categorical([])) + tm.assert_categorical_equal(s.unique(), Categorical([]), check_dtype=False) + s = Series(Categorical([np.nan])) + tm.assert_categorical_equal(s.unique(), Categorical([np.nan]), + check_dtype=False) + + +def test_unique_data_ownership(): + # it works! #1807 + Series(Series(["a", "c", "b"]).unique()).sort_values() + + +def test_is_unique(): + # GH11946 + s = Series(np.random.randint(0, 10, size=1000)) + assert not s.is_unique + s = Series(np.arange(1000)) + assert s.is_unique + + +def test_is_unique_class_ne(capsys): + # GH 20661 + class Foo(object): + def __init__(self, val): + self._value = val + + def __ne__(self, other): + raise Exception("NEQ not supported") + + li = [Foo(i) for i in range(5)] + s = Series(li, index=[i for i in range(5)]) + _, err = capsys.readouterr() + s.is_unique + _, err = capsys.readouterr() + assert len(err) == 0 + + +@pytest.mark.parametrize( + 'keep, expected', + [ + ('first', Series([False, False, False, False, True, True, False])), + ('last', Series([False, True, True, False, False, False, False])), + (False, Series([False, True, True, False, True, True, False])) + ]) +def test_drop_duplicates_non_bool(any_numpy_dtype, keep, expected): + tc = Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(any_numpy_dtype)) + + tm.assert_series_equal(tc.duplicated(keep=keep), expected) + tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected]) + sc = tc.copy() + sc.drop_duplicates(keep=keep, inplace=True) + tm.assert_series_equal(sc, tc[~expected]) + + +@pytest.mark.parametrize('keep, expected', + [('first', Series([False, False, True, True])), + ('last', Series([True, True, False, False])), + (False, Series([True, True, True, True]))]) +def test_drop_duplicates_bool(keep, expected): + tc = Series([True, False, True, False]) + + tm.assert_series_equal(tc.duplicated(keep=keep), expected) + tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected]) + sc = tc.copy() + sc.drop_duplicates(keep=keep, inplace=True) + tm.assert_series_equal(sc, tc[~expected]) + + +@pytest.mark.parametrize('keep, expected', [ + ('first', Series([False, False, True, False, True], name='name')), + ('last', Series([True, True, False, False, False], name='name')), + (False, Series([True, True, True, False, True], name='name')) +]) +def test_duplicated_keep(keep, expected): + s = Series(['a', 'b', 'b', 'c', 'a'], name='name') + + result = s.duplicated(keep=keep) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize('keep, expected', [ + ('first', Series([False, False, True, False, True])), + ('last', Series([True, True, False, False, False])), + (False, Series([True, True, True, False, True])) +]) +def test_duplicated_nan_none(keep, expected): + s = Series([np.nan, 3, 3, None, np.nan], dtype=object) + + result = s.duplicated(keep=keep) + tm.assert_series_equal(result, expected)