From 714e8c23b8ba72d75687955a05e28675f1b9779c Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 28 Nov 2018 23:24:04 +0100 Subject: [PATCH 1/5] TST: fix fixture for numpy dtypes --- pandas/conftest.py | 115 +++++++++++++++---------- pandas/tests/series/test_duplicates.py | 27 +++--- 2 files changed, 81 insertions(+), 61 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 29c3abf4d1e1b..0b68a4ab1b9eb 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -386,8 +386,14 @@ def tz_aware_fixture(request): COMPLEX_DTYPES = [complex, "complex64", "complex128"] STRING_DTYPES = [str, 'str', 'U'] +TIME_DTYPES = ['datetime64[ns]', 'M8[ns]', 'timedelta64[ns]', 'm8[ns]'] +BOOL_DTYPES = [bool, 'bool'] +BYTES_DTYPES = [bytes, 'bytes'] +OBJECT_DTYPES = [object, 'object'] + ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES -ALL_NUMPY_DTYPES = ALL_REAL_DTYPES + COMPLEX_DTYPES + STRING_DTYPES +ALL_NUMPY_DTYPES = (ALL_REAL_DTYPES + COMPLEX_DTYPES + STRING_DTYPES + + TIME_DTYPES + BOOL_DTYPES + BYTES_DTYPES + OBJECT_DTYPES) @pytest.fixture(params=STRING_DTYPES) @@ -406,8 +412,9 @@ def float_dtype(request): """ Parameterized fixture for float dtypes. - * float32 - * float64 + * float + * 'float32' + * 'float64' """ return request.param @@ -418,8 +425,9 @@ def complex_dtype(request): """ Parameterized fixture for complex dtypes. - * complex64 - * complex128 + * complex + * 'complex64' + * 'complex128' """ return request.param @@ -430,10 +438,11 @@ def sint_dtype(request): """ Parameterized fixture for signed integer dtypes. - * int8 - * int16 - * int32 - * int64 + * int + * 'int8' + * 'int16' + * 'int32' + * 'int64' """ return request.param @@ -444,10 +453,10 @@ def uint_dtype(request): """ Parameterized fixture for unsigned integer dtypes. - * uint8 - * uint16 - * uint32 - * uint64 + * 'uint8' + * 'uint16' + * 'uint32' + * 'uint64' """ return request.param @@ -456,16 +465,17 @@ def uint_dtype(request): @pytest.fixture(params=ALL_INT_DTYPES) def any_int_dtype(request): """ - Parameterized fixture for any integer dtypes. + Parameterized fixture for any integer dtype. - * int8 - * uint8 - * int16 - * uint16 - * int32 - * uint32 - * int64 - * uint64 + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' """ return request.param @@ -474,18 +484,20 @@ def any_int_dtype(request): @pytest.fixture(params=ALL_REAL_DTYPES) def any_real_dtype(request): """ - Parameterized fixture for any (purely) real numeric dtypes. + Parameterized fixture for any (purely) real numeric dtype. - * int8 - * uint8 - * int16 - * uint16 - * int32 - * uint32 - * int64 - * uint64 - * float32 - * float64 + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * float + * 'float32' + * 'float64' """ return request.param @@ -496,21 +508,34 @@ def any_numpy_dtype(request): """ Parameterized fixture for all numpy dtypes. - * int8 - * uint8 - * int16 - * uint16 - * int32 - * uint32 - * int64 - * uint64 - * float32 - * float64 - * complex64 - * complex128 + * bool + * 'bool' + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * float + * 'float32' + * 'float64' + * complex + * 'complex64' + * 'complex128' * str * 'str' * 'U' + * bytes + * 'bytes' + * 'datetime64[ns]' + * 'M8[ns]' + * 'timedelta64[ns]' + * 'm8[ns]' + * object + * 'object' """ return request.param diff --git a/pandas/tests/series/test_duplicates.py b/pandas/tests/series/test_duplicates.py index f41483405f6cc..ccd2239b7c922 100644 --- a/pandas/tests/series/test_duplicates.py +++ b/pandas/tests/series/test_duplicates.py @@ -91,22 +91,17 @@ def __ne__(self, other): ('last', Series([False, True, True, False, False, False, False])), (False, Series([False, True, True, False, True, True, False])) ]) -def test_drop_duplicates_non_bool(any_numpy_dtype, keep, expected): - tc = Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(any_numpy_dtype)) - - tm.assert_series_equal(tc.duplicated(keep=keep), expected) - tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected]) - sc = tc.copy() - sc.drop_duplicates(keep=keep, inplace=True) - tm.assert_series_equal(sc, tc[~expected]) - - -@pytest.mark.parametrize('keep, expected', - [('first', Series([False, False, True, True])), - ('last', Series([True, True, False, False])), - (False, Series([True, True, True, True]))]) -def test_drop_duplicates_bool(keep, expected): - tc = Series([True, False, True, False]) +def test_drop_duplicates(any_numpy_dtype, keep, expected): + tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype)) + + if tc.dtype == 'bool': + # all non-zero values are True and hence duplicate + if keep == 'first': + expected = Series([False, False, True, True, True, True, True]) + elif keep == 'last': + expected = Series([True, True, True, True, True, False, False]) + else: + expected = Series([True] * 7) tm.assert_series_equal(tc.duplicated(keep=keep), expected) tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected]) From 51c33b0e568f22ddcd2908e923caff41461cb398 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 29 Nov 2018 00:14:53 +0100 Subject: [PATCH 2/5] Review (jreback) --- pandas/tests/series/test_duplicates.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/pandas/tests/series/test_duplicates.py b/pandas/tests/series/test_duplicates.py index ccd2239b7c922..26222637e3509 100644 --- a/pandas/tests/series/test_duplicates.py +++ b/pandas/tests/series/test_duplicates.py @@ -95,13 +95,21 @@ def test_drop_duplicates(any_numpy_dtype, keep, expected): tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype)) if tc.dtype == 'bool': - # all non-zero values are True and hence duplicate - if keep == 'first': - expected = Series([False, False, True, True, True, True, True]) - elif keep == 'last': - expected = Series([True, True, True, True, True, False, False]) - else: - expected = Series([True] * 7) + pytest.skip('tested separately in test_drop_duplicates_bool') + + tm.assert_series_equal(tc.duplicated(keep=keep), expected) + tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected]) + sc = tc.copy() + sc.drop_duplicates(keep=keep, inplace=True) + tm.assert_series_equal(sc, tc[~expected]) + + +@pytest.mark.parametrize('keep, expected', + [('first', Series([False, False, True, True])), + ('last', Series([True, True, False, False])), + (False, Series([True, True, True, True]))]) +def test_drop_duplicates_bool(keep, expected): + tc = Series([True, False, True, False]) tm.assert_series_equal(tc.duplicated(keep=keep), expected) tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected]) From 2205e27e2ad6ca0f7bb2e56f3e355031dcd04589 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 29 Nov 2018 07:38:37 +0100 Subject: [PATCH 3/5] Review + fix --- pandas/conftest.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 0b68a4ab1b9eb..69f5959736034 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -386,14 +386,15 @@ def tz_aware_fixture(request): COMPLEX_DTYPES = [complex, "complex64", "complex128"] STRING_DTYPES = [str, 'str', 'U'] -TIME_DTYPES = ['datetime64[ns]', 'M8[ns]', 'timedelta64[ns]', 'm8[ns]'] +DATETIMEDELTA_DTYPES = ['datetime64[ns]', 'M8[ns]', 'timedelta64[ns]', 'm8[ns]'] BOOL_DTYPES = [bool, 'bool'] BYTES_DTYPES = [bytes, 'bytes'] OBJECT_DTYPES = [object, 'object'] ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES ALL_NUMPY_DTYPES = (ALL_REAL_DTYPES + COMPLEX_DTYPES + STRING_DTYPES - + TIME_DTYPES + BOOL_DTYPES + BYTES_DTYPES + OBJECT_DTYPES) + + DATETIMEDELTA_DTYPES + BOOL_DTYPES + OBJECT_DTYPES + + BYTES_DTYPES * PY3) # only add bytes for PY3 @pytest.fixture(params=STRING_DTYPES) From 5b17f2da4daa5b14be39b7dac34442cd31789628 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 29 Nov 2018 07:43:02 +0100 Subject: [PATCH 4/5] Lint --- pandas/conftest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 69f5959736034..2063136a391d7 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -386,7 +386,8 @@ def tz_aware_fixture(request): COMPLEX_DTYPES = [complex, "complex64", "complex128"] STRING_DTYPES = [str, 'str', 'U'] -DATETIMEDELTA_DTYPES = ['datetime64[ns]', 'M8[ns]', 'timedelta64[ns]', 'm8[ns]'] +DATETIMEDELTA_DTYPES = ['datetime64[ns]', 'M8[ns]', + 'timedelta64[ns]', 'm8[ns]'] BOOL_DTYPES = [bool, 'bool'] BYTES_DTYPES = [bytes, 'bytes'] OBJECT_DTYPES = [object, 'object'] From 62e8f8a42590c7adc2abfa7168a9cc1a2b3640a8 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 29 Nov 2018 17:36:42 +0100 Subject: [PATCH 5/5] Review (jreback) --- pandas/conftest.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 2063136a391d7..20f97bdec1107 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -386,16 +386,17 @@ def tz_aware_fixture(request): COMPLEX_DTYPES = [complex, "complex64", "complex128"] STRING_DTYPES = [str, 'str', 'U'] -DATETIMEDELTA_DTYPES = ['datetime64[ns]', 'M8[ns]', - 'timedelta64[ns]', 'm8[ns]'] +DATETIME_DTYPES = ['datetime64[ns]', 'M8[ns]'] +TIMEDELTA_DTYPES = ['timedelta64[ns]', 'm8[ns]'] + BOOL_DTYPES = [bool, 'bool'] BYTES_DTYPES = [bytes, 'bytes'] OBJECT_DTYPES = [object, 'object'] ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES ALL_NUMPY_DTYPES = (ALL_REAL_DTYPES + COMPLEX_DTYPES + STRING_DTYPES - + DATETIMEDELTA_DTYPES + BOOL_DTYPES + OBJECT_DTYPES - + BYTES_DTYPES * PY3) # only add bytes for PY3 + + DATETIME_DTYPES + TIMEDELTA_DTYPES + BOOL_DTYPES + + OBJECT_DTYPES + BYTES_DTYPES * PY3) # bytes only for PY3 @pytest.fixture(params=STRING_DTYPES)