From 4a7c62ab2e1c43ea7cbc12086146143448bf12dd Mon Sep 17 00:00:00 2001 From: ssolanki Date: Tue, 3 Apr 2018 12:11:37 +0530 Subject: [PATCH 01/13] TST, fix for issue #17978. Addition of "hypothesis usage" in test cases of tests/reshape/test_util.py as kind of POC. --- pandas/tests/reshape/test_util.py | 105 ++++++++++++++++++++++++++---- 1 file changed, 92 insertions(+), 13 deletions(-) diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index e4a9591b95c26..b1b0593b94c1f 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -4,31 +4,101 @@ import pandas.util.testing as tm from pandas.core.reshape.util import cartesian_product +from hypothesis import strategies as st +from hypothesis import given, settings, assume +from datetime import date +from dateutil import relativedelta +import string + + +NO_OF_EXAMPLES_PER_TEST_CASE = 20 + + +def get_elements(elem_type): + strategy = st.nothing() + if elem_type == bool: + strategy = st.booleans() + elif elem_type == int: + strategy = st.integers() + elif elem_type == float: + strategy = st.floats() + elif elem_type == str: + strategy = st.text(string.ascii_letters, max_size=10) + return strategy + + +@st.composite +def get_seq(draw, types, mixed=False, min_size=None, max_size=None, transform_func=None): + """helper function to generate strategy for creating lists. parameters define the nature of to be generated list. + :param types: what type of elements constitute the list + :param mixed: if True, list will contains elements from all types listed in arg, oterwise it will have elements only from types[0]. + :param min_size: minimum size of the list. + :param max_size: maximum size of the list. + :param transform_func: a callable which can be applied to whole list after it has been generated. + """ + strategy = st.nothing() + if min_size is None: + min_size = draw(st.integers(min_value=0, max_value=100)) + + if max_size is None: + max_size = draw(st.integers(min_value=min_size, max_value=100)) + + assert min_size <= max_size, 'max_size must be greater than equal to min_size' + + elem_strategies = [] + for elem_type in types: + elem_strategies.append(get_elements(elem_type)) + if not mixed: + break + + if transform_func: + strategy = draw(st.lists(st.one_of(elem_strategies), + min_size=min_size, max_size=max_size).map(transform_func)) + else: + strategy = draw(st.lists(st.one_of(elem_strategies), + min_size=min_size, max_size=max_size)) + return strategy + class TestCartesianProduct(object): - def test_simple(self): - x, y = list('ABC'), [1, 22] + @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) + @given(get_seq((str,), False, 1, 1), + get_seq((int,), False, 1, 2)) + def test_simple(self, x, y): + x = list(x[0]) + # non-empty test case is handled in test_empty, therefore ignore it here + assume(len(x) != 0) result1, result2 = cartesian_product([x, y]) - expected1 = np.array(['A', 'A', 'B', 'B', 'C', 'C']) - expected2 = np.array([1, 22, 1, 22, 1, 22]) + expected1 = np.array([item1 for item1 in x for item2 in y]) + expected2 = np.array([item2 for item1 in x for item2 in y]) + tm.assert_numpy_array_equal(result1, expected1) tm.assert_numpy_array_equal(result2, expected2) + @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) def test_datetimeindex(self): # regression test for GitHub issue #6439 # make sure that the ordering on datetimeindex is consistent - x = date_range('2000-01-01', periods=2) + d = st.dates(min_value=date(1900, 1, 1), max_value=date(2100, 1, 1)).example() + n = d + relativedelta.relativedelta(days=1) + x = date_range(d, periods=2) result1, result2 = [Index(y).day for y in cartesian_product([x, x])] - expected1 = Index([1, 1, 2, 2]) - expected2 = Index([1, 2, 1, 2]) + expected1 = Index([d.day, d.day, n.day, n.day]) + expected2 = Index([d.day, n.day, d.day, n.day]) + tm.assert_index_equal(result1, expected1) tm.assert_index_equal(result2, expected2) - def test_empty(self): + @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) + @given(st.lists(st.nothing()), + get_seq((int,), False), + get_seq((str,), False)) + def test_empty(self, empty_list, list_of_int, list_of_str): # product of empty factors - X = [[], [0, 1], []] - Y = [[], [], ['a', 'b', 'c']] + X = [empty_list, list_of_int, empty_list] + Y = [empty_list, empty_list, list_of_str] + for x, y in zip(X, Y): expected1 = np.array([], dtype=np.asarray(x).dtype) expected2 = np.array([], dtype=np.asarray(y).dtype) @@ -37,13 +107,22 @@ def test_empty(self): tm.assert_numpy_array_equal(result2, expected2) # empty product (empty input): - result = cartesian_product([]) + result = cartesian_product(empty_list) expected = [] assert result == expected + @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) def test_invalid_input(self): - invalid_inputs = [1, [1], [1, 2], [[1], 2], - 'a', ['a'], ['a', 'b'], [['a'], 'b']] + invalid_inputs = [st.integers().example(), + st.tuples(st.integers()).example(), + st.tuples(st.integers(), st.integers()).example(), + st.text(string.ascii_letters, min_size=1, max_size=1).example(), + st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1)).example(), + st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1), + st.text(string.ascii_letters, min_size=1, max_size=1)).example(), + st.tuples(st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1)), + st.text(string.ascii_letters, min_size=1, max_size=1)).example()] + msg = "Input must be a list-like of list-likes" for X in invalid_inputs: tm.assert_raises_regex(TypeError, msg, cartesian_product, X=X) From 77a4f0637e4f4078ed228ca263a2c88debdfef7e Mon Sep 17 00:00:00 2001 From: ssolanki Date: Tue, 3 Apr 2018 19:27:06 +0530 Subject: [PATCH 02/13] add hypothesis package requirement in *.run and *.yaml file. --- ci/environment-dev.yaml | 1 + ci/requirements-2.7.run | 1 + ci/requirements-2.7_COMPAT.run | 1 + ci/requirements-2.7_LOCALE.run | 1 + ci/requirements-2.7_SLOW.run | 1 + ci/requirements-2.7_WIN.run | 1 + ci/requirements-3.5.run | 1 + ci/requirements-3.5_ASCII.run | 1 + ci/requirements-3.5_OSX.run | 1 + ci/requirements-3.6.run | 1 + ci/requirements-3.6_LOCALE.run | 1 + ci/requirements-3.6_LOCALE_SLOW.run | 1 + ci/requirements-3.6_WIN.run | 1 + 13 files changed, 13 insertions(+) diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml index 1337fc54e9aac..6c2e653891824 100644 --- a/ci/environment-dev.yaml +++ b/ci/environment-dev.yaml @@ -13,3 +13,4 @@ dependencies: - pytz - setuptools>=3.3 - sphinx + - hypothesis>=3.46.0 \ No newline at end of file diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run index 7c10b98fb6e14..fa42d33f1a582 100644 --- a/ci/requirements-2.7.run +++ b/ci/requirements-2.7.run @@ -18,3 +18,4 @@ patsy pymysql=0.6.3 jinja2=2.8 xarray=0.8.0 +hypothesis>=3.46.0 diff --git a/ci/requirements-2.7_COMPAT.run b/ci/requirements-2.7_COMPAT.run index c3daed6e6e1da..bc1d1dcac7bf8 100644 --- a/ci/requirements-2.7_COMPAT.run +++ b/ci/requirements-2.7_COMPAT.run @@ -12,3 +12,4 @@ pymysql=0.6.0 sqlalchemy=0.7.8 xlsxwriter=0.5.2 jinja2=2.8 +hypothesis>=3.46.0 \ No newline at end of file diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run index 0a809a7dd6e5d..515dfe52a7af8 100644 --- a/ci/requirements-2.7_LOCALE.run +++ b/ci/requirements-2.7_LOCALE.run @@ -10,3 +10,4 @@ matplotlib=1.4.3 sqlalchemy=0.8.1 lxml scipy +hypothesis>=3.46.0 \ No newline at end of file diff --git a/ci/requirements-2.7_SLOW.run b/ci/requirements-2.7_SLOW.run index db95a6ccb2314..d9cc8cd64f2ed 100644 --- a/ci/requirements-2.7_SLOW.run +++ b/ci/requirements-2.7_SLOW.run @@ -17,3 +17,4 @@ psycopg2 pymysql html5lib beautifulsoup4 +hypothesis>=3.46.0 \ No newline at end of file diff --git a/ci/requirements-2.7_WIN.run b/ci/requirements-2.7_WIN.run index c4ca7fc736bb1..6202c3ee0c8ba 100644 --- a/ci/requirements-2.7_WIN.run +++ b/ci/requirements-2.7_WIN.run @@ -16,3 +16,4 @@ bottleneck html5lib beautifulsoup4 jinja2=2.8 +hypothesis>=3.46.0 \ No newline at end of file diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run index 669cf437f2164..3e173300df435 100644 --- a/ci/requirements-3.5.run +++ b/ci/requirements-3.5.run @@ -18,3 +18,4 @@ psycopg2 s3fs beautifulsoup4 ipython +hypothesis>=3.46.0 \ No newline at end of file diff --git a/ci/requirements-3.5_ASCII.run b/ci/requirements-3.5_ASCII.run index b9d543f557d06..9b7a834178120 100644 --- a/ci/requirements-3.5_ASCII.run +++ b/ci/requirements-3.5_ASCII.run @@ -1,3 +1,4 @@ python-dateutil pytz numpy +hypothesis>=3.46.0 \ No newline at end of file diff --git a/ci/requirements-3.5_OSX.run b/ci/requirements-3.5_OSX.run index 1d83474d10f2f..9cfa800ae9c56 100644 --- a/ci/requirements-3.5_OSX.run +++ b/ci/requirements-3.5_OSX.run @@ -14,3 +14,4 @@ bottleneck xarray s3fs beautifulsoup4 +hypothesis>=3.46.0 \ No newline at end of file diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index 822144a80bc9a..c3144ebf6d19e 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -23,3 +23,4 @@ beautifulsoup4 s3fs xarray ipython +hypothesis>=3.46.0 \ No newline at end of file diff --git a/ci/requirements-3.6_LOCALE.run b/ci/requirements-3.6_LOCALE.run index ad54284c6f7e3..5232a5fc22c48 100644 --- a/ci/requirements-3.6_LOCALE.run +++ b/ci/requirements-3.6_LOCALE.run @@ -20,3 +20,4 @@ beautifulsoup4 s3fs xarray ipython +hypothesis>=3.46.0 \ No newline at end of file diff --git a/ci/requirements-3.6_LOCALE_SLOW.run b/ci/requirements-3.6_LOCALE_SLOW.run index ad54284c6f7e3..5232a5fc22c48 100644 --- a/ci/requirements-3.6_LOCALE_SLOW.run +++ b/ci/requirements-3.6_LOCALE_SLOW.run @@ -20,3 +20,4 @@ beautifulsoup4 s3fs xarray ipython +hypothesis>=3.46.0 \ No newline at end of file diff --git a/ci/requirements-3.6_WIN.run b/ci/requirements-3.6_WIN.run index 3042888763863..49ed6006d98e5 100644 --- a/ci/requirements-3.6_WIN.run +++ b/ci/requirements-3.6_WIN.run @@ -15,3 +15,4 @@ blosc thrift=0.10* fastparquet pyarrow +hypothesis>=3.46.0 \ No newline at end of file From 7e2ed0caf73e2f93217b1a1d24b850b9f543d283 Mon Sep 17 00:00:00 2001 From: ssolanki Date: Wed, 4 Apr 2018 14:52:50 +0530 Subject: [PATCH 03/13] TST, fix for issue #17978. Addition of "hypothesis usage" in test cases of tests/reshape/test_util.py as kind of POC. Incorporate review comments. Resolve flake8 warning. --- pandas/tests/reshape/test_util.py | 98 +++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 23 deletions(-) diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index b1b0593b94c1f..03a1bc4ee37f4 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -28,13 +28,55 @@ def get_elements(elem_type): @st.composite -def get_seq(draw, types, mixed=False, min_size=None, max_size=None, transform_func=None): - """helper function to generate strategy for creating lists. parameters define the nature of to be generated list. - :param types: what type of elements constitute the list - :param mixed: if True, list will contains elements from all types listed in arg, oterwise it will have elements only from types[0]. - :param min_size: minimum size of the list. - :param max_size: maximum size of the list. - :param transform_func: a callable which can be applied to whole list after it has been generated. +def get_seq(draw, types, mixed=False, min_size=None, max_size=None, + transform_func=None): + """ + Helper function to generate strategy for creating lists. + What constitute in the generated list is driven by the different + parameters. + + Parameters + ---------- + types: iterable sequence like tuple or list + types which can be in the generated list. + mixed: bool + if True, list will contains elements from all types listed in arg, + otherwise it will have elements only from types[0]. + min_size: int + minimum size of the list. + max_size: int + maximum size of the list. + transform_func: callable + a callable which can be applied to whole list after it has been + generated. It can think of as providing functionality of filter + and map function. + + Returns + ------- + hypothesis lists strategy. + + Examples + -------- + seq_strategy = get_seq((int, str, bool), + mixed=True, min_size=1, max_size=5) + seq_strategy.example() + Out[12]: ['lkYMSn', -2501, 35, 'J'] + seq_strategy.example() + Out[13]: [True] + seq_strategy.example() + Out[14]: ['dRWgQYrBrW', True, False, 'gmsujJVDBM', 'Z'] + + seq_strategy = get_seq((int, bool), + mixed=False, + min_size=1, + max_size=5, + transform_func=lambda seq: [str(x) for x in seq]) + seq_strategy.example() + Out[19]: ['-1892'] + seq_strategy.example() + Out[20]: ['22', '66', '14785', '-26312', '32'] + seq_strategy.example() + Out[21]: ['22890', '-15537', '96'] """ strategy = st.nothing() if min_size is None: @@ -43,7 +85,8 @@ def get_seq(draw, types, mixed=False, min_size=None, max_size=None, transform_fu if max_size is None: max_size = draw(st.integers(min_value=min_size, max_value=100)) - assert min_size <= max_size, 'max_size must be greater than equal to min_size' + assert min_size <= max_size, \ + 'max_size must be greater than equal to min_size' elem_strategies = [] for elem_type in types: @@ -53,10 +96,12 @@ def get_seq(draw, types, mixed=False, min_size=None, max_size=None, transform_fu if transform_func: strategy = draw(st.lists(st.one_of(elem_strategies), - min_size=min_size, max_size=max_size).map(transform_func)) + min_size=min_size, + max_size=max_size).map(transform_func)) else: strategy = draw(st.lists(st.one_of(elem_strategies), - min_size=min_size, max_size=max_size)) + min_size=min_size, + max_size=max_size)) return strategy @@ -67,7 +112,8 @@ class TestCartesianProduct(object): get_seq((int,), False, 1, 2)) def test_simple(self, x, y): x = list(x[0]) - # non-empty test case is handled in test_empty, therefore ignore it here + # non-empty test case is handled in test_empty, + # therefore ignore it here. assume(len(x) != 0) result1, result2 = cartesian_product([x, y]) expected1 = np.array([item1 for item1 in x for item2 in y]) @@ -77,10 +123,10 @@ def test_simple(self, x, y): tm.assert_numpy_array_equal(result2, expected2) @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) - def test_datetimeindex(self): + @given(st.dates(min_value=date(1900, 1, 1), max_value=date(2100, 1, 1))) + def test_datetimeindex(self, d): # regression test for GitHub issue #6439 # make sure that the ordering on datetimeindex is consistent - d = st.dates(min_value=date(1900, 1, 1), max_value=date(2100, 1, 1)).example() n = d + relativedelta.relativedelta(days=1) x = date_range(d, periods=2) result1, result2 = [Index(y).day for y in cartesian_product([x, x])] @@ -112,16 +158,22 @@ def test_empty(self, empty_list, list_of_int, list_of_str): assert result == expected @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) - def test_invalid_input(self): - invalid_inputs = [st.integers().example(), - st.tuples(st.integers()).example(), - st.tuples(st.integers(), st.integers()).example(), - st.text(string.ascii_letters, min_size=1, max_size=1).example(), - st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1)).example(), - st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1), - st.text(string.ascii_letters, min_size=1, max_size=1)).example(), - st.tuples(st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1)), - st.text(string.ascii_letters, min_size=1, max_size=1)).example()] + @given(st.integers(), + st.text(string.ascii_letters, min_size=1), + get_seq((int, str), True, min_size=1), + st.lists(st.one_of(st.integers(), + st.text(string.ascii_letters, min_size=1), + get_seq((int,), min_size=1) + ), + min_size=1).filter( + lambda x: len(x) == 1 and type(x[0]) != list) + ) + def test_invalid_input(self, number, text, seq, mixed_seq): + + invalid_inputs = [number, + text, + seq, + mixed_seq] msg = "Input must be a list-like of list-likes" for X in invalid_inputs: From 11eb77732f3ef82ee6b0c54621eb3fed1ccb453b Mon Sep 17 00:00:00 2001 From: ssolanki Date: Wed, 4 Apr 2018 19:34:13 +0530 Subject: [PATCH 04/13] using filter function makes hypothesis fail test_invalid_input function fail. add hypothesis package requirement. --- ci/requirements-3.6_NUMPY_DEV.run | 1 + pandas/tests/reshape/test_util.py | 10 +++------- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/ci/requirements-3.6_NUMPY_DEV.run b/ci/requirements-3.6_NUMPY_DEV.run index af44f198c687e..20cdf454a3cce 100644 --- a/ci/requirements-3.6_NUMPY_DEV.run +++ b/ci/requirements-3.6_NUMPY_DEV.run @@ -1 +1,2 @@ pytz +hypothesis>=3.46.0 \ No newline at end of file diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index 03a1bc4ee37f4..ff96712d2ecf7 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -161,13 +161,9 @@ def test_empty(self, empty_list, list_of_int, list_of_str): @given(st.integers(), st.text(string.ascii_letters, min_size=1), get_seq((int, str), True, min_size=1), - st.lists(st.one_of(st.integers(), - st.text(string.ascii_letters, min_size=1), - get_seq((int,), min_size=1) - ), - min_size=1).filter( - lambda x: len(x) == 1 and type(x[0]) != list) - ) + st.builds(lambda *x: list(x), st.integers(), + st.text(string.ascii_letters, min_size=1), + st.lists(st.integers(), min_size=1))) def test_invalid_input(self, number, text, seq, mixed_seq): invalid_inputs = [number, From fca259c92722b98f07e0212746a905e73cc586ca Mon Sep 17 00:00:00 2001 From: ssolanki Date: Wed, 4 Apr 2018 21:43:40 +0530 Subject: [PATCH 05/13] TST, fix for issue #17978. Addition of "hypothesis usage" in test cases of tests/reshape/test_util.py as kind of POC. Incorporate review comments. Resolve flake8 warning. Add section for hypothesis in contributing.rst --- doc/source/contributing.rst | 60 +++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 6d5ac31c39a62..a64f684d46a4a 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -775,6 +775,66 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex test_cool_feature.py::test_dtypes[int8] PASSED test_cool_feature.py::test_series[int8] PASSED +Transitioning to ``hypothesis`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +With the transition to pytest, things have become easier for testing by having reduced boilerplate for test cases and also by utilizing pytest's features like parametizing, skipping and marking test cases. +However, one has to still come up with input data examples which can be tested against the functionality. There is always a possibility to skip testing an example which could have failed the test case. +Imagine if some framework could generate random input examples based on the property/specification of the function being tested. That is exactly what hypothesis does by generating the input data based on some set of specifications provided by the user. +e.g suppose we have to test python's sum function for a list of int. + +Here is a sample test case using pytest: +.. code-block:: python + import pytest + + @pytest.mark.parametrize('seq', [ + [0, 0, 0], + [1, 2, 3, 4], + [-3, 5, -8, 23], + [12345678, 9876543, 567894321] + ]) + def test_sum_using_pytest(seq): + total = 0 + for item in seq: + total += item + assert sum(seq) == total + +output of test cases: +.. code-block:: shell + collecting ... collected 4 items + pytest_example.py::test_sum_using_pytest[seq0] PASSED [ 25%] + pytest_example.py::test_sum_using_pytest[seq1] PASSED [ 50%] + pytest_example.py::test_sum_using_pytest[seq2] PASSED [ 75%] + pytest_example.py::test_sum_using_pytest[seq3] PASSED [100%] + + ========================== 4 passed in 0.06 seconds =========================== + + +Compare it with below example for the same test case using hypothesis. +.. code-block:: python + from hypothesis import strategies as st + from hypothesis import given + + + @given(st.lists(st.integers())) + def test_sum(seq): + total = 0 + for item in seq: + total += item + assert sum(seq) == total + +output of test cases: +.. code-block:: shell + collecting ... collected 1 item + hypothesis_example.py::test_sum PASSED [100%] + + ========================== 1 passed in 0.33 seconds =========================== + +The main difference in above example is use of a decorator "@given(st.lists(st.integers()))" which if applied to test case function, generates some random list of int, which is then assigned to parameter of test case. +For more information about hypothesis or in general about property based testing, check below links: +- https://hypothesis.readthedocs.io/en/latest/quickstart.html +- https://hypothesis.works/articles/what-is-property-based-testing/ +- http://blog.jessitron.com/2013/04/property-based-testing-what-is-it.html + Running the test suite ---------------------- From d30d6fd2c493451adeb9f33ce84b2db4ab8be3e7 Mon Sep 17 00:00:00 2001 From: ssolanki Date: Wed, 4 Apr 2018 22:03:10 +0530 Subject: [PATCH 06/13] TST, fix for issue #17978. Addition of "hypothesis usage" in test cases of tests/reshape/test_util.py as kind of POC. Incorporate review comments. Resolve flake8 warning. Add section for hypothesis in contributing.rst --- doc/source/contributing.rst | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index a64f684d46a4a..122d5f7e49c7b 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -776,14 +776,18 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex test_cool_feature.py::test_series[int8] PASSED Transitioning to ``hypothesis`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ With the transition to pytest, things have become easier for testing by having reduced boilerplate for test cases and also by utilizing pytest's features like parametizing, skipping and marking test cases. + However, one has to still come up with input data examples which can be tested against the functionality. There is always a possibility to skip testing an example which could have failed the test case. + Imagine if some framework could generate random input examples based on the property/specification of the function being tested. That is exactly what hypothesis does by generating the input data based on some set of specifications provided by the user. e.g suppose we have to test python's sum function for a list of int. Here is a sample test case using pytest: + .. code-block:: python + import pytest @pytest.mark.parametrize('seq', [ @@ -799,7 +803,9 @@ Here is a sample test case using pytest: assert sum(seq) == total output of test cases: + .. code-block:: shell + collecting ... collected 4 items pytest_example.py::test_sum_using_pytest[seq0] PASSED [ 25%] pytest_example.py::test_sum_using_pytest[seq1] PASSED [ 50%] @@ -810,7 +816,9 @@ output of test cases: Compare it with below example for the same test case using hypothesis. + .. code-block:: python + from hypothesis import strategies as st from hypothesis import given @@ -822,8 +830,11 @@ Compare it with below example for the same test case using hypothesis. total += item assert sum(seq) == total + output of test cases: + .. code-block:: shell + collecting ... collected 1 item hypothesis_example.py::test_sum PASSED [100%] @@ -831,6 +842,7 @@ output of test cases: The main difference in above example is use of a decorator "@given(st.lists(st.integers()))" which if applied to test case function, generates some random list of int, which is then assigned to parameter of test case. For more information about hypothesis or in general about property based testing, check below links: + - https://hypothesis.readthedocs.io/en/latest/quickstart.html - https://hypothesis.works/articles/what-is-property-based-testing/ - http://blog.jessitron.com/2013/04/property-based-testing-what-is-it.html From 090773b9281e007157a9e2ed7f0b094f454cb914 Mon Sep 17 00:00:00 2001 From: ssolanki Date: Sun, 8 Apr 2018 11:56:09 +0530 Subject: [PATCH 07/13] TST, fix for issue #17978. moved generic thing to pandas.utils._hypothesis.py. not sure of what exactly was required to change but still tried to change the content as per review comments. --- doc/source/contributing.rst | 8 ++- pandas/tests/reshape/test_util.py | 101 +++--------------------------- pandas/util/_hypothesis.py | 97 ++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 97 deletions(-) create mode 100644 pandas/util/_hypothesis.py diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 122d5f7e49c7b..d4e7442b550d7 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -775,13 +775,13 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex test_cool_feature.py::test_dtypes[int8] PASSED test_cool_feature.py::test_series[int8] PASSED -Transitioning to ``hypothesis`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Using ``hypothesis`` +~~~~~~~~~~~~~~~~~~~~ With the transition to pytest, things have become easier for testing by having reduced boilerplate for test cases and also by utilizing pytest's features like parametizing, skipping and marking test cases. However, one has to still come up with input data examples which can be tested against the functionality. There is always a possibility to skip testing an example which could have failed the test case. -Imagine if some framework could generate random input examples based on the property/specification of the function being tested. That is exactly what hypothesis does by generating the input data based on some set of specifications provided by the user. +Hypothesis is a python package which helps in overcoming this issue by generating the input data based on some set of specifications provided by the user. e.g suppose we have to test python's sum function for a list of int. Here is a sample test case using pytest: @@ -841,6 +841,8 @@ output of test cases: ========================== 1 passed in 0.33 seconds =========================== The main difference in above example is use of a decorator "@given(st.lists(st.integers()))" which if applied to test case function, generates some random list of int, which is then assigned to parameter of test case. +Above example clearly helps in adding more coverage for our test functions. + For more information about hypothesis or in general about property based testing, check below links: - https://hypothesis.readthedocs.io/en/latest/quickstart.html diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index ff96712d2ecf7..3ab53cfebbaff 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -4,105 +4,18 @@ import pandas.util.testing as tm from pandas.core.reshape.util import cartesian_product -from hypothesis import strategies as st -from hypothesis import given, settings, assume +import string from datetime import date from dateutil import relativedelta -import string +from pandas.util._hypothesis import (st, + given, + settings, + get_seq, + assume) -NO_OF_EXAMPLES_PER_TEST_CASE = 20 - -def get_elements(elem_type): - strategy = st.nothing() - if elem_type == bool: - strategy = st.booleans() - elif elem_type == int: - strategy = st.integers() - elif elem_type == float: - strategy = st.floats() - elif elem_type == str: - strategy = st.text(string.ascii_letters, max_size=10) - return strategy - - -@st.composite -def get_seq(draw, types, mixed=False, min_size=None, max_size=None, - transform_func=None): - """ - Helper function to generate strategy for creating lists. - What constitute in the generated list is driven by the different - parameters. - - Parameters - ---------- - types: iterable sequence like tuple or list - types which can be in the generated list. - mixed: bool - if True, list will contains elements from all types listed in arg, - otherwise it will have elements only from types[0]. - min_size: int - minimum size of the list. - max_size: int - maximum size of the list. - transform_func: callable - a callable which can be applied to whole list after it has been - generated. It can think of as providing functionality of filter - and map function. - - Returns - ------- - hypothesis lists strategy. - - Examples - -------- - seq_strategy = get_seq((int, str, bool), - mixed=True, min_size=1, max_size=5) - seq_strategy.example() - Out[12]: ['lkYMSn', -2501, 35, 'J'] - seq_strategy.example() - Out[13]: [True] - seq_strategy.example() - Out[14]: ['dRWgQYrBrW', True, False, 'gmsujJVDBM', 'Z'] - - seq_strategy = get_seq((int, bool), - mixed=False, - min_size=1, - max_size=5, - transform_func=lambda seq: [str(x) for x in seq]) - seq_strategy.example() - Out[19]: ['-1892'] - seq_strategy.example() - Out[20]: ['22', '66', '14785', '-26312', '32'] - seq_strategy.example() - Out[21]: ['22890', '-15537', '96'] - """ - strategy = st.nothing() - if min_size is None: - min_size = draw(st.integers(min_value=0, max_value=100)) - - if max_size is None: - max_size = draw(st.integers(min_value=min_size, max_value=100)) - - assert min_size <= max_size, \ - 'max_size must be greater than equal to min_size' - - elem_strategies = [] - for elem_type in types: - elem_strategies.append(get_elements(elem_type)) - if not mixed: - break - - if transform_func: - strategy = draw(st.lists(st.one_of(elem_strategies), - min_size=min_size, - max_size=max_size).map(transform_func)) - else: - strategy = draw(st.lists(st.one_of(elem_strategies), - min_size=min_size, - max_size=max_size)) - return strategy +NO_OF_EXAMPLES_PER_TEST_CASE = 20 class TestCartesianProduct(object): diff --git a/pandas/util/_hypothesis.py b/pandas/util/_hypothesis.py new file mode 100644 index 0000000000000..6e1977123df25 --- /dev/null +++ b/pandas/util/_hypothesis.py @@ -0,0 +1,97 @@ +import string +from hypothesis import (given, + settings, + assume, + strategies as st, + ) + + +def get_elements(elem_type): + strategy = st.nothing() + if elem_type == bool: + strategy = st.booleans() + elif elem_type == int: + strategy = st.integers() + elif elem_type == float: + strategy = st.floats() + elif elem_type == str: + strategy = st.text(string.ascii_letters, max_size=10) + return strategy + + +@st.composite +def get_seq(draw, types, mixed=False, min_size=None, max_size=None, + transform_func=None): + """ + Helper function to generate strategy for creating lists. + What constitute in the generated list is driven by the different + parameters. + + Parameters + ---------- + types: iterable sequence like tuple or list + types which can be in the generated list. + mixed: bool + if True, list will contains elements from all types listed in arg, + otherwise it will have elements only from types[0]. + min_size: int + minimum size of the list. + max_size: int + maximum size of the list. + transform_func: callable + a callable which can be applied to whole list after it has been + generated. It can think of as providing functionality of filter + and map function. + + Returns + ------- + hypothesis lists strategy. + + Examples + -------- + seq_strategy = get_seq((int, str, bool), + mixed=True, min_size=1, max_size=5) + seq_strategy.example() + Out[12]: ['lkYMSn', -2501, 35, 'J'] + seq_strategy.example() + Out[13]: [True] + seq_strategy.example() + Out[14]: ['dRWgQYrBrW', True, False, 'gmsujJVDBM', 'Z'] + + seq_strategy = get_seq((int, bool), + mixed=False, + min_size=1, + max_size=5, + transform_func=lambda seq: [str(x) for x in seq]) + seq_strategy.example() + Out[19]: ['-1892'] + seq_strategy.example() + Out[20]: ['22', '66', '14785', '-26312', '32'] + seq_strategy.example() + Out[21]: ['22890', '-15537', '96'] + """ + strategy = st.nothing() + if min_size is None: + min_size = draw(st.integers(min_value=0, max_value=100)) + + if max_size is None: + max_size = draw(st.integers(min_value=min_size, max_value=100)) + + assert min_size <= max_size, \ + 'max_size must be greater than equal to min_size' + + elem_strategies = [] + for elem_type in types: + elem_strategies.append(get_elements(elem_type)) + if not mixed: + break + + if transform_func: + strategy = draw(st.lists(st.one_of(elem_strategies), + min_size=min_size, + max_size=max_size).map(transform_func)) + else: + strategy = draw(st.lists(st.one_of(elem_strategies), + min_size=min_size, + max_size=max_size)) + return strategy From b8d2b820e006a30f78cb2c826bbeaa7d5b6645cb Mon Sep 17 00:00:00 2001 From: ssolanki Date: Tue, 10 Apr 2018 18:07:56 +0530 Subject: [PATCH 08/13] TST, fix for issue #17978. test_empty was failing due to "hypothesis.errors.FailedHealthCheck" error on travis only, therefore decrease the size for lists. --- pandas/tests/reshape/test_util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index 3ab53cfebbaff..e36bde2a387db 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -51,8 +51,8 @@ def test_datetimeindex(self, d): @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) @given(st.lists(st.nothing()), - get_seq((int,), False), - get_seq((str,), False)) + get_seq((int,), False, min_size=1, max_size=10), + get_seq((str,), False, min_size=1, max_size=10)) def test_empty(self, empty_list, list_of_int, list_of_str): # product of empty factors X = [empty_list, list_of_int, empty_list] From 8e6701515dcc69d48dd9e0f27bb6c73a278051c1 Mon Sep 17 00:00:00 2001 From: ssolanki Date: Thu, 12 Apr 2018 16:08:51 +0530 Subject: [PATCH 09/13] TST, fix for issue #17978. Incorporate review comments. --- doc/source/contributing.rst | 40 +------------ pandas/util/_hypothesis.py | 111 +++++++++++++++++++++++++++++------- 2 files changed, 92 insertions(+), 59 deletions(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index d4e7442b550d7..1169022ec7f6a 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -777,45 +777,12 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex Using ``hypothesis`` ~~~~~~~~~~~~~~~~~~~~ -With the transition to pytest, things have become easier for testing by having reduced boilerplate for test cases and also by utilizing pytest's features like parametizing, skipping and marking test cases. +With the usage of pytest, things have become easier for testing by having reduced boilerplate for test cases and also by utilizing pytest's features like parametizing, skipping and marking test cases. However, one has to still come up with input data examples which can be tested against the functionality. There is always a possibility to skip testing an example which could have failed the test case. Hypothesis is a python package which helps in overcoming this issue by generating the input data based on some set of specifications provided by the user. -e.g suppose we have to test python's sum function for a list of int. - -Here is a sample test case using pytest: - -.. code-block:: python - - import pytest - - @pytest.mark.parametrize('seq', [ - [0, 0, 0], - [1, 2, 3, 4], - [-3, 5, -8, 23], - [12345678, 9876543, 567894321] - ]) - def test_sum_using_pytest(seq): - total = 0 - for item in seq: - total += item - assert sum(seq) == total - -output of test cases: - -.. code-block:: shell - - collecting ... collected 4 items - pytest_example.py::test_sum_using_pytest[seq0] PASSED [ 25%] - pytest_example.py::test_sum_using_pytest[seq1] PASSED [ 50%] - pytest_example.py::test_sum_using_pytest[seq2] PASSED [ 75%] - pytest_example.py::test_sum_using_pytest[seq3] PASSED [100%] - - ========================== 4 passed in 0.06 seconds =========================== - - -Compare it with below example for the same test case using hypothesis. +e.g consider the test case for testing python's sum function for a list of int using hypothesis. .. code-block:: python @@ -840,8 +807,7 @@ output of test cases: ========================== 1 passed in 0.33 seconds =========================== -The main difference in above example is use of a decorator "@given(st.lists(st.integers()))" which if applied to test case function, generates some random list of int, which is then assigned to parameter of test case. -Above example clearly helps in adding more coverage for our test functions. +In above example by applying a decorator "@given(st.lists(st.integers()))" to the unit test function, we have directed hypothesis to generate some random list of int as input for the test function, which eventually helps in adding more coverage for our test functions by generating random input data. For more information about hypothesis or in general about property based testing, check below links: diff --git a/pandas/util/_hypothesis.py b/pandas/util/_hypothesis.py index 6e1977123df25..b0b5eb2d019a6 100644 --- a/pandas/util/_hypothesis.py +++ b/pandas/util/_hypothesis.py @@ -1,3 +1,11 @@ +""" +This module houses utility functions to generate hypothesis strategies which + can be used to generate random input test data for various test cases. +It is for internal use by different test case files like pandas/test/test*.py + files only and should not be used beyond this purpose. +For more information on hypothesis, check +(http://hypothesis.readthedocs.io/en/latest/). +""" import string from hypothesis import (given, settings, @@ -7,6 +15,62 @@ def get_elements(elem_type): + """ + Helper function to return hypothesis strategy whose elements depends on + the input data-type. + Currently only four types are supported namely, bool, int, float and str. + + Parameters + ---------- + elem_type: type + type of the elements for the strategy. + + Returns + ------- + hypothesis strategy. + + Examples + -------- + >>> strat = get_elements(str) + >>> strat.example() + 'KWAo' + + >>> strat.example() + 'OfAlBH' + + >>> strat = get_elements(int) + >>> strat.example() + 31911 + + >>> strat.example() + 25288 + + >>> strat = get_elements(float) + >>> strat.example() + nan + + >>> strat.example() + inf + + >>> strat.example() + -2.2250738585072014e-308 + + >>> strat.example() + 0.5 + + >>> strat.example() + 1.7976931348623157e+308 + + >>> strat = get_elements(bool) + >>> strat.example() + True + + >>> strat.example() + True + + >>> strat.example() + False + """ strategy = st.nothing() if elem_type == bool: strategy = st.booleans() @@ -49,28 +113,32 @@ def get_seq(draw, types, mixed=False, min_size=None, max_size=None, Examples -------- - seq_strategy = get_seq((int, str, bool), - mixed=True, min_size=1, max_size=5) - seq_strategy.example() - Out[12]: ['lkYMSn', -2501, 35, 'J'] - seq_strategy.example() - Out[13]: [True] - seq_strategy.example() - Out[14]: ['dRWgQYrBrW', True, False, 'gmsujJVDBM', 'Z'] - - seq_strategy = get_seq((int, bool), - mixed=False, - min_size=1, - max_size=5, - transform_func=lambda seq: [str(x) for x in seq]) - seq_strategy.example() - Out[19]: ['-1892'] - seq_strategy.example() - Out[20]: ['22', '66', '14785', '-26312', '32'] - seq_strategy.example() - Out[21]: ['22890', '-15537', '96'] + >>> seq_strategy = get_seq((int, str, bool), mixed=True, min_size=1, max_size=5) + + >>> seq_strategy.example() + ['lkYMSn', -2501, 35, 'J'] + + >>> seq_strategy.example() + [True] + + >>> seq_strategy.example() + ['dRWgQYrBrW', True, False, 'gmsujJVDBM', 'Z'] + + >>> seq_strategy = get_seq((int, bool), +... mixed=False, +... min_size=1, +... max_size=5, +... transform_func=lambda seq: [str(x) for x in seq]) + + >>> seq_strategy.example() + ['9552', '124', '-24024'] + + >>> seq_strategy.example() + ['-1892'] + + >>> seq_strategy.example() + ['22', '66', '14785', '-26312', '32'] """ - strategy = st.nothing() if min_size is None: min_size = draw(st.integers(min_value=0, max_value=100)) @@ -85,7 +153,6 @@ def get_seq(draw, types, mixed=False, min_size=None, max_size=None, elem_strategies.append(get_elements(elem_type)) if not mixed: break - if transform_func: strategy = draw(st.lists(st.one_of(elem_strategies), min_size=min_size, From f70c7b0390f55e2bffdc08d50cafc4df92c20903 Mon Sep 17 00:00:00 2001 From: ssolanki Date: Thu, 12 Apr 2018 16:09:35 +0530 Subject: [PATCH 10/13] TST, fix for issue #17978. Incorporate review comments. --- pandas/tests/reshape/test_util.py | 44 +++++++++++++------------------ 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index e36bde2a387db..bc746541c3fa3 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -8,26 +8,18 @@ from datetime import date from dateutil import relativedelta -from pandas.util._hypothesis import (st, - given, - settings, - get_seq, - assume) - +from pandas.util import _hypothesis as hp NO_OF_EXAMPLES_PER_TEST_CASE = 20 class TestCartesianProduct(object): - @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) - @given(get_seq((str,), False, 1, 1), - get_seq((int,), False, 1, 2)) + @hp.settings(max_examples=20) + @hp.given(hp.st.lists(hp.st.text(string.ascii_letters, min_size=1, max_size=1), + min_size=1, max_size=3), + hp.get_seq((int,), False, 1, 2)) def test_simple(self, x, y): - x = list(x[0]) - # non-empty test case is handled in test_empty, - # therefore ignore it here. - assume(len(x) != 0) result1, result2 = cartesian_product([x, y]) expected1 = np.array([item1 for item1 in x for item2 in y]) expected2 = np.array([item2 for item1 in x for item2 in y]) @@ -35,8 +27,8 @@ def test_simple(self, x, y): tm.assert_numpy_array_equal(result1, expected1) tm.assert_numpy_array_equal(result2, expected2) - @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) - @given(st.dates(min_value=date(1900, 1, 1), max_value=date(2100, 1, 1))) + @hp.settings(max_examples=20) + @hp.given(hp.st.dates(min_value=date(1900, 1, 1), max_value=date(2100, 1, 1))) def test_datetimeindex(self, d): # regression test for GitHub issue #6439 # make sure that the ordering on datetimeindex is consistent @@ -49,10 +41,10 @@ def test_datetimeindex(self, d): tm.assert_index_equal(result1, expected1) tm.assert_index_equal(result2, expected2) - @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) - @given(st.lists(st.nothing()), - get_seq((int,), False, min_size=1, max_size=10), - get_seq((str,), False, min_size=1, max_size=10)) + @hp.settings(max_examples=20) + @hp.given(hp.st.lists(hp.st.nothing()), + hp.get_seq((int,), False, min_size=1, max_size=10), + hp.get_seq((str,), False, min_size=1, max_size=10)) def test_empty(self, empty_list, list_of_int, list_of_str): # product of empty factors X = [empty_list, list_of_int, empty_list] @@ -70,13 +62,13 @@ def test_empty(self, empty_list, list_of_int, list_of_str): expected = [] assert result == expected - @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) - @given(st.integers(), - st.text(string.ascii_letters, min_size=1), - get_seq((int, str), True, min_size=1), - st.builds(lambda *x: list(x), st.integers(), - st.text(string.ascii_letters, min_size=1), - st.lists(st.integers(), min_size=1))) + @hp.settings(max_examples=20) + @hp.given(hp.st.integers(), + hp.st.text(string.ascii_letters, min_size=1), + hp.get_seq((int, str), True, min_size=1), + hp.st.builds(lambda *x: list(x), hp.st.integers(), + hp.st.text(string.ascii_letters, min_size=1), + hp.st.lists(hp.st.integers(), min_size=1))) def test_invalid_input(self, number, text, seq, mixed_seq): invalid_inputs = [number, From da492e38579e58cf7032ce4427d8278fb31e09ef Mon Sep 17 00:00:00 2001 From: ssolanki Date: Thu, 19 Apr 2018 22:00:58 +0530 Subject: [PATCH 11/13] TST, fix for issue #17978. Remove hypothesis requirement from *.run files. --- ci/install_circle.sh | 1 + ci/install_travis.sh | 1 + ci/requirements-2.7.run | 1 - ci/requirements-2.7_COMPAT.run | 3 +-- ci/requirements-2.7_LOCALE.run | 3 +-- ci/requirements-2.7_WIN.run | 3 +-- ci/requirements-3.5.run | 3 +-- ci/requirements-3.5_ASCII.run | 3 +-- ci/requirements-3.5_OSX.run | 3 +-- ci/requirements-3.6.run | 3 +-- ci/requirements-3.6_LOCALE.run | 3 +-- ci/requirements-3.6_LOCALE_SLOW.run | 3 +-- ci/requirements-3.6_NUMPY_DEV.run | 3 +-- ci/requirements-3.6_SLOW.run | 3 +-- ci/requirements-3.6_WIN.run | 3 +-- 15 files changed, 14 insertions(+), 25 deletions(-) diff --git a/ci/install_circle.sh b/ci/install_circle.sh index fd79f907625e9..c06fc980d7f86 100755 --- a/ci/install_circle.sh +++ b/ci/install_circle.sh @@ -65,6 +65,7 @@ fi echo "[create env: ${REQ_BUILD}]" time conda create -n pandas -q --file=${REQ_BUILD} || exit 1 time conda install -n pandas pytest>=3.1.0 || exit 1 +time conda install -n pandas hypothesis>=3.46.0 || exit 1 source activate pandas time pip install moto || exit 1 diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 9ccb4baf25505..23ea235004f31 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -104,6 +104,7 @@ if [ -e ${REQ} ]; then fi time conda install -n pandas pytest>=3.1.0 +time conda install -n pandas hypothesis>=3.46.0 time pip install -q pytest-xdist moto if [ "$LINT" ]; then diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run index fa42d33f1a582..7c10b98fb6e14 100644 --- a/ci/requirements-2.7.run +++ b/ci/requirements-2.7.run @@ -18,4 +18,3 @@ patsy pymysql=0.6.3 jinja2=2.8 xarray=0.8.0 -hypothesis>=3.46.0 diff --git a/ci/requirements-2.7_COMPAT.run b/ci/requirements-2.7_COMPAT.run index bc1d1dcac7bf8..954fa5388a039 100644 --- a/ci/requirements-2.7_COMPAT.run +++ b/ci/requirements-2.7_COMPAT.run @@ -11,5 +11,4 @@ psycopg2 pymysql=0.6.0 sqlalchemy=0.7.8 xlsxwriter=0.5.2 -jinja2=2.8 -hypothesis>=3.46.0 \ No newline at end of file +jinja2=2.8 \ No newline at end of file diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run index 515dfe52a7af8..0116e0c719883 100644 --- a/ci/requirements-2.7_LOCALE.run +++ b/ci/requirements-2.7_LOCALE.run @@ -9,5 +9,4 @@ bottleneck=1.0.0 matplotlib=1.4.3 sqlalchemy=0.8.1 lxml -scipy -hypothesis>=3.46.0 \ No newline at end of file +scipy \ No newline at end of file diff --git a/ci/requirements-2.7_WIN.run b/ci/requirements-2.7_WIN.run index 6202c3ee0c8ba..27317fdc8f885 100644 --- a/ci/requirements-2.7_WIN.run +++ b/ci/requirements-2.7_WIN.run @@ -15,5 +15,4 @@ s3fs bottleneck html5lib beautifulsoup4 -jinja2=2.8 -hypothesis>=3.46.0 \ No newline at end of file +jinja2=2.8 \ No newline at end of file diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run index 3e173300df435..d879ba303aeab 100644 --- a/ci/requirements-3.5.run +++ b/ci/requirements-3.5.run @@ -17,5 +17,4 @@ pymysql psycopg2 s3fs beautifulsoup4 -ipython -hypothesis>=3.46.0 \ No newline at end of file +ipython \ No newline at end of file diff --git a/ci/requirements-3.5_ASCII.run b/ci/requirements-3.5_ASCII.run index 9b7a834178120..cb627467eaf68 100644 --- a/ci/requirements-3.5_ASCII.run +++ b/ci/requirements-3.5_ASCII.run @@ -1,4 +1,3 @@ python-dateutil pytz -numpy -hypothesis>=3.46.0 \ No newline at end of file +numpy \ No newline at end of file diff --git a/ci/requirements-3.5_OSX.run b/ci/requirements-3.5_OSX.run index 9cfa800ae9c56..78ae7c1ea703e 100644 --- a/ci/requirements-3.5_OSX.run +++ b/ci/requirements-3.5_OSX.run @@ -13,5 +13,4 @@ jinja2 bottleneck xarray s3fs -beautifulsoup4 -hypothesis>=3.46.0 \ No newline at end of file +beautifulsoup4 \ No newline at end of file diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run index c3144ebf6d19e..2df5719ca77b6 100644 --- a/ci/requirements-3.6.run +++ b/ci/requirements-3.6.run @@ -22,5 +22,4 @@ fastparquet beautifulsoup4 s3fs xarray -ipython -hypothesis>=3.46.0 \ No newline at end of file +ipython \ No newline at end of file diff --git a/ci/requirements-3.6_LOCALE.run b/ci/requirements-3.6_LOCALE.run index 5232a5fc22c48..9b651b337d349 100644 --- a/ci/requirements-3.6_LOCALE.run +++ b/ci/requirements-3.6_LOCALE.run @@ -19,5 +19,4 @@ psycopg2 beautifulsoup4 s3fs xarray -ipython -hypothesis>=3.46.0 \ No newline at end of file +ipython \ No newline at end of file diff --git a/ci/requirements-3.6_LOCALE_SLOW.run b/ci/requirements-3.6_LOCALE_SLOW.run index 5232a5fc22c48..9b651b337d349 100644 --- a/ci/requirements-3.6_LOCALE_SLOW.run +++ b/ci/requirements-3.6_LOCALE_SLOW.run @@ -19,5 +19,4 @@ psycopg2 beautifulsoup4 s3fs xarray -ipython -hypothesis>=3.46.0 \ No newline at end of file +ipython \ No newline at end of file diff --git a/ci/requirements-3.6_NUMPY_DEV.run b/ci/requirements-3.6_NUMPY_DEV.run index 20cdf454a3cce..9850437cd17f4 100644 --- a/ci/requirements-3.6_NUMPY_DEV.run +++ b/ci/requirements-3.6_NUMPY_DEV.run @@ -1,2 +1 @@ -pytz -hypothesis>=3.46.0 \ No newline at end of file +pytz \ No newline at end of file diff --git a/ci/requirements-3.6_SLOW.run b/ci/requirements-3.6_SLOW.run index 90af3b38f59c5..ab158da00bea4 100644 --- a/ci/requirements-3.6_SLOW.run +++ b/ci/requirements-3.6_SLOW.run @@ -16,5 +16,4 @@ s3fs psycopg2 pymysql html5lib -beautifulsoup4 -hypothesis>=3.46.0 \ No newline at end of file +beautifulsoup4 \ No newline at end of file diff --git a/ci/requirements-3.6_WIN.run b/ci/requirements-3.6_WIN.run index 49ed6006d98e5..57e7dae8978cd 100644 --- a/ci/requirements-3.6_WIN.run +++ b/ci/requirements-3.6_WIN.run @@ -14,5 +14,4 @@ matplotlib blosc thrift=0.10* fastparquet -pyarrow -hypothesis>=3.46.0 \ No newline at end of file +pyarrow \ No newline at end of file From 96557b63eeec436245cdc053c1d173a51b1bb7de Mon Sep 17 00:00:00 2001 From: ssolanki Date: Mon, 21 May 2018 23:29:54 +0530 Subject: [PATCH 12/13] add hypothesis dependency in all ci/*.yaml --- ci/circle-27-compat.yaml | 1 + ci/circle-35-ascii.yaml | 1 + ci/circle-36-locale.yaml | 1 + ci/circle-36-locale_slow.yaml | 1 + ci/environment-dev.yaml | 1 - ci/travis-27-locale.yaml | 1 + ci/travis-35-osx.yaml | 1 + 7 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ci/circle-27-compat.yaml b/ci/circle-27-compat.yaml index 81a48d4edf11c..5ff26cc2640a9 100644 --- a/ci/circle-27-compat.yaml +++ b/ci/circle-27-compat.yaml @@ -22,6 +22,7 @@ dependencies: # universal - pytest - pytest-xdist + - hypothesis - pip: - html5lib==1.0b2 - beautifulsoup4==4.2.1 diff --git a/ci/circle-35-ascii.yaml b/ci/circle-35-ascii.yaml index 602c414b49bb2..fc464f307ca5b 100644 --- a/ci/circle-35-ascii.yaml +++ b/ci/circle-35-ascii.yaml @@ -11,3 +11,4 @@ dependencies: # universal - pytest - pytest-xdist + - hypothesis diff --git a/ci/circle-36-locale.yaml b/ci/circle-36-locale.yaml index cc852c1e2aeeb..263a7842c19fc 100644 --- a/ci/circle-36-locale.yaml +++ b/ci/circle-36-locale.yaml @@ -31,3 +31,4 @@ dependencies: - pytest - pytest-xdist - moto + - hypothesis diff --git a/ci/circle-36-locale_slow.yaml b/ci/circle-36-locale_slow.yaml index cc852c1e2aeeb..263a7842c19fc 100644 --- a/ci/circle-36-locale_slow.yaml +++ b/ci/circle-36-locale_slow.yaml @@ -31,3 +31,4 @@ dependencies: - pytest - pytest-xdist - moto + - hypothesis diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml index c18046f29dd3c..f9f9208519d61 100644 --- a/ci/environment-dev.yaml +++ b/ci/environment-dev.yaml @@ -13,4 +13,3 @@ dependencies: - pytz - setuptools>=24.2.0 - sphinx - - hypothesis>=3.46.0 \ No newline at end of file diff --git a/ci/travis-27-locale.yaml b/ci/travis-27-locale.yaml index 1312c1296d46a..c22fdcb41def0 100644 --- a/ci/travis-27-locale.yaml +++ b/ci/travis-27-locale.yaml @@ -22,6 +22,7 @@ dependencies: # universal - pytest - pytest-xdist + - hypothesis - pip: - html5lib==1.0b2 - beautifulsoup4==4.2.1 diff --git a/ci/travis-35-osx.yaml b/ci/travis-35-osx.yaml index e74abac4c9775..3db389c15bf85 100644 --- a/ci/travis-35-osx.yaml +++ b/ci/travis-35-osx.yaml @@ -23,5 +23,6 @@ dependencies: # universal - pytest - pytest-xdist + - hypothesis - pip: - python-dateutil==2.5.3 From fa5bd75ba9306616030cf47f074fabb642d8f102 Mon Sep 17 00:00:00 2001 From: ssolanki Date: Tue, 22 May 2018 09:39:09 +0530 Subject: [PATCH 13/13] fix lint issues. --- pandas/tests/reshape/test_util.py | 22 ++++++++++++---------- pandas/util/_hypothesis.py | 12 +++++++----- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index bc746541c3fa3..ad39afc237ceb 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -16,9 +16,10 @@ class TestCartesianProduct(object): @hp.settings(max_examples=20) - @hp.given(hp.st.lists(hp.st.text(string.ascii_letters, min_size=1, max_size=1), + @hp.given(hp.st.lists(hp.st.text(string.ascii_letters, + min_size=1, max_size=1), min_size=1, max_size=3), - hp.get_seq((int,), False, 1, 2)) + hp.get_seq((int,), False, 1, 2)) def test_simple(self, x, y): result1, result2 = cartesian_product([x, y]) expected1 = np.array([item1 for item1 in x for item2 in y]) @@ -28,7 +29,8 @@ def test_simple(self, x, y): tm.assert_numpy_array_equal(result2, expected2) @hp.settings(max_examples=20) - @hp.given(hp.st.dates(min_value=date(1900, 1, 1), max_value=date(2100, 1, 1))) + @hp.given(hp.st.dates(min_value=date(1900, 1, 1), + max_value=date(2100, 1, 1))) def test_datetimeindex(self, d): # regression test for GitHub issue #6439 # make sure that the ordering on datetimeindex is consistent @@ -43,8 +45,8 @@ def test_datetimeindex(self, d): @hp.settings(max_examples=20) @hp.given(hp.st.lists(hp.st.nothing()), - hp.get_seq((int,), False, min_size=1, max_size=10), - hp.get_seq((str,), False, min_size=1, max_size=10)) + hp.get_seq((int,), False, min_size=1, max_size=10), + hp.get_seq((str,), False, min_size=1, max_size=10)) def test_empty(self, empty_list, list_of_int, list_of_str): # product of empty factors X = [empty_list, list_of_int, empty_list] @@ -64,11 +66,11 @@ def test_empty(self, empty_list, list_of_int, list_of_str): @hp.settings(max_examples=20) @hp.given(hp.st.integers(), - hp.st.text(string.ascii_letters, min_size=1), - hp.get_seq((int, str), True, min_size=1), - hp.st.builds(lambda *x: list(x), hp.st.integers(), - hp.st.text(string.ascii_letters, min_size=1), - hp.st.lists(hp.st.integers(), min_size=1))) + hp.st.text(string.ascii_letters, min_size=1), + hp.get_seq((int, str), True, min_size=1), + hp.st.builds(lambda *x: list(x), hp.st.integers(), + hp.st.text(string.ascii_letters, min_size=1), + hp.st.lists(hp.st.integers(), min_size=1))) def test_invalid_input(self, number, text, seq, mixed_seq): invalid_inputs = [number, diff --git a/pandas/util/_hypothesis.py b/pandas/util/_hypothesis.py index b0b5eb2d019a6..9ea247cd3dbfd 100644 --- a/pandas/util/_hypothesis.py +++ b/pandas/util/_hypothesis.py @@ -7,9 +7,9 @@ (http://hypothesis.readthedocs.io/en/latest/). """ import string -from hypothesis import (given, - settings, - assume, +from hypothesis import (given, # noqa:F401 + settings, # noqa:F401 + assume, # noqa:F401 strategies as st, ) @@ -113,7 +113,8 @@ def get_seq(draw, types, mixed=False, min_size=None, max_size=None, Examples -------- - >>> seq_strategy = get_seq((int, str, bool), mixed=True, min_size=1, max_size=5) + >>> seq_strategy = get_seq((int, str, bool), mixed=True, min_size=1, +... max_size=5) >>> seq_strategy.example() ['lkYMSn', -2501, 35, 'J'] @@ -128,7 +129,8 @@ def get_seq(draw, types, mixed=False, min_size=None, max_size=None, ... mixed=False, ... min_size=1, ... max_size=5, -... transform_func=lambda seq: [str(x) for x in seq]) +... transform_func=lambda seq: +... [str(x) for x in seq]) >>> seq_strategy.example() ['9552', '124', '-24024']