diff --git a/ci/appveyor-27.yaml b/ci/appveyor-27.yaml index 84107c605b14f..0f1bedd5b01a0 100644 --- a/ci/appveyor-27.yaml +++ b/ci/appveyor-27.yaml @@ -27,3 +27,4 @@ dependencies: - pytest - pytest-xdist - moto + - hypothesis diff --git a/ci/appveyor-36.yaml b/ci/appveyor-36.yaml index 5e370de39958a..9f2499a24aeac 100644 --- a/ci/appveyor-36.yaml +++ b/ci/appveyor-36.yaml @@ -25,3 +25,4 @@ dependencies: - cython - pytest - pytest-xdist + - hypothesis diff --git a/ci/circle-27-compat.yaml b/ci/circle-27-compat.yaml index 81a48d4edf11c..5ff26cc2640a9 100644 --- a/ci/circle-27-compat.yaml +++ b/ci/circle-27-compat.yaml @@ -22,6 +22,7 @@ dependencies: # universal - pytest - pytest-xdist + - hypothesis - pip: - html5lib==1.0b2 - beautifulsoup4==4.2.1 diff --git a/ci/circle-35-ascii.yaml b/ci/circle-35-ascii.yaml index 602c414b49bb2..fc464f307ca5b 100644 --- a/ci/circle-35-ascii.yaml +++ b/ci/circle-35-ascii.yaml @@ -11,3 +11,4 @@ dependencies: # universal - pytest - pytest-xdist + - hypothesis diff --git a/ci/circle-36-locale.yaml b/ci/circle-36-locale.yaml index cc852c1e2aeeb..263a7842c19fc 100644 --- a/ci/circle-36-locale.yaml +++ b/ci/circle-36-locale.yaml @@ -31,3 +31,4 @@ dependencies: - pytest - pytest-xdist - moto + - hypothesis diff --git a/ci/circle-36-locale_slow.yaml b/ci/circle-36-locale_slow.yaml index cc852c1e2aeeb..263a7842c19fc 100644 --- a/ci/circle-36-locale_slow.yaml +++ b/ci/circle-36-locale_slow.yaml @@ -31,3 +31,4 @@ dependencies: - pytest - pytest-xdist - moto + - hypothesis diff --git a/ci/travis-27-locale.yaml b/ci/travis-27-locale.yaml index 1312c1296d46a..c22fdcb41def0 100644 --- a/ci/travis-27-locale.yaml +++ b/ci/travis-27-locale.yaml @@ -22,6 +22,7 @@ dependencies: # universal - pytest - pytest-xdist + - hypothesis - pip: - html5lib==1.0b2 - beautifulsoup4==4.2.1 diff --git a/ci/travis-27.yaml b/ci/travis-27.yaml index 22b993a2da886..da842bb3924f9 100644 --- a/ci/travis-27.yaml +++ b/ci/travis-27.yaml @@ -42,6 +42,7 @@ dependencies: - pytest - pytest-xdist - moto + - hypothesis - pip: - backports.lzma - cpplint diff --git a/ci/travis-35-osx.yaml b/ci/travis-35-osx.yaml index e74abac4c9775..3db389c15bf85 100644 --- a/ci/travis-35-osx.yaml +++ b/ci/travis-35-osx.yaml @@ -23,5 +23,6 @@ dependencies: # universal - pytest - pytest-xdist + - hypothesis - pip: - python-dateutil==2.5.3 diff --git a/ci/travis-36.yaml b/ci/travis-36.yaml index fe057e714761e..4bdad9469d367 100644 --- a/ci/travis-36.yaml +++ b/ci/travis-36.yaml @@ -42,6 +42,7 @@ dependencies: - pytest-xdist - pytest-cov - moto + - hypothesis - pip: - brotlipy - coverage diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index e9939250052f1..5de78b1b90f0d 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -775,6 +775,46 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex test_cool_feature.py::test_dtypes[int8] PASSED test_cool_feature.py::test_series[int8] PASSED +Using ``hypothesis`` +~~~~~~~~~~~~~~~~~~~~ +With the usage of ``pytest``, things have become easier for testing by having reduced boilerplate for test cases and also by utilizing pytest's features like parametizing, skipping and marking test cases. + +However, one has to still come up with input data examples which can be tested against the functionality. There is always a possibility to skip testing an example which could have failed the test case. + +Hypothesis is a python package which helps in overcoming this issue by generating the input data based on some set of specifications provided by the user. +e.g consider the test case for testing python's sum function for a list of int using hypothesis. + +.. code-block:: python + + from hypothesis import strategies as st + from hypothesis import given + + + @given(st.lists(st.integers())) + def test_sum(seq): + total = 0 + for item in seq: + total += item + assert sum(seq) == total + + +output of test cases: + +.. code-block:: shell + + collecting ... collected 1 item + hypothesis_example.py::test_sum PASSED [100%] + + ========================== 1 passed in 0.33 seconds =========================== + +In above example by applying a decorator "@given(st.lists(st.integers()))" to the unit test function, we have directed hypothesis to generate some random list of int as input for the test function, which eventually helps in adding more coverage for our test functions by generating random input data. + +For more information about hypothesis or in general about property based testing, check below links: + +- https://hypothesis.readthedocs.io/en/latest/quickstart.html +- https://hypothesis.works/articles/what-is-property-based-testing/ +- http://blog.jessitron.com/2013/04/property-based-testing-what-is-it.html + Running the test suite ---------------------- diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index e4a9591b95c26..ad39afc237ceb 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -4,31 +4,54 @@ import pandas.util.testing as tm from pandas.core.reshape.util import cartesian_product +import string +from datetime import date +from dateutil import relativedelta + +from pandas.util import _hypothesis as hp + +NO_OF_EXAMPLES_PER_TEST_CASE = 20 + class TestCartesianProduct(object): - def test_simple(self): - x, y = list('ABC'), [1, 22] + @hp.settings(max_examples=20) + @hp.given(hp.st.lists(hp.st.text(string.ascii_letters, + min_size=1, max_size=1), + min_size=1, max_size=3), + hp.get_seq((int,), False, 1, 2)) + def test_simple(self, x, y): result1, result2 = cartesian_product([x, y]) - expected1 = np.array(['A', 'A', 'B', 'B', 'C', 'C']) - expected2 = np.array([1, 22, 1, 22, 1, 22]) + expected1 = np.array([item1 for item1 in x for item2 in y]) + expected2 = np.array([item2 for item1 in x for item2 in y]) + tm.assert_numpy_array_equal(result1, expected1) tm.assert_numpy_array_equal(result2, expected2) - def test_datetimeindex(self): + @hp.settings(max_examples=20) + @hp.given(hp.st.dates(min_value=date(1900, 1, 1), + max_value=date(2100, 1, 1))) + def test_datetimeindex(self, d): # regression test for GitHub issue #6439 # make sure that the ordering on datetimeindex is consistent - x = date_range('2000-01-01', periods=2) + n = d + relativedelta.relativedelta(days=1) + x = date_range(d, periods=2) result1, result2 = [Index(y).day for y in cartesian_product([x, x])] - expected1 = Index([1, 1, 2, 2]) - expected2 = Index([1, 2, 1, 2]) + expected1 = Index([d.day, d.day, n.day, n.day]) + expected2 = Index([d.day, n.day, d.day, n.day]) + tm.assert_index_equal(result1, expected1) tm.assert_index_equal(result2, expected2) - def test_empty(self): + @hp.settings(max_examples=20) + @hp.given(hp.st.lists(hp.st.nothing()), + hp.get_seq((int,), False, min_size=1, max_size=10), + hp.get_seq((str,), False, min_size=1, max_size=10)) + def test_empty(self, empty_list, list_of_int, list_of_str): # product of empty factors - X = [[], [0, 1], []] - Y = [[], [], ['a', 'b', 'c']] + X = [empty_list, list_of_int, empty_list] + Y = [empty_list, empty_list, list_of_str] + for x, y in zip(X, Y): expected1 = np.array([], dtype=np.asarray(x).dtype) expected2 = np.array([], dtype=np.asarray(y).dtype) @@ -37,13 +60,24 @@ def test_empty(self): tm.assert_numpy_array_equal(result2, expected2) # empty product (empty input): - result = cartesian_product([]) + result = cartesian_product(empty_list) expected = [] assert result == expected - def test_invalid_input(self): - invalid_inputs = [1, [1], [1, 2], [[1], 2], - 'a', ['a'], ['a', 'b'], [['a'], 'b']] + @hp.settings(max_examples=20) + @hp.given(hp.st.integers(), + hp.st.text(string.ascii_letters, min_size=1), + hp.get_seq((int, str), True, min_size=1), + hp.st.builds(lambda *x: list(x), hp.st.integers(), + hp.st.text(string.ascii_letters, min_size=1), + hp.st.lists(hp.st.integers(), min_size=1))) + def test_invalid_input(self, number, text, seq, mixed_seq): + + invalid_inputs = [number, + text, + seq, + mixed_seq] + msg = "Input must be a list-like of list-likes" for X in invalid_inputs: tm.assert_raises_regex(TypeError, msg, cartesian_product, X=X) diff --git a/pandas/util/_hypothesis.py b/pandas/util/_hypothesis.py new file mode 100644 index 0000000000000..9ea247cd3dbfd --- /dev/null +++ b/pandas/util/_hypothesis.py @@ -0,0 +1,166 @@ +""" +This module houses utility functions to generate hypothesis strategies which + can be used to generate random input test data for various test cases. +It is for internal use by different test case files like pandas/test/test*.py + files only and should not be used beyond this purpose. +For more information on hypothesis, check +(http://hypothesis.readthedocs.io/en/latest/). +""" +import string +from hypothesis import (given, # noqa:F401 + settings, # noqa:F401 + assume, # noqa:F401 + strategies as st, + ) + + +def get_elements(elem_type): + """ + Helper function to return hypothesis strategy whose elements depends on + the input data-type. + Currently only four types are supported namely, bool, int, float and str. + + Parameters + ---------- + elem_type: type + type of the elements for the strategy. + + Returns + ------- + hypothesis strategy. + + Examples + -------- + >>> strat = get_elements(str) + >>> strat.example() + 'KWAo' + + >>> strat.example() + 'OfAlBH' + + >>> strat = get_elements(int) + >>> strat.example() + 31911 + + >>> strat.example() + 25288 + + >>> strat = get_elements(float) + >>> strat.example() + nan + + >>> strat.example() + inf + + >>> strat.example() + -2.2250738585072014e-308 + + >>> strat.example() + 0.5 + + >>> strat.example() + 1.7976931348623157e+308 + + >>> strat = get_elements(bool) + >>> strat.example() + True + + >>> strat.example() + True + + >>> strat.example() + False + """ + strategy = st.nothing() + if elem_type == bool: + strategy = st.booleans() + elif elem_type == int: + strategy = st.integers() + elif elem_type == float: + strategy = st.floats() + elif elem_type == str: + strategy = st.text(string.ascii_letters, max_size=10) + return strategy + + +@st.composite +def get_seq(draw, types, mixed=False, min_size=None, max_size=None, + transform_func=None): + """ + Helper function to generate strategy for creating lists. + What constitute in the generated list is driven by the different + parameters. + + Parameters + ---------- + types: iterable sequence like tuple or list + types which can be in the generated list. + mixed: bool + if True, list will contains elements from all types listed in arg, + otherwise it will have elements only from types[0]. + min_size: int + minimum size of the list. + max_size: int + maximum size of the list. + transform_func: callable + a callable which can be applied to whole list after it has been + generated. It can think of as providing functionality of filter + and map function. + + Returns + ------- + hypothesis lists strategy. + + Examples + -------- + >>> seq_strategy = get_seq((int, str, bool), mixed=True, min_size=1, +... max_size=5) + + >>> seq_strategy.example() + ['lkYMSn', -2501, 35, 'J'] + + >>> seq_strategy.example() + [True] + + >>> seq_strategy.example() + ['dRWgQYrBrW', True, False, 'gmsujJVDBM', 'Z'] + + >>> seq_strategy = get_seq((int, bool), +... mixed=False, +... min_size=1, +... max_size=5, +... transform_func=lambda seq: +... [str(x) for x in seq]) + + >>> seq_strategy.example() + ['9552', '124', '-24024'] + + >>> seq_strategy.example() + ['-1892'] + + >>> seq_strategy.example() + ['22', '66', '14785', '-26312', '32'] + """ + if min_size is None: + min_size = draw(st.integers(min_value=0, max_value=100)) + + if max_size is None: + max_size = draw(st.integers(min_value=min_size, max_value=100)) + + assert min_size <= max_size, \ + 'max_size must be greater than equal to min_size' + + elem_strategies = [] + for elem_type in types: + elem_strategies.append(get_elements(elem_type)) + if not mixed: + break + if transform_func: + strategy = draw(st.lists(st.one_of(elem_strategies), + min_size=min_size, + max_size=max_size).map(transform_func)) + else: + strategy = draw(st.lists(st.one_of(elem_strategies), + min_size=min_size, + max_size=max_size)) + return strategy