From 4a7c62ab2e1c43ea7cbc12086146143448bf12dd Mon Sep 17 00:00:00 2001
From: ssolanki <sushobhitsolanki@gmail.com>
Date: Tue, 3 Apr 2018 12:11:37 +0530
Subject: [PATCH 01/13] TST, fix for issue #17978. Addition of "hypothesis
 usage" in test cases of tests/reshape/test_util.py as kind of POC.

---
 pandas/tests/reshape/test_util.py | 105 ++++++++++++++++++++++++++----
 1 file changed, 92 insertions(+), 13 deletions(-)

diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py
index e4a9591b95c26..b1b0593b94c1f 100644
--- a/pandas/tests/reshape/test_util.py
+++ b/pandas/tests/reshape/test_util.py
@@ -4,31 +4,101 @@
 import pandas.util.testing as tm
 from pandas.core.reshape.util import cartesian_product
 
+from hypothesis import strategies as st
+from hypothesis import given, settings, assume
+from datetime import date
+from dateutil import relativedelta
+import string
+
+
+NO_OF_EXAMPLES_PER_TEST_CASE = 20
+
+
+def get_elements(elem_type):
+    strategy = st.nothing()
+    if elem_type == bool:
+        strategy = st.booleans()
+    elif elem_type == int:
+        strategy = st.integers()
+    elif elem_type == float:
+        strategy = st.floats()
+    elif elem_type == str:
+        strategy = st.text(string.ascii_letters, max_size=10)
+    return strategy
+
+
+@st.composite
+def get_seq(draw, types, mixed=False, min_size=None, max_size=None, transform_func=None):
+    """helper function to generate strategy for creating lists. parameters define the nature of to be generated list.
+    :param types: what type of elements constitute the list
+    :param mixed: if True, list will contains elements from all types listed in arg, oterwise it will have elements only from types[0].
+    :param min_size: minimum size of the list.
+    :param max_size: maximum size of the list.
+    :param transform_func: a callable which can be applied to whole list after it has been generated.
+    """
+    strategy = st.nothing()
+    if min_size is None:
+        min_size = draw(st.integers(min_value=0, max_value=100))
+
+    if max_size is None:
+        max_size = draw(st.integers(min_value=min_size, max_value=100))
+
+    assert min_size <= max_size, 'max_size must be greater than equal to min_size'
+
+    elem_strategies = []
+    for elem_type in types:
+        elem_strategies.append(get_elements(elem_type))
+        if not mixed:
+            break
+
+    if transform_func:
+        strategy = draw(st.lists(st.one_of(elem_strategies),
+                                 min_size=min_size, max_size=max_size).map(transform_func))
+    else:
+        strategy = draw(st.lists(st.one_of(elem_strategies),
+                                 min_size=min_size, max_size=max_size))
+    return strategy
+
 
 class TestCartesianProduct(object):
 
-    def test_simple(self):
-        x, y = list('ABC'), [1, 22]
+    @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE)
+    @given(get_seq((str,), False, 1, 1),
+           get_seq((int,), False, 1, 2))
+    def test_simple(self, x, y):
+        x = list(x[0])
+        # non-empty test case is handled in test_empty, therefore ignore it here
+        assume(len(x) != 0)
         result1, result2 = cartesian_product([x, y])
-        expected1 = np.array(['A', 'A', 'B', 'B', 'C', 'C'])
-        expected2 = np.array([1, 22, 1, 22, 1, 22])
+        expected1 = np.array([item1 for item1 in x for item2 in y])
+        expected2 = np.array([item2 for item1 in x for item2 in y])
+
         tm.assert_numpy_array_equal(result1, expected1)
         tm.assert_numpy_array_equal(result2, expected2)
 
+    @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE)
     def test_datetimeindex(self):
         # regression test for GitHub issue #6439
         # make sure that the ordering on datetimeindex is consistent
-        x = date_range('2000-01-01', periods=2)
+        d = st.dates(min_value=date(1900, 1, 1), max_value=date(2100, 1, 1)).example()
+        n = d + relativedelta.relativedelta(days=1)
+        x = date_range(d, periods=2)
         result1, result2 = [Index(y).day for y in cartesian_product([x, x])]
-        expected1 = Index([1, 1, 2, 2])
-        expected2 = Index([1, 2, 1, 2])
+        expected1 = Index([d.day, d.day, n.day, n.day])
+        expected2 = Index([d.day, n.day, d.day, n.day])
+
         tm.assert_index_equal(result1, expected1)
         tm.assert_index_equal(result2, expected2)
 
-    def test_empty(self):
+    @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE)
+    @given(st.lists(st.nothing()),
+           get_seq((int,), False),
+           get_seq((str,), False))
+    def test_empty(self, empty_list, list_of_int, list_of_str):
         # product of empty factors
-        X = [[], [0, 1], []]
-        Y = [[], [], ['a', 'b', 'c']]
+        X = [empty_list, list_of_int, empty_list]
+        Y = [empty_list, empty_list, list_of_str]
+
         for x, y in zip(X, Y):
             expected1 = np.array([], dtype=np.asarray(x).dtype)
             expected2 = np.array([], dtype=np.asarray(y).dtype)
@@ -37,13 +107,22 @@ def test_empty(self):
             tm.assert_numpy_array_equal(result2, expected2)
 
         # empty product (empty input):
-        result = cartesian_product([])
+        result = cartesian_product(empty_list)
         expected = []
         assert result == expected
 
+    @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE)
     def test_invalid_input(self):
-        invalid_inputs = [1, [1], [1, 2], [[1], 2],
-                          'a', ['a'], ['a', 'b'], [['a'], 'b']]
+        invalid_inputs = [st.integers().example(),
+                          st.tuples(st.integers()).example(),
+                          st.tuples(st.integers(), st.integers()).example(),
+                          st.text(string.ascii_letters, min_size=1, max_size=1).example(),
+                          st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1)).example(),
+                          st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1),
+                                    st.text(string.ascii_letters, min_size=1, max_size=1)).example(),
+                          st.tuples(st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1)),
+                                    st.text(string.ascii_letters, min_size=1, max_size=1)).example()]
+
         msg = "Input must be a list-like of list-likes"
         for X in invalid_inputs:
             tm.assert_raises_regex(TypeError, msg, cartesian_product, X=X)

From 77a4f0637e4f4078ed228ca263a2c88debdfef7e Mon Sep 17 00:00:00 2001
From: ssolanki <sushobhitsolanki@gmail.com>
Date: Tue, 3 Apr 2018 19:27:06 +0530
Subject: [PATCH 02/13] add hypothesis package requirement in *.run and *.yaml
 file.

---
 ci/environment-dev.yaml             | 1 +
 ci/requirements-2.7.run             | 1 +
 ci/requirements-2.7_COMPAT.run      | 1 +
 ci/requirements-2.7_LOCALE.run      | 1 +
 ci/requirements-2.7_SLOW.run        | 1 +
 ci/requirements-2.7_WIN.run         | 1 +
 ci/requirements-3.5.run             | 1 +
 ci/requirements-3.5_ASCII.run       | 1 +
 ci/requirements-3.5_OSX.run         | 1 +
 ci/requirements-3.6.run             | 1 +
 ci/requirements-3.6_LOCALE.run      | 1 +
 ci/requirements-3.6_LOCALE_SLOW.run | 1 +
 ci/requirements-3.6_WIN.run         | 1 +
 13 files changed, 13 insertions(+)

diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml
index 1337fc54e9aac..6c2e653891824 100644
--- a/ci/environment-dev.yaml
+++ b/ci/environment-dev.yaml
@@ -13,3 +13,4 @@ dependencies:
   - pytz
   - setuptools>=3.3
   - sphinx
+  - hypothesis>=3.46.0
\ No newline at end of file
diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run
index 7c10b98fb6e14..fa42d33f1a582 100644
--- a/ci/requirements-2.7.run
+++ b/ci/requirements-2.7.run
@@ -18,3 +18,4 @@ patsy
 pymysql=0.6.3
 jinja2=2.8
 xarray=0.8.0
+hypothesis>=3.46.0
diff --git a/ci/requirements-2.7_COMPAT.run b/ci/requirements-2.7_COMPAT.run
index c3daed6e6e1da..bc1d1dcac7bf8 100644
--- a/ci/requirements-2.7_COMPAT.run
+++ b/ci/requirements-2.7_COMPAT.run
@@ -12,3 +12,4 @@ pymysql=0.6.0
 sqlalchemy=0.7.8
 xlsxwriter=0.5.2
 jinja2=2.8
+hypothesis>=3.46.0
\ No newline at end of file
diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run
index 0a809a7dd6e5d..515dfe52a7af8 100644
--- a/ci/requirements-2.7_LOCALE.run
+++ b/ci/requirements-2.7_LOCALE.run
@@ -10,3 +10,4 @@ matplotlib=1.4.3
 sqlalchemy=0.8.1
 lxml
 scipy
+hypothesis>=3.46.0
\ No newline at end of file
diff --git a/ci/requirements-2.7_SLOW.run b/ci/requirements-2.7_SLOW.run
index db95a6ccb2314..d9cc8cd64f2ed 100644
--- a/ci/requirements-2.7_SLOW.run
+++ b/ci/requirements-2.7_SLOW.run
@@ -17,3 +17,4 @@ psycopg2
 pymysql
 html5lib
 beautifulsoup4
+hypothesis>=3.46.0
\ No newline at end of file
diff --git a/ci/requirements-2.7_WIN.run b/ci/requirements-2.7_WIN.run
index c4ca7fc736bb1..6202c3ee0c8ba 100644
--- a/ci/requirements-2.7_WIN.run
+++ b/ci/requirements-2.7_WIN.run
@@ -16,3 +16,4 @@ bottleneck
 html5lib
 beautifulsoup4
 jinja2=2.8
+hypothesis>=3.46.0
\ No newline at end of file
diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run
index 669cf437f2164..3e173300df435 100644
--- a/ci/requirements-3.5.run
+++ b/ci/requirements-3.5.run
@@ -18,3 +18,4 @@ psycopg2
 s3fs
 beautifulsoup4
 ipython
+hypothesis>=3.46.0
\ No newline at end of file
diff --git a/ci/requirements-3.5_ASCII.run b/ci/requirements-3.5_ASCII.run
index b9d543f557d06..9b7a834178120 100644
--- a/ci/requirements-3.5_ASCII.run
+++ b/ci/requirements-3.5_ASCII.run
@@ -1,3 +1,4 @@
 python-dateutil
 pytz
 numpy
+hypothesis>=3.46.0
\ No newline at end of file
diff --git a/ci/requirements-3.5_OSX.run b/ci/requirements-3.5_OSX.run
index 1d83474d10f2f..9cfa800ae9c56 100644
--- a/ci/requirements-3.5_OSX.run
+++ b/ci/requirements-3.5_OSX.run
@@ -14,3 +14,4 @@ bottleneck
 xarray
 s3fs
 beautifulsoup4
+hypothesis>=3.46.0
\ No newline at end of file
diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run
index 822144a80bc9a..c3144ebf6d19e 100644
--- a/ci/requirements-3.6.run
+++ b/ci/requirements-3.6.run
@@ -23,3 +23,4 @@ beautifulsoup4
 s3fs
 xarray
 ipython
+hypothesis>=3.46.0
\ No newline at end of file
diff --git a/ci/requirements-3.6_LOCALE.run b/ci/requirements-3.6_LOCALE.run
index ad54284c6f7e3..5232a5fc22c48 100644
--- a/ci/requirements-3.6_LOCALE.run
+++ b/ci/requirements-3.6_LOCALE.run
@@ -20,3 +20,4 @@ beautifulsoup4
 s3fs
 xarray
 ipython
+hypothesis>=3.46.0
\ No newline at end of file
diff --git a/ci/requirements-3.6_LOCALE_SLOW.run b/ci/requirements-3.6_LOCALE_SLOW.run
index ad54284c6f7e3..5232a5fc22c48 100644
--- a/ci/requirements-3.6_LOCALE_SLOW.run
+++ b/ci/requirements-3.6_LOCALE_SLOW.run
@@ -20,3 +20,4 @@ beautifulsoup4
 s3fs
 xarray
 ipython
+hypothesis>=3.46.0
\ No newline at end of file
diff --git a/ci/requirements-3.6_WIN.run b/ci/requirements-3.6_WIN.run
index 3042888763863..49ed6006d98e5 100644
--- a/ci/requirements-3.6_WIN.run
+++ b/ci/requirements-3.6_WIN.run
@@ -15,3 +15,4 @@ blosc
 thrift=0.10*
 fastparquet
 pyarrow
+hypothesis>=3.46.0
\ No newline at end of file

From 7e2ed0caf73e2f93217b1a1d24b850b9f543d283 Mon Sep 17 00:00:00 2001
From: ssolanki <sushobhitsolanki@gmail.com>
Date: Wed, 4 Apr 2018 14:52:50 +0530
Subject: [PATCH 03/13] TST, fix for issue #17978. Addition of "hypothesis
 usage" in test cases of tests/reshape/test_util.py as kind of POC.

Incorporate review comments.
Resolve flake8 warning.
---
 pandas/tests/reshape/test_util.py | 98 +++++++++++++++++++++++--------
 1 file changed, 75 insertions(+), 23 deletions(-)

diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py
index b1b0593b94c1f..03a1bc4ee37f4 100644
--- a/pandas/tests/reshape/test_util.py
+++ b/pandas/tests/reshape/test_util.py
@@ -28,13 +28,55 @@ def get_elements(elem_type):
 
 
 @st.composite
-def get_seq(draw, types, mixed=False, min_size=None, max_size=None, transform_func=None):
-    """helper function to generate strategy for creating lists. parameters define the nature of to be generated list.
-    :param types: what type of elements constitute the list
-    :param mixed: if True, list will contains elements from all types listed in arg, oterwise it will have elements only from types[0].
-    :param min_size: minimum size of the list.
-    :param max_size: maximum size of the list.
-    :param transform_func: a callable which can be applied to whole list after it has been generated.
+def get_seq(draw, types, mixed=False, min_size=None, max_size=None,
+            transform_func=None):
+    """
+    Helper function to generate strategy for creating lists.
+    What constitute in the generated list is driven by the different
+    parameters.
+
+    Parameters
+    ----------
+    types: iterable sequence like tuple or list
+        types which can be in the generated list.
+    mixed: bool
+        if True, list will contains elements from all types listed in arg,
+        otherwise it will have elements only from types[0].
+    min_size: int
+        minimum size of the list.
+    max_size: int
+        maximum size of the list.
+    transform_func: callable
+        a callable which can be applied to whole list after it has been
+         generated. It can think of as providing functionality of filter
+         and map function.
+
+    Returns
+    -------
+    hypothesis lists strategy.
+
+    Examples
+    --------
+    seq_strategy = get_seq((int, str, bool),
+                            mixed=True, min_size=1, max_size=5)
+    seq_strategy.example()
+    Out[12]: ['lkYMSn', -2501, 35, 'J']
+    seq_strategy.example()
+    Out[13]: [True]
+    seq_strategy.example()
+    Out[14]: ['dRWgQYrBrW', True, False, 'gmsujJVDBM', 'Z']
+
+    seq_strategy = get_seq((int, bool),
+                            mixed=False,
+                            min_size=1,
+                            max_size=5,
+                            transform_func=lambda seq: [str(x) for x in seq])
+    seq_strategy.example()
+    Out[19]: ['-1892']
+    seq_strategy.example()
+    Out[20]: ['22', '66', '14785', '-26312', '32']
+    seq_strategy.example()
+    Out[21]: ['22890', '-15537', '96']
     """
     strategy = st.nothing()
     if min_size is None:
@@ -43,7 +85,8 @@ def get_seq(draw, types, mixed=False, min_size=None, max_size=None, transform_fu
     if max_size is None:
         max_size = draw(st.integers(min_value=min_size, max_value=100))
 
-    assert min_size <= max_size, 'max_size must be greater than equal to min_size'
+    assert min_size <= max_size, \
+        'max_size must be greater than equal to min_size'
 
     elem_strategies = []
     for elem_type in types:
@@ -53,10 +96,12 @@ def get_seq(draw, types, mixed=False, min_size=None, max_size=None, transform_fu
 
     if transform_func:
         strategy = draw(st.lists(st.one_of(elem_strategies),
-                                 min_size=min_size, max_size=max_size).map(transform_func))
+                                 min_size=min_size,
+                                 max_size=max_size).map(transform_func))
     else:
         strategy = draw(st.lists(st.one_of(elem_strategies),
-                                 min_size=min_size, max_size=max_size))
+                                 min_size=min_size,
+                                 max_size=max_size))
     return strategy
 
 
@@ -67,7 +112,8 @@ class TestCartesianProduct(object):
            get_seq((int,), False, 1, 2))
     def test_simple(self, x, y):
         x = list(x[0])
-        # non-empty test case is handled in test_empty, therefore ignore it here
+        # non-empty test case is handled in test_empty,
+        # therefore ignore it here.
         assume(len(x) != 0)
         result1, result2 = cartesian_product([x, y])
         expected1 = np.array([item1 for item1 in x for item2 in y])
@@ -77,10 +123,10 @@ def test_simple(self, x, y):
         tm.assert_numpy_array_equal(result2, expected2)
 
     @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE)
-    def test_datetimeindex(self):
+    @given(st.dates(min_value=date(1900, 1, 1), max_value=date(2100, 1, 1)))
+    def test_datetimeindex(self, d):
         # regression test for GitHub issue #6439
         # make sure that the ordering on datetimeindex is consistent
-        d = st.dates(min_value=date(1900, 1, 1), max_value=date(2100, 1, 1)).example()
         n = d + relativedelta.relativedelta(days=1)
         x = date_range(d, periods=2)
         result1, result2 = [Index(y).day for y in cartesian_product([x, x])]
@@ -112,16 +158,22 @@ def test_empty(self, empty_list, list_of_int, list_of_str):
         assert result == expected
 
     @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE)
-    def test_invalid_input(self):
-        invalid_inputs = [st.integers().example(),
-                          st.tuples(st.integers()).example(),
-                          st.tuples(st.integers(), st.integers()).example(),
-                          st.text(string.ascii_letters, min_size=1, max_size=1).example(),
-                          st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1)).example(),
-                          st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1),
-                                    st.text(string.ascii_letters, min_size=1, max_size=1)).example(),
-                          st.tuples(st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1)),
-                                    st.text(string.ascii_letters, min_size=1, max_size=1)).example()]
+    @given(st.integers(),
+           st.text(string.ascii_letters, min_size=1),
+           get_seq((int, str), True, min_size=1),
+           st.lists(st.one_of(st.integers(),
+                              st.text(string.ascii_letters, min_size=1),
+                              get_seq((int,), min_size=1)
+                              ),
+                    min_size=1).filter(
+               lambda x: len(x) == 1 and type(x[0]) != list)
+           )
+    def test_invalid_input(self, number, text, seq, mixed_seq):
+
+        invalid_inputs = [number,
+                          text,
+                          seq,
+                          mixed_seq]
 
         msg = "Input must be a list-like of list-likes"
         for X in invalid_inputs:

From 11eb77732f3ef82ee6b0c54621eb3fed1ccb453b Mon Sep 17 00:00:00 2001
From: ssolanki <sushobhitsolanki@gmail.com>
Date: Wed, 4 Apr 2018 19:34:13 +0530
Subject: [PATCH 04/13] using filter function makes hypothesis fail
 test_invalid_input function fail. add hypothesis package requirement.

---
 ci/requirements-3.6_NUMPY_DEV.run |  1 +
 pandas/tests/reshape/test_util.py | 10 +++-------
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/ci/requirements-3.6_NUMPY_DEV.run b/ci/requirements-3.6_NUMPY_DEV.run
index af44f198c687e..20cdf454a3cce 100644
--- a/ci/requirements-3.6_NUMPY_DEV.run
+++ b/ci/requirements-3.6_NUMPY_DEV.run
@@ -1 +1,2 @@
 pytz
+hypothesis>=3.46.0
\ No newline at end of file
diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py
index 03a1bc4ee37f4..ff96712d2ecf7 100644
--- a/pandas/tests/reshape/test_util.py
+++ b/pandas/tests/reshape/test_util.py
@@ -161,13 +161,9 @@ def test_empty(self, empty_list, list_of_int, list_of_str):
     @given(st.integers(),
            st.text(string.ascii_letters, min_size=1),
            get_seq((int, str), True, min_size=1),
-           st.lists(st.one_of(st.integers(),
-                              st.text(string.ascii_letters, min_size=1),
-                              get_seq((int,), min_size=1)
-                              ),
-                    min_size=1).filter(
-               lambda x: len(x) == 1 and type(x[0]) != list)
-           )
+           st.builds(lambda *x: list(x), st.integers(),
+                     st.text(string.ascii_letters, min_size=1),
+                     st.lists(st.integers(), min_size=1)))
     def test_invalid_input(self, number, text, seq, mixed_seq):
 
         invalid_inputs = [number,

From fca259c92722b98f07e0212746a905e73cc586ca Mon Sep 17 00:00:00 2001
From: ssolanki <sushobhitsolanki@gmail.com>
Date: Wed, 4 Apr 2018 21:43:40 +0530
Subject: [PATCH 05/13] TST, fix for issue #17978. Addition of "hypothesis
 usage" in test cases of tests/reshape/test_util.py as kind of POC.

Incorporate review comments.
Resolve flake8 warning.
Add section for hypothesis in contributing.rst
---
 doc/source/contributing.rst | 60 +++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index 6d5ac31c39a62..a64f684d46a4a 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -775,6 +775,66 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex
    test_cool_feature.py::test_dtypes[int8] PASSED
    test_cool_feature.py::test_series[int8] PASSED
 
+Transitioning to ``hypothesis``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+With the transition to pytest, things have become easier for testing by having reduced boilerplate for test cases and also by utilizing pytest's features like parametizing, skipping and marking test cases.
+However, one has to still come up with input data examples which can be tested against the functionality. There is always a possibility to skip testing an example which could have failed the test case.
+Imagine if some framework could generate random input examples based on the property/specification of the function being tested. That is exactly what hypothesis does by generating the input data based on some set of specifications provided by the user.
+e.g suppose we have to test python's sum function for a list of int.
+
+Here is a sample test case using pytest:
+.. code-block:: python
+    import pytest
+
+    @pytest.mark.parametrize('seq', [
+        [0, 0, 0],
+        [1, 2, 3, 4],
+        [-3, 5, -8, 23],
+        [12345678, 9876543, 567894321]
+    ])
+    def test_sum_using_pytest(seq):
+        total = 0
+        for item in seq:
+            total += item
+        assert sum(seq) == total
+
+output of test cases:
+.. code-block:: shell
+    collecting ... collected 4 items
+    pytest_example.py::test_sum_using_pytest[seq0] PASSED                    [ 25%]
+    pytest_example.py::test_sum_using_pytest[seq1] PASSED                    [ 50%]
+    pytest_example.py::test_sum_using_pytest[seq2] PASSED                    [ 75%]
+    pytest_example.py::test_sum_using_pytest[seq3] PASSED                    [100%]
+
+    ========================== 4 passed in 0.06 seconds ===========================
+
+
+Compare it with below example for the same test case using hypothesis.
+.. code-block:: python
+    from hypothesis import strategies as st
+    from hypothesis import given
+
+
+    @given(st.lists(st.integers()))
+    def test_sum(seq):
+        total = 0
+        for item in seq:
+            total += item
+        assert sum(seq) == total
+
+output of test cases:
+.. code-block:: shell
+    collecting ... collected 1 item
+    hypothesis_example.py::test_sum PASSED                                   [100%]
+
+    ========================== 1 passed in 0.33 seconds ===========================
+
+The main difference in above example is use of a decorator "@given(st.lists(st.integers()))" which if applied to test case function, generates some random list of int, which is then assigned to parameter of test case.
+For more information about hypothesis or in general about property based testing, check below links:
+- https://hypothesis.readthedocs.io/en/latest/quickstart.html
+- https://hypothesis.works/articles/what-is-property-based-testing/
+- http://blog.jessitron.com/2013/04/property-based-testing-what-is-it.html
+
 
 Running the test suite
 ----------------------

From d30d6fd2c493451adeb9f33ce84b2db4ab8be3e7 Mon Sep 17 00:00:00 2001
From: ssolanki <sushobhitsolanki@gmail.com>
Date: Wed, 4 Apr 2018 22:03:10 +0530
Subject: [PATCH 06/13] TST, fix for issue #17978. Addition of "hypothesis
 usage" in test cases of tests/reshape/test_util.py as kind of POC.

Incorporate review comments.
Resolve flake8 warning.
Add section for hypothesis in contributing.rst
---
 doc/source/contributing.rst | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index a64f684d46a4a..122d5f7e49c7b 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -776,14 +776,18 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex
    test_cool_feature.py::test_series[int8] PASSED
 
 Transitioning to ``hypothesis``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 With the transition to pytest, things have become easier for testing by having reduced boilerplate for test cases and also by utilizing pytest's features like parametizing, skipping and marking test cases.
+
 However, one has to still come up with input data examples which can be tested against the functionality. There is always a possibility to skip testing an example which could have failed the test case.
+
 Imagine if some framework could generate random input examples based on the property/specification of the function being tested. That is exactly what hypothesis does by generating the input data based on some set of specifications provided by the user.
 e.g suppose we have to test python's sum function for a list of int.
 
 Here is a sample test case using pytest:
+
 .. code-block:: python
+
     import pytest
 
     @pytest.mark.parametrize('seq', [
@@ -799,7 +803,9 @@ Here is a sample test case using pytest:
         assert sum(seq) == total
 
 output of test cases:
+
 .. code-block:: shell
+
     collecting ... collected 4 items
     pytest_example.py::test_sum_using_pytest[seq0] PASSED                    [ 25%]
     pytest_example.py::test_sum_using_pytest[seq1] PASSED                    [ 50%]
@@ -810,7 +816,9 @@ output of test cases:
 
 
 Compare it with below example for the same test case using hypothesis.
+
 .. code-block:: python
+
     from hypothesis import strategies as st
     from hypothesis import given
 
@@ -822,8 +830,11 @@ Compare it with below example for the same test case using hypothesis.
             total += item
         assert sum(seq) == total
 
+
 output of test cases:
+
 .. code-block:: shell
+
     collecting ... collected 1 item
     hypothesis_example.py::test_sum PASSED                                   [100%]
 
@@ -831,6 +842,7 @@ output of test cases:
 
 The main difference in above example is use of a decorator "@given(st.lists(st.integers()))" which if applied to test case function, generates some random list of int, which is then assigned to parameter of test case.
 For more information about hypothesis or in general about property based testing, check below links:
+
 - https://hypothesis.readthedocs.io/en/latest/quickstart.html
 - https://hypothesis.works/articles/what-is-property-based-testing/
 - http://blog.jessitron.com/2013/04/property-based-testing-what-is-it.html

From 090773b9281e007157a9e2ed7f0b094f454cb914 Mon Sep 17 00:00:00 2001
From: ssolanki <sushobhitsolanki@gmail.com>
Date: Sun, 8 Apr 2018 11:56:09 +0530
Subject: [PATCH 07/13] TST, fix for issue #17978. moved generic thing to
 pandas.utils._hypothesis.py. not sure of what exactly was required to change
 but still tried to change the content as per review comments.

---
 doc/source/contributing.rst       |   8 ++-
 pandas/tests/reshape/test_util.py | 101 +++---------------------------
 pandas/util/_hypothesis.py        |  97 ++++++++++++++++++++++++++++
 3 files changed, 109 insertions(+), 97 deletions(-)
 create mode 100644 pandas/util/_hypothesis.py

diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index 122d5f7e49c7b..d4e7442b550d7 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -775,13 +775,13 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex
    test_cool_feature.py::test_dtypes[int8] PASSED
    test_cool_feature.py::test_series[int8] PASSED
 
-Transitioning to ``hypothesis``
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Using ``hypothesis``
+~~~~~~~~~~~~~~~~~~~~
 With the transition to pytest, things have become easier for testing by having reduced boilerplate for test cases and also by utilizing pytest's features like parametizing, skipping and marking test cases.
 
 However, one has to still come up with input data examples which can be tested against the functionality. There is always a possibility to skip testing an example which could have failed the test case.
 
-Imagine if some framework could generate random input examples based on the property/specification of the function being tested. That is exactly what hypothesis does by generating the input data based on some set of specifications provided by the user.
+Hypothesis is a python package which helps in overcoming this issue by generating the input data based on some set of specifications provided by the user.
 e.g suppose we have to test python's sum function for a list of int.
 
 Here is a sample test case using pytest:
@@ -841,6 +841,8 @@ output of test cases:
     ========================== 1 passed in 0.33 seconds ===========================
 
 The main difference in above example is use of a decorator "@given(st.lists(st.integers()))" which if applied to test case function, generates some random list of int, which is then assigned to parameter of test case.
+Above example clearly helps in adding more coverage for our test functions.
+
 For more information about hypothesis or in general about property based testing, check below links:
 
 - https://hypothesis.readthedocs.io/en/latest/quickstart.html
diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py
index ff96712d2ecf7..3ab53cfebbaff 100644
--- a/pandas/tests/reshape/test_util.py
+++ b/pandas/tests/reshape/test_util.py
@@ -4,105 +4,18 @@
 import pandas.util.testing as tm
 from pandas.core.reshape.util import cartesian_product
 
-from hypothesis import strategies as st
-from hypothesis import given, settings, assume
+import string
 from datetime import date
 from dateutil import relativedelta
-import string
 
+from pandas.util._hypothesis import (st,
+                                     given,
+                                     settings,
+                                     get_seq,
+                                     assume)
 
-NO_OF_EXAMPLES_PER_TEST_CASE = 20
 
-
-def get_elements(elem_type):
-    strategy = st.nothing()
-    if elem_type == bool:
-        strategy = st.booleans()
-    elif elem_type == int:
-        strategy = st.integers()
-    elif elem_type == float:
-        strategy = st.floats()
-    elif elem_type == str:
-        strategy = st.text(string.ascii_letters, max_size=10)
-    return strategy
-
-
-@st.composite
-def get_seq(draw, types, mixed=False, min_size=None, max_size=None,
-            transform_func=None):
-    """
-    Helper function to generate strategy for creating lists.
-    What constitute in the generated list is driven by the different
-    parameters.
-
-    Parameters
-    ----------
-    types: iterable sequence like tuple or list
-        types which can be in the generated list.
-    mixed: bool
-        if True, list will contains elements from all types listed in arg,
-        otherwise it will have elements only from types[0].
-    min_size: int
-        minimum size of the list.
-    max_size: int
-        maximum size of the list.
-    transform_func: callable
-        a callable which can be applied to whole list after it has been
-         generated. It can think of as providing functionality of filter
-         and map function.
-
-    Returns
-    -------
-    hypothesis lists strategy.
-
-    Examples
-    --------
-    seq_strategy = get_seq((int, str, bool),
-                            mixed=True, min_size=1, max_size=5)
-    seq_strategy.example()
-    Out[12]: ['lkYMSn', -2501, 35, 'J']
-    seq_strategy.example()
-    Out[13]: [True]
-    seq_strategy.example()
-    Out[14]: ['dRWgQYrBrW', True, False, 'gmsujJVDBM', 'Z']
-
-    seq_strategy = get_seq((int, bool),
-                            mixed=False,
-                            min_size=1,
-                            max_size=5,
-                            transform_func=lambda seq: [str(x) for x in seq])
-    seq_strategy.example()
-    Out[19]: ['-1892']
-    seq_strategy.example()
-    Out[20]: ['22', '66', '14785', '-26312', '32']
-    seq_strategy.example()
-    Out[21]: ['22890', '-15537', '96']
-    """
-    strategy = st.nothing()
-    if min_size is None:
-        min_size = draw(st.integers(min_value=0, max_value=100))
-
-    if max_size is None:
-        max_size = draw(st.integers(min_value=min_size, max_value=100))
-
-    assert min_size <= max_size, \
-        'max_size must be greater than equal to min_size'
-
-    elem_strategies = []
-    for elem_type in types:
-        elem_strategies.append(get_elements(elem_type))
-        if not mixed:
-            break
-
-    if transform_func:
-        strategy = draw(st.lists(st.one_of(elem_strategies),
-                                 min_size=min_size,
-                                 max_size=max_size).map(transform_func))
-    else:
-        strategy = draw(st.lists(st.one_of(elem_strategies),
-                                 min_size=min_size,
-                                 max_size=max_size))
-    return strategy
+NO_OF_EXAMPLES_PER_TEST_CASE = 20
 
 
 class TestCartesianProduct(object):
diff --git a/pandas/util/_hypothesis.py b/pandas/util/_hypothesis.py
new file mode 100644
index 0000000000000..6e1977123df25
--- /dev/null
+++ b/pandas/util/_hypothesis.py
@@ -0,0 +1,97 @@
+import string
+from hypothesis import (given,
+                        settings,
+                        assume,
+                        strategies as st,
+                        )
+
+
+def get_elements(elem_type):
+    strategy = st.nothing()
+    if elem_type == bool:
+        strategy = st.booleans()
+    elif elem_type == int:
+        strategy = st.integers()
+    elif elem_type == float:
+        strategy = st.floats()
+    elif elem_type == str:
+        strategy = st.text(string.ascii_letters, max_size=10)
+    return strategy
+
+
+@st.composite
+def get_seq(draw, types, mixed=False, min_size=None, max_size=None,
+            transform_func=None):
+    """
+    Helper function to generate strategy for creating lists.
+    What constitute in the generated list is driven by the different
+    parameters.
+
+    Parameters
+    ----------
+    types: iterable sequence like tuple or list
+        types which can be in the generated list.
+    mixed: bool
+        if True, list will contains elements from all types listed in arg,
+        otherwise it will have elements only from types[0].
+    min_size: int
+        minimum size of the list.
+    max_size: int
+        maximum size of the list.
+    transform_func: callable
+        a callable which can be applied to whole list after it has been
+         generated. It can think of as providing functionality of filter
+         and map function.
+
+    Returns
+    -------
+    hypothesis lists strategy.
+
+    Examples
+    --------
+    seq_strategy = get_seq((int, str, bool),
+                            mixed=True, min_size=1, max_size=5)
+    seq_strategy.example()
+    Out[12]: ['lkYMSn', -2501, 35, 'J']
+    seq_strategy.example()
+    Out[13]: [True]
+    seq_strategy.example()
+    Out[14]: ['dRWgQYrBrW', True, False, 'gmsujJVDBM', 'Z']
+
+    seq_strategy = get_seq((int, bool),
+                            mixed=False,
+                            min_size=1,
+                            max_size=5,
+                            transform_func=lambda seq: [str(x) for x in seq])
+    seq_strategy.example()
+    Out[19]: ['-1892']
+    seq_strategy.example()
+    Out[20]: ['22', '66', '14785', '-26312', '32']
+    seq_strategy.example()
+    Out[21]: ['22890', '-15537', '96']
+    """
+    strategy = st.nothing()
+    if min_size is None:
+        min_size = draw(st.integers(min_value=0, max_value=100))
+
+    if max_size is None:
+        max_size = draw(st.integers(min_value=min_size, max_value=100))
+
+    assert min_size <= max_size, \
+        'max_size must be greater than equal to min_size'
+
+    elem_strategies = []
+    for elem_type in types:
+        elem_strategies.append(get_elements(elem_type))
+        if not mixed:
+            break
+
+    if transform_func:
+        strategy = draw(st.lists(st.one_of(elem_strategies),
+                                 min_size=min_size,
+                                 max_size=max_size).map(transform_func))
+    else:
+        strategy = draw(st.lists(st.one_of(elem_strategies),
+                                 min_size=min_size,
+                                 max_size=max_size))
+    return strategy

From b8d2b820e006a30f78cb2c826bbeaa7d5b6645cb Mon Sep 17 00:00:00 2001
From: ssolanki <sushobhitsolanki@gmail.com>
Date: Tue, 10 Apr 2018 18:07:56 +0530
Subject: [PATCH 08/13] TST, fix for issue #17978. test_empty was failing due
 to "hypothesis.errors.FailedHealthCheck" error on travis only, therefore
 decrease the size for lists.

---
 pandas/tests/reshape/test_util.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py
index 3ab53cfebbaff..e36bde2a387db 100644
--- a/pandas/tests/reshape/test_util.py
+++ b/pandas/tests/reshape/test_util.py
@@ -51,8 +51,8 @@ def test_datetimeindex(self, d):
 
     @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE)
     @given(st.lists(st.nothing()),
-           get_seq((int,), False),
-           get_seq((str,), False))
+           get_seq((int,), False, min_size=1, max_size=10),
+           get_seq((str,), False, min_size=1, max_size=10))
     def test_empty(self, empty_list, list_of_int, list_of_str):
         # product of empty factors
         X = [empty_list, list_of_int, empty_list]

From 8e6701515dcc69d48dd9e0f27bb6c73a278051c1 Mon Sep 17 00:00:00 2001
From: ssolanki <sushobhitsolanki@gmail.com>
Date: Thu, 12 Apr 2018 16:08:51 +0530
Subject: [PATCH 09/13] TST, fix for issue #17978. Incorporate review comments.

---
 doc/source/contributing.rst |  40 +------------
 pandas/util/_hypothesis.py  | 111 +++++++++++++++++++++++++++++-------
 2 files changed, 92 insertions(+), 59 deletions(-)

diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index d4e7442b550d7..1169022ec7f6a 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -777,45 +777,12 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex
 
 Using ``hypothesis``
 ~~~~~~~~~~~~~~~~~~~~
-With the transition to pytest, things have become easier for testing by having reduced boilerplate for test cases and also by utilizing pytest's features like parametizing, skipping and marking test cases.
+With the usage of pytest, things have become easier for testing by having reduced boilerplate for test cases and also by utilizing pytest's features like parametizing, skipping and marking test cases.
 
 However, one has to still come up with input data examples which can be tested against the functionality. There is always a possibility to skip testing an example which could have failed the test case.
 
 Hypothesis is a python package which helps in overcoming this issue by generating the input data based on some set of specifications provided by the user.
-e.g suppose we have to test python's sum function for a list of int.
-
-Here is a sample test case using pytest:
-
-.. code-block:: python
-
-    import pytest
-
-    @pytest.mark.parametrize('seq', [
-        [0, 0, 0],
-        [1, 2, 3, 4],
-        [-3, 5, -8, 23],
-        [12345678, 9876543, 567894321]
-    ])
-    def test_sum_using_pytest(seq):
-        total = 0
-        for item in seq:
-            total += item
-        assert sum(seq) == total
-
-output of test cases:
-
-.. code-block:: shell
-
-    collecting ... collected 4 items
-    pytest_example.py::test_sum_using_pytest[seq0] PASSED                    [ 25%]
-    pytest_example.py::test_sum_using_pytest[seq1] PASSED                    [ 50%]
-    pytest_example.py::test_sum_using_pytest[seq2] PASSED                    [ 75%]
-    pytest_example.py::test_sum_using_pytest[seq3] PASSED                    [100%]
-
-    ========================== 4 passed in 0.06 seconds ===========================
-
-
-Compare it with below example for the same test case using hypothesis.
+e.g consider the test case for testing python's sum function for a list of int using hypothesis.
 
 .. code-block:: python
 
@@ -840,8 +807,7 @@ output of test cases:
 
     ========================== 1 passed in 0.33 seconds ===========================
 
-The main difference in above example is use of a decorator "@given(st.lists(st.integers()))" which if applied to test case function, generates some random list of int, which is then assigned to parameter of test case.
-Above example clearly helps in adding more coverage for our test functions.
+In above example by applying a decorator "@given(st.lists(st.integers()))" to the unit test function, we have directed hypothesis to generate some random list of int as input for the test function, which eventually helps in adding more coverage for our test functions by generating random input data.
 
 For more information about hypothesis or in general about property based testing, check below links:
 
diff --git a/pandas/util/_hypothesis.py b/pandas/util/_hypothesis.py
index 6e1977123df25..b0b5eb2d019a6 100644
--- a/pandas/util/_hypothesis.py
+++ b/pandas/util/_hypothesis.py
@@ -1,3 +1,11 @@
+"""
+This module houses utility functions to generate hypothesis strategies which
+ can be used to generate random input test data for various test cases.
+It is for internal use by different test case files like pandas/test/test*.py
+ files only and should not be used beyond this purpose.
+For more information on hypothesis, check
+(http://hypothesis.readthedocs.io/en/latest/).
+"""
 import string
 from hypothesis import (given,
                         settings,
@@ -7,6 +15,62 @@
 
 
 def get_elements(elem_type):
+    """
+    Helper function to return hypothesis strategy whose elements depends on
+    the input data-type.
+    Currently only four types are supported namely, bool, int, float and str.
+
+    Parameters
+    ----------
+    elem_type: type
+        type of the elements for the strategy.
+
+    Returns
+    -------
+    hypothesis strategy.
+
+    Examples
+    --------
+    >>> strat = get_elements(str)
+    >>> strat.example()
+    'KWAo'
+
+    >>> strat.example()
+    'OfAlBH'
+
+    >>> strat = get_elements(int)
+    >>> strat.example()
+    31911
+
+    >>> strat.example()
+    25288
+
+    >>> strat = get_elements(float)
+    >>> strat.example()
+    nan
+
+    >>> strat.example()
+    inf
+
+    >>> strat.example()
+    -2.2250738585072014e-308
+
+    >>> strat.example()
+    0.5
+
+    >>> strat.example()
+    1.7976931348623157e+308
+
+    >>> strat = get_elements(bool)
+    >>> strat.example()
+    True
+
+    >>> strat.example()
+    True
+
+    >>> strat.example()
+    False
+    """
     strategy = st.nothing()
     if elem_type == bool:
         strategy = st.booleans()
@@ -49,28 +113,32 @@ def get_seq(draw, types, mixed=False, min_size=None, max_size=None,
 
     Examples
     --------
-    seq_strategy = get_seq((int, str, bool),
-                            mixed=True, min_size=1, max_size=5)
-    seq_strategy.example()
-    Out[12]: ['lkYMSn', -2501, 35, 'J']
-    seq_strategy.example()
-    Out[13]: [True]
-    seq_strategy.example()
-    Out[14]: ['dRWgQYrBrW', True, False, 'gmsujJVDBM', 'Z']
-
-    seq_strategy = get_seq((int, bool),
-                            mixed=False,
-                            min_size=1,
-                            max_size=5,
-                            transform_func=lambda seq: [str(x) for x in seq])
-    seq_strategy.example()
-    Out[19]: ['-1892']
-    seq_strategy.example()
-    Out[20]: ['22', '66', '14785', '-26312', '32']
-    seq_strategy.example()
-    Out[21]: ['22890', '-15537', '96']
+    >>> seq_strategy = get_seq((int, str, bool), mixed=True, min_size=1, max_size=5)
+
+    >>> seq_strategy.example()
+    ['lkYMSn', -2501, 35, 'J']
+
+    >>> seq_strategy.example()
+    [True]
+
+    >>> seq_strategy.example()
+    ['dRWgQYrBrW', True, False, 'gmsujJVDBM', 'Z']
+
+    >>> seq_strategy = get_seq((int, bool),
+...                             mixed=False,
+...                             min_size=1,
+...                             max_size=5,
+...                             transform_func=lambda seq: [str(x) for x in seq])
+
+    >>> seq_strategy.example()
+    ['9552', '124', '-24024']
+
+    >>> seq_strategy.example()
+    ['-1892']
+
+    >>> seq_strategy.example()
+    ['22', '66', '14785', '-26312', '32']
     """
-    strategy = st.nothing()
     if min_size is None:
         min_size = draw(st.integers(min_value=0, max_value=100))
 
@@ -85,7 +153,6 @@ def get_seq(draw, types, mixed=False, min_size=None, max_size=None,
         elem_strategies.append(get_elements(elem_type))
         if not mixed:
             break
-
     if transform_func:
         strategy = draw(st.lists(st.one_of(elem_strategies),
                                  min_size=min_size,

From f70c7b0390f55e2bffdc08d50cafc4df92c20903 Mon Sep 17 00:00:00 2001
From: ssolanki <sushobhitsolanki@gmail.com>
Date: Thu, 12 Apr 2018 16:09:35 +0530
Subject: [PATCH 10/13] TST, fix for issue #17978. Incorporate review comments.

---
 pandas/tests/reshape/test_util.py | 44 +++++++++++++------------------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py
index e36bde2a387db..bc746541c3fa3 100644
--- a/pandas/tests/reshape/test_util.py
+++ b/pandas/tests/reshape/test_util.py
@@ -8,26 +8,18 @@
 from datetime import date
 from dateutil import relativedelta
 
-from pandas.util._hypothesis import (st,
-                                     given,
-                                     settings,
-                                     get_seq,
-                                     assume)
-
+from pandas.util import _hypothesis as hp
 
 NO_OF_EXAMPLES_PER_TEST_CASE = 20
 
 
 class TestCartesianProduct(object):
 
-    @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE)
-    @given(get_seq((str,), False, 1, 1),
-           get_seq((int,), False, 1, 2))
+    @hp.settings(max_examples=20)
+    @hp.given(hp.st.lists(hp.st.text(string.ascii_letters, min_size=1, max_size=1),
+                          min_size=1, max_size=3),
+           hp.get_seq((int,), False, 1, 2))
     def test_simple(self, x, y):
-        x = list(x[0])
-        # non-empty test case is handled in test_empty,
-        # therefore ignore it here.
-        assume(len(x) != 0)
         result1, result2 = cartesian_product([x, y])
         expected1 = np.array([item1 for item1 in x for item2 in y])
         expected2 = np.array([item2 for item1 in x for item2 in y])
@@ -35,8 +27,8 @@ def test_simple(self, x, y):
         tm.assert_numpy_array_equal(result1, expected1)
         tm.assert_numpy_array_equal(result2, expected2)
 
-    @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE)
-    @given(st.dates(min_value=date(1900, 1, 1), max_value=date(2100, 1, 1)))
+    @hp.settings(max_examples=20)
+    @hp.given(hp.st.dates(min_value=date(1900, 1, 1), max_value=date(2100, 1, 1)))
     def test_datetimeindex(self, d):
         # regression test for GitHub issue #6439
         # make sure that the ordering on datetimeindex is consistent
@@ -49,10 +41,10 @@ def test_datetimeindex(self, d):
         tm.assert_index_equal(result1, expected1)
         tm.assert_index_equal(result2, expected2)
 
-    @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE)
-    @given(st.lists(st.nothing()),
-           get_seq((int,), False, min_size=1, max_size=10),
-           get_seq((str,), False, min_size=1, max_size=10))
+    @hp.settings(max_examples=20)
+    @hp.given(hp.st.lists(hp.st.nothing()),
+           hp.get_seq((int,), False, min_size=1, max_size=10),
+           hp.get_seq((str,), False, min_size=1, max_size=10))
     def test_empty(self, empty_list, list_of_int, list_of_str):
         # product of empty factors
         X = [empty_list, list_of_int, empty_list]
@@ -70,13 +62,13 @@ def test_empty(self, empty_list, list_of_int, list_of_str):
         expected = []
         assert result == expected
 
-    @settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE)
-    @given(st.integers(),
-           st.text(string.ascii_letters, min_size=1),
-           get_seq((int, str), True, min_size=1),
-           st.builds(lambda *x: list(x), st.integers(),
-                     st.text(string.ascii_letters, min_size=1),
-                     st.lists(st.integers(), min_size=1)))
+    @hp.settings(max_examples=20)
+    @hp.given(hp.st.integers(),
+           hp.st.text(string.ascii_letters, min_size=1),
+           hp.get_seq((int, str), True, min_size=1),
+           hp.st.builds(lambda *x: list(x), hp.st.integers(),
+                     hp.st.text(string.ascii_letters, min_size=1),
+                     hp.st.lists(hp.st.integers(), min_size=1)))
     def test_invalid_input(self, number, text, seq, mixed_seq):
 
         invalid_inputs = [number,

From da492e38579e58cf7032ce4427d8278fb31e09ef Mon Sep 17 00:00:00 2001
From: ssolanki <sushobhitsolanki@gmail.com>
Date: Thu, 19 Apr 2018 22:00:58 +0530
Subject: [PATCH 11/13] TST, fix for issue #17978. Remove hypothesis
 requirement from *.run files.

---
 ci/install_circle.sh                | 1 +
 ci/install_travis.sh                | 1 +
 ci/requirements-2.7.run             | 1 -
 ci/requirements-2.7_COMPAT.run      | 3 +--
 ci/requirements-2.7_LOCALE.run      | 3 +--
 ci/requirements-2.7_WIN.run         | 3 +--
 ci/requirements-3.5.run             | 3 +--
 ci/requirements-3.5_ASCII.run       | 3 +--
 ci/requirements-3.5_OSX.run         | 3 +--
 ci/requirements-3.6.run             | 3 +--
 ci/requirements-3.6_LOCALE.run      | 3 +--
 ci/requirements-3.6_LOCALE_SLOW.run | 3 +--
 ci/requirements-3.6_NUMPY_DEV.run   | 3 +--
 ci/requirements-3.6_SLOW.run        | 3 +--
 ci/requirements-3.6_WIN.run         | 3 +--
 15 files changed, 14 insertions(+), 25 deletions(-)

diff --git a/ci/install_circle.sh b/ci/install_circle.sh
index fd79f907625e9..c06fc980d7f86 100755
--- a/ci/install_circle.sh
+++ b/ci/install_circle.sh
@@ -65,6 +65,7 @@ fi
 echo "[create env: ${REQ_BUILD}]"
 time conda create -n pandas -q --file=${REQ_BUILD} || exit 1
 time conda install -n pandas pytest>=3.1.0 || exit 1
+time conda install -n pandas hypothesis>=3.46.0 || exit 1
 
 source activate pandas
 time pip install moto || exit 1
diff --git a/ci/install_travis.sh b/ci/install_travis.sh
index 9ccb4baf25505..23ea235004f31 100755
--- a/ci/install_travis.sh
+++ b/ci/install_travis.sh
@@ -104,6 +104,7 @@ if [ -e ${REQ} ]; then
 fi
 
 time conda install -n pandas pytest>=3.1.0
+time conda install -n pandas hypothesis>=3.46.0
 time pip install -q pytest-xdist moto
 
 if [ "$LINT" ]; then
diff --git a/ci/requirements-2.7.run b/ci/requirements-2.7.run
index fa42d33f1a582..7c10b98fb6e14 100644
--- a/ci/requirements-2.7.run
+++ b/ci/requirements-2.7.run
@@ -18,4 +18,3 @@ patsy
 pymysql=0.6.3
 jinja2=2.8
 xarray=0.8.0
-hypothesis>=3.46.0
diff --git a/ci/requirements-2.7_COMPAT.run b/ci/requirements-2.7_COMPAT.run
index bc1d1dcac7bf8..954fa5388a039 100644
--- a/ci/requirements-2.7_COMPAT.run
+++ b/ci/requirements-2.7_COMPAT.run
@@ -11,5 +11,4 @@ psycopg2
 pymysql=0.6.0
 sqlalchemy=0.7.8
 xlsxwriter=0.5.2
-jinja2=2.8
-hypothesis>=3.46.0
\ No newline at end of file
+jinja2=2.8
\ No newline at end of file
diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run
index 515dfe52a7af8..0116e0c719883 100644
--- a/ci/requirements-2.7_LOCALE.run
+++ b/ci/requirements-2.7_LOCALE.run
@@ -9,5 +9,4 @@ bottleneck=1.0.0
 matplotlib=1.4.3
 sqlalchemy=0.8.1
 lxml
-scipy
-hypothesis>=3.46.0
\ No newline at end of file
+scipy
\ No newline at end of file
diff --git a/ci/requirements-2.7_WIN.run b/ci/requirements-2.7_WIN.run
index 6202c3ee0c8ba..27317fdc8f885 100644
--- a/ci/requirements-2.7_WIN.run
+++ b/ci/requirements-2.7_WIN.run
@@ -15,5 +15,4 @@ s3fs
 bottleneck
 html5lib
 beautifulsoup4
-jinja2=2.8
-hypothesis>=3.46.0
\ No newline at end of file
+jinja2=2.8
\ No newline at end of file
diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run
index 3e173300df435..d879ba303aeab 100644
--- a/ci/requirements-3.5.run
+++ b/ci/requirements-3.5.run
@@ -17,5 +17,4 @@ pymysql
 psycopg2
 s3fs
 beautifulsoup4
-ipython
-hypothesis>=3.46.0
\ No newline at end of file
+ipython
\ No newline at end of file
diff --git a/ci/requirements-3.5_ASCII.run b/ci/requirements-3.5_ASCII.run
index 9b7a834178120..cb627467eaf68 100644
--- a/ci/requirements-3.5_ASCII.run
+++ b/ci/requirements-3.5_ASCII.run
@@ -1,4 +1,3 @@
 python-dateutil
 pytz
-numpy
-hypothesis>=3.46.0
\ No newline at end of file
+numpy
\ No newline at end of file
diff --git a/ci/requirements-3.5_OSX.run b/ci/requirements-3.5_OSX.run
index 9cfa800ae9c56..78ae7c1ea703e 100644
--- a/ci/requirements-3.5_OSX.run
+++ b/ci/requirements-3.5_OSX.run
@@ -13,5 +13,4 @@ jinja2
 bottleneck
 xarray
 s3fs
-beautifulsoup4
-hypothesis>=3.46.0
\ No newline at end of file
+beautifulsoup4
\ No newline at end of file
diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run
index c3144ebf6d19e..2df5719ca77b6 100644
--- a/ci/requirements-3.6.run
+++ b/ci/requirements-3.6.run
@@ -22,5 +22,4 @@ fastparquet
 beautifulsoup4
 s3fs
 xarray
-ipython
-hypothesis>=3.46.0
\ No newline at end of file
+ipython
\ No newline at end of file
diff --git a/ci/requirements-3.6_LOCALE.run b/ci/requirements-3.6_LOCALE.run
index 5232a5fc22c48..9b651b337d349 100644
--- a/ci/requirements-3.6_LOCALE.run
+++ b/ci/requirements-3.6_LOCALE.run
@@ -19,5 +19,4 @@ psycopg2
 beautifulsoup4
 s3fs
 xarray
-ipython
-hypothesis>=3.46.0
\ No newline at end of file
+ipython
\ No newline at end of file
diff --git a/ci/requirements-3.6_LOCALE_SLOW.run b/ci/requirements-3.6_LOCALE_SLOW.run
index 5232a5fc22c48..9b651b337d349 100644
--- a/ci/requirements-3.6_LOCALE_SLOW.run
+++ b/ci/requirements-3.6_LOCALE_SLOW.run
@@ -19,5 +19,4 @@ psycopg2
 beautifulsoup4
 s3fs
 xarray
-ipython
-hypothesis>=3.46.0
\ No newline at end of file
+ipython
\ No newline at end of file
diff --git a/ci/requirements-3.6_NUMPY_DEV.run b/ci/requirements-3.6_NUMPY_DEV.run
index 20cdf454a3cce..9850437cd17f4 100644
--- a/ci/requirements-3.6_NUMPY_DEV.run
+++ b/ci/requirements-3.6_NUMPY_DEV.run
@@ -1,2 +1 @@
-pytz
-hypothesis>=3.46.0
\ No newline at end of file
+pytz
\ No newline at end of file
diff --git a/ci/requirements-3.6_SLOW.run b/ci/requirements-3.6_SLOW.run
index 90af3b38f59c5..ab158da00bea4 100644
--- a/ci/requirements-3.6_SLOW.run
+++ b/ci/requirements-3.6_SLOW.run
@@ -16,5 +16,4 @@ s3fs
 psycopg2
 pymysql
 html5lib
-beautifulsoup4
-hypothesis>=3.46.0
\ No newline at end of file
+beautifulsoup4
\ No newline at end of file
diff --git a/ci/requirements-3.6_WIN.run b/ci/requirements-3.6_WIN.run
index 49ed6006d98e5..57e7dae8978cd 100644
--- a/ci/requirements-3.6_WIN.run
+++ b/ci/requirements-3.6_WIN.run
@@ -14,5 +14,4 @@ matplotlib
 blosc
 thrift=0.10*
 fastparquet
-pyarrow
-hypothesis>=3.46.0
\ No newline at end of file
+pyarrow
\ No newline at end of file

From 96557b63eeec436245cdc053c1d173a51b1bb7de Mon Sep 17 00:00:00 2001
From: ssolanki <sushobhitsolanki@gmail.com>
Date: Mon, 21 May 2018 23:29:54 +0530
Subject: [PATCH 12/13] add hypothesis dependency in all ci/*.yaml

---
 ci/circle-27-compat.yaml      | 1 +
 ci/circle-35-ascii.yaml       | 1 +
 ci/circle-36-locale.yaml      | 1 +
 ci/circle-36-locale_slow.yaml | 1 +
 ci/environment-dev.yaml       | 1 -
 ci/travis-27-locale.yaml      | 1 +
 ci/travis-35-osx.yaml         | 1 +
 7 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/ci/circle-27-compat.yaml b/ci/circle-27-compat.yaml
index 81a48d4edf11c..5ff26cc2640a9 100644
--- a/ci/circle-27-compat.yaml
+++ b/ci/circle-27-compat.yaml
@@ -22,6 +22,7 @@ dependencies:
   # universal
   - pytest
   - pytest-xdist
+  - hypothesis
   - pip:
     - html5lib==1.0b2
     - beautifulsoup4==4.2.1
diff --git a/ci/circle-35-ascii.yaml b/ci/circle-35-ascii.yaml
index 602c414b49bb2..fc464f307ca5b 100644
--- a/ci/circle-35-ascii.yaml
+++ b/ci/circle-35-ascii.yaml
@@ -11,3 +11,4 @@ dependencies:
   # universal
   - pytest
   - pytest-xdist
+  - hypothesis
diff --git a/ci/circle-36-locale.yaml b/ci/circle-36-locale.yaml
index cc852c1e2aeeb..263a7842c19fc 100644
--- a/ci/circle-36-locale.yaml
+++ b/ci/circle-36-locale.yaml
@@ -31,3 +31,4 @@ dependencies:
   - pytest
   - pytest-xdist
   - moto
+  - hypothesis
diff --git a/ci/circle-36-locale_slow.yaml b/ci/circle-36-locale_slow.yaml
index cc852c1e2aeeb..263a7842c19fc 100644
--- a/ci/circle-36-locale_slow.yaml
+++ b/ci/circle-36-locale_slow.yaml
@@ -31,3 +31,4 @@ dependencies:
   - pytest
   - pytest-xdist
   - moto
+  - hypothesis
diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml
index c18046f29dd3c..f9f9208519d61 100644
--- a/ci/environment-dev.yaml
+++ b/ci/environment-dev.yaml
@@ -13,4 +13,3 @@ dependencies:
   - pytz
   - setuptools>=24.2.0
   - sphinx
-  - hypothesis>=3.46.0
\ No newline at end of file
diff --git a/ci/travis-27-locale.yaml b/ci/travis-27-locale.yaml
index 1312c1296d46a..c22fdcb41def0 100644
--- a/ci/travis-27-locale.yaml
+++ b/ci/travis-27-locale.yaml
@@ -22,6 +22,7 @@ dependencies:
   # universal
   - pytest
   - pytest-xdist
+  - hypothesis
   - pip:
     - html5lib==1.0b2
     - beautifulsoup4==4.2.1
diff --git a/ci/travis-35-osx.yaml b/ci/travis-35-osx.yaml
index e74abac4c9775..3db389c15bf85 100644
--- a/ci/travis-35-osx.yaml
+++ b/ci/travis-35-osx.yaml
@@ -23,5 +23,6 @@ dependencies:
   # universal
   - pytest
   - pytest-xdist
+  - hypothesis
   - pip:
     - python-dateutil==2.5.3

From fa5bd75ba9306616030cf47f074fabb642d8f102 Mon Sep 17 00:00:00 2001
From: ssolanki <sushobhitsolanki@gmail.com>
Date: Tue, 22 May 2018 09:39:09 +0530
Subject: [PATCH 13/13] fix lint issues.

---
 pandas/tests/reshape/test_util.py | 22 ++++++++++++----------
 pandas/util/_hypothesis.py        | 12 +++++++-----
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py
index bc746541c3fa3..ad39afc237ceb 100644
--- a/pandas/tests/reshape/test_util.py
+++ b/pandas/tests/reshape/test_util.py
@@ -16,9 +16,10 @@
 class TestCartesianProduct(object):
 
     @hp.settings(max_examples=20)
-    @hp.given(hp.st.lists(hp.st.text(string.ascii_letters, min_size=1, max_size=1),
+    @hp.given(hp.st.lists(hp.st.text(string.ascii_letters,
+                                     min_size=1, max_size=1),
                           min_size=1, max_size=3),
-           hp.get_seq((int,), False, 1, 2))
+              hp.get_seq((int,), False, 1, 2))
     def test_simple(self, x, y):
         result1, result2 = cartesian_product([x, y])
         expected1 = np.array([item1 for item1 in x for item2 in y])
@@ -28,7 +29,8 @@ def test_simple(self, x, y):
         tm.assert_numpy_array_equal(result2, expected2)
 
     @hp.settings(max_examples=20)
-    @hp.given(hp.st.dates(min_value=date(1900, 1, 1), max_value=date(2100, 1, 1)))
+    @hp.given(hp.st.dates(min_value=date(1900, 1, 1),
+                          max_value=date(2100, 1, 1)))
     def test_datetimeindex(self, d):
         # regression test for GitHub issue #6439
         # make sure that the ordering on datetimeindex is consistent
@@ -43,8 +45,8 @@ def test_datetimeindex(self, d):
 
     @hp.settings(max_examples=20)
     @hp.given(hp.st.lists(hp.st.nothing()),
-           hp.get_seq((int,), False, min_size=1, max_size=10),
-           hp.get_seq((str,), False, min_size=1, max_size=10))
+              hp.get_seq((int,), False, min_size=1, max_size=10),
+              hp.get_seq((str,), False, min_size=1, max_size=10))
     def test_empty(self, empty_list, list_of_int, list_of_str):
         # product of empty factors
         X = [empty_list, list_of_int, empty_list]
@@ -64,11 +66,11 @@ def test_empty(self, empty_list, list_of_int, list_of_str):
 
     @hp.settings(max_examples=20)
     @hp.given(hp.st.integers(),
-           hp.st.text(string.ascii_letters, min_size=1),
-           hp.get_seq((int, str), True, min_size=1),
-           hp.st.builds(lambda *x: list(x), hp.st.integers(),
-                     hp.st.text(string.ascii_letters, min_size=1),
-                     hp.st.lists(hp.st.integers(), min_size=1)))
+              hp.st.text(string.ascii_letters, min_size=1),
+              hp.get_seq((int, str), True, min_size=1),
+              hp.st.builds(lambda *x: list(x), hp.st.integers(),
+                           hp.st.text(string.ascii_letters, min_size=1),
+                           hp.st.lists(hp.st.integers(), min_size=1)))
     def test_invalid_input(self, number, text, seq, mixed_seq):
 
         invalid_inputs = [number,
diff --git a/pandas/util/_hypothesis.py b/pandas/util/_hypothesis.py
index b0b5eb2d019a6..9ea247cd3dbfd 100644
--- a/pandas/util/_hypothesis.py
+++ b/pandas/util/_hypothesis.py
@@ -7,9 +7,9 @@
 (http://hypothesis.readthedocs.io/en/latest/).
 """
 import string
-from hypothesis import (given,
-                        settings,
-                        assume,
+from hypothesis import (given,  # noqa:F401
+                        settings,   # noqa:F401
+                        assume, # noqa:F401
                         strategies as st,
                         )
 
@@ -113,7 +113,8 @@ def get_seq(draw, types, mixed=False, min_size=None, max_size=None,
 
     Examples
     --------
-    >>> seq_strategy = get_seq((int, str, bool), mixed=True, min_size=1, max_size=5)
+    >>> seq_strategy = get_seq((int, str, bool), mixed=True, min_size=1,
+...    max_size=5)
 
     >>> seq_strategy.example()
     ['lkYMSn', -2501, 35, 'J']
@@ -128,7 +129,8 @@ def get_seq(draw, types, mixed=False, min_size=None, max_size=None,
 ...                             mixed=False,
 ...                             min_size=1,
 ...                             max_size=5,
-...                             transform_func=lambda seq: [str(x) for x in seq])
+...                             transform_func=lambda seq:
+...                             [str(x) for x in seq])
 
     >>> seq_strategy.example()
     ['9552', '124', '-24024']