pandas-dev · sushobhit27 · Apr 3, 2018 · Apr 3, 2018 · Apr 4, 2018 · Apr 4, 2018
diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml
@@ -13,3 +13,4 @@ dependencies:
   - pytz
   - setuptools>=3.3
   - sphinx
+  - hypothesis>=3.46.0
diff --git a/ci/install_circle.sh b/ci/install_circle.sh
@@ -65,6 +65,7 @@ fi
 echo "[create env: ${REQ_BUILD}]"
 time conda create -n pandas -q --file=${REQ_BUILD} || exit 1
 time conda install -n pandas pytest>=3.1.0 || exit 1
+time conda install -n pandas hypothesis>=3.46.0 || exit 1
 
 source activate pandas
 time pip install moto || exit 1

diff --git a/ci/install_travis.sh b/ci/install_travis.sh
@@ -104,6 +104,7 @@ if [ -e ${REQ} ]; then
 fi
 
 time conda install -n pandas pytest>=3.1.0
+time conda install -n pandas hypothesis>=3.46.0
 time pip install -q pytest-xdist moto
 
 if [ "$LINT" ]; then

diff --git a/ci/requirements-2.7_COMPAT.run b/ci/requirements-2.7_COMPAT.run
@@ -11,4 +11,4 @@ psycopg2
 pymysql=0.6.0
 sqlalchemy=0.7.8
 xlsxwriter=0.5.2
-jinja2=2.8
+jinja2=2.8
diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run
@@ -9,4 +9,4 @@ bottleneck=1.0.0
 matplotlib=1.4.3
 sqlalchemy=0.8.1
 lxml
-scipy
+scipy
diff --git a/ci/requirements-2.7_WIN.run b/ci/requirements-2.7_WIN.run
@@ -15,4 +15,4 @@ s3fs
 bottleneck
 html5lib
 beautifulsoup4
-jinja2=2.8
+jinja2=2.8
diff --git a/ci/requirements-3.5.run b/ci/requirements-3.5.run
@@ -17,4 +17,4 @@ pymysql
 psycopg2
 s3fs
 beautifulsoup4
-ipython
+ipython
diff --git a/ci/requirements-3.5_ASCII.run b/ci/requirements-3.5_ASCII.run
@@ -1,3 +1,3 @@
 python-dateutil
 pytz
-numpy
+numpy
diff --git a/ci/requirements-3.5_OSX.run b/ci/requirements-3.5_OSX.run
@@ -13,4 +13,4 @@ jinja2
 bottleneck
 xarray
 s3fs
-beautifulsoup4
+beautifulsoup4
diff --git a/ci/requirements-3.6.run b/ci/requirements-3.6.run
@@ -22,4 +22,4 @@ fastparquet
 beautifulsoup4
 s3fs
 xarray
-ipython
+ipython
diff --git a/ci/requirements-3.6_LOCALE.run b/ci/requirements-3.6_LOCALE.run
@@ -19,4 +19,4 @@ psycopg2
 beautifulsoup4
 s3fs
 xarray
-ipython
+ipython
diff --git a/ci/requirements-3.6_LOCALE_SLOW.run b/ci/requirements-3.6_LOCALE_SLOW.run
@@ -19,4 +19,4 @@ psycopg2
 beautifulsoup4
 s3fs
 xarray
-ipython
+ipython
diff --git a/ci/requirements-3.6_NUMPY_DEV.run b/ci/requirements-3.6_NUMPY_DEV.run
@@ -1 +1 @@
-pytz
+pytz
diff --git a/ci/requirements-3.6_SLOW.run b/ci/requirements-3.6_SLOW.run
@@ -16,4 +16,4 @@ s3fs
 psycopg2
 pymysql
 html5lib
-beautifulsoup4
+beautifulsoup4
diff --git a/ci/requirements-3.6_WIN.run b/ci/requirements-3.6_WIN.run
@@ -14,4 +14,4 @@ matplotlib
 blosc
 thrift=0.10*
 fastparquet
-pyarrow
+pyarrow
diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
@@ -775,6 +775,46 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex
    test_cool_feature.py::test_dtypes[int8] PASSED
    test_cool_feature.py::test_series[int8] PASSED
 
+Using ``hypothesis``
+~~~~~~~~~~~~~~~~~~~~
+With the usage of pytest, things have become easier for testing by having reduced boilerplate for test cases and also by utilizing pytest's features like parametizing, skipping and marking test cases.
+
+However, one has to still come up with input data examples which can be tested against the functionality. There is always a possibility to skip testing an example which could have failed the test case.
+
+Hypothesis is a python package which helps in overcoming this issue by generating the input data based on some set of specifications provided by the user.
+e.g consider the test case for testing python's sum function for a list of int using hypothesis.
+
+.. code-block:: python
+
+    from hypothesis import strategies as st
+    from hypothesis import given
+
+
+    @given(st.lists(st.integers()))
+    def test_sum(seq):
+        total = 0
+        for item in seq:
+            total += item
+        assert sum(seq) == total
+
+
+output of test cases:
+
+.. code-block:: shell
+
+    collecting ... collected 1 item
+    hypothesis_example.py::test_sum PASSED                                   [100%]
+
+    ========================== 1 passed in 0.33 seconds ===========================
+
+In above example by applying a decorator "@given(st.lists(st.integers()))" to the unit test function, we have directed hypothesis to generate some random list of int as input for the test function, which eventually helps in adding more coverage for our test functions by generating random input data.
+
+For more information about hypothesis or in general about property based testing, check below links:
+
+- https://hypothesis.readthedocs.io/en/latest/quickstart.html
+- https://hypothesis.works/articles/what-is-property-based-testing/
+- http://blog.jessitron.com/2013/04/property-based-testing-what-is-it.html
+
 
 Running the test suite
 ----------------------

diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py
@@ -4,31 +4,52 @@
 import pandas.util.testing as tm
 from pandas.core.reshape.util import cartesian_product
 
+import string
+from datetime import date
+from dateutil import relativedelta
+
+from pandas.util import _hypothesis as hp
+
+NO_OF_EXAMPLES_PER_TEST_CASE = 20
+
 
 class TestCartesianProduct(object):
 
-    def test_simple(self):
-        x, y = list('ABC'), [1, 22]
+    @hp.settings(max_examples=20)
+    @hp.given(hp.st.lists(hp.st.text(string.ascii_letters, min_size=1, max_size=1),
+                          min_size=1, max_size=3),
+           hp.get_seq((int,), False, 1, 2))
+    def test_simple(self, x, y):
         result1, result2 = cartesian_product([x, y])
-        expected1 = np.array(['A', 'A', 'B', 'B', 'C', 'C'])
-        expected2 = np.array([1, 22, 1, 22, 1, 22])
+        expected1 = np.array([item1 for item1 in x for item2 in y])
+        expected2 = np.array([item2 for item1 in x for item2 in y])
+
         tm.assert_numpy_array_equal(result1, expected1)
         tm.assert_numpy_array_equal(result2, expected2)
 
-    def test_datetimeindex(self):
+    @hp.settings(max_examples=20)
+    @hp.given(hp.st.dates(min_value=date(1900, 1, 1), max_value=date(2100, 1, 1)))
+    def test_datetimeindex(self, d):
         # regression test for GitHub issue #6439
         # make sure that the ordering on datetimeindex is consistent
-        x = date_range('2000-01-01', periods=2)
+        n = d + relativedelta.relativedelta(days=1)
+        x = date_range(d, periods=2)
         result1, result2 = [Index(y).day for y in cartesian_product([x, x])]
-        expected1 = Index([1, 1, 2, 2])
-        expected2 = Index([1, 2, 1, 2])
+        expected1 = Index([d.day, d.day, n.day, n.day])
+        expected2 = Index([d.day, n.day, d.day, n.day])
+
         tm.assert_index_equal(result1, expected1)
         tm.assert_index_equal(result2, expected2)
 
-    def test_empty(self):
+    @hp.settings(max_examples=20)
+    @hp.given(hp.st.lists(hp.st.nothing()),
+           hp.get_seq((int,), False, min_size=1, max_size=10),
+           hp.get_seq((str,), False, min_size=1, max_size=10))
+    def test_empty(self, empty_list, list_of_int, list_of_str):
         # product of empty factors
-        X = [[], [0, 1], []]
-        Y = [[], [], ['a', 'b', 'c']]
+        X = [empty_list, list_of_int, empty_list]
+        Y = [empty_list, empty_list, list_of_str]
+
         for x, y in zip(X, Y):
             expected1 = np.array([], dtype=np.asarray(x).dtype)
             expected2 = np.array([], dtype=np.asarray(y).dtype)
@@ -37,13 +58,24 @@ def test_empty(self):
             tm.assert_numpy_array_equal(result2, expected2)
 
         # empty product (empty input):
-        result = cartesian_product([])
+        result = cartesian_product(empty_list)
         expected = []
         assert result == expected
 
-    def test_invalid_input(self):
-        invalid_inputs = [1, [1], [1, 2], [[1], 2],
-                          'a', ['a'], ['a', 'b'], [['a'], 'b']]
+    @hp.settings(max_examples=20)
+    @hp.given(hp.st.integers(),
+           hp.st.text(string.ascii_letters, min_size=1),
+           hp.get_seq((int, str), True, min_size=1),
+           hp.st.builds(lambda *x: list(x), hp.st.integers(),
+                     hp.st.text(string.ascii_letters, min_size=1),
+                     hp.st.lists(hp.st.integers(), min_size=1)))
+    def test_invalid_input(self, number, text, seq, mixed_seq):
+
+        invalid_inputs = [number,
+                          text,
+                          seq,
+                          mixed_seq]
+
         msg = "Input must be a list-like of list-likes"
         for X in invalid_inputs:
             tm.assert_raises_regex(TypeError, msg, cartesian_product, X=X)
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,3 +13,4 @@ dependencies: @@
       - pytz
       - setuptools>=3.3
       - sphinx
+      - hypothesis>=3.46.0