-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
TST: add hypothesis-based tests #20590
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
4a7c62a
77a4f06
78a5820
7e2ed0c
11eb777
fca259c
d30d6fd
090773b
b8d2b82
8e67015
f70c7b0
da492e3
a80385d
96557b6
fa5bd75
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,3 +13,4 @@ dependencies: | |
- pytz | ||
- setuptools>=3.3 | ||
- sphinx | ||
- hypothesis>=3.46.0 | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,3 +18,4 @@ patsy | |
pymysql=0.6.3 | ||
jinja2=2.8 | ||
xarray=0.8.0 | ||
hypothesis>=3.46.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,3 +12,4 @@ pymysql=0.6.0 | |
sqlalchemy=0.7.8 | ||
xlsxwriter=0.5.2 | ||
jinja2=2.8 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. revert these, they shouldn't have any changes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will rebase my branch. |
||
hypothesis>=3.46.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,3 +10,4 @@ matplotlib=1.4.3 | |
sqlalchemy=0.8.1 | ||
lxml | ||
scipy | ||
hypothesis>=3.46.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these are user requirements, not testing requirements. so remove from EACH of these except for environment-dev, rather adding ci/install_travis_travis, install_circle and appveyor.yaml (search for moto and put it next to that) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Exactly in which file I should add the requirement, as earlier, CI tests failed in absence of hypothesis package requirement in *.run There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I put them above
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,3 +17,4 @@ psycopg2 | |
pymysql | ||
html5lib | ||
beautifulsoup4 | ||
hypothesis>=3.46.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,3 +16,4 @@ bottleneck | |
html5lib | ||
beautifulsoup4 | ||
jinja2=2.8 | ||
hypothesis>=3.46.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,3 +18,4 @@ psycopg2 | |
s3fs | ||
beautifulsoup4 | ||
ipython | ||
hypothesis>=3.46.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
python-dateutil | ||
pytz | ||
numpy | ||
hypothesis>=3.46.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,3 +14,4 @@ bottleneck | |
xarray | ||
s3fs | ||
beautifulsoup4 | ||
hypothesis>=3.46.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,3 +23,4 @@ beautifulsoup4 | |
s3fs | ||
xarray | ||
ipython | ||
hypothesis>=3.46.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,3 +20,4 @@ beautifulsoup4 | |
s3fs | ||
xarray | ||
ipython | ||
hypothesis>=3.46.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,3 +20,4 @@ beautifulsoup4 | |
s3fs | ||
xarray | ||
ipython | ||
hypothesis>=3.46.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,3 +15,4 @@ blosc | |
thrift=0.10* | ||
fastparquet | ||
pyarrow | ||
hypothesis>=3.46.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,31 +4,101 @@ | |
import pandas.util.testing as tm | ||
from pandas.core.reshape.util import cartesian_product | ||
|
||
from hypothesis import strategies as st | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any hypothesis related things that we want to import, pls put in a separate file in pandas.util._hypthoesis.py and import from there. (these are the generic things similar to what we do in conftest.py) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree. |
||
from hypothesis import given, settings, assume | ||
from datetime import date | ||
from dateutil import relativedelta | ||
import string | ||
|
||
|
||
NO_OF_EXAMPLES_PER_TEST_CASE = 20 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hard-code this for now in the examples. have to see how this behaves. |
||
|
||
|
||
def get_elements(elem_type): | ||
strategy = st.nothing() | ||
if elem_type == bool: | ||
strategy = st.booleans() | ||
elif elem_type == int: | ||
strategy = st.integers() | ||
elif elem_type == float: | ||
strategy = st.floats() | ||
elif elem_type == str: | ||
strategy = st.text(string.ascii_letters, max_size=10) | ||
return strategy | ||
|
||
|
||
@st.composite | ||
def get_seq(draw, types, mixed=False, min_size=None, max_size=None, transform_func=None): | ||
"""helper function to generate strategy for creating lists. parameters define the nature of to be generated list. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These lines are too long (check the log output or run flake8 locally) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, we use the numpy docstring standard: http://pandas-docs.github.io/pandas-docs-travis/contributing_docstring.html |
||
:param types: what type of elements constitute the list | ||
:param mixed: if True, list will contains elements from all types listed in arg, oterwise it will have elements only from types[0]. | ||
:param min_size: minimum size of the list. | ||
:param max_size: maximum size of the list. | ||
:param transform_func: a callable which can be applied to whole list after it has been generated. | ||
""" | ||
strategy = st.nothing() | ||
if min_size is None: | ||
min_size = draw(st.integers(min_value=0, max_value=100)) | ||
|
||
if max_size is None: | ||
max_size = draw(st.integers(min_value=min_size, max_value=100)) | ||
|
||
assert min_size <= max_size, 'max_size must be greater than equal to min_size' | ||
|
||
elem_strategies = [] | ||
for elem_type in types: | ||
elem_strategies.append(get_elements(elem_type)) | ||
if not mixed: | ||
break | ||
|
||
if transform_func: | ||
strategy = draw(st.lists(st.one_of(elem_strategies), | ||
min_size=min_size, max_size=max_size).map(transform_func)) | ||
else: | ||
strategy = draw(st.lists(st.one_of(elem_strategies), | ||
min_size=min_size, max_size=max_size)) | ||
return strategy | ||
|
||
|
||
class TestCartesianProduct(object): | ||
|
||
def test_simple(self): | ||
x, y = list('ABC'), [1, 22] | ||
@settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) | ||
@given(get_seq((str,), False, 1, 1), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there any way to name these things for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are already giving them a name in some cases, as arguments in test case function. e.g but local function can be added like below: and then used as below but that would be too cumbersome. Instead, for each argument in given decorator, having comments explaining each returned strategy can be more suitable. |
||
get_seq((int,), False, 1, 2)) | ||
def test_simple(self, x, y): | ||
x = list(x[0]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this is a list? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. to map test case as closely as possible to original test case, where x = list('ABC'). |
||
# non-empty test case is handled in test_empty, therefore ignore it here | ||
assume(len(x) != 0) | ||
result1, result2 = cartesian_product([x, y]) | ||
expected1 = np.array(['A', 'A', 'B', 'B', 'C', 'C']) | ||
expected2 = np.array([1, 22, 1, 22, 1, 22]) | ||
expected1 = np.array([item1 for item1 in x for item2 in y]) | ||
expected2 = np.array([item2 for item1 in x for item2 in y]) | ||
|
||
tm.assert_numpy_array_equal(result1, expected1) | ||
tm.assert_numpy_array_equal(result2, expected2) | ||
|
||
@settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) | ||
def test_datetimeindex(self): | ||
# regression test for GitHub issue #6439 | ||
# make sure that the ordering on datetimeindex is consistent | ||
x = date_range('2000-01-01', periods=2) | ||
d = st.dates(min_value=date(1900, 1, 1), max_value=date(2100, 1, 1)).example() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why generate them here instead of using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no specific reason but copy pasted after testing on console, as example() function can be easily used to check generated value :) and ya it can be moved to "given" decorator. |
||
n = d + relativedelta.relativedelta(days=1) | ||
x = date_range(d, periods=2) | ||
result1, result2 = [Index(y).day for y in cartesian_product([x, x])] | ||
expected1 = Index([1, 1, 2, 2]) | ||
expected2 = Index([1, 2, 1, 2]) | ||
expected1 = Index([d.day, d.day, n.day, n.day]) | ||
expected2 = Index([d.day, n.day, d.day, n.day]) | ||
|
||
tm.assert_index_equal(result1, expected1) | ||
tm.assert_index_equal(result2, expected2) | ||
|
||
def test_empty(self): | ||
@settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) | ||
@given(st.lists(st.nothing()), | ||
get_seq((int,), False), | ||
get_seq((str,), False)) | ||
def test_empty(self, empty_list, list_of_int, list_of_str): | ||
# product of empty factors | ||
X = [[], [0, 1], []] | ||
Y = [[], [], ['a', 'b', 'c']] | ||
X = [empty_list, list_of_int, empty_list] | ||
Y = [empty_list, empty_list, list_of_str] | ||
|
||
for x, y in zip(X, Y): | ||
expected1 = np.array([], dtype=np.asarray(x).dtype) | ||
expected2 = np.array([], dtype=np.asarray(y).dtype) | ||
|
@@ -37,13 +107,22 @@ def test_empty(self): | |
tm.assert_numpy_array_equal(result2, expected2) | ||
|
||
# empty product (empty input): | ||
result = cartesian_product([]) | ||
result = cartesian_product(empty_list) | ||
expected = [] | ||
assert result == expected | ||
|
||
@settings(max_examples=NO_OF_EXAMPLES_PER_TEST_CASE) | ||
def test_invalid_input(self): | ||
invalid_inputs = [1, [1], [1, 2], [[1], 2], | ||
'a', ['a'], ['a', 'b'], [['a'], 'b']] | ||
invalid_inputs = [st.integers().example(), | ||
st.tuples(st.integers()).example(), | ||
st.tuples(st.integers(), st.integers()).example(), | ||
st.text(string.ascii_letters, min_size=1, max_size=1).example(), | ||
st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1)).example(), | ||
st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1), | ||
st.text(string.ascii_letters, min_size=1, max_size=1)).example(), | ||
st.tuples(st.tuples(st.text(string.ascii_letters, min_size=1, max_size=1)), | ||
st.text(string.ascii_letters, min_size=1, max_size=1)).example()] | ||
|
||
msg = "Input must be a list-like of list-likes" | ||
for X in invalid_inputs: | ||
tm.assert_raises_regex(TypeError, msg, cartesian_product, X=X) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
need a new-line