From 4d77cd8e60174f7adf32587755653624617f5b6c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 3 Feb 2018 15:08:35 -0600 Subject: [PATCH 01/31] PKG: Exclude data test files. --- MANIFEST.in | 33 +++++++++++++------ pandas/tests/io/conftest.py | 18 +++++++++-- pandas/tests/io/test_common.py | 2 ++ pandas/tests/io/test_html.py | 56 +++++++++++++++++---------------- pandas/tests/io/test_packers.py | 2 ++ pandas/tests/io/test_pickle.py | 2 ++ pandas/tests/plotting/common.py | 4 +++ pandas/util/_test_decorators.py | 2 ++ pandas/util/testing.py | 8 ++++- setup.py | 6 +--- 10 files changed, 88 insertions(+), 45 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 9773019c6e6e0..9416da89cc627 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -10,20 +10,33 @@ prune doc/build graft pandas -global-exclude *.so -global-exclude *.pyd +global-exclude *.bz2 +global-exclude *.csv +global-exclude *.dta +global-exclude *.gz +global-exclude *.h5 +global-exclude *.html +global-exclude *.json +global-exclude *.msgpack +global-exclude *.pickle +global-exclude *.png global-exclude *.pyc +global-exclude *.pyd +global-exclude *.sas7bdat +global-exclude *.so +global-exclude *.xls +global-exclude *.xlsm +global-exclude *.xlsx +global-exclude *.xpt +global-exclude *.xz +global-exclude *.zip global-exclude *~ -global-exclude \#* -global-exclude .git* global-exclude .DS_Store -global-exclude *.png +global-exclude .git* +global-exclude \#* + +recursive-exclude pandas/tests/io/data -# include examples/data/* -# recursive-include examples *.py -# recursive-include doc/source * -# recursive-include doc/sphinxext * -# recursive-include LICENSES * include versioneer.py include pandas/_version.py include pandas/io/formats/templates/*.tpl diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index 57e72da2fd3f4..21d171ab5fc05 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -9,19 +9,30 @@ @pytest.fixture(scope='module') def tips_file(): """Path to the tips dataset""" - return os.path.join(HERE, 'parser', 'data', 'tips.csv') + path = os.path.join(HERE, 'parser', 'data', 'tips.csv') + if not os.path.exists(path): + pytest.skip("Data files not included in pandas distribution.") + + return path @pytest.fixture(scope='module') def jsonl_file(): """Path a JSONL dataset""" - return os.path.join(HERE, 'parser', 'data', 'items.jsonl') + path = os.path.join(HERE, 'parser', 'data', 'items.jsonl') + if not os.path.exists(path): + pytest.skip("Data files not included in pandas distribution.") + + return path @pytest.fixture(scope='module') def salaries_table(): """DataFrame with the salaries dataset""" path = os.path.join(HERE, 'parser', 'data', 'salaries.csv') + if not os.path.exists(path): + pytest.skip("Data files not included in pandas distribution.") + return read_table(path) @@ -53,6 +64,9 @@ def s3_resource(tips_file, jsonl_file): def add_tips_files(bucket_name): for s3_key, file_name in test_s3_files: + if not os.path.exists(file_name): + pytest.skip("Data files not included in pandas distribution.") + with open(file_name, 'rb') as f: conn.Bucket(bucket_name).put_object( Key=s3_key, diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index a0070dce6a7f1..3c258e2b77b3f 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -166,6 +166,8 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): ]) def test_read_fspath_all(self, reader, module, path): pytest.importorskip(module) + if not os.path.exists(path): + pytest.skip("Data files not included in pandas distribution.") mypath = CustomFSPath(path) result = reader(mypath) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 151a0750b7f6e..3edaef14f30f6 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -65,9 +65,6 @@ def _skip_if_none_of(module_names): pytest.skip("Bad version of bs4: 4.2.0") -DATA_PATH = tm.get_data_path() - - def assert_framelist_equal(list1, list2, *args, **kwargs): assert len(list1) == len(list2), ('lists are not of equal size ' 'len(list1) == {0}, ' @@ -86,8 +83,8 @@ def test_bs4_version_fails(): _skip_if_none_of(('bs4', 'html5lib')) import bs4 if LooseVersion(bs4.__version__) == LooseVersion('4.2.0'): - tm.assert_raises(AssertionError, read_html, os.path.join(DATA_PATH, - "spam.html"), + tm.assert_raises(AssertionError, read_html, + os.path.join(tm.get_data_path(), "spam.html"), flavor='bs4') @@ -100,16 +97,17 @@ def read_html(self, *args, **kwargs): class TestReadHtml(ReadHtmlMixin): flavor = 'bs4' - spam_data = os.path.join(DATA_PATH, 'spam.html') - spam_data_kwargs = {} - if PY3: - spam_data_kwargs['encoding'] = 'UTF-8' - banklist_data = os.path.join(DATA_PATH, 'banklist.html') @classmethod def setup_class(cls): _skip_if_none_of(('bs4', 'html5lib')) + cls.spam_data = os.path.join(tm.get_data_path(), 'spam.html') + cls.spam_data_kwargs = {} + if PY3: + cls.spam_data_kwargs['encoding'] = 'UTF-8' + cls.banklist_data = os.path.join(tm.get_data_path(), 'banklist.html') + def test_to_html_compat(self): df = mkdf(4, 3, data_gen_f=lambda *args: rand(), c_idx_names=False, r_idx_names=False).applymap('{0:.3f}'.format).astype(float) @@ -382,7 +380,7 @@ def test_python_docs_table(self): @pytest.mark.slow def test_thousands_macau_stats(self): all_non_nan_table_index = -2 - macau_data = os.path.join(DATA_PATH, 'macau.html') + macau_data = os.path.join(tm.get_data_path(), 'macau.html') dfs = self.read_html(macau_data, index_col=0, attrs={'class': 'style1'}) df = dfs[all_non_nan_table_index] @@ -392,7 +390,7 @@ def test_thousands_macau_stats(self): @pytest.mark.slow def test_thousands_macau_index_col(self): all_non_nan_table_index = -2 - macau_data = os.path.join(DATA_PATH, 'macau.html') + macau_data = os.path.join(tm.get_data_path(), 'macau.html') dfs = self.read_html(macau_data, index_col=0, header=0) df = dfs[all_non_nan_table_index] @@ -520,7 +518,7 @@ def test_countries_municipalities(self): assert_framelist_equal(res1, res2) def test_nyse_wsj_commas_table(self): - data = os.path.join(DATA_PATH, 'nyse_wsj.html') + data = os.path.join(tm.get_data_path(), 'nyse_wsj.html') df = self.read_html(data, index_col=0, header=0, attrs={'class': 'mdcTable'})[0] @@ -542,7 +540,8 @@ def try_remove_ws(x): df = self.read_html(self.banklist_data, 'Metcalf', attrs={'id': 'table'})[0] - ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'), + ground_truth = read_csv(os.path.join(tm.get_data_path(), + 'banklist.csv'), converters={'Updated Date': Timestamp, 'Closing Date': Timestamp}) assert df.shape == ground_truth.shape @@ -660,7 +659,7 @@ def test_parse_dates_combine(self): tm.assert_frame_equal(newdf, res[0]) def test_computer_sales_page(self): - data = os.path.join(DATA_PATH, 'computer_sales_page.html') + data = os.path.join(tm.get_data_path(), 'computer_sales_page.html') with tm.assert_raises_regex(ParserError, r"Passed header=\[0,1\] are " r"too many rows for this " @@ -668,7 +667,7 @@ def test_computer_sales_page(self): self.read_html(data, header=[0, 1]) def test_wikipedia_states_table(self): - data = os.path.join(DATA_PATH, 'wikipedia_states.html') + data = os.path.join(tm.get_data_path(), 'wikipedia_states.html') assert os.path.isfile(data), '%r is not a file' % data assert os.path.getsize(data), '%r is an empty file' % data result = self.read_html(data, 'Arizona', header=1)[0] @@ -788,11 +787,14 @@ def _lang_enc(filename): class TestReadHtmlEncoding(object): - files = glob.glob(os.path.join(DATA_PATH, 'html_encoding', '*.html')) flavor = 'bs4' @classmethod def setup_class(cls): + cls.files = glob.glob(os.path.join(tm.get_data_path(), + 'html_encoding', + '*.html')) + _skip_if_none_of((cls.flavor, 'html5lib')) def read_html(self, *args, **kwargs): @@ -847,8 +849,8 @@ def setup_class(cls): def test_data_fail(self): from lxml.etree import XMLSyntaxError - spam_data = os.path.join(DATA_PATH, 'spam.html') - banklist_data = os.path.join(DATA_PATH, 'banklist.html') + spam_data = os.path.join(tm.get_data_path(), 'spam.html') + banklist_data = os.path.join(tm.get_data_path(), 'banklist.html') with pytest.raises(XMLSyntaxError): self.read_html(spam_data) @@ -857,7 +859,7 @@ def test_data_fail(self): self.read_html(banklist_data) def test_works_on_valid_markup(self): - filename = os.path.join(DATA_PATH, 'valid_markup.html') + filename = os.path.join(tm.get_data_path(), 'valid_markup.html') dfs = self.read_html(filename, index_col=0) assert isinstance(dfs, list) assert isinstance(dfs[0], DataFrame) @@ -865,7 +867,7 @@ def test_works_on_valid_markup(self): @pytest.mark.slow def test_fallback_success(self): _skip_if_none_of(('bs4', 'html5lib')) - banklist_data = os.path.join(DATA_PATH, 'banklist.html') + banklist_data = os.path.join(tm.get_data_path(), 'banklist.html') self.read_html(banklist_data, '.*Water.*', flavor=['lxml', 'html5lib']) def test_to_html_timestamp(self): @@ -893,7 +895,7 @@ def test_parse_dates_combine(self): tm.assert_frame_equal(newdf, res[0]) def test_computer_sales_page(self): - data = os.path.join(DATA_PATH, 'computer_sales_page.html') + data = os.path.join(tm.get_data_path(), 'computer_sales_page.html') self.read_html(data, header=[0, 1]) @@ -914,7 +916,7 @@ def get_elements_from_file(url, element='table'): @pytest.mark.slow def test_bs4_finds_tables(): - filepath = os.path.join(DATA_PATH, "spam.html") + filepath = os.path.join(tm.get_data_path(), "spam.html") with warnings.catch_warnings(): warnings.filterwarnings('ignore') assert get_elements_from_file(filepath, 'table') @@ -929,19 +931,19 @@ def get_lxml_elements(url, element): @pytest.mark.slow def test_lxml_finds_tables(): - filepath = os.path.join(DATA_PATH, "spam.html") + filepath = os.path.join(tm.get_data_path(), "spam.html") assert get_lxml_elements(filepath, 'table') @pytest.mark.slow def test_lxml_finds_tbody(): - filepath = os.path.join(DATA_PATH, "spam.html") + filepath = os.path.join(tm.get_data_path(), "spam.html") assert get_lxml_elements(filepath, 'tbody') def test_same_ordering(): _skip_if_none_of(['bs4', 'lxml', 'html5lib']) - filename = os.path.join(DATA_PATH, 'valid_markup.html') + filename = os.path.join(tm.get_data_path(), 'valid_markup.html') dfs_lxml = read_html(filename, index_col=0, flavor=['lxml']) dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4']) assert_framelist_equal(dfs_lxml, dfs_bs4) @@ -965,7 +967,7 @@ def test_importcheck_thread_safety(): pytest.importorskip('lxml') reload(pandas.io.html) - filename = os.path.join(DATA_PATH, 'valid_markup.html') + filename = os.path.join(tm.get_data_path(), 'valid_markup.html') helper_thread1 = ErrorThread(target=read_html, args=(filename,)) helper_thread2 = ErrorThread(target=read_html, args=(filename,)) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index c343e0105eb4f..06dbb60b587da 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -830,6 +830,8 @@ def test_default_encoding(self): def legacy_packers_versions(): # yield the packers versions path = tm.get_data_path('legacy_msgpack') + if not os.path.exists(path): + raise pytest.skip("Data file {} does not exist.".format(path)) for v in os.listdir(path): p = os.path.join(path, v) if os.path.isdir(p): diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 2ba3e174404c7..ab75416033f68 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -191,6 +191,8 @@ def compare_sp_frame_float(result, expected, typ, version): def legacy_pickle_versions(): # yield the pickle versions path = tm.get_data_path('legacy_pickle') + if not os.path.exists(path): + raise pytest.skip("Data path {} does not exists.".format(path)) for v in os.listdir(path): p = os.path.join(path, v) if os.path.isdir(p): diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 2e62b22b2b69e..58f03fe69ed60 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -77,6 +77,10 @@ def setup_method(self, method): from pandas import read_csv base = os.path.join(os.path.dirname(curpath()), os.pardir) path = os.path.join(base, 'tests', 'data', 'iris.csv') + + if not os.path.exists(path): + pytest.skip("Data files not included in pandas distribution.") + self.iris = read_csv(path) n = 100 diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 0fd5648739e5c..67a55d9ca21db 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -23,6 +23,8 @@ def test_foo(): For more information, refer to the ``pytest`` documentation on ``skipif``. """ +import functools +import os import pytest import locale diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 0009e26f8b100..1a6a1623fc96b 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -732,10 +732,16 @@ def get_data_path(f=''): """Return the path of a data file, these are relative to the current test directory. """ + import pytest + # get our callers file _, filename, _, _, _, _ = inspect.getouterframes(inspect.currentframe())[1] base_dir = os.path.abspath(os.path.dirname(filename)) - return os.path.join(base_dir, 'data', f) + path = os.path.join(base_dir, 'data', f) + if not os.path.exists(path): + pytest.skip("Data files not included in pandas distribution.") + + return path # ----------------------------------------------------------------------------- # Comparators diff --git a/setup.py b/setup.py index 5397a1b84dc4d..ccf3a9f5be471 100755 --- a/setup.py +++ b/setup.py @@ -722,11 +722,7 @@ def pxd(name): maintainer=AUTHOR, version=versioneer.get_version(), packages=find_packages(include=['pandas', 'pandas.*']), - package_data={'': ['data/*', 'templates/*'], - 'pandas.tests.io': ['data/legacy_hdf/*.h5', - 'data/legacy_pickle/*/*.pickle', - 'data/legacy_msgpack/*/*.msgpack', - 'data/html_encoding/*.html']}, + package_data={'': ['templates/*']}, ext_modules=extensions, maintainer_email=EMAIL, description=DESCRIPTION, From 270e44248656cf7d8b690b0cb86ad467ac539eb6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 6 Feb 2018 11:33:31 -0600 Subject: [PATCH 02/31] Stuff --- ci/script_single.sh | 8 ++++---- pandas/tests/test_base.py | 2 ++ pandas/util/_test_decorators.py | 3 --- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/ci/script_single.sh b/ci/script_single.sh index 005c648ee025f..8154d1c304076 100755 --- a/ci/script_single.sh +++ b/ci/script_single.sh @@ -23,12 +23,12 @@ elif [ "$DOC" ]; then echo "We are not running pytest as this is a doc-build" elif [ "$COVERAGE" ]; then - echo pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas - pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas + echo pytest -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas + pytest -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas else - echo pytest -m "single" -r xX --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas - pytest -m "single" -r xX --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas # TODO: doctest + echo pytest -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas + pytest -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas # TODO: doctest fi diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index df2547fc7b0da..a0d23a9625d60 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -511,6 +511,8 @@ def test_value_counts_unique_nunique_null(self): index=expected_index[9:1:-1], dtype='int64', name='a') + if isinstance(o, pd.PeriodIndex): + import pdb; pdb.set_trace() result_s_na = o.value_counts(dropna=False) tm.assert_series_equal(result_s_na, expected_s_na) assert result_s_na.index.name is None diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 67a55d9ca21db..aefe0401e55ac 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -23,9 +23,6 @@ def test_foo(): For more information, refer to the ``pytest`` documentation on ``skipif``. """ -import functools -import os - import pytest import locale from distutils.version import LooseVersion From 1804bccebaf2c34484ddb80b161718ea3f038dd2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 25 Feb 2018 14:20:06 -0600 Subject: [PATCH 03/31] Refactor data path handling --- pandas/conftest.py | 2 + pandas/tests/conftest.py | 32 +++ pandas/tests/io/conftest.py | 7 +- pandas/tests/reshape/merge/test_merge_asof.py | 250 ++++++++++-------- pandas/tests/reshape/test_tile.py | 6 +- pandas/tests/util/test_testing.py | 12 + setup.cfg | 1 + 7 files changed, 196 insertions(+), 114 deletions(-) create mode 100644 pandas/tests/conftest.py diff --git a/pandas/conftest.py b/pandas/conftest.py index 37f0a2f818a3b..76a02d076a373 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -16,6 +16,8 @@ def pytest_addoption(parser): help="run high memory tests") parser.addoption("--only-slow", action="store_true", help="run only slow tests") + parser.addoption("--strict-data-files", action="store_true", + help="Fail if a test is skipped for missing data file.") def pytest_runtest_setup(item): diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py new file mode 100644 index 0000000000000..3b49d9f213909 --- /dev/null +++ b/pandas/tests/conftest.py @@ -0,0 +1,32 @@ +import os + +import pytest + + +@pytest.fixture +def datapath(request): + """Get the path to a data file. + + Parameters + ---------- + path : str + Path to the file, relative to ``pandas/tests/`` + + Returns + ------- + path : path including ``pandas/tests``. + + Raises + ------ + ValueError + If the path doesn't exist and the --strict-data-files option is set. + """ + def deco(path): + path = os.path.join('pandas', 'tests', os.path.join(path)) + if not os.path.exists(path): + if request.config.getoption("--strict-data-files"): + raise ValueError("Failed.") + else: + pytest.skip("Data files not included in pandas distribution.") + return path + return deco diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index 342008f59f851..bcf8471d1af45 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -11,11 +11,14 @@ def parser_data(request): @pytest.fixture -def tips_file(parser_data): +def tips_file(request, parser_data): """Path to the tips dataset""" path = os.path.join(parser_data, 'tips.csv') if not os.path.exists(path): - pytest.skip("Data files not included in pandas distribution.") + if request.config.getoption("--strict-data-files"): + raise ValueError("Failed.") + else: + pytest.skip("Data files not included in pandas distribution.") return path diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index cebbcc41c3e17..622bc787c7bf8 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1,4 +1,5 @@ import os + import pytest import pytz @@ -11,27 +12,71 @@ from pandas.util.testing import assert_frame_equal -class TestAsOfMerge(object): +def read_data(path, dedupe=False): + x = read_csv(path) + if dedupe: + x = (x.drop_duplicates(['time', 'ticker'], keep='last') + .reset_index(drop=True)) + x.time = to_datetime(x.time) + return x + + +@pytest.fixture +def trades(datapath): + return read_data(datapath(os.path.join('reshape', 'merge', 'data', + 'trades.csv'))) + + +@pytest.fixture +def trades2(datapath): + return read_data(datapath(os.path.join('reshape', 'merge', 'data', + 'trades2.csv'))) + + +@pytest.fixture +def quotes(datapath): + return read_data(datapath(os.path.join('reshape', 'merge', 'data', + 'quotes.csv')), dedupe=True) + + +@pytest.fixture +def quotes2(datapath): + return read_data(datapath(os.path.join('reshape', 'merge', 'data', + 'quotes2.csv')), dedupe=True) + + +@pytest.fixture +def asof(datapath): + return read_data(datapath(os.path.join('reshape', 'merge', 'data', + 'asof.csv'))) + + +@pytest.fixture +def asof2(datapath): + return read_data(datapath(os.path.join('reshape', 'merge', 'data', + 'asof2.csv'))) + + +@pytest.fixture +def tolerance(datapath): + return read_data(datapath(os.path.join('reshape', 'merge', 'data', + 'tolerance.csv'))) + - def read_data(self, name, dedupe=False): - path = os.path.join(tm.get_data_path(), name) - x = read_csv(path) - if dedupe: - x = (x.drop_duplicates(['time', 'ticker'], keep='last') - .reset_index(drop=True) - ) - x.time = to_datetime(x.time) - return x - - def setup_method(self, method): - - self.trades = self.read_data('trades.csv') - self.quotes = self.read_data('quotes.csv', dedupe=True) - self.asof = self.read_data('asof.csv') - self.tolerance = self.read_data('tolerance.csv') - self.allow_exact_matches = self.read_data('allow_exact_matches.csv') - self.allow_exact_matches_and_tolerance = self.read_data( - 'allow_exact_matches_and_tolerance.csv') +@pytest.fixture +def allow_exact_matches(datapath): + return read_data(datapath(os.path.join('reshape', 'merge', 'data', + 'allow_exact_matches.csv'))) + + +@pytest.fixture +def allow_exact_matches_and_tolerance(datapath): + return read_data(datapath(os.path.join( + 'reshape', 'merge', 'data', 'allow_exact_matches_and_tolerance.csv' + ))) + + +class TestAsOfMerge(object): def test_examples1(self): """ doc-string examples """ @@ -149,23 +194,20 @@ def test_examples4(self): result = pd.merge_asof(left, right, on='a', direction='nearest') assert_frame_equal(result, expected) - def test_basic(self): - - expected = self.asof - trades = self.trades - quotes = self.quotes + def test_basic(self, trades, quotes, asof): + expected = asof result = merge_asof(trades, quotes, on='time', by='ticker') assert_frame_equal(result, expected) - def test_basic_categorical(self): + def test_basic_categorical(self, asof, trades, quotes): - expected = self.asof - trades = self.trades.copy() + expected = asof + trades = trades.copy() trades.ticker = trades.ticker.astype('category') - quotes = self.quotes.copy() + quotes = quotes.copy() quotes.ticker = quotes.ticker.astype('category') expected.ticker = expected.ticker.astype('category') @@ -174,12 +216,12 @@ def test_basic_categorical(self): by='ticker') assert_frame_equal(result, expected) - def test_basic_left_index(self): + def test_basic_left_index(self, trades, quotes, asof): # GH14253 - expected = self.asof - trades = self.trades.set_index('time') - quotes = self.quotes + expected = asof + trades = trades.set_index('time') + quotes = quotes result = merge_asof(trades, quotes, left_index=True, @@ -191,11 +233,11 @@ def test_basic_left_index(self): expected = expected[result.columns] assert_frame_equal(result, expected) - def test_basic_right_index(self): + def test_basic_right_index(self, trades, quotes, asof): - expected = self.asof - trades = self.trades - quotes = self.quotes.set_index('time') + expected = asof + trades = trades + quotes = quotes.set_index('time') result = merge_asof(trades, quotes, left_on='time', @@ -203,11 +245,11 @@ def test_basic_right_index(self): by='ticker') assert_frame_equal(result, expected) - def test_basic_left_index_right_index(self): + def test_basic_left_index_right_index(self, trades, quotes, asof): - expected = self.asof.set_index('time') - trades = self.trades.set_index('time') - quotes = self.quotes.set_index('time') + expected = asof.set_index('time') + trades = trades.set_index('time') + quotes = quotes.set_index('time') result = merge_asof(trades, quotes, left_index=True, @@ -215,48 +257,48 @@ def test_basic_left_index_right_index(self): by='ticker') assert_frame_equal(result, expected) - def test_multi_index(self): + def test_multi_index(self, trades, quotes): # MultiIndex is prohibited - trades = self.trades.set_index(['time', 'price']) - quotes = self.quotes.set_index('time') + trades = trades.set_index(['time', 'price']) + quotes = quotes.set_index('time') with pytest.raises(MergeError): merge_asof(trades, quotes, left_index=True, right_index=True) - trades = self.trades.set_index('time') - quotes = self.quotes.set_index(['time', 'bid']) + def test_multi_index2(self, trades, quotes): + trades = trades.set_index('time') + quotes = quotes.set_index(['time', 'bid']) with pytest.raises(MergeError): merge_asof(trades, quotes, left_index=True, right_index=True) - def test_on_and_index(self): + def test_on_and_index_on_price(self, trades, quotes): # 'on' parameter and index together is prohibited - trades = self.trades.set_index('time') - quotes = self.quotes.set_index('time') + trades = trades.set_index('time') + quotes = quotes.set_index('time') with pytest.raises(MergeError): merge_asof(trades, quotes, left_on='price', left_index=True, right_index=True) - trades = self.trades.set_index('time') - quotes = self.quotes.set_index('time') + def test_on_and_index_on_bid(self, trades, quotes): + trades = trades.set_index('time') + quotes = quotes.set_index('time') with pytest.raises(MergeError): merge_asof(trades, quotes, right_on='bid', left_index=True, right_index=True) - def test_basic_left_by_right_by(self): + def test_basic_left_by_right_by(self, trades, quotes, asof): # GH14253 - expected = self.asof - trades = self.trades - quotes = self.quotes + expected = asof result = merge_asof(trades, quotes, on='time', @@ -264,11 +306,11 @@ def test_basic_left_by_right_by(self): right_by='ticker') assert_frame_equal(result, expected) - def test_missing_right_by(self): + def test_missing_right_by(self, trades, quotes, asof): - expected = self.asof - trades = self.trades - quotes = self.quotes + expected = asof + trades = trades + quotes = quotes q = quotes[quotes.ticker != 'MSFT'] result = merge_asof(trades, q, @@ -277,7 +319,7 @@ def test_missing_right_by(self): expected.loc[expected.ticker == 'MSFT', ['bid', 'ask']] = np.nan assert_frame_equal(result, expected) - def test_multiby(self): + def test_multiby(self, trades, quotes): # GH13936 trades = pd.DataFrame({ 'time': pd.to_datetime(['20160525 13:30:00.023', @@ -334,7 +376,7 @@ def test_multiby(self): by=['ticker', 'exch']) assert_frame_equal(result, expected) - def test_multiby_heterogeneous_types(self): + def test_multiby_heterogeneous_types(self, trades, quotes): # GH13936 trades = pd.DataFrame({ 'time': pd.to_datetime(['20160525 13:30:00.023', @@ -423,34 +465,31 @@ def test_multiby_indexed(self): pd.merge_asof(left, right, left_index=True, right_index=True, left_by=['k1', 'k2'], right_by=['k1']) - def test_basic2(self): - - expected = self.read_data('asof2.csv') - trades = self.read_data('trades2.csv') - quotes = self.read_data('quotes2.csv', dedupe=True) + def test_basic2(self, asof2, trades2, quotes2): - result = merge_asof(trades, quotes, + expected = asof2 + result = merge_asof(trades2, quotes2, on='time', by='ticker') assert_frame_equal(result, expected) - def test_basic_no_by(self): + def test_basic_no_by(self, asof, trades, quotes): f = lambda x: x[x.ticker == 'MSFT'].drop('ticker', axis=1) \ .reset_index(drop=True) # just use a single ticker - expected = f(self.asof) - trades = f(self.trades) - quotes = f(self.quotes) + expected = f(asof) + trades = f(trades) + quotes = f(quotes) result = merge_asof(trades, quotes, on='time') assert_frame_equal(result, expected) - def test_valid_join_keys(self): + def test_valid_join_keys(self, trades, quotes): - trades = self.trades - quotes = self.quotes + trades = trades + quotes = quotes with pytest.raises(MergeError): merge_asof(trades, quotes, @@ -467,14 +506,14 @@ def test_valid_join_keys(self): merge_asof(trades, quotes, by='ticker') - def test_with_duplicates(self): + def test_with_duplicates(self, asof, trades, quotes): - q = pd.concat([self.quotes, self.quotes]).sort_values( + q = pd.concat([quotes, quotes]).sort_values( ['time', 'ticker']).reset_index(drop=True) - result = merge_asof(self.trades, q, + result = merge_asof(trades, q, on='time', by='ticker') - expected = self.read_data('asof.csv') + expected = asof assert_frame_equal(result, expected) def test_with_duplicates_no_on(self): @@ -489,22 +528,14 @@ def test_with_duplicates_no_on(self): 'right_val': [1, 1, 3]}) assert_frame_equal(result, expected) - def test_valid_allow_exact_matches(self): - - trades = self.trades - quotes = self.quotes - + def test_valid_allow_exact_matches(self, trades, quotes): with pytest.raises(MergeError): merge_asof(trades, quotes, on='time', by='ticker', allow_exact_matches='foo') - def test_valid_tolerance(self): - - trades = self.trades - quotes = self.quotes - + def test_valid_tolerance(self, trades, quotes): # dti merge_asof(trades, quotes, on='time', @@ -544,10 +575,10 @@ def test_valid_tolerance(self): by='ticker', tolerance=-1) - def test_non_sorted(self): + def test_non_sorted(self, trades, quotes): - trades = self.trades.sort_values('time', ascending=False) - quotes = self.quotes.sort_values('time', ascending=False) + trades = trades.sort_values('time', ascending=False) + quotes = quotes.sort_values('time', ascending=False) # we require that we are already sorted on time & quotes assert not trades.time.is_monotonic @@ -557,7 +588,7 @@ def test_non_sorted(self): on='time', by='ticker') - trades = self.trades.sort_values('time') + trades = trades.sort_values('time') assert trades.time.is_monotonic assert not quotes.time.is_monotonic with pytest.raises(ValueError): @@ -565,26 +596,25 @@ def test_non_sorted(self): on='time', by='ticker') - quotes = self.quotes.sort_values('time') + quotes = quotes.sort_values('time') assert trades.time.is_monotonic assert quotes.time.is_monotonic # ok, though has dupes - merge_asof(trades, self.quotes, + merge_asof(trades, quotes, on='time', by='ticker') - def test_tolerance(self): + def test_tolerance(self, trades, quotes, tolerance): - trades = self.trades - quotes = self.quotes + trades = trades + quotes = quotes result = merge_asof(trades, quotes, on='time', by='ticker', tolerance=Timedelta('1day')) - expected = self.tolerance - assert_frame_equal(result, expected) + assert_frame_equal(result, tolerance) def test_tolerance_forward(self): # GH14887 @@ -641,11 +671,11 @@ def test_tolerance_tz(self): 'value2': list("BCDEE")}) assert_frame_equal(result, expected) - def test_index_tolerance(self): + def test_index_tolerance(self, trades, quotes, tolerance): # GH 15135 - expected = self.tolerance.set_index('time') - trades = self.trades.set_index('time') - quotes = self.quotes.set_index('time') + expected = tolerance.set_index('time') + trades = trades.set_index('time') + quotes = quotes.set_index('time') result = pd.merge_asof(trades, quotes, left_index=True, @@ -654,13 +684,13 @@ def test_index_tolerance(self): tolerance=pd.Timedelta('1day')) assert_frame_equal(result, expected) - def test_allow_exact_matches(self): + def test_allow_exact_matches(self, trades, quotes, allow_exact_matches): - result = merge_asof(self.trades, self.quotes, + result = merge_asof(trades, quotes, on='time', by='ticker', allow_exact_matches=False) - expected = self.allow_exact_matches + expected = allow_exact_matches assert_frame_equal(result, expected) def test_allow_exact_matches_forward(self): @@ -695,14 +725,16 @@ def test_allow_exact_matches_nearest(self): allow_exact_matches=False) assert_frame_equal(result, expected) - def test_allow_exact_matches_and_tolerance(self): + def test_allow_exact_matches_and_tolerance( + self, trades, quotes, + allow_exact_matches_and_tolerance): - result = merge_asof(self.trades, self.quotes, + result = merge_asof(trades, quotes, on='time', by='ticker', tolerance=Timedelta('100ms'), allow_exact_matches=False) - expected = self.allow_exact_matches_and_tolerance + expected = allow_exact_matches_and_tolerance assert_frame_equal(result, expected) def test_allow_exact_matches_and_tolerance2(self): diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index ff914273d47b1..195faf8aad039 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -287,10 +287,10 @@ def test_round_frac(self): result = tmod._round_frac(0.000123456, precision=2) assert result == 0.00012 - def test_qcut_binning_issues(self): + def test_qcut_binning_issues(self, datapath): # #1978, 1979 - path = os.path.join(tm.get_data_path(), 'cut_data.csv') - arr = np.loadtxt(path) + cut_file = datapath(os.path.join('reshape', 'data', 'cut_data.csv')) + arr = np.loadtxt(cut_file) result = qcut(arr, 20) diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py index 1c878604b11a2..a3d91e2dcc75c 100644 --- a/pandas/tests/util/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -739,3 +739,15 @@ def test_locale(self): # GH9744 locales = tm.get_locales() assert len(locales) >= 1 + + +def test_datapath_missing(datapath, request): + if not request.config.getoption("--strict-data-files"): + pytest.skip("Need to set '--strict-data-files'") + + with pytest.raises(ValueError): + datapath('not_a_file') + + result = datapath('data/iris.csv') + expected = 'pandas/tests/data/iris.csv' + assert result == expected diff --git a/setup.cfg b/setup.cfg index 942b2b0a1a0bf..6f668776d35e3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,3 +32,4 @@ markers = slow: mark a test as slow network: mark a test as network high_memory: mark a test as a high-memory only +addopts = --strict-data-files \ No newline at end of file From 70221525cc368377b33205c259faf9dbba0e4840 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 25 Feb 2018 14:30:24 -0600 Subject: [PATCH 04/31] More fixtures --- pandas/tests/io/conftest.py | 30 ++++++------------------------ pandas/tests/io/test_common.py | 21 +++++++++++---------- 2 files changed, 17 insertions(+), 34 deletions(-) diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index bcf8471d1af45..896690871b36e 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -11,36 +11,21 @@ def parser_data(request): @pytest.fixture -def tips_file(request, parser_data): +def tips_file(datapath): """Path to the tips dataset""" - path = os.path.join(parser_data, 'tips.csv') - if not os.path.exists(path): - if request.config.getoption("--strict-data-files"): - raise ValueError("Failed.") - else: - pytest.skip("Data files not included in pandas distribution.") - - return path + return datapath(os.path.join('io', 'parser', 'data', 'tips.csv')) @pytest.fixture -def jsonl_file(parser_data): +def jsonl_file(datapath): """Path a JSONL dataset""" - path = os.path.join(parser_data, 'items.jsonl') - if not os.path.exists(path): - pytest.skip("Data files not included in pandas distribution.") - - return path + return datapath(os.path.join('io', 'parser', 'data', 'items.jsonl')) @pytest.fixture -def salaries_table(parser_data): +def salaries_table(datapath): """DataFrame with the salaries dataset""" - path = os.path.join(parser_data, 'salaries.csv') - if not os.path.exists(path): - pytest.skip("Data files not included in pandas distribution.") - - return read_table(path) + return datapath(os.path.join('io', 'parser', 'data', 'salaries.csv')) @pytest.fixture @@ -71,9 +56,6 @@ def s3_resource(tips_file, jsonl_file): def add_tips_files(bucket_name): for s3_key, file_name in test_s3_files: - if not os.path.exists(file_name): - pytest.skip("Data files not included in pandas distribution.") - with open(file_name, 'rb') as f: conn.Bucket(bucket_name).put_object( Key=s3_key, diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 515254e43bbb3..7789525182521 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -234,13 +234,14 @@ def test_write_fspath_hdf5(self): tm.assert_frame_equal(result, expected) -class TestMMapWrapper(object): +@pytest.fixture +def mmap_file(datapath): + return datapath(os.path.join('io', 'data', 'test_mmap.csv')) + - def setup_method(self, method): - self.mmap_file = os.path.join(tm.get_data_path(), - 'test_mmap.csv') +class TestMMapWrapper(object): - def test_constructor_bad_file(self): + def test_constructor_bad_file(self, mmap_file): non_file = StringIO('I am not a file') non_file.fileno = lambda: -1 @@ -254,15 +255,15 @@ def test_constructor_bad_file(self): tm.assert_raises_regex(err, msg, common.MMapWrapper, non_file) - target = open(self.mmap_file, 'r') + target = open(mmap_file, 'r') target.close() msg = "I/O operation on closed file" tm.assert_raises_regex( ValueError, msg, common.MMapWrapper, target) - def test_get_attr(self): - with open(self.mmap_file, 'r') as target: + def test_get_attr(self, mmap_file): + with open(mmap_file, 'r') as target: wrapper = common.MMapWrapper(target) attrs = dir(wrapper.mmap) @@ -275,8 +276,8 @@ def test_get_attr(self): assert not hasattr(wrapper, 'foo') - def test_next(self): - with open(self.mmap_file, 'r') as target: + def test_next(self, mmap_file): + with open(mmap_file, 'r') as target: wrapper = common.MMapWrapper(target) lines = target.readlines() From 151ffdad453e5f563ba6dd25d986a6c55ef56620 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 26 Mar 2018 15:59:16 -0500 Subject: [PATCH 05/31] Updated html --- pandas/tests/conftest.py | 4 +-- pandas/tests/io/test_html.py | 69 +++++++++++++++++++----------------- 2 files changed, 38 insertions(+), 35 deletions(-) diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py index 3b49d9f213909..3d83acc078f24 100644 --- a/pandas/tests/conftest.py +++ b/pandas/tests/conftest.py @@ -21,8 +21,8 @@ def datapath(request): ValueError If the path doesn't exist and the --strict-data-files option is set. """ - def deco(path): - path = os.path.join('pandas', 'tests', os.path.join(path)) + def deco(*args): + path = os.path.join('pandas', 'tests', *args) if not os.path.exists(path): if request.config.getoption("--strict-data-files"): raise ValueError("Failed.") diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 79b9a3715efd2..648845038f4a1 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -25,8 +25,7 @@ import pandas.util._test_decorators as td from pandas.util.testing import makeCustomDataframe as mkdf, network - -DATA_PATH = tm.get_data_path() +HERE = os.path.dirname(__file__) def assert_framelist_equal(list1, list2, *args, **kwargs): @@ -44,11 +43,11 @@ def assert_framelist_equal(list1, list2, *args, **kwargs): @td.skip_if_no('bs4') -def test_bs4_version_fails(monkeypatch): +def test_bs4_version_fails(monkeypatch, datapath): import bs4 monkeypatch.setattr(bs4, '__version__', '4.2') with tm.assert_raises_regex(ValueError, "minimum version"): - read_html(os.path.join(DATA_PATH, "spam.html"), flavor='bs4') + read_html(datapath("io", "data", "spam.html"), flavor='bs4') def test_invalid_flavor(): @@ -59,8 +58,8 @@ def test_invalid_flavor(): @td.skip_if_no('bs4') @td.skip_if_no('lxml') -def test_same_ordering(): - filename = os.path.join(DATA_PATH, 'valid_markup.html') +def test_same_ordering(datapath): + filename = datapath('io', 'data', 'valid_markup.html') dfs_lxml = read_html(filename, index_col=0, flavor=['lxml']) dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4']) assert_framelist_equal(dfs_lxml, dfs_bs4) @@ -72,11 +71,14 @@ def test_same_ordering(): pytest.param('lxml', marks=pytest.mark.skipif( not td.safe_import('lxml'), reason='No lxml'))], scope="class") class TestReadHtml(object): - spam_data = os.path.join(DATA_PATH, 'spam.html') - spam_data_kwargs = {} - if PY3: - spam_data_kwargs['encoding'] = 'UTF-8' - banklist_data = os.path.join(DATA_PATH, 'banklist.html') + + @pytest.fixture(autouse=True) + def set_files(self, datapath): + self.spam_data = datapath('io', 'data', 'spam.html') + self.spam_data_kwargs = {} + if PY3: + self.spam_data_kwargs['encoding'] = 'UTF-8' + self.banklist_data = datapath("io", "data", "banklist.html") @pytest.fixture(autouse=True, scope="function") def set_defaults(self, flavor, request): @@ -272,7 +274,8 @@ def test_invalid_url(self): @pytest.mark.slow def test_file_url(self): url = self.banklist_data - dfs = self.read_html(file_path_to_url(url), 'First', + dfs = self.read_html(file_path_to_url(os.path.abspath(url)), + 'First', attrs={'id': 'table'}) assert isinstance(dfs, list) for df in dfs: @@ -326,7 +329,7 @@ def test_multiindex_header_index_skiprows(self): @pytest.mark.slow def test_regex_idempotency(self): url = self.banklist_data - dfs = self.read_html(file_path_to_url(url), + dfs = self.read_html(file_path_to_url(os.path.abspath(url)), match=re.compile(re.compile('Florida')), attrs={'id': 'table'}) assert isinstance(dfs, list) @@ -352,9 +355,9 @@ def test_python_docs_table(self): assert sorted(zz) == sorted(['Repo', 'What']) @pytest.mark.slow - def test_thousands_macau_stats(self): + def test_thousands_macau_stats(self, datapath): all_non_nan_table_index = -2 - macau_data = os.path.join(DATA_PATH, 'macau.html') + macau_data = datapath("io", "data", "macau.html") dfs = self.read_html(macau_data, index_col=0, attrs={'class': 'style1'}) df = dfs[all_non_nan_table_index] @@ -362,9 +365,9 @@ def test_thousands_macau_stats(self): assert not any(s.isna().any() for _, s in df.iteritems()) @pytest.mark.slow - def test_thousands_macau_index_col(self): + def test_thousands_macau_index_col(self, datapath): all_non_nan_table_index = -2 - macau_data = os.path.join(DATA_PATH, 'macau.html') + macau_data = datapath('io', 'data', 'macau.html') dfs = self.read_html(macau_data, index_col=0, header=0) df = dfs[all_non_nan_table_index] @@ -491,8 +494,8 @@ def test_countries_municipalities(self): res2 = self.read_html(data2, header=0) assert_framelist_equal(res1, res2) - def test_nyse_wsj_commas_table(self): - data = os.path.join(DATA_PATH, 'nyse_wsj.html') + def test_nyse_wsj_commas_table(self, datapath): + data = datapath('io', 'data', 'nyse_wsj.html') df = self.read_html(data, index_col=0, header=0, attrs={'class': 'mdcTable'})[0] @@ -503,7 +506,7 @@ def test_nyse_wsj_commas_table(self): tm.assert_index_equal(df.columns, columns) @pytest.mark.slow - def test_banklist_header(self): + def test_banklist_header(self, datapath): from pandas.io.html import _remove_whitespace def try_remove_ws(x): @@ -514,7 +517,7 @@ def try_remove_ws(x): df = self.read_html(self.banklist_data, 'Metcalf', attrs={'id': 'table'})[0] - ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'), + ground_truth = read_csv(datapath('io', 'data', 'banklist.csv'), converters={'Updated Date': Timestamp, 'Closing Date': Timestamp}) assert df.shape == ground_truth.shape @@ -631,19 +634,19 @@ def test_parse_dates_combine(self): newdf = DataFrame({'datetime': raw_dates}) tm.assert_frame_equal(newdf, res[0]) - def test_computer_sales_page(self): - data = os.path.join(DATA_PATH, 'computer_sales_page.html') + def test_computer_sales_page(self, datapath): + data = datapath('io', 'data', 'computer_sales_page.html') with tm.assert_raises_regex(ParserError, r"Passed header=\[0,1\] are " r"too many rows for this " r"multi_index of columns"): self.read_html(data, header=[0, 1]) - data = os.path.join(DATA_PATH, 'computer_sales_page.html') + data = datapath('io', 'data', 'computer_sales_page.html') assert self.read_html(data, header=[1, 2]) - def test_wikipedia_states_table(self): - data = os.path.join(DATA_PATH, 'wikipedia_states.html') + def test_wikipedia_states_table(self, datapath): + data = datapath('io', 'data', 'wikipedia_states.html') assert os.path.isfile(data), '%r is not a file' % data assert os.path.getsize(data), '%r is an empty file' % data result = self.read_html(data, 'Arizona', header=1)[0] @@ -757,15 +760,15 @@ def test_multiple_header_rows(self): html_df = read_html(html, )[0] tm.assert_frame_equal(expected_df, html_df) - def test_works_on_valid_markup(self): - filename = os.path.join(DATA_PATH, 'valid_markup.html') + def test_works_on_valid_markup(self, datapath): + filename = datapath('io', 'data', 'valid_markup.html') dfs = self.read_html(filename, index_col=0) assert isinstance(dfs, list) assert isinstance(dfs[0], DataFrame) @pytest.mark.slow - def test_fallback_success(self): - banklist_data = os.path.join(DATA_PATH, 'banklist.html') + def test_fallback_success(self, datapath): + banklist_data = datapath('io', 'data', 'banklist.html') self.read_html(banklist_data, '.*Water.*', flavor=['lxml', 'html5lib']) def test_to_html_timestamp(self): @@ -809,7 +812,7 @@ def test_displayed_only(self, displayed_only, exp0, exp1): assert len(dfs) == 1 # Should not parse hidden table @pytest.mark.parametrize("f", glob.glob( - os.path.join(DATA_PATH, 'html_encoding', '*.html'))) + os.path.join(HERE, 'data', 'html_encoding', '*.html'))) def test_encode(self, f): _, encoding = os.path.splitext(os.path.basename(f))[0].split('_') @@ -879,7 +882,7 @@ def seekable(self): assert self.read_html(bad) @pytest.mark.slow - def test_importcheck_thread_safety(self): + def test_importcheck_thread_safety(self, datapath): # see gh-16928 class ErrorThread(threading.Thread): @@ -894,7 +897,7 @@ def run(self): # force import check by reinitalising global vars in html.py reload(pandas.io.html) - filename = os.path.join(DATA_PATH, 'valid_markup.html') + filename = datapath('io', 'data', 'valid_markup.html') helper_thread1 = ErrorThread(target=self.read_html, args=(filename,)) helper_thread2 = ErrorThread(target=self.read_html, args=(filename,)) From d9d65706706d1c1e7b0793f11557abf88570f3a0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 26 Mar 2018 17:27:12 -0500 Subject: [PATCH 06/31] Remove os.path.joins --- pandas/tests/conftest.py | 3 +- pandas/tests/io/conftest.py | 14 ++------ pandas/tests/io/test_common.py | 34 +++++++++---------- pandas/tests/reshape/merge/test_merge_asof.py | 33 +++++++----------- pandas/tests/test_base.py | 2 -- 5 files changed, 34 insertions(+), 52 deletions(-) diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py index 3d83acc078f24..2c246fb9d371d 100644 --- a/pandas/tests/conftest.py +++ b/pandas/tests/conftest.py @@ -27,6 +27,7 @@ def deco(*args): if request.config.getoption("--strict-data-files"): raise ValueError("Failed.") else: - pytest.skip("Data files not included in pandas distribution.") + pytest.skip("{} not included in pandas distribution." + .format(path)) return path return deco diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py index 896690871b36e..7623587803b41 100644 --- a/pandas/tests/io/conftest.py +++ b/pandas/tests/io/conftest.py @@ -1,31 +1,23 @@ -import os - import pytest from pandas.io.parsers import read_table -from pandas.util import testing as tm - - -@pytest.fixture -def parser_data(request): - return os.path.join(tm.get_data_path(), '..', 'parser', 'data') @pytest.fixture def tips_file(datapath): """Path to the tips dataset""" - return datapath(os.path.join('io', 'parser', 'data', 'tips.csv')) + return datapath('io', 'parser', 'data', 'tips.csv') @pytest.fixture def jsonl_file(datapath): """Path a JSONL dataset""" - return datapath(os.path.join('io', 'parser', 'data', 'items.jsonl')) + return datapath('io', 'parser', 'data', 'items.jsonl') @pytest.fixture def salaries_table(datapath): """DataFrame with the salaries dataset""" - return datapath(os.path.join('io', 'parser', 'data', 'salaries.csv')) + return read_table(datapath('io', 'parser', 'data', 'salaries.csv')) @pytest.fixture diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 7789525182521..37097033f0c75 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -149,29 +149,27 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): reader(path) @pytest.mark.parametrize('reader, module, path', [ - (pd.read_csv, 'os', os.path.join(HERE, 'data', 'iris.csv')), - (pd.read_table, 'os', os.path.join(HERE, 'data', 'iris.csv')), - (pd.read_fwf, 'os', os.path.join(HERE, 'data', - 'fixed_width_format.txt')), - (pd.read_excel, 'xlrd', os.path.join(HERE, 'data', 'test1.xlsx')), - (pd.read_feather, 'feather', os.path.join(HERE, 'data', - 'feather-0_3_1.feather')), - (pd.read_hdf, 'tables', os.path.join(HERE, 'data', 'legacy_hdf', + (pd.read_csv, 'os', 'iris.csv'), + (pd.read_table, 'os', 'iris.csv'), + (pd.read_fwf, 'os', 'fixed_width_format.txt'), + (pd.read_excel, 'xlrd', 'test1.xlsx'), + (pd.read_feather, 'feather', 'feather-0_3_1.feather'), + (pd.read_hdf, 'tables', os.path.join('legacy_hdf', 'datetimetz_object.h5')), - (pd.read_stata, 'os', os.path.join(HERE, 'data', 'stata10_115.dta')), - (pd.read_sas, 'os', os.path.join(HERE, 'sas', 'data', + (pd.read_stata, 'os', 'stata10_115.dta'), + # in the function, we go down to tests/io/data + # so step back up a level before going into sas + (pd.read_sas, 'os', os.path.join('..', 'sas', 'data', 'test1.sas7bdat')), - (pd.read_json, 'os', os.path.join(HERE, 'json', 'data', + (pd.read_json, 'os', os.path.join('..', 'json', 'data', 'tsframe_v012.json')), - (pd.read_msgpack, 'os', os.path.join(HERE, 'msgpack', 'data', + (pd.read_msgpack, 'os', os.path.join('..', 'msgpack', 'data', 'frame.mp')), - (pd.read_pickle, 'os', os.path.join(HERE, 'data', - 'categorical_0_14_1.pickle')), + (pd.read_pickle, 'os', 'categorical_0_14_1.pickle'), ]) - def test_read_fspath_all(self, reader, module, path): + def test_read_fspath_all(self, reader, module, path, datapath): pytest.importorskip(module) - if not os.path.exists(path): - pytest.skip("Data files not included in pandas distribution.") + path = datapath('io', 'data', path) mypath = CustomFSPath(path) result = reader(mypath) @@ -236,7 +234,7 @@ def test_write_fspath_hdf5(self): @pytest.fixture def mmap_file(datapath): - return datapath(os.path.join('io', 'data', 'test_mmap.csv')) + return datapath('io', 'data', 'test_mmap.csv') class TestMMapWrapper(object): diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 622bc787c7bf8..bcb27e49f28fe 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -1,5 +1,3 @@ -import os - import pytest import pytz @@ -23,57 +21,52 @@ def read_data(path, dedupe=False): @pytest.fixture def trades(datapath): - return read_data(datapath(os.path.join('reshape', 'merge', 'data', - 'trades.csv'))) + return read_data(datapath('reshape', 'merge', 'data', 'trades.csv')) @pytest.fixture def trades2(datapath): - return read_data(datapath(os.path.join('reshape', 'merge', 'data', - 'trades2.csv'))) + return read_data(datapath('reshape', 'merge', 'data', 'trades2.csv')) @pytest.fixture def quotes(datapath): - return read_data(datapath(os.path.join('reshape', 'merge', 'data', - 'quotes.csv')), dedupe=True) + return read_data(datapath('reshape', 'merge', 'data', 'quotes.csv'), + dedupe=True) @pytest.fixture def quotes2(datapath): - return read_data(datapath(os.path.join('reshape', 'merge', 'data', - 'quotes2.csv')), dedupe=True) + return read_data(datapath('reshape', 'merge', 'data', 'quotes2.csv'), + dedupe=True) @pytest.fixture def asof(datapath): - return read_data(datapath(os.path.join('reshape', 'merge', 'data', - 'asof.csv'))) + return read_data(datapath('reshape', 'merge', 'data', 'asof.csv')) @pytest.fixture def asof2(datapath): - return read_data(datapath(os.path.join('reshape', 'merge', 'data', - 'asof2.csv'))) + return read_data(datapath('reshape', 'merge', 'data', 'asof2.csv')) @pytest.fixture def tolerance(datapath): - return read_data(datapath(os.path.join('reshape', 'merge', 'data', - 'tolerance.csv'))) + return read_data(datapath('reshape', 'merge', 'data', 'tolerance.csv')) @pytest.fixture def allow_exact_matches(datapath): - return read_data(datapath(os.path.join('reshape', 'merge', 'data', - 'allow_exact_matches.csv'))) + return read_data(datapath('reshape', 'merge', 'data', + 'allow_exact_matches.csv')) @pytest.fixture def allow_exact_matches_and_tolerance(datapath): - return read_data(datapath(os.path.join( + return read_data(datapath( 'reshape', 'merge', 'data', 'allow_exact_matches_and_tolerance.csv' - ))) + )) class TestAsOfMerge(object): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 3e6b347991fa4..c4c02c0bf6f17 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -519,8 +519,6 @@ def test_value_counts_unique_nunique_null(self): index=expected_index[9:1:-1], dtype='int64', name='a') - if isinstance(o, pd.PeriodIndex): - import pdb; pdb.set_trace() result_s_na = o.value_counts(dropna=False) tm.assert_series_equal(result_s_na, expected_s_na) assert result_s_na.index.name is None From 584959114cf02d63b235b5d01928e2ce22d25d4d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 26 Mar 2018 21:04:38 -0500 Subject: [PATCH 07/31] More modules --- pandas/tests/io/test_pytables.py | 23 ++++++++++---------- pandas/tests/io/test_sql.py | 4 ++-- pandas/tests/tseries/offsets/test_offsets.py | 16 +++++++------- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index e690b1e302d8b..2db19bf373872 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4416,28 +4416,27 @@ def f(): store.select('df') tm.assert_raises_regex(ClosedFileError, 'file is not open', f) - def test_pytables_native_read(self): - + def test_pytables_native_read(self, datapath): with ensure_clean_store( - tm.get_data_path('legacy_hdf/pytables_native.h5'), + datapath('io', 'data', 'legacy_hdf/pytables_native.h5'), mode='r') as store: d2 = store['detector/readout'] assert isinstance(d2, DataFrame) @pytest.mark.skipif(PY35 and is_platform_windows(), reason="native2 read fails oddly on windows / 3.5") - def test_pytables_native2_read(self): + def test_pytables_native2_read(self, datapath): with ensure_clean_store( - tm.get_data_path('legacy_hdf/pytables_native2.h5'), + datapath('io', 'data', 'legacy_hdf', 'pytables_native2.h5'), mode='r') as store: str(store) d1 = store['detector'] assert isinstance(d1, DataFrame) - def test_legacy_table_read(self): + def test_legacy_table_read(self, datapath): # legacy table types with ensure_clean_store( - tm.get_data_path('legacy_hdf/legacy_table.h5'), + datapath('io', 'data', 'legacy_hdf', 'legacy_table.h5'), mode='r') as store: with catch_warnings(record=True): @@ -5082,7 +5081,7 @@ def test_fspath(self): with pd.HDFStore(path) as store: assert os.fspath(store) == str(path) - def test_read_py2_hdf_file_in_py3(self): + def test_read_py2_hdf_file_in_py3(self, datapath): # GH 16781 # tests reading a PeriodIndex DataFrame written in Python2 in Python3 @@ -5097,8 +5096,8 @@ def test_read_py2_hdf_file_in_py3(self): ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B')) with ensure_clean_store( - tm.get_data_path( - 'legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5'), + datapath('io', 'data', 'legacy_hdf', + 'periodindex_0.20.1_x86_64_darwin_2.7.13.h5'), mode='r') as store: result = store['p'] assert_frame_equal(result, expected) @@ -5495,14 +5494,14 @@ def test_store_timezone(self): assert_frame_equal(result, df) - def test_legacy_datetimetz_object(self): + def test_legacy_datetimetz_object(self, datapath): # legacy from < 0.17.0 # 8260 expected = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'), B=Timestamp('20130603', tz='CET')), index=range(5)) with ensure_clean_store( - tm.get_data_path('legacy_hdf/datetimetz_object.h5'), + datapath('io', 'data', 'legacy_hdf', 'datetimetz_object.h5'), mode='r') as store: result = store['df'] assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 4530cc9d2fba9..ca9b1749ecf8a 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -253,9 +253,9 @@ def _get_exec(self): else: return self.conn.cursor() - def _load_iris_data(self): + def _load_iris_data(self, datapath): import io - iris_csv_file = os.path.join(tm.get_data_path(), 'iris.csv') + iris_csv_file = datapath('io', 'data', 'iris.csv') self.drop_table('iris') self._get_exec().execute(SQL_STRINGS['create_iris'][self.flavor]) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index d96ebab615d12..03e2b42022967 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -455,14 +455,15 @@ def test_add(self, offset_types, tz): assert isinstance(result, Timestamp) assert result == expected_localize - def test_pickle_v0_15_2(self): + def test_pickle_v0_15_2(self, datapath): offsets = {'DateOffset': DateOffset(years=1), 'MonthBegin': MonthBegin(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'Week': Week(1)} - pickle_path = os.path.join(tm.get_data_path(), - 'dateoffset_0_15_2.pickle') + + pickle_path = datapath('tseries', 'offsets', 'data', + 'dateoffset_0_15_2.pickle') # This code was executed once on v0.15.2 to generate the pickle: # with open(pickle_path, 'wb') as f: pickle.dump(offsets, f) # @@ -1848,12 +1849,11 @@ def _check_roundtrip(obj): _check_roundtrip(self.offset2) _check_roundtrip(self.offset * 2) - def test_pickle_compat_0_14_1(self): + def test_pickle_compat_0_14_1(self, datapath): + # /Users/taugspurger/sandbox/pandas-ip/pandas/pandas/tests/tseries/offsets/test_offsets.py hdays = [datetime(2013, 1, 1) for ele in range(4)] - - pth = tm.get_data_path() - - cday0_14_1 = read_pickle(os.path.join(pth, 'cday-0.14.1.pickle')) + pth = datapath('tseries', 'offsets', 'data', 'cday-0.14.1.pickle') + cday0_14_1 = read_pickle(pth) cday = CDay(holidays=hdays) assert cday == cday0_14_1 From 31fb0b634b14e204fe1db87f7ee51065664692f4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Mar 2018 06:05:36 -0500 Subject: [PATCH 08/31] Some more --- pandas/tests/indexes/test_multi.py | 8 ++-- pandas/tests/io/formats/test_format.py | 4 +- pandas/tests/io/json/test_compression.py | 6 +-- pandas/tests/io/json/test_pandas.py | 8 ++-- pandas/tests/io/parser/common.py | 23 +++++----- pandas/tests/io/parser/compression.py | 4 +- pandas/tests/io/parser/dtypes.py | 6 +-- pandas/tests/io/parser/test_network.py | 53 +++++++++++------------ pandas/tests/io/parser/test_parsers.py | 6 ++- pandas/tests/io/parser/test_textreader.py | 5 ++- pandas/tests/io/sas/test_sas7bdat.py | 43 ++++++++---------- pandas/tests/io/sas/test_xport.py | 6 ++- pandas/tests/io/test_excel.py | 8 ++-- pandas/tests/io/test_packers.py | 3 +- pandas/tests/io/test_stata.py | 9 ++-- 15 files changed, 94 insertions(+), 98 deletions(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 34abf7052da8c..fa450a9262a82 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1141,12 +1141,12 @@ def test_iter(self): ('baz', 'two'), ('qux', 'one'), ('qux', 'two')] assert result == expected - def test_legacy_pickle(self): + def test_legacy_pickle(self, datapath): if PY3: pytest.skip("testing for legacy pickles not " "support on py3") - path = tm.get_data_path('multiindex_v1.pickle') + path = datapath('indexes', 'data', 'multiindex_v1.pickle') obj = pd.read_pickle(path) obj2 = MultiIndex.from_tuples(obj.values) @@ -1162,10 +1162,10 @@ def test_legacy_pickle(self): assert_almost_equal(res, exp) assert_almost_equal(exp, exp2) - def test_legacy_v2_unpickle(self): + def test_legacy_v2_unpickle(self, datapath): # 0.7.3 -> 0.8.0 format manage - path = tm.get_data_path('mindex_073.pickle') + path = datapath('indexes', 'data', 'mindex_073.pickle') obj = pd.read_pickle(path) obj2 = MultiIndex.from_tuples(obj.values) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 6c3b75cdfa6df..b3a04eeba0161 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -916,8 +916,8 @@ def test_unicode_problem_decoding_as_ascii(self): dm = DataFrame({u('c/\u03c3'): Series({'test': np.nan})}) compat.text_type(dm.to_string()) - def test_string_repr_encoding(self): - filepath = tm.get_data_path('unicode_series.csv') + def test_string_repr_encoding(self, datapath): + filepath = datapath('io', 'formats', 'data', 'unicode_series.csv') df = pd.read_csv(filepath, header=None, encoding='latin1') repr(df) repr(df[1]) diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py index c9074ca49e5be..05ceace20f5a4 100644 --- a/pandas/tests/io/json/test_compression.py +++ b/pandas/tests/io/json/test_compression.py @@ -21,11 +21,11 @@ def test_compression_roundtrip(compression): assert_frame_equal(df, pd.read_json(result)) -def test_read_zipped_json(): - uncompressed_path = tm.get_data_path("tsframe_v012.json") +def test_read_zipped_json(datapath): + uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json") uncompressed_df = pd.read_json(uncompressed_path) - compressed_path = tm.get_data_path("tsframe_v012.json.zip") + compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip") compressed_df = pd.read_json(compressed_path, compression='zip') assert_frame_equal(uncompressed_df, compressed_df) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 7e497c395266f..bcbac4400c953 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -37,8 +37,9 @@ class TestPandasContainer(object): - def setup_method(self, method): - self.dirpath = tm.get_data_path() + @pytest.fixture(scope="function", autouse=True) + def setup(self, datapath): + self.dirpath = datapath("io", "json", "data") self.ts = tm.makeTimeSeries() self.ts.name = 'ts' @@ -59,7 +60,8 @@ def setup_method(self, method): self.mixed_frame = _mixed_frame.copy() self.categorical = _cat_frame.copy() - def teardown_method(self, method): + yield + del self.dirpath del self.ts diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index cf7ec9e2f2652..2c363f5f4adff 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -635,21 +635,19 @@ def test_read_csv_parse_simple_list(self): tm.assert_frame_equal(df, expected) @tm.network - def test_url(self): + def test_url(self, datapath): # HTTP(S) url = ('https://raw.github.com/pandas-dev/pandas/master/' 'pandas/tests/io/parser/data/salaries.csv') url_table = self.read_table(url) - dirpath = tm.get_data_path() - localtable = os.path.join(dirpath, 'salaries.csv') + localtable = datapath('io', 'parser', 'data', 'salaries.csv') local_table = self.read_table(localtable) tm.assert_frame_equal(url_table, local_table) # TODO: ftp testing @pytest.mark.slow - def test_file(self): - dirpath = tm.get_data_path() - localtable = os.path.join(dirpath, 'salaries.csv') + def test_file(self, datapath): + localtable = datapath('io', 'parser', 'data', 'salaries.csv') local_table = self.read_table(localtable) try: @@ -739,8 +737,8 @@ def test_utf16_bom_skiprows(self): tm.assert_frame_equal(result, expected) - def test_utf16_example(self): - path = tm.get_data_path('utf16_ex.txt') + def test_utf16_example(self, datapath): + path = datapath('io', 'parser', 'data', 'utf16_ex.txt') # it works! and is the right length result = self.read_table(path, encoding='utf-16') @@ -751,8 +749,8 @@ def test_utf16_example(self): result = self.read_table(buf, encoding='utf-16') assert len(result) == 50 - def test_unicode_encoding(self): - pth = tm.get_data_path('unicode_series.csv') + def test_unicode_encoding(self, datapath): + pth = datapath('io', 'parser', 'data', 'unicode_series.csv') result = self.read_csv(pth, header=None, encoding='latin-1') result = result.set_index(0) @@ -1499,10 +1497,9 @@ def test_internal_eof_byte_to_file(self): result = self.read_csv(path) tm.assert_frame_equal(result, expected) - def test_sub_character(self): + def test_sub_character(self, datapath): # see gh-16893 - dirpath = tm.get_data_path() - filename = os.path.join(dirpath, "sub_char.csv") + filename = datapath('io', 'parser', 'data', 'sub_char.csv') expected = DataFrame([[1, 2, 3]], columns=["a", "\x1ab", "c"]) result = self.read_csv(filename) diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py index 01c6620e50d37..48b2cedb63811 100644 --- a/pandas/tests/io/parser/compression.py +++ b/pandas/tests/io/parser/compression.py @@ -121,9 +121,9 @@ def test_read_csv_infer_compression(self): inputs[3].close() - def test_read_csv_compressed_utf16_example(self): + def test_read_csv_compressed_utf16_example(self, datapath): # GH18071 - path = tm.get_data_path('utf16_ex_small.zip') + path = datapath('io', 'parser', 'data', 'utf16_ex_small.zip') result = self.read_csv(path, encoding='utf-16', compression='zip', sep='\t') diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py index b91ce04673e29..8060ebf2fbcd4 100644 --- a/pandas/tests/io/parser/dtypes.py +++ b/pandas/tests/io/parser/dtypes.py @@ -125,9 +125,9 @@ def test_categorical_dtype_high_cardinality_numeric(self): np.sort(actual.a.cat.categories), ordered=True) tm.assert_frame_equal(actual, expected) - def test_categorical_dtype_encoding(self): + def test_categorical_dtype_encoding(self, datapath): # GH 10153 - pth = tm.get_data_path('unicode_series.csv') + pth = datapath('io', 'parser', 'data', 'unicode_series.csv') encoding = 'latin-1' expected = self.read_csv(pth, header=None, encoding=encoding) expected[1] = Categorical(expected[1]) @@ -135,7 +135,7 @@ def test_categorical_dtype_encoding(self): dtype={1: 'category'}) tm.assert_frame_equal(actual, expected) - pth = tm.get_data_path('utf16_ex.txt') + pth = datapath('io', 'parser', 'data', 'utf16_ex.txt') encoding = 'utf-16' expected = self.read_table(pth, encoding=encoding) expected = expected.apply(Categorical) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index fdf45f307e953..e2243b8087a5b 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -48,10 +48,16 @@ def check_compressed_urls(salaries_table, compression, extension, mode, tm.assert_frame_equal(url_table, salaries_table) +@pytest.fixture +def tips_df(datapath): + """DataFrame with the tips dataset.""" + return read_csv(datapath('io', 'parser', 'data', 'tips.csv')) + + @pytest.mark.usefixtures("s3_resource") class TestS3(object): - def test_parse_public_s3_bucket(self): + def test_parse_public_s3_bucket(self, tips_df): pytest.importorskip('s3fs') # more of an integration test due to the not-public contents portion # can probably mock this though. @@ -60,45 +66,40 @@ def test_parse_public_s3_bucket(self): ext, compression=comp) assert isinstance(df, DataFrame) assert not df.empty - tm.assert_frame_equal(read_csv( - tm.get_data_path('tips.csv')), df) + tm.assert_frame_equal(df, tips_df) # Read public file from bucket with not-public contents df = read_csv('s3://cant_get_it/tips.csv') assert isinstance(df, DataFrame) assert not df.empty - tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df) + tm.assert_frame_equal(df, tips_df) - def test_parse_public_s3n_bucket(self): + def test_parse_public_s3n_bucket(self, tips_df): # Read from AWS s3 as "s3n" URL df = read_csv('s3n://pandas-test/tips.csv', nrows=10) assert isinstance(df, DataFrame) assert not df.empty - tm.assert_frame_equal(read_csv( - tm.get_data_path('tips.csv')).iloc[:10], df) + tm.assert_frame_equal(tips_df.iloc[:10], df) - def test_parse_public_s3a_bucket(self): + def test_parse_public_s3a_bucket(self, tips_df): # Read from AWS s3 as "s3a" URL df = read_csv('s3a://pandas-test/tips.csv', nrows=10) assert isinstance(df, DataFrame) assert not df.empty - tm.assert_frame_equal(read_csv( - tm.get_data_path('tips.csv')).iloc[:10], df) + tm.assert_frame_equal(tips_df.iloc[:10], df) - def test_parse_public_s3_bucket_nrows(self): + def test_parse_public_s3_bucket_nrows(self, tips_df): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, nrows=10, compression=comp) assert isinstance(df, DataFrame) assert not df.empty - tm.assert_frame_equal(read_csv( - tm.get_data_path('tips.csv')).iloc[:10], df) + tm.assert_frame_equal(tips_df.iloc[:10], df) - def test_parse_public_s3_bucket_chunked(self): + def test_parse_public_s3_bucket_chunked(self, tips_df): # Read with a chunksize chunksize = 5 - local_tips = read_csv(tm.get_data_path('tips.csv')) for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df_reader = read_csv('s3://pandas-test/tips.csv' + ext, chunksize=chunksize, compression=comp) @@ -109,14 +110,13 @@ def test_parse_public_s3_bucket_chunked(self): df = df_reader.get_chunk() assert isinstance(df, DataFrame) assert not df.empty - true_df = local_tips.iloc[ + true_df = tips_df.iloc[ chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df) - def test_parse_public_s3_bucket_chunked_python(self): + def test_parse_public_s3_bucket_chunked_python(self, tips_df): # Read with a chunksize using the Python parser chunksize = 5 - local_tips = read_csv(tm.get_data_path('tips.csv')) for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df_reader = read_csv('s3://pandas-test/tips.csv' + ext, chunksize=chunksize, compression=comp, @@ -127,36 +127,33 @@ def test_parse_public_s3_bucket_chunked_python(self): df = df_reader.get_chunk() assert isinstance(df, DataFrame) assert not df.empty - true_df = local_tips.iloc[ + true_df = tips_df.iloc[ chunksize * i_chunk: chunksize * (i_chunk + 1)] tm.assert_frame_equal(true_df, df) - def test_parse_public_s3_bucket_python(self): + def test_parse_public_s3_bucket_python(self, tips_df): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression=comp) assert isinstance(df, DataFrame) assert not df.empty - tm.assert_frame_equal(read_csv( - tm.get_data_path('tips.csv')), df) + tm.assert_frame_equal(df, tips_df) - def test_infer_s3_compression(self): + def test_infer_s3_compression(self, tips_df): for ext in ['', '.gz', '.bz2']: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', compression='infer') assert isinstance(df, DataFrame) assert not df.empty - tm.assert_frame_equal(read_csv( - tm.get_data_path('tips.csv')), df) + tm.assert_frame_equal(df, tips_df) - def test_parse_public_s3_bucket_nrows_python(self): + def test_parse_public_s3_bucket_nrows_python(self, tips_df): for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]: df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python', nrows=10, compression=comp) assert isinstance(df, DataFrame) assert not df.empty - tm.assert_frame_equal(read_csv( - tm.get_data_path('tips.csv')).iloc[:10], df) + tm.assert_frame_equal(tips_df.iloc[:10], df) def test_s3_fails(self): with pytest.raises(IOError): diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py index 7717102b64fc5..b6f13039641a2 100644 --- a/pandas/tests/io/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import os +import pytest import pandas.util.testing as tm from pandas import read_csv, read_table, DataFrame @@ -45,8 +46,9 @@ def read_table(self, *args, **kwargs): def float_precision_choices(self): raise com.AbstractMethodError(self) - def setup_method(self, method): - self.dirpath = tm.get_data_path() + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + self.dirpath = datapath('io', 'parser', 'data') self.csv1 = os.path.join(self.dirpath, 'test1.csv') self.csv2 = os.path.join(self.dirpath, 'test2.csv') self.xls1 = os.path.join(self.dirpath, 'test.xls') diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index ab4c14034cd20..c1e0f1dc753e8 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -28,8 +28,9 @@ class TestTextReader(object): - def setup_method(self, method): - self.dirpath = tm.get_data_path() + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + self.dirpath = datapath('io', 'parser', 'data') self.csv1 = os.path.join(self.dirpath, 'test1.csv') self.csv2 = os.path.join(self.dirpath, 'test2.csv') self.xls1 = os.path.join(self.dirpath, 'test.xls') diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 5da347e47957c..ae40653c28f99 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -11,8 +11,9 @@ class TestSAS7BDAT(object): - def setup_method(self, method): - self.dirpath = tm.get_data_path() + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + self.dirpath = datapath("io", "sas", "data") self.data = [] self.test_ix = [list(range(1, 16)), [16]] for j in 1, 2: @@ -123,9 +124,8 @@ def test_iterator_read_too_much(self): rdr.close() -def test_encoding_options(): - dirpath = tm.get_data_path() - fname = os.path.join(dirpath, "test1.sas7bdat") +def test_encoding_options(datapath): + fname = datapath("io", "sas", "data", "test1.sas7bdat") df1 = pd.read_sas(fname) df2 = pd.read_sas(fname, encoding='utf-8') for col in df1.columns: @@ -143,51 +143,46 @@ def test_encoding_options(): assert(x == y.decode()) -def test_productsales(): - dirpath = tm.get_data_path() - fname = os.path.join(dirpath, "productsales.sas7bdat") +def test_productsales(datapath): + fname = datapath("io", "sas", "data", "productsales.sas7bdat") df = pd.read_sas(fname, encoding='utf-8') - fname = os.path.join(dirpath, "productsales.csv") + fname = datapath("io", "sas", "data", "productsales.csv") df0 = pd.read_csv(fname, parse_dates=['MONTH']) vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"] df0[vn] = df0[vn].astype(np.float64) tm.assert_frame_equal(df, df0) -def test_12659(): - dirpath = tm.get_data_path() - fname = os.path.join(dirpath, "test_12659.sas7bdat") +def test_12659(datapath): + fname = datapath("io", "sas", "data", "test_12659.sas7bdat") df = pd.read_sas(fname) - fname = os.path.join(dirpath, "test_12659.csv") + fname = datapath("io", "sas", "data", "test_12659.csv") df0 = pd.read_csv(fname) df0 = df0.astype(np.float64) tm.assert_frame_equal(df, df0) -def test_airline(): - dirpath = tm.get_data_path() - fname = os.path.join(dirpath, "airline.sas7bdat") +def test_airline(datapath): + fname = datapath("io", "sas", "data", "airline.sas7bdat") df = pd.read_sas(fname) - fname = os.path.join(dirpath, "airline.csv") + fname = datapath("io", "sas", "data", "airline.csv") df0 = pd.read_csv(fname) df0 = df0.astype(np.float64) tm.assert_frame_equal(df, df0, check_exact=False) -def test_date_time(): +def test_date_time(datapath): # Support of different SAS date/datetime formats (PR #15871) - dirpath = tm.get_data_path() - fname = os.path.join(dirpath, "datetime.sas7bdat") + fname = datapath("io", "sas", "data", "datetime.sas7bdat") df = pd.read_sas(fname) - fname = os.path.join(dirpath, "datetime.csv") + fname = datapath("io", "sas", "data", "datetime.csv") df0 = pd.read_csv(fname, parse_dates=['Date1', 'Date2', 'DateTime', 'DateTimeHi', 'Taiw']) tm.assert_frame_equal(df, df0) -def test_zero_variables(): +def test_zero_variables(datapath): # Check if the SAS file has zero variables (PR #18184) - dirpath = tm.get_data_path() - fname = os.path.join(dirpath, "zero_variables.sas7bdat") + fname = datapath("io", "sas", "data", "zero_variables.sas7bdat") with pytest.raises(EmptyDataError): pd.read_sas(fname) diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py index de31c3e36a8d5..6e5b2ab067aa5 100644 --- a/pandas/tests/io/sas/test_xport.py +++ b/pandas/tests/io/sas/test_xport.py @@ -1,3 +1,4 @@ +import pytest import pandas as pd import pandas.util.testing as tm from pandas.io.sas.sasreader import read_sas @@ -18,8 +19,9 @@ def numeric_as_float(data): class TestXport(object): - def setup_method(self, method): - self.dirpath = tm.get_data_path() + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + self.dirpath = datapath("io", "sas", "data") self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt") self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt") self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt") diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 6b39717213c0d..5fded9712a52a 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -40,8 +40,9 @@ @td.skip_if_no('xlrd', '0.9') class SharedItems(object): - def setup_method(self, method): - self.dirpath = tm.get_data_path() + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + self.dirpath = datapath("io", "data") self.frame = _frame.copy() self.frame2 = _frame2.copy() self.tsframe = _tsframe.copy() @@ -50,7 +51,6 @@ def setup_method(self, method): def get_csv_refdf(self, basename): """ Obtain the reference data from read_csv with the Python engine. - Test data path is defined by pandas.util.testing.get_data_path() Parameters ---------- @@ -70,7 +70,6 @@ def get_csv_refdf(self, basename): def get_excelfile(self, basename, ext): """ Return test data ExcelFile instance. Test data path is defined by - pandas.util.testing.get_data_path() Parameters ---------- @@ -88,7 +87,6 @@ def get_excelfile(self, basename, ext): def get_exceldf(self, basename, ext, *args, **kwds): """ Return test data DataFrame. Test data path is defined by - pandas.util.testing.get_data_path() Parameters ---------- diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 3f3c0bb3b2a94..6e203787aec89 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -843,7 +843,8 @@ def test_default_encoding(self): assert_frame_equal(result, frame) -def legacy_packers_versions(): +@pytest.fixture +def legacy_packers_versions(datapath): # yield the packers versions path = tm.get_data_path('legacy_msgpack') if not os.path.exists(path): diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 49ad07b79d111..a4a1460026ee6 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -25,8 +25,8 @@ @pytest.fixture -def dirpath(): - return tm.get_data_path() +def dirpath(datapath): + return datapath("io", "data") @pytest.fixture @@ -39,8 +39,9 @@ def parsed_114(dirpath): class TestStata(object): - def setup_method(self, method): - self.dirpath = tm.get_data_path() + @pytest.fixture + def setup_method(self, datapath): + self.dirpath = datapath("io", "data") self.dta1_114 = os.path.join(self.dirpath, 'stata1_114.dta') self.dta1_117 = os.path.join(self.dirpath, 'stata1_117.dta') From e897f11e66c7e1258eafa10bde11b162c5673e90 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Mar 2018 08:47:51 -0500 Subject: [PATCH 09/31] Updated packers --- pandas/tests/io/test_packers.py | 53 ++++++++++++++------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 6e203787aec89..b5752f13cd62c 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -3,6 +3,7 @@ from warnings import catch_warnings import os import datetime +import glob import numpy as np import sys from distutils.version import LooseVersion @@ -843,16 +844,13 @@ def test_default_encoding(self): assert_frame_equal(result, frame) -@pytest.fixture -def legacy_packers_versions(datapath): - # yield the packers versions - path = tm.get_data_path('legacy_msgpack') - if not os.path.exists(path): - raise pytest.skip("Data file {} does not exist.".format(path)) - for v in os.listdir(path): - p = os.path.join(path, v) - if os.path.isdir(p): - yield v +files = glob.glob(os.path.join(os.path.dirname(__file__), "data", + "legacy_msgpack", "*", "*.msgpack")) + + +@pytest.fixture(params=files) +def legacy_packer(request, datapath): + return datapath(request.param) class TestMsgpack(object): @@ -929,24 +927,19 @@ def compare_frame_dt_mixed_tzs(self, result, expected, typ, version): else: tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize('version', legacy_packers_versions()) def test_msgpacks_legacy(self, current_packers_data, all_packers_data, - version): - - pth = tm.get_data_path('legacy_msgpack/{0}'.format(version)) - n = 0 - for f in os.listdir(pth): - # GH12142 0.17 files packed in P2 can't be read in P3 - if (compat.PY3 and version.startswith('0.17.') and - f.split('.')[-4][-1] == '2'): - continue - vf = os.path.join(pth, f) - try: - with catch_warnings(record=True): - self.compare(current_packers_data, all_packers_data, - vf, version) - except ImportError: - # blosc not installed - continue - n += 1 - assert n > 0, 'Msgpack files are not tested' + legacy_packer, datapath): + + version = os.path.basename(os.path.dirname(legacy_packer)) + + # GH12142 0.17 files packed in P2 can't be read in P3 + if (compat.PY3 and version.startswith('0.17.') and + legacy_packer.split('.')[-4][-1] == '2'): + pytest.skip("Files packed in Py2 can't be read in Py3.") + try: + with catch_warnings(record=True): + self.compare(current_packers_data, all_packers_data, + legacy_packer, version) + except ImportError: + # blosc not installed + pass From 9cf30fd6d8c51a7aa8353a6e7449cb480c06288d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Mar 2018 08:56:14 -0500 Subject: [PATCH 10/31] Pickle --- pandas/tests/io/test_pickle.py | 40 +++++++++++++++------------------- pandas/util/testing.py | 15 ------------- 2 files changed, 17 insertions(+), 38 deletions(-) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 702ba11c6df13..aed444b1f07e2 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -12,7 +12,7 @@ 3. Move the created pickle to "data/legacy_pickle/" directory. """ - +import glob import pytest from warnings import catch_warnings @@ -185,29 +185,25 @@ def compare_sp_frame_float(result, expected, typ, version): tm.assert_sp_frame_equal(result, expected) +files = glob.glob(os.path.join(os.path.dirname(__file__), "data", + "legacy_pickle", "*", "*.pickle")) + + +@pytest.fixture(params=files) +def legacy_pickle(request, datapath): + return datapath(request.param) + + # --------------------- # tests # --------------------- -def legacy_pickle_versions(): - # yield the pickle versions - path = tm.get_data_path('legacy_pickle') - if not os.path.exists(path): - raise pytest.skip("Data path {} does not exists.".format(path)) - for v in os.listdir(path): - p = os.path.join(path, v) - if os.path.isdir(p): - for f in os.listdir(p): - yield (v, f) - - -@pytest.mark.parametrize('version, f', legacy_pickle_versions()) -def test_pickles(current_pickle_data, version, f): +def test_pickles(current_pickle_data, legacy_pickle): if not is_platform_little_endian(): pytest.skip("known failure on non-little endian") - vf = tm.get_data_path('legacy_pickle/{}/{}'.format(version, f)) + version = os.path.basename(os.path.dirname(legacy_pickle)) with catch_warnings(record=True): - compare(current_pickle_data, vf, version) + compare(current_pickle_data, legacy_pickle, version) def test_round_trip_current(current_pickle_data): @@ -263,12 +259,11 @@ def python_unpickler(path): compare_element(result, expected, typ) -def test_pickle_v0_14_1(): +def test_pickle_v0_14_1(datapath): cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False, categories=['a', 'b', 'c', 'd']) - pickle_path = os.path.join(tm.get_data_path(), - 'categorical_0_14_1.pickle') + pickle_path = datapath('io', 'data', 'categorical_0_14_1.pickle') # This code was executed once on v0.14.1 to generate the pickle: # # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], @@ -278,14 +273,13 @@ def test_pickle_v0_14_1(): tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) -def test_pickle_v0_15_2(): +def test_pickle_v0_15_2(datapath): # ordered -> _ordered # GH 9347 cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False, categories=['a', 'b', 'c', 'd']) - pickle_path = os.path.join(tm.get_data_path(), - 'categorical_0_15_2.pickle') + pickle_path = datapath('io', 'data', 'categorical_0_15_2.pickle') # This code was executed once on v0.15.2 to generate the pickle: # # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], diff --git a/pandas/util/testing.py b/pandas/util/testing.py index ec8a6cf76cb4d..684de95350580 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -729,21 +729,6 @@ def ensure_clean(filename=None, return_filelike=False): print("Exception on removing file: {error}".format(error=e)) -def get_data_path(f=''): - """Return the path of a data file, these are relative to the current test - directory. - """ - import pytest - - # get our callers file - _, filename, _, _, _, _ = inspect.getouterframes(inspect.currentframe())[1] - base_dir = os.path.abspath(os.path.dirname(filename)) - path = os.path.join(base_dir, 'data', f) - if not os.path.exists(path): - pytest.skip("Data files not included in pandas distribution.") - - return path - # ----------------------------------------------------------------------------- # Comparators From 95cde7af6b22f8f4405a5e56de4693f28e4984dc Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Mar 2018 09:00:38 -0500 Subject: [PATCH 11/31] Linting --- pandas/tests/io/test_sql.py | 1 - pandas/tests/tseries/offsets/test_offsets.py | 1 - pandas/util/testing.py | 1 - 3 files changed, 3 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index ca9b1749ecf8a..6a87d0dc952df 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -22,7 +22,6 @@ import pytest import sqlite3 import csv -import os import warnings import numpy as np diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 03e2b42022967..fcb48d5cf801f 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -1,4 +1,3 @@ -import os from distutils.version import LooseVersion from datetime import date, datetime, timedelta diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 684de95350580..b8accd424edcf 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -6,7 +6,6 @@ import sys import tempfile import warnings -import inspect import os import subprocess import locale From 10ddddcd718d7e77b9ca6b2295fa3aee5eea1f32 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Mar 2018 09:05:41 -0500 Subject: [PATCH 12/31] Autouse stata --- pandas/tests/io/test_stata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index a4a1460026ee6..181ed1063f735 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -39,7 +39,7 @@ def parsed_114(dirpath): class TestStata(object): - @pytest.fixture + @pytest.fixture(autouse=True) def setup_method(self, datapath): self.dirpath = datapath("io", "data") self.dta1_114 = os.path.join(self.dirpath, 'stata1_114.dta') From e1ea208b3b48fad6b3a2c4ee7e2116b3aa3a3c0b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Mar 2018 09:10:12 -0500 Subject: [PATCH 13/31] Remove filename --- pandas/tests/tseries/offsets/test_offsets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index fcb48d5cf801f..0ec95b56aacab 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -1849,7 +1849,6 @@ def _check_roundtrip(obj): _check_roundtrip(self.offset * 2) def test_pickle_compat_0_14_1(self, datapath): - # /Users/taugspurger/sandbox/pandas-ip/pandas/pandas/tests/tseries/offsets/test_offsets.py hdays = [datetime(2013, 1, 1) for ele in range(4)] pth = datapath('tseries', 'offsets', 'data', 'cday-0.14.1.pickle') cday0_14_1 = read_pickle(pth) From 861687879e3362c15e10da4b262976610b061625 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Mar 2018 09:13:33 -0500 Subject: [PATCH 14/31] Autouse in merge_asof --- pandas/tests/reshape/merge/test_merge_asof.py | 246 ++++++++---------- 1 file changed, 111 insertions(+), 135 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index bcb27e49f28fe..59b53cd23010e 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -10,67 +10,30 @@ from pandas.util.testing import assert_frame_equal -def read_data(path, dedupe=False): - x = read_csv(path) - if dedupe: - x = (x.drop_duplicates(['time', 'ticker'], keep='last') - .reset_index(drop=True)) - x.time = to_datetime(x.time) - return x - - -@pytest.fixture -def trades(datapath): - return read_data(datapath('reshape', 'merge', 'data', 'trades.csv')) - - -@pytest.fixture -def trades2(datapath): - return read_data(datapath('reshape', 'merge', 'data', 'trades2.csv')) - - -@pytest.fixture -def quotes(datapath): - return read_data(datapath('reshape', 'merge', 'data', 'quotes.csv'), - dedupe=True) - - -@pytest.fixture -def quotes2(datapath): - return read_data(datapath('reshape', 'merge', 'data', 'quotes2.csv'), - dedupe=True) - - -@pytest.fixture -def asof(datapath): - return read_data(datapath('reshape', 'merge', 'data', 'asof.csv')) - - -@pytest.fixture -def asof2(datapath): - return read_data(datapath('reshape', 'merge', 'data', 'asof2.csv')) - - -@pytest.fixture -def tolerance(datapath): - return read_data(datapath('reshape', 'merge', 'data', 'tolerance.csv')) - - -@pytest.fixture -def allow_exact_matches(datapath): - return read_data(datapath('reshape', 'merge', 'data', - 'allow_exact_matches.csv')) - - -@pytest.fixture -def allow_exact_matches_and_tolerance(datapath): - return read_data(datapath( - 'reshape', 'merge', 'data', 'allow_exact_matches_and_tolerance.csv' - )) - - class TestAsOfMerge(object): + def read_data(self, datapath, name, dedupe=False): + path = datapath('reshape', 'merge', 'data', name) + x = read_csv(path) + if dedupe: + x = (x.drop_duplicates(['time', 'ticker'], keep='last') + .reset_index(drop=True) + ) + x.time = to_datetime(x.time) + return x + + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + + self.trades = self.read_data(datapath, 'trades.csv') + self.quotes = self.read_data(datapath, 'quotes.csv', dedupe=True) + self.asof = self.read_data(datapath, 'asof.csv') + self.tolerance = self.read_data(datapath, 'tolerance.csv') + self.allow_exact_matches = self.read_data(datapath, + 'allow_exact_matches.csv') + self.allow_exact_matches_and_tolerance = self.read_data( + datapath, 'allow_exact_matches_and_tolerance.csv') + def test_examples1(self): """ doc-string examples """ @@ -187,20 +150,23 @@ def test_examples4(self): result = pd.merge_asof(left, right, on='a', direction='nearest') assert_frame_equal(result, expected) - def test_basic(self, trades, quotes, asof): + def test_basic(self): + + expected = self.asof + trades = self.trades + quotes = self.quotes - expected = asof result = merge_asof(trades, quotes, on='time', by='ticker') assert_frame_equal(result, expected) - def test_basic_categorical(self, asof, trades, quotes): + def test_basic_categorical(self): - expected = asof - trades = trades.copy() + expected = self.asof + trades = self.trades.copy() trades.ticker = trades.ticker.astype('category') - quotes = quotes.copy() + quotes = self.quotes.copy() quotes.ticker = quotes.ticker.astype('category') expected.ticker = expected.ticker.astype('category') @@ -209,12 +175,12 @@ def test_basic_categorical(self, asof, trades, quotes): by='ticker') assert_frame_equal(result, expected) - def test_basic_left_index(self, trades, quotes, asof): + def test_basic_left_index(self): # GH14253 - expected = asof - trades = trades.set_index('time') - quotes = quotes + expected = self.asof + trades = self.trades.set_index('time') + quotes = self.quotes result = merge_asof(trades, quotes, left_index=True, @@ -226,11 +192,11 @@ def test_basic_left_index(self, trades, quotes, asof): expected = expected[result.columns] assert_frame_equal(result, expected) - def test_basic_right_index(self, trades, quotes, asof): + def test_basic_right_index(self): - expected = asof - trades = trades - quotes = quotes.set_index('time') + expected = self.asof + trades = self.trades + quotes = self.quotes.set_index('time') result = merge_asof(trades, quotes, left_on='time', @@ -238,11 +204,11 @@ def test_basic_right_index(self, trades, quotes, asof): by='ticker') assert_frame_equal(result, expected) - def test_basic_left_index_right_index(self, trades, quotes, asof): + def test_basic_left_index_right_index(self): - expected = asof.set_index('time') - trades = trades.set_index('time') - quotes = quotes.set_index('time') + expected = self.asof.set_index('time') + trades = self.trades.set_index('time') + quotes = self.quotes.set_index('time') result = merge_asof(trades, quotes, left_index=True, @@ -250,48 +216,48 @@ def test_basic_left_index_right_index(self, trades, quotes, asof): by='ticker') assert_frame_equal(result, expected) - def test_multi_index(self, trades, quotes): + def test_multi_index(self): # MultiIndex is prohibited - trades = trades.set_index(['time', 'price']) - quotes = quotes.set_index('time') + trades = self.trades.set_index(['time', 'price']) + quotes = self.quotes.set_index('time') with pytest.raises(MergeError): merge_asof(trades, quotes, left_index=True, right_index=True) - def test_multi_index2(self, trades, quotes): - trades = trades.set_index('time') - quotes = quotes.set_index(['time', 'bid']) + trades = self.trades.set_index('time') + quotes = self.quotes.set_index(['time', 'bid']) with pytest.raises(MergeError): merge_asof(trades, quotes, left_index=True, right_index=True) - def test_on_and_index_on_price(self, trades, quotes): + def test_on_and_index(self): # 'on' parameter and index together is prohibited - trades = trades.set_index('time') - quotes = quotes.set_index('time') + trades = self.trades.set_index('time') + quotes = self.quotes.set_index('time') with pytest.raises(MergeError): merge_asof(trades, quotes, left_on='price', left_index=True, right_index=True) - def test_on_and_index_on_bid(self, trades, quotes): - trades = trades.set_index('time') - quotes = quotes.set_index('time') + trades = self.trades.set_index('time') + quotes = self.quotes.set_index('time') with pytest.raises(MergeError): merge_asof(trades, quotes, right_on='bid', left_index=True, right_index=True) - def test_basic_left_by_right_by(self, trades, quotes, asof): + def test_basic_left_by_right_by(self): # GH14253 - expected = asof + expected = self.asof + trades = self.trades + quotes = self.quotes result = merge_asof(trades, quotes, on='time', @@ -299,11 +265,11 @@ def test_basic_left_by_right_by(self, trades, quotes, asof): right_by='ticker') assert_frame_equal(result, expected) - def test_missing_right_by(self, trades, quotes, asof): + def test_missing_right_by(self): - expected = asof - trades = trades - quotes = quotes + expected = self.asof + trades = self.trades + quotes = self.quotes q = quotes[quotes.ticker != 'MSFT'] result = merge_asof(trades, q, @@ -312,7 +278,7 @@ def test_missing_right_by(self, trades, quotes, asof): expected.loc[expected.ticker == 'MSFT', ['bid', 'ask']] = np.nan assert_frame_equal(result, expected) - def test_multiby(self, trades, quotes): + def test_multiby(self): # GH13936 trades = pd.DataFrame({ 'time': pd.to_datetime(['20160525 13:30:00.023', @@ -369,7 +335,7 @@ def test_multiby(self, trades, quotes): by=['ticker', 'exch']) assert_frame_equal(result, expected) - def test_multiby_heterogeneous_types(self, trades, quotes): + def test_multiby_heterogeneous_types(self): # GH13936 trades = pd.DataFrame({ 'time': pd.to_datetime(['20160525 13:30:00.023', @@ -458,31 +424,34 @@ def test_multiby_indexed(self): pd.merge_asof(left, right, left_index=True, right_index=True, left_by=['k1', 'k2'], right_by=['k1']) - def test_basic2(self, asof2, trades2, quotes2): + def test_basic2(self, datapath): + + expected = self.read_data(datapath, 'asof2.csv') + trades = self.read_data(datapath, 'trades2.csv') + quotes = self.read_data(datapath, 'quotes2.csv', dedupe=True) - expected = asof2 - result = merge_asof(trades2, quotes2, + result = merge_asof(trades, quotes, on='time', by='ticker') assert_frame_equal(result, expected) - def test_basic_no_by(self, asof, trades, quotes): + def test_basic_no_by(self): f = lambda x: x[x.ticker == 'MSFT'].drop('ticker', axis=1) \ .reset_index(drop=True) # just use a single ticker - expected = f(asof) - trades = f(trades) - quotes = f(quotes) + expected = f(self.asof) + trades = f(self.trades) + quotes = f(self.quotes) result = merge_asof(trades, quotes, on='time') assert_frame_equal(result, expected) - def test_valid_join_keys(self, trades, quotes): + def test_valid_join_keys(self): - trades = trades - quotes = quotes + trades = self.trades + quotes = self.quotes with pytest.raises(MergeError): merge_asof(trades, quotes, @@ -499,14 +468,14 @@ def test_valid_join_keys(self, trades, quotes): merge_asof(trades, quotes, by='ticker') - def test_with_duplicates(self, asof, trades, quotes): + def test_with_duplicates(self, datapath): - q = pd.concat([quotes, quotes]).sort_values( + q = pd.concat([self.quotes, self.quotes]).sort_values( ['time', 'ticker']).reset_index(drop=True) - result = merge_asof(trades, q, + result = merge_asof(self.trades, q, on='time', by='ticker') - expected = asof + expected = self.read_data(datapath, 'asof.csv') assert_frame_equal(result, expected) def test_with_duplicates_no_on(self): @@ -521,14 +490,22 @@ def test_with_duplicates_no_on(self): 'right_val': [1, 1, 3]}) assert_frame_equal(result, expected) - def test_valid_allow_exact_matches(self, trades, quotes): + def test_valid_allow_exact_matches(self): + + trades = self.trades + quotes = self.quotes + with pytest.raises(MergeError): merge_asof(trades, quotes, on='time', by='ticker', allow_exact_matches='foo') - def test_valid_tolerance(self, trades, quotes): + def test_valid_tolerance(self): + + trades = self.trades + quotes = self.quotes + # dti merge_asof(trades, quotes, on='time', @@ -568,10 +545,10 @@ def test_valid_tolerance(self, trades, quotes): by='ticker', tolerance=-1) - def test_non_sorted(self, trades, quotes): + def test_non_sorted(self): - trades = trades.sort_values('time', ascending=False) - quotes = quotes.sort_values('time', ascending=False) + trades = self.trades.sort_values('time', ascending=False) + quotes = self.quotes.sort_values('time', ascending=False) # we require that we are already sorted on time & quotes assert not trades.time.is_monotonic @@ -581,7 +558,7 @@ def test_non_sorted(self, trades, quotes): on='time', by='ticker') - trades = trades.sort_values('time') + trades = self.trades.sort_values('time') assert trades.time.is_monotonic assert not quotes.time.is_monotonic with pytest.raises(ValueError): @@ -589,25 +566,26 @@ def test_non_sorted(self, trades, quotes): on='time', by='ticker') - quotes = quotes.sort_values('time') + quotes = self.quotes.sort_values('time') assert trades.time.is_monotonic assert quotes.time.is_monotonic # ok, though has dupes - merge_asof(trades, quotes, + merge_asof(trades, self.quotes, on='time', by='ticker') - def test_tolerance(self, trades, quotes, tolerance): + def test_tolerance(self): - trades = trades - quotes = quotes + trades = self.trades + quotes = self.quotes result = merge_asof(trades, quotes, on='time', by='ticker', tolerance=Timedelta('1day')) - assert_frame_equal(result, tolerance) + expected = self.tolerance + assert_frame_equal(result, expected) def test_tolerance_forward(self): # GH14887 @@ -664,11 +642,11 @@ def test_tolerance_tz(self): 'value2': list("BCDEE")}) assert_frame_equal(result, expected) - def test_index_tolerance(self, trades, quotes, tolerance): + def test_index_tolerance(self): # GH 15135 - expected = tolerance.set_index('time') - trades = trades.set_index('time') - quotes = quotes.set_index('time') + expected = self.tolerance.set_index('time') + trades = self.trades.set_index('time') + quotes = self.quotes.set_index('time') result = pd.merge_asof(trades, quotes, left_index=True, @@ -677,13 +655,13 @@ def test_index_tolerance(self, trades, quotes, tolerance): tolerance=pd.Timedelta('1day')) assert_frame_equal(result, expected) - def test_allow_exact_matches(self, trades, quotes, allow_exact_matches): + def test_allow_exact_matches(self): - result = merge_asof(trades, quotes, + result = merge_asof(self.trades, self.quotes, on='time', by='ticker', allow_exact_matches=False) - expected = allow_exact_matches + expected = self.allow_exact_matches assert_frame_equal(result, expected) def test_allow_exact_matches_forward(self): @@ -718,16 +696,14 @@ def test_allow_exact_matches_nearest(self): allow_exact_matches=False) assert_frame_equal(result, expected) - def test_allow_exact_matches_and_tolerance( - self, trades, quotes, - allow_exact_matches_and_tolerance): + def test_allow_exact_matches_and_tolerance(self): - result = merge_asof(trades, quotes, + result = merge_asof(self.trades, self.quotes, on='time', by='ticker', tolerance=Timedelta('100ms'), allow_exact_matches=False) - expected = allow_exact_matches_and_tolerance + expected = self.allow_exact_matches_and_tolerance assert_frame_equal(result, expected) def test_allow_exact_matches_and_tolerance2(self): From 77bf77c3475c23e42be34eef4926cc51412d7974 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Mar 2018 09:16:00 -0500 Subject: [PATCH 15/31] Cleanup plotting --- pandas/tests/plotting/common.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 7657130044e70..70c54cefa3833 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -45,7 +45,8 @@ def _ok_for_gaussian_kde(kind): @td.skip_if_no_mpl class TestPlotBase(object): - def setup_method(self, method): + @pytest.fixture(autouse=True) + def setup_method(self, datapath): import matplotlib as mpl mpl.rcdefaults() @@ -76,11 +77,7 @@ def setup_method(self, method): self.default_tick_position = 'left' if self.mpl_ge_2_0_0 else 'default' # common test data from pandas import read_csv - base = os.path.join(os.path.dirname(curpath()), os.pardir) - path = os.path.join(base, 'tests', 'data', 'iris.csv') - - if not os.path.exists(path): - pytest.skip("Data files not included in pandas distribution.") + path = datapath('data', 'iris.csv') self.iris = read_csv(path) From 156e14bf1d28452af49d6eb7fc1721a938fb078a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Mar 2018 09:26:30 -0500 Subject: [PATCH 16/31] CLN: Simplify fspath --- pandas/tests/conftest.py | 7 ++++--- pandas/tests/io/test_common.py | 31 +++++++++++++------------------ 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py index 2c246fb9d371d..cdc1248834681 100644 --- a/pandas/tests/conftest.py +++ b/pandas/tests/conftest.py @@ -25,9 +25,10 @@ def deco(*args): path = os.path.join('pandas', 'tests', *args) if not os.path.exists(path): if request.config.getoption("--strict-data-files"): - raise ValueError("Failed.") + msg = "Could not find file {} and --strict-data-files is set." + raise ValueError(msg.format(path)) else: - pytest.skip("{} not included in pandas distribution." - .format(path)) + msg = "Could not find {}." + pytest.skip(msg.format(path)) return path return deco diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 37097033f0c75..5c9739be73393 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -149,27 +149,22 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): reader(path) @pytest.mark.parametrize('reader, module, path', [ - (pd.read_csv, 'os', 'iris.csv'), - (pd.read_table, 'os', 'iris.csv'), - (pd.read_fwf, 'os', 'fixed_width_format.txt'), - (pd.read_excel, 'xlrd', 'test1.xlsx'), - (pd.read_feather, 'feather', 'feather-0_3_1.feather'), - (pd.read_hdf, 'tables', os.path.join('legacy_hdf', - 'datetimetz_object.h5')), - (pd.read_stata, 'os', 'stata10_115.dta'), - # in the function, we go down to tests/io/data - # so step back up a level before going into sas - (pd.read_sas, 'os', os.path.join('..', 'sas', 'data', - 'test1.sas7bdat')), - (pd.read_json, 'os', os.path.join('..', 'json', 'data', - 'tsframe_v012.json')), - (pd.read_msgpack, 'os', os.path.join('..', 'msgpack', 'data', - 'frame.mp')), - (pd.read_pickle, 'os', 'categorical_0_14_1.pickle'), + (pd.read_csv, 'os', ('io', 'data', 'iris.csv')), + (pd.read_table, 'os', ('io', 'data', 'iris.csv')), + (pd.read_fwf, 'os', ('io', 'data', 'fixed_width_format.txt')), + (pd.read_excel, 'xlrd', ('io', 'data', 'test1.xlsx')), + (pd.read_feather, 'feather', ('io', 'data', 'feather-0_3_1.feather')), + (pd.read_hdf, 'tables', ('io', 'data', 'legacy_hdf', + 'datetimetz_object.h5')), + (pd.read_stata, 'os', ('io', 'data', 'stata10_115.dta')), + (pd.read_sas, 'os', ('io', 'sas', 'data', 'test1.sas7bdat')), + (pd.read_json, 'os', ('io', 'json', 'data', 'tsframe_v012.json')), + (pd.read_msgpack, 'os', ('io', 'msgpack', 'data', 'frame.mp')), + (pd.read_pickle, 'os', ('io', 'data', 'categorical_0_14_1.pickle')), ]) def test_read_fspath_all(self, reader, module, path, datapath): pytest.importorskip(module) - path = datapath('io', 'data', path) + path = datapath(*path) mypath = CustomFSPath(path) result = reader(mypath) From f3f3662063241dfcdb7b51459f05b988face58e0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Mar 2018 11:27:57 -0500 Subject: [PATCH 17/31] Refactor sql tests --- pandas/tests/io/test_sql.py | 56 ++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 6a87d0dc952df..2d857bd24cfc3 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -183,9 +183,11 @@ class MixInBase(object): def teardown_method(self, method): - for tbl in self._get_all_tables(): - self.drop_table(tbl) - self._close_conn() + # if setup fails, there may not be a connection to close. + if hasattr(self, 'conn'): + for tbl in self._get_all_tables(): + self.drop_table(tbl) + self._close_conn() class MySQLMixIn(MixInBase): @@ -502,9 +504,10 @@ class _TestSQLApi(PandasSQLTest): flavor = 'sqlite' mode = None - def setup_method(self, method): + @pytest.fixture(autouse=True) + def setup_method(self, datapath): self.conn = self.connect() - self._load_iris_data() + self._load_iris_data(datapath) self._load_iris_view() self._load_test1_data() self._load_test2_data() @@ -1024,8 +1027,9 @@ class _EngineToConnMixin(object): A mixin that causes setup_connect to create a conn rather than an engine. """ - def setup_method(self, method): - super(_EngineToConnMixin, self).setup_method(method) + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + super(_EngineToConnMixin, self).setup_method(datapath) engine = self.conn conn = engine.connect() self.__tx = conn.begin() @@ -1033,12 +1037,14 @@ def setup_method(self, method): self.__engine = engine self.conn = conn - def teardown_method(self, method): + yield + self.__tx.rollback() self.conn.close() self.conn = self.__engine self.pandasSQL = sql.SQLDatabase(self.__engine) - super(_EngineToConnMixin, self).teardown_method(method) + # XXX: + # super(_EngineToConnMixin, self).teardown_method(method) @pytest.mark.single @@ -1135,7 +1141,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest): """ flavor = None - @classmethod + @pytest.fixture(autouse=True, scope='class') def setup_class(cls): cls.setup_import() cls.setup_driver() @@ -1148,10 +1154,11 @@ def setup_class(cls): msg = "{0} - can't connect to {1} server".format(cls, cls.flavor) pytest.skip(msg) - def setup_method(self, method): + @pytest.fixture(autouse=True) + def setup_method(self, datapath): self.setup_connect() - self._load_iris_data() + self._load_iris_data(datapath) self._load_raw_sql() self._load_test1_data() @@ -1945,11 +1952,12 @@ class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest): def connect(cls): return sqlite3.connect(':memory:') - def setup_method(self, method): + @pytest.fixture(autouse=True) + def setup_method(self, datapath): self.conn = self.connect() self.pandasSQL = sql.SQLiteDatabase(self.conn) - self._load_iris_data() + self._load_iris_data(datapath) self._load_test1_data() @@ -2160,8 +2168,9 @@ def _skip_if_no_pymysql(): @pytest.mark.single class TestXSQLite(SQLiteMixIn): - def setup_method(self, method): - self.method = method + @pytest.fixture(autouse=True) + def setup_method(self, request, datapath): + self.method = request.function self.conn = sqlite3.connect(':memory:') def test_basic(self): @@ -2241,7 +2250,7 @@ def test_execute_fail(self): sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn) @tm.capture_stdout - def test_execute_closed_connection(self): + def test_execute_closed_connection(self, request, datapath): create_sql = """ CREATE TABLE test ( @@ -2261,7 +2270,7 @@ def test_execute_closed_connection(self): tquery("select * from test", con=self.conn) # Initialize connection again (needed for tearDown) - self.setup_method(self.method) + self.setup_method(request, datapath) def test_na_roundtrip(self): pass @@ -2366,7 +2375,7 @@ def clean_up(test_table_to_drop): "if SQLAlchemy is not installed") class TestXMySQL(MySQLMixIn): - @classmethod + @pytest.fixture(autouse=True, scope='class') def setup_class(cls): _skip_if_no_pymysql() @@ -2395,7 +2404,8 @@ def setup_class(cls): "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf. ") - def setup_method(self, method): + @pytest.fixture(autouse=True) + def setup_method(self, request, datapath): _skip_if_no_pymysql() import pymysql try: @@ -2421,7 +2431,7 @@ def setup_method(self, method): "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf. ") - self.method = method + self.method = request.function def test_basic(self): _skip_if_no_pymysql() @@ -2527,7 +2537,7 @@ def test_execute_fail(self): sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn) @tm.capture_stdout - def test_execute_closed_connection(self): + def test_execute_closed_connection(self, request, datapath): _skip_if_no_pymysql() drop_sql = "DROP TABLE IF EXISTS test" create_sql = """ @@ -2550,7 +2560,7 @@ def test_execute_closed_connection(self): tquery("select * from test", con=self.conn) # Initialize connection again (needed for tearDown) - self.setup_method(self.method) + self.setup_method(request, datapath) def test_na_roundtrip(self): _skip_if_no_pymysql() From aac3606642451f7116797125185e5d5f36a4827f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Mar 2018 12:17:28 -0500 Subject: [PATCH 18/31] Fixed docstrings --- pandas/tests/io/test_excel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 5fded9712a52a..9e6cefe79d5bb 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -69,7 +69,7 @@ def get_csv_refdf(self, basename): def get_excelfile(self, basename, ext): """ - Return test data ExcelFile instance. Test data path is defined by + Return test data ExcelFile instance. Parameters ---------- @@ -86,7 +86,7 @@ def get_excelfile(self, basename, ext): def get_exceldf(self, basename, ext, *args, **kwds): """ - Return test data DataFrame. Test data path is defined by + Return test data DataFrame. Parameters ---------- From 762a2d14d9101441b7cad82f8675b3553f20f8ad Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Mar 2018 12:33:59 -0500 Subject: [PATCH 19/31] Moved --- pandas/conftest.py | 35 ++++++++++++++++++++++++++++++++++- pandas/tests/conftest.py | 34 ---------------------------------- 2 files changed, 34 insertions(+), 35 deletions(-) delete mode 100644 pandas/tests/conftest.py diff --git a/pandas/conftest.py b/pandas/conftest.py index 2851921e1804c..e6593f4af21b2 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1,6 +1,8 @@ -import pytest +import os +import pytest import numpy + import pandas import pandas.util._test_decorators as td @@ -89,3 +91,34 @@ def join_type(request): Fixture for trying all types of join operations """ return request.param + + +@pytest.fixture +def datapath(request): + """Get the path to a data file. + + Parameters + ---------- + path : str + Path to the file, relative to ``pandas/tests/`` + + Returns + ------- + path : path including ``pandas/tests``. + + Raises + ------ + ValueError + If the path doesn't exist and the --strict-data-files option is set. + """ + def deco(*args): + path = os.path.join('pandas', 'tests', *args) + if not os.path.exists(path): + if request.config.getoption("--strict-data-files"): + msg = "Could not find file {} and --strict-data-files is set." + raise ValueError(msg.format(path)) + else: + msg = "Could not find {}." + pytest.skip(msg.format(path)) + return path + return deco diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py deleted file mode 100644 index cdc1248834681..0000000000000 --- a/pandas/tests/conftest.py +++ /dev/null @@ -1,34 +0,0 @@ -import os - -import pytest - - -@pytest.fixture -def datapath(request): - """Get the path to a data file. - - Parameters - ---------- - path : str - Path to the file, relative to ``pandas/tests/`` - - Returns - ------- - path : path including ``pandas/tests``. - - Raises - ------ - ValueError - If the path doesn't exist and the --strict-data-files option is set. - """ - def deco(*args): - path = os.path.join('pandas', 'tests', *args) - if not os.path.exists(path): - if request.config.getoption("--strict-data-files"): - msg = "Could not find file {} and --strict-data-files is set." - raise ValueError(msg.format(path)) - else: - msg = "Could not find {}." - pytest.skip(msg.format(path)) - return path - return deco From 7c44b77711ab65ca2071131a262535430ad99284 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 28 Mar 2018 09:10:02 -0500 Subject: [PATCH 20/31] Use fixture for iris plotting --- pandas/conftest.py | 6 ++++++ pandas/tests/plotting/common.py | 8 +------- pandas/tests/plotting/test_deprecated.py | 5 ++--- pandas/tests/plotting/test_misc.py | 16 ++++++++-------- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index e6593f4af21b2..1fa66a41c9243 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -122,3 +122,9 @@ def deco(*args): pytest.skip(msg.format(path)) return path return deco + + +@pytest.fixture +def iris(datapath): + """The iris dataset as a DataFrame.""" + return pandas.read_csv(datapath('data', 'iris.csv')) diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 70c54cefa3833..09687dd97bd43 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -45,8 +45,7 @@ def _ok_for_gaussian_kde(kind): @td.skip_if_no_mpl class TestPlotBase(object): - @pytest.fixture(autouse=True) - def setup_method(self, datapath): + def setup_method(self, method): import matplotlib as mpl mpl.rcdefaults() @@ -75,11 +74,6 @@ def setup_method(self, datapath): else: self.default_figsize = (8.0, 6.0) self.default_tick_position = 'left' if self.mpl_ge_2_0_0 else 'default' - # common test data - from pandas import read_csv - path = datapath('data', 'iris.csv') - - self.iris = read_csv(path) n = 100 with tm.RNGContext(42): diff --git a/pandas/tests/plotting/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py index 2c2d371921d2f..a45b17ec98261 100644 --- a/pandas/tests/plotting/test_deprecated.py +++ b/pandas/tests/plotting/test_deprecated.py @@ -46,10 +46,9 @@ def test_boxplot_deprecated(self): by='indic') @pytest.mark.slow - def test_radviz_deprecated(self): - df = self.iris + def test_radviz_deprecated(self, iris): with tm.assert_produces_warning(FutureWarning): - plotting.radviz(frame=df, class_column='Name') + plotting.radviz(frame=iris, class_column='Name') @pytest.mark.slow def test_plot_params(self): diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index c5ce8aba9d80e..0704577f83d50 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -91,11 +91,11 @@ def test_scatter_matrix_axis(self): axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) @pytest.mark.slow - def test_andrews_curves(self): + def test_andrews_curves(self, iris): from pandas.plotting import andrews_curves from matplotlib import cm - df = self.iris + df = iris _check_plot_works(andrews_curves, frame=df, class_column='Name') @@ -156,11 +156,11 @@ def test_andrews_curves(self): andrews_curves(data=df, class_column='Name') @pytest.mark.slow - def test_parallel_coordinates(self): + def test_parallel_coordinates(self, iris): from pandas.plotting import parallel_coordinates from matplotlib import cm - df = self.iris + df = iris ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name') @@ -225,11 +225,11 @@ def test_parallel_coordinates_with_sorted_labels(self): assert prev[1] < nxt[1] and prev[0] < nxt[0] @pytest.mark.slow - def test_radviz(self): + def test_radviz(self, iris): from pandas.plotting import radviz from matplotlib import cm - df = self.iris + df = iris _check_plot_works(radviz, frame=df, class_column='Name') rgba = ('#556270', '#4ECDC4', '#C7F464') @@ -263,8 +263,8 @@ def test_radviz(self): self._check_colors(handles, facecolors=colors) @pytest.mark.slow - def test_subplot_titles(self): - df = self.iris.drop('Name', axis=1).head() + def test_subplot_titles(self, iris): + df = iris.drop('Name', axis=1).head() # Use the column names as the subplot titles title = list(df.columns) From ad0995144ce167c84d920fcce0f66a75c34ff059 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 28 Mar 2018 16:56:42 -0500 Subject: [PATCH 21/31] Abs path for file test --- pandas/tests/io/parser/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 2c363f5f4adff..e5ceb4d52ba58 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -76,7 +76,7 @@ def test_read_csv(self): else: prefix = u("file://") - fname = prefix + compat.text_type(self.csv1) + fname = prefix + compat.text_type(os.path.abspath(self.csv1)) self.read_csv(fname, index_col=0, parse_dates=True) def test_1000_sep(self): From 6f02d6b4a5aad8d9d4e0eea6d82f88f4a774b665 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 29 Mar 2018 08:38:18 -0500 Subject: [PATCH 22/31] Removed stdout capture from sql tests This was interfering with pytest's dependency injection. --- pandas/tests/io/test_sql.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 2d857bd24cfc3..8d4c3ecf8ada9 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2249,7 +2249,6 @@ def test_execute_fail(self): with pytest.raises(Exception): sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn) - @tm.capture_stdout def test_execute_closed_connection(self, request, datapath): create_sql = """ CREATE TABLE test @@ -2536,7 +2535,6 @@ def test_execute_fail(self): with pytest.raises(Exception): sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn) - @tm.capture_stdout def test_execute_closed_connection(self, request, datapath): _skip_if_no_pymysql() drop_sql = "DROP TABLE IF EXISTS test" From bac438cde43ee0ba1a36064f71bef3adedea4236 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 12 Jun 2018 08:22:18 -0500 Subject: [PATCH 23/31] Cleanup Manifest --- MANIFEST.in | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 9416da89cc627..b417b8890fa24 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,11 +3,12 @@ include LICENSE include RELEASE.md include README.md include setup.py -include pyproject.toml graft doc prune doc/build +graft LICENSES + graft pandas global-exclude *.bz2 @@ -35,8 +36,6 @@ global-exclude .DS_Store global-exclude .git* global-exclude \#* -recursive-exclude pandas/tests/io/data - include versioneer.py include pandas/_version.py include pandas/io/formats/templates/*.tpl From 84ccdbfa3dbebd1a56caf544607162549af5b35d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 12 Jun 2018 09:08:54 -0500 Subject: [PATCH 24/31] fixed test test --- pandas/tests/util/test_testing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py index c5e025f051e82..a06e58844b1da 100644 --- a/pandas/tests/util/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import os import pandas as pd import pytest import numpy as np @@ -851,5 +852,5 @@ def test_datapath_missing(datapath, request): datapath('not_a_file') result = datapath('data/iris.csv') - expected = 'pandas/tests/data/iris.csv' + expected = os.path.join('pandas', 'tests', 'data', 'iris.csv') assert result == expected From 7fd766055cb71ded2f67c7ec58f9fa1fb8174b33 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 20 Jun 2018 08:14:02 -0500 Subject: [PATCH 25/31] Fixed windows --- pandas/tests/util/test_testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py index a06e58844b1da..4d34987e14f75 100644 --- a/pandas/tests/util/test_testing.py +++ b/pandas/tests/util/test_testing.py @@ -851,6 +851,6 @@ def test_datapath_missing(datapath, request): with pytest.raises(ValueError): datapath('not_a_file') - result = datapath('data/iris.csv') + result = datapath('data', 'iris.csv') expected = os.path.join('pandas', 'tests', 'data', 'iris.csv') assert result == expected From c187f8b106bd55e31eabdb0572d640718f619326 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 20 Jun 2018 08:18:01 -0500 Subject: [PATCH 26/31] whatsnew --- doc/source/whatsnew/v0.23.2.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 5b3e607956f7a..966efa4c2e123 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -37,6 +37,11 @@ Documentation Changes - - +Build Changes +------------- + +- The source and binary distributions no longer include test files, resulting in smaller download sizes. Tests relying on these files will be skipped when using ``pandas.test()``. (:issue:`19320`) + .. _whatsnew_0232.bug_fixes: Bug Fixes From 632a61d7d0ff2607f4a642953aad8b804c2dbff7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 20 Jun 2018 10:59:44 -0500 Subject: [PATCH 27/31] Clarify note [ci skip] [ci skip] --- doc/source/whatsnew/v0.23.2.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 966efa4c2e123..45b78fa4a3e4e 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -40,7 +40,7 @@ Documentation Changes Build Changes ------------- -- The source and binary distributions no longer include test files, resulting in smaller download sizes. Tests relying on these files will be skipped when using ``pandas.test()``. (:issue:`19320`) +- The source and binary distributions no longer include test data files, resulting in smaller download sizes. Tests relying on these data files will be skipped when using ``pandas.test()``. (:issue:`19320`) .. _whatsnew_0232.bug_fixes: From b5b70c77b182cb27da617c1609db235c6fe8ea0f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 21 Jun 2018 09:13:30 -0500 Subject: [PATCH 28/31] TST: refactored html tests --- pandas/tests/io/test_html.py | 30 +++++++++++++++++++++++------- pandas/tests/io/test_packers.py | 3 ++- test_foo.py | 22 ++++++++++++++++++++++ 3 files changed, 47 insertions(+), 8 deletions(-) create mode 100644 test_foo.py diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 46e2b718e8343..371f85576812f 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -28,6 +28,22 @@ HERE = os.path.dirname(__file__) +def pytest_generate_tests(metafunc): + # Defers evaluation of the fixture until after collection. + # https://docs.pytest.org/en/latest/example/parametrize.html\ + # deferring-the-setup-of-parametrized-resources + if 'html_file' in metafunc.fixturenames: + paths = glob.glob( + os.path.join(HERE, 'data', 'html_encoding', '*.html') + ) + metafunc.parametrize("html_file", paths, indirect=True) + + +@pytest.fixture +def html_file(request, datapath): + return datapath(request.param) + + def assert_framelist_equal(list1, list2, *args, **kwargs): assert len(list1) == len(list2), ('lists are not of equal size ' 'len(list1) == {0}, ' @@ -838,22 +854,22 @@ def test_displayed_only(self, displayed_only, exp0, exp1): else: assert len(dfs) == 1 # Should not parse hidden table - @pytest.mark.parametrize("f", glob.glob( - os.path.join(HERE, 'data', 'html_encoding', '*.html'))) - def test_encode(self, f): - _, encoding = os.path.splitext(os.path.basename(f))[0].split('_') + def test_encode(self, html_file): + _, encoding = os.path.splitext( + os.path.basename(html_file) + )[0].split('_') try: - with open(f, 'rb') as fobj: + with open(html_file, 'rb') as fobj: from_string = self.read_html(fobj.read(), encoding=encoding, index_col=0).pop() - with open(f, 'rb') as fobj: + with open(html_file, 'rb') as fobj: from_file_like = self.read_html(BytesIO(fobj.read()), encoding=encoding, index_col=0).pop() - from_filename = self.read_html(f, encoding=encoding, + from_filename = self.read_html(html_file, encoding=encoding, index_col=0).pop() tm.assert_frame_equal(from_string, from_file_like) tm.assert_frame_equal(from_string, from_filename) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index aca623cb14aae..412e218f95c6f 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -928,7 +928,8 @@ def test_msgpacks_legacy(self, current_packers_data, all_packers_data, # GH12142 0.17 files packed in P2 can't be read in P3 if (compat.PY3 and version.startswith('0.17.') and legacy_packer.split('.')[-4][-1] == '2'): - pytest.skip("Files packed in Py2 can't be read in Py3.") + msg = "Files packed in Py2 can't be read in Py3 ({})" + pytest.skip(msg.format(version)) try: with catch_warnings(record=True): self.compare(current_packers_data, all_packers_data, diff --git a/test_foo.py b/test_foo.py new file mode 100644 index 0000000000000..93623d5fdb045 --- /dev/null +++ b/test_foo.py @@ -0,0 +1,22 @@ +import pytest + +CALL_COUNT = 0 + + +@pytest.fixture(scope="module") +def fixture(request, datapath): + global CALL_COUNT + CALL_COUNT += 1 + + return request.param + + +def pytest_generate_tests(metafunc): + if "fixture" in metafunc.fixturenames: + metafunc.parametrize("fixture", ["foo"], indirect=True, scope="module") + + +@pytest.mark.parametrize("param", ["bar", "zaz"]) +def test_1(fixture, param): + global CALL_COUNT + assert CALL_COUNT == 1 From 9954bba5b72e7d31e5e9326d9fe9cedc35af2372 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 22 Jun 2018 15:28:56 -0500 Subject: [PATCH 29/31] Remove auto-generated html fixtures --- pandas/tests/io/test_html.py | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 371f85576812f..b05d37a90cff0 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -28,20 +28,15 @@ HERE = os.path.dirname(__file__) -def pytest_generate_tests(metafunc): - # Defers evaluation of the fixture until after collection. - # https://docs.pytest.org/en/latest/example/parametrize.html\ - # deferring-the-setup-of-parametrized-resources - if 'html_file' in metafunc.fixturenames: - paths = glob.glob( - os.path.join(HERE, 'data', 'html_encoding', '*.html') - ) - metafunc.parametrize("html_file", paths, indirect=True) - - -@pytest.fixture -def html_file(request, datapath): - return datapath(request.param) +@pytest.fixture(params=[ + 'chinese_utf-16.html', + 'chinese_utf-32.html', + 'chinese_utf-8.html', + 'letz_latin1.html', +]) +def html_encoding_file(request, datapath): + """Parametrized fixture for HTML encoding test filenames.""" + return datapath('io', 'data', 'html_encoding', request.param) def assert_framelist_equal(list1, list2, *args, **kwargs): @@ -854,22 +849,23 @@ def test_displayed_only(self, displayed_only, exp0, exp1): else: assert len(dfs) == 1 # Should not parse hidden table - def test_encode(self, html_file): + def test_encode(self, html_encoding_file): _, encoding = os.path.splitext( - os.path.basename(html_file) + os.path.basename(html_encoding_file) )[0].split('_') try: - with open(html_file, 'rb') as fobj: + with open(html_encoding_file, 'rb') as fobj: from_string = self.read_html(fobj.read(), encoding=encoding, index_col=0).pop() - with open(html_file, 'rb') as fobj: + with open(html_encoding_file, 'rb') as fobj: from_file_like = self.read_html(BytesIO(fobj.read()), encoding=encoding, index_col=0).pop() - from_filename = self.read_html(html_file, encoding=encoding, + from_filename = self.read_html(html_encoding_file, + encoding=encoding, index_col=0).pop() tm.assert_frame_equal(from_string, from_file_like) tm.assert_frame_equal(from_string, from_filename) From c7718852e576d55c7352a19dc430fc4fed78dbce Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 22 Jun 2018 17:09:03 -0500 Subject: [PATCH 30/31] linting --- pandas/tests/io/test_html.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index b05d37a90cff0..9c6a8de7ed446 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -1,6 +1,5 @@ from __future__ import print_function -import glob import os import re import threading From dd752708e42ac989fdfcbf16cb455a62aa74fa2f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 26 Jun 2018 07:15:48 -0500 Subject: [PATCH 31/31] Removed test test file --- test_foo.py | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 test_foo.py diff --git a/test_foo.py b/test_foo.py deleted file mode 100644 index 93623d5fdb045..0000000000000 --- a/test_foo.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -CALL_COUNT = 0 - - -@pytest.fixture(scope="module") -def fixture(request, datapath): - global CALL_COUNT - CALL_COUNT += 1 - - return request.param - - -def pytest_generate_tests(metafunc): - if "fixture" in metafunc.fixturenames: - metafunc.parametrize("fixture", ["foo"], indirect=True, scope="module") - - -@pytest.mark.parametrize("param", ["bar", "zaz"]) -def test_1(fixture, param): - global CALL_COUNT - assert CALL_COUNT == 1