From 4d77cd8e60174f7adf32587755653624617f5b6c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 3 Feb 2018 15:08:35 -0600
Subject: [PATCH 01/31] PKG: Exclude data test files.

---
 MANIFEST.in                     | 33 +++++++++++++------
 pandas/tests/io/conftest.py     | 18 +++++++++--
 pandas/tests/io/test_common.py  |  2 ++
 pandas/tests/io/test_html.py    | 56 +++++++++++++++++----------------
 pandas/tests/io/test_packers.py |  2 ++
 pandas/tests/io/test_pickle.py  |  2 ++
 pandas/tests/plotting/common.py |  4 +++
 pandas/util/_test_decorators.py |  2 ++
 pandas/util/testing.py          |  8 ++++-
 setup.py                        |  6 +---
 10 files changed, 88 insertions(+), 45 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index 9773019c6e6e0..9416da89cc627 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -10,20 +10,33 @@ prune doc/build
 
 graft pandas
 
-global-exclude *.so
-global-exclude *.pyd
+global-exclude *.bz2
+global-exclude *.csv
+global-exclude *.dta
+global-exclude *.gz
+global-exclude *.h5
+global-exclude *.html
+global-exclude *.json
+global-exclude *.msgpack
+global-exclude *.pickle
+global-exclude *.png
 global-exclude *.pyc
+global-exclude *.pyd
+global-exclude *.sas7bdat
+global-exclude *.so
+global-exclude *.xls
+global-exclude *.xlsm
+global-exclude *.xlsx
+global-exclude *.xpt
+global-exclude *.xz
+global-exclude *.zip
 global-exclude *~
-global-exclude \#*
-global-exclude .git*
 global-exclude .DS_Store
-global-exclude *.png
+global-exclude .git*
+global-exclude \#*
+
+recursive-exclude pandas/tests/io/data
 
-# include examples/data/*
-# recursive-include examples *.py
-# recursive-include doc/source *
-# recursive-include doc/sphinxext *
-# recursive-include LICENSES *
 include versioneer.py
 include pandas/_version.py
 include pandas/io/formats/templates/*.tpl
diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
index 57e72da2fd3f4..21d171ab5fc05 100644
--- a/pandas/tests/io/conftest.py
+++ b/pandas/tests/io/conftest.py
@@ -9,19 +9,30 @@
 @pytest.fixture(scope='module')
 def tips_file():
     """Path to the tips dataset"""
-    return os.path.join(HERE, 'parser', 'data', 'tips.csv')
+    path = os.path.join(HERE, 'parser', 'data', 'tips.csv')
+    if not os.path.exists(path):
+        pytest.skip("Data files not included in pandas distribution.")
+
+    return path
 
 
 @pytest.fixture(scope='module')
 def jsonl_file():
     """Path a JSONL dataset"""
-    return os.path.join(HERE, 'parser', 'data', 'items.jsonl')
+    path = os.path.join(HERE, 'parser', 'data', 'items.jsonl')
+    if not os.path.exists(path):
+        pytest.skip("Data files not included in pandas distribution.")
+
+    return path
 
 
 @pytest.fixture(scope='module')
 def salaries_table():
     """DataFrame with the salaries dataset"""
     path = os.path.join(HERE, 'parser', 'data', 'salaries.csv')
+    if not os.path.exists(path):
+        pytest.skip("Data files not included in pandas distribution.")
+
     return read_table(path)
 
 
@@ -53,6 +64,9 @@ def s3_resource(tips_file, jsonl_file):
 
     def add_tips_files(bucket_name):
         for s3_key, file_name in test_s3_files:
+            if not os.path.exists(file_name):
+                pytest.skip("Data files not included in pandas distribution.")
+
             with open(file_name, 'rb') as f:
                 conn.Bucket(bucket_name).put_object(
                     Key=s3_key,
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index a0070dce6a7f1..3c258e2b77b3f 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -166,6 +166,8 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext):
     ])
     def test_read_fspath_all(self, reader, module, path):
         pytest.importorskip(module)
+        if not os.path.exists(path):
+            pytest.skip("Data files not included in pandas distribution.")
 
         mypath = CustomFSPath(path)
         result = reader(mypath)
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index 151a0750b7f6e..3edaef14f30f6 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -65,9 +65,6 @@ def _skip_if_none_of(module_names):
                 pytest.skip("Bad version of bs4: 4.2.0")
 
 
-DATA_PATH = tm.get_data_path()
-
-
 def assert_framelist_equal(list1, list2, *args, **kwargs):
     assert len(list1) == len(list2), ('lists are not of equal size '
                                       'len(list1) == {0}, '
@@ -86,8 +83,8 @@ def test_bs4_version_fails():
     _skip_if_none_of(('bs4', 'html5lib'))
     import bs4
     if LooseVersion(bs4.__version__) == LooseVersion('4.2.0'):
-        tm.assert_raises(AssertionError, read_html, os.path.join(DATA_PATH,
-                                                                 "spam.html"),
+        tm.assert_raises(AssertionError, read_html,
+                         os.path.join(tm.get_data_path(), "spam.html"),
                          flavor='bs4')
 
 
@@ -100,16 +97,17 @@ def read_html(self, *args, **kwargs):
 
 class TestReadHtml(ReadHtmlMixin):
     flavor = 'bs4'
-    spam_data = os.path.join(DATA_PATH, 'spam.html')
-    spam_data_kwargs = {}
-    if PY3:
-        spam_data_kwargs['encoding'] = 'UTF-8'
-    banklist_data = os.path.join(DATA_PATH, 'banklist.html')
 
     @classmethod
     def setup_class(cls):
         _skip_if_none_of(('bs4', 'html5lib'))
 
+        cls.spam_data = os.path.join(tm.get_data_path(), 'spam.html')
+        cls.spam_data_kwargs = {}
+        if PY3:
+            cls.spam_data_kwargs['encoding'] = 'UTF-8'
+        cls.banklist_data = os.path.join(tm.get_data_path(), 'banklist.html')
+
     def test_to_html_compat(self):
         df = mkdf(4, 3, data_gen_f=lambda *args: rand(), c_idx_names=False,
                   r_idx_names=False).applymap('{0:.3f}'.format).astype(float)
@@ -382,7 +380,7 @@ def test_python_docs_table(self):
     @pytest.mark.slow
     def test_thousands_macau_stats(self):
         all_non_nan_table_index = -2
-        macau_data = os.path.join(DATA_PATH, 'macau.html')
+        macau_data = os.path.join(tm.get_data_path(), 'macau.html')
         dfs = self.read_html(macau_data, index_col=0,
                              attrs={'class': 'style1'})
         df = dfs[all_non_nan_table_index]
@@ -392,7 +390,7 @@ def test_thousands_macau_stats(self):
     @pytest.mark.slow
     def test_thousands_macau_index_col(self):
         all_non_nan_table_index = -2
-        macau_data = os.path.join(DATA_PATH, 'macau.html')
+        macau_data = os.path.join(tm.get_data_path(), 'macau.html')
         dfs = self.read_html(macau_data, index_col=0, header=0)
         df = dfs[all_non_nan_table_index]
 
@@ -520,7 +518,7 @@ def test_countries_municipalities(self):
         assert_framelist_equal(res1, res2)
 
     def test_nyse_wsj_commas_table(self):
-        data = os.path.join(DATA_PATH, 'nyse_wsj.html')
+        data = os.path.join(tm.get_data_path(), 'nyse_wsj.html')
         df = self.read_html(data, index_col=0, header=0,
                             attrs={'class': 'mdcTable'})[0]
 
@@ -542,7 +540,8 @@ def try_remove_ws(x):
 
         df = self.read_html(self.banklist_data, 'Metcalf',
                             attrs={'id': 'table'})[0]
-        ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'),
+        ground_truth = read_csv(os.path.join(tm.get_data_path(),
+                                             'banklist.csv'),
                                 converters={'Updated Date': Timestamp,
                                             'Closing Date': Timestamp})
         assert df.shape == ground_truth.shape
@@ -660,7 +659,7 @@ def test_parse_dates_combine(self):
         tm.assert_frame_equal(newdf, res[0])
 
     def test_computer_sales_page(self):
-        data = os.path.join(DATA_PATH, 'computer_sales_page.html')
+        data = os.path.join(tm.get_data_path(), 'computer_sales_page.html')
         with tm.assert_raises_regex(ParserError,
                                     r"Passed header=\[0,1\] are "
                                     r"too many rows for this "
@@ -668,7 +667,7 @@ def test_computer_sales_page(self):
             self.read_html(data, header=[0, 1])
 
     def test_wikipedia_states_table(self):
-        data = os.path.join(DATA_PATH, 'wikipedia_states.html')
+        data = os.path.join(tm.get_data_path(), 'wikipedia_states.html')
         assert os.path.isfile(data), '%r is not a file' % data
         assert os.path.getsize(data), '%r is an empty file' % data
         result = self.read_html(data, 'Arizona', header=1)[0]
@@ -788,11 +787,14 @@ def _lang_enc(filename):
 
 
 class TestReadHtmlEncoding(object):
-    files = glob.glob(os.path.join(DATA_PATH, 'html_encoding', '*.html'))
     flavor = 'bs4'
 
     @classmethod
     def setup_class(cls):
+        cls.files = glob.glob(os.path.join(tm.get_data_path(),
+                                           'html_encoding',
+                                           '*.html'))
+
         _skip_if_none_of((cls.flavor, 'html5lib'))
 
     def read_html(self, *args, **kwargs):
@@ -847,8 +849,8 @@ def setup_class(cls):
 
     def test_data_fail(self):
         from lxml.etree import XMLSyntaxError
-        spam_data = os.path.join(DATA_PATH, 'spam.html')
-        banklist_data = os.path.join(DATA_PATH, 'banklist.html')
+        spam_data = os.path.join(tm.get_data_path(), 'spam.html')
+        banklist_data = os.path.join(tm.get_data_path(), 'banklist.html')
 
         with pytest.raises(XMLSyntaxError):
             self.read_html(spam_data)
@@ -857,7 +859,7 @@ def test_data_fail(self):
             self.read_html(banklist_data)
 
     def test_works_on_valid_markup(self):
-        filename = os.path.join(DATA_PATH, 'valid_markup.html')
+        filename = os.path.join(tm.get_data_path(), 'valid_markup.html')
         dfs = self.read_html(filename, index_col=0)
         assert isinstance(dfs, list)
         assert isinstance(dfs[0], DataFrame)
@@ -865,7 +867,7 @@ def test_works_on_valid_markup(self):
     @pytest.mark.slow
     def test_fallback_success(self):
         _skip_if_none_of(('bs4', 'html5lib'))
-        banklist_data = os.path.join(DATA_PATH, 'banklist.html')
+        banklist_data = os.path.join(tm.get_data_path(), 'banklist.html')
         self.read_html(banklist_data, '.*Water.*', flavor=['lxml', 'html5lib'])
 
     def test_to_html_timestamp(self):
@@ -893,7 +895,7 @@ def test_parse_dates_combine(self):
         tm.assert_frame_equal(newdf, res[0])
 
     def test_computer_sales_page(self):
-        data = os.path.join(DATA_PATH, 'computer_sales_page.html')
+        data = os.path.join(tm.get_data_path(), 'computer_sales_page.html')
         self.read_html(data, header=[0, 1])
 
 
@@ -914,7 +916,7 @@ def get_elements_from_file(url, element='table'):
 
 @pytest.mark.slow
 def test_bs4_finds_tables():
-    filepath = os.path.join(DATA_PATH, "spam.html")
+    filepath = os.path.join(tm.get_data_path(), "spam.html")
     with warnings.catch_warnings():
         warnings.filterwarnings('ignore')
         assert get_elements_from_file(filepath, 'table')
@@ -929,19 +931,19 @@ def get_lxml_elements(url, element):
 
 @pytest.mark.slow
 def test_lxml_finds_tables():
-    filepath = os.path.join(DATA_PATH, "spam.html")
+    filepath = os.path.join(tm.get_data_path(), "spam.html")
     assert get_lxml_elements(filepath, 'table')
 
 
 @pytest.mark.slow
 def test_lxml_finds_tbody():
-    filepath = os.path.join(DATA_PATH, "spam.html")
+    filepath = os.path.join(tm.get_data_path(), "spam.html")
     assert get_lxml_elements(filepath, 'tbody')
 
 
 def test_same_ordering():
     _skip_if_none_of(['bs4', 'lxml', 'html5lib'])
-    filename = os.path.join(DATA_PATH, 'valid_markup.html')
+    filename = os.path.join(tm.get_data_path(), 'valid_markup.html')
     dfs_lxml = read_html(filename, index_col=0, flavor=['lxml'])
     dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4'])
     assert_framelist_equal(dfs_lxml, dfs_bs4)
@@ -965,7 +967,7 @@ def test_importcheck_thread_safety():
     pytest.importorskip('lxml')
     reload(pandas.io.html)
 
-    filename = os.path.join(DATA_PATH, 'valid_markup.html')
+    filename = os.path.join(tm.get_data_path(), 'valid_markup.html')
     helper_thread1 = ErrorThread(target=read_html, args=(filename,))
     helper_thread2 = ErrorThread(target=read_html, args=(filename,))
 
diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py
index c343e0105eb4f..06dbb60b587da 100644
--- a/pandas/tests/io/test_packers.py
+++ b/pandas/tests/io/test_packers.py
@@ -830,6 +830,8 @@ def test_default_encoding(self):
 def legacy_packers_versions():
     # yield the packers versions
     path = tm.get_data_path('legacy_msgpack')
+    if not os.path.exists(path):
+        raise pytest.skip("Data file {} does not exist.".format(path))
     for v in os.listdir(path):
         p = os.path.join(path, v)
         if os.path.isdir(p):
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index 2ba3e174404c7..ab75416033f68 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -191,6 +191,8 @@ def compare_sp_frame_float(result, expected, typ, version):
 def legacy_pickle_versions():
     # yield the pickle versions
     path = tm.get_data_path('legacy_pickle')
+    if not os.path.exists(path):
+        raise pytest.skip("Data path {} does not exists.".format(path))
     for v in os.listdir(path):
         p = os.path.join(path, v)
         if os.path.isdir(p):
diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py
index 2e62b22b2b69e..58f03fe69ed60 100644
--- a/pandas/tests/plotting/common.py
+++ b/pandas/tests/plotting/common.py
@@ -77,6 +77,10 @@ def setup_method(self, method):
         from pandas import read_csv
         base = os.path.join(os.path.dirname(curpath()), os.pardir)
         path = os.path.join(base, 'tests', 'data', 'iris.csv')
+
+        if not os.path.exists(path):
+            pytest.skip("Data files not included in pandas distribution.")
+
         self.iris = read_csv(path)
 
         n = 100
diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
index 0fd5648739e5c..67a55d9ca21db 100644
--- a/pandas/util/_test_decorators.py
+++ b/pandas/util/_test_decorators.py
@@ -23,6 +23,8 @@ def test_foo():
 
 For more information, refer to the ``pytest`` documentation on ``skipif``.
 """
+import functools
+import os
 
 import pytest
 import locale
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 0009e26f8b100..1a6a1623fc96b 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -732,10 +732,16 @@ def get_data_path(f=''):
     """Return the path of a data file, these are relative to the current test
     directory.
     """
+    import pytest
+
     # get our callers file
     _, filename, _, _, _, _ = inspect.getouterframes(inspect.currentframe())[1]
     base_dir = os.path.abspath(os.path.dirname(filename))
-    return os.path.join(base_dir, 'data', f)
+    path = os.path.join(base_dir, 'data', f)
+    if not os.path.exists(path):
+        pytest.skip("Data files not included in pandas distribution.")
+
+    return path
 
 # -----------------------------------------------------------------------------
 # Comparators
diff --git a/setup.py b/setup.py
index 5397a1b84dc4d..ccf3a9f5be471 100755
--- a/setup.py
+++ b/setup.py
@@ -722,11 +722,7 @@ def pxd(name):
       maintainer=AUTHOR,
       version=versioneer.get_version(),
       packages=find_packages(include=['pandas', 'pandas.*']),
-      package_data={'': ['data/*', 'templates/*'],
-                    'pandas.tests.io': ['data/legacy_hdf/*.h5',
-                                        'data/legacy_pickle/*/*.pickle',
-                                        'data/legacy_msgpack/*/*.msgpack',
-                                        'data/html_encoding/*.html']},
+      package_data={'': ['templates/*']},
       ext_modules=extensions,
       maintainer_email=EMAIL,
       description=DESCRIPTION,

From 270e44248656cf7d8b690b0cb86ad467ac539eb6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 6 Feb 2018 11:33:31 -0600
Subject: [PATCH 02/31] Stuff

---
 ci/script_single.sh             | 8 ++++----
 pandas/tests/test_base.py       | 2 ++
 pandas/util/_test_decorators.py | 3 ---
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/ci/script_single.sh b/ci/script_single.sh
index 005c648ee025f..8154d1c304076 100755
--- a/ci/script_single.sh
+++ b/ci/script_single.sh
@@ -23,12 +23,12 @@ elif [ "$DOC" ]; then
     echo "We are not running pytest as this is a doc-build"
 
 elif [ "$COVERAGE" ]; then
-    echo pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
-    pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
+    echo pytest -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
+    pytest      -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
 
 else
-    echo pytest -m "single" -r xX --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas
-    pytest -m "single" -r xX  --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas # TODO: doctest
+    echo pytest -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas
+    pytest      -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas # TODO: doctest
 
 fi
 
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index df2547fc7b0da..a0d23a9625d60 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -511,6 +511,8 @@ def test_value_counts_unique_nunique_null(self):
                                     index=expected_index[9:1:-1],
                                     dtype='int64', name='a')
 
+                if isinstance(o, pd.PeriodIndex):
+                    import pdb; pdb.set_trace()
                 result_s_na = o.value_counts(dropna=False)
                 tm.assert_series_equal(result_s_na, expected_s_na)
                 assert result_s_na.index.name is None
diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
index 67a55d9ca21db..aefe0401e55ac 100644
--- a/pandas/util/_test_decorators.py
+++ b/pandas/util/_test_decorators.py
@@ -23,9 +23,6 @@ def test_foo():
 
 For more information, refer to the ``pytest`` documentation on ``skipif``.
 """
-import functools
-import os
-
 import pytest
 import locale
 from distutils.version import LooseVersion

From 1804bccebaf2c34484ddb80b161718ea3f038dd2 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 25 Feb 2018 14:20:06 -0600
Subject: [PATCH 03/31] Refactor data path handling

---
 pandas/conftest.py                            |   2 +
 pandas/tests/conftest.py                      |  32 +++
 pandas/tests/io/conftest.py                   |   7 +-
 pandas/tests/reshape/merge/test_merge_asof.py | 250 ++++++++++--------
 pandas/tests/reshape/test_tile.py             |   6 +-
 pandas/tests/util/test_testing.py             |  12 +
 setup.cfg                                     |   1 +
 7 files changed, 196 insertions(+), 114 deletions(-)
 create mode 100644 pandas/tests/conftest.py

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 37f0a2f818a3b..76a02d076a373 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -16,6 +16,8 @@ def pytest_addoption(parser):
                      help="run high memory tests")
     parser.addoption("--only-slow", action="store_true",
                      help="run only slow tests")
+    parser.addoption("--strict-data-files", action="store_true",
+                     help="Fail if a test is skipped for missing data file.")
 
 
 def pytest_runtest_setup(item):
diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py
new file mode 100644
index 0000000000000..3b49d9f213909
--- /dev/null
+++ b/pandas/tests/conftest.py
@@ -0,0 +1,32 @@
+import os
+
+import pytest
+
+
+@pytest.fixture
+def datapath(request):
+    """Get the path to a data file.
+
+    Parameters
+    ----------
+    path : str
+        Path to the file, relative to ``pandas/tests/``
+
+    Returns
+    -------
+    path : path including ``pandas/tests``.
+
+    Raises
+    ------
+    ValueError
+        If the path doesn't exist and the --strict-data-files option is set.
+    """
+    def deco(path):
+        path = os.path.join('pandas', 'tests', os.path.join(path))
+        if not os.path.exists(path):
+            if request.config.getoption("--strict-data-files"):
+                raise ValueError("Failed.")
+            else:
+                pytest.skip("Data files not included in pandas distribution.")
+        return path
+    return deco
diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
index 342008f59f851..bcf8471d1af45 100644
--- a/pandas/tests/io/conftest.py
+++ b/pandas/tests/io/conftest.py
@@ -11,11 +11,14 @@ def parser_data(request):
 
 
 @pytest.fixture
-def tips_file(parser_data):
+def tips_file(request, parser_data):
     """Path to the tips dataset"""
     path = os.path.join(parser_data, 'tips.csv')
     if not os.path.exists(path):
-        pytest.skip("Data files not included in pandas distribution.")
+        if request.config.getoption("--strict-data-files"):
+            raise ValueError("Failed.")
+        else:
+            pytest.skip("Data files not included in pandas distribution.")
 
     return path
 
diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py
index cebbcc41c3e17..622bc787c7bf8 100644
--- a/pandas/tests/reshape/merge/test_merge_asof.py
+++ b/pandas/tests/reshape/merge/test_merge_asof.py
@@ -1,4 +1,5 @@
 import os
+
 import pytest
 
 import pytz
@@ -11,27 +12,71 @@
 from pandas.util.testing import assert_frame_equal
 
 
-class TestAsOfMerge(object):
+def read_data(path, dedupe=False):
+    x = read_csv(path)
+    if dedupe:
+        x = (x.drop_duplicates(['time', 'ticker'], keep='last')
+              .reset_index(drop=True))
+    x.time = to_datetime(x.time)
+    return x
+
+
+@pytest.fixture
+def trades(datapath):
+    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
+                                           'trades.csv')))
+
+
+@pytest.fixture
+def trades2(datapath):
+    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
+                                           'trades2.csv')))
+
+
+@pytest.fixture
+def quotes(datapath):
+    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
+                                           'quotes.csv')), dedupe=True)
+
+
+@pytest.fixture
+def quotes2(datapath):
+    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
+                                           'quotes2.csv')), dedupe=True)
+
+
+@pytest.fixture
+def asof(datapath):
+    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
+                                           'asof.csv')))
+
+
+@pytest.fixture
+def asof2(datapath):
+    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
+                                           'asof2.csv')))
+
+
+@pytest.fixture
+def tolerance(datapath):
+    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
+                                           'tolerance.csv')))
+
 
-    def read_data(self, name, dedupe=False):
-        path = os.path.join(tm.get_data_path(), name)
-        x = read_csv(path)
-        if dedupe:
-            x = (x.drop_duplicates(['time', 'ticker'], keep='last')
-                  .reset_index(drop=True)
-                 )
-        x.time = to_datetime(x.time)
-        return x
-
-    def setup_method(self, method):
-
-        self.trades = self.read_data('trades.csv')
-        self.quotes = self.read_data('quotes.csv', dedupe=True)
-        self.asof = self.read_data('asof.csv')
-        self.tolerance = self.read_data('tolerance.csv')
-        self.allow_exact_matches = self.read_data('allow_exact_matches.csv')
-        self.allow_exact_matches_and_tolerance = self.read_data(
-            'allow_exact_matches_and_tolerance.csv')
+@pytest.fixture
+def allow_exact_matches(datapath):
+    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
+                                           'allow_exact_matches.csv')))
+
+
+@pytest.fixture
+def allow_exact_matches_and_tolerance(datapath):
+    return read_data(datapath(os.path.join(
+        'reshape', 'merge', 'data', 'allow_exact_matches_and_tolerance.csv'
+    )))
+
+
+class TestAsOfMerge(object):
 
     def test_examples1(self):
         """ doc-string examples """
@@ -149,23 +194,20 @@ def test_examples4(self):
         result = pd.merge_asof(left, right, on='a', direction='nearest')
         assert_frame_equal(result, expected)
 
-    def test_basic(self):
-
-        expected = self.asof
-        trades = self.trades
-        quotes = self.quotes
+    def test_basic(self, trades, quotes, asof):
 
+        expected = asof
         result = merge_asof(trades, quotes,
                             on='time',
                             by='ticker')
         assert_frame_equal(result, expected)
 
-    def test_basic_categorical(self):
+    def test_basic_categorical(self, asof, trades, quotes):
 
-        expected = self.asof
-        trades = self.trades.copy()
+        expected = asof
+        trades = trades.copy()
         trades.ticker = trades.ticker.astype('category')
-        quotes = self.quotes.copy()
+        quotes = quotes.copy()
         quotes.ticker = quotes.ticker.astype('category')
         expected.ticker = expected.ticker.astype('category')
 
@@ -174,12 +216,12 @@ def test_basic_categorical(self):
                             by='ticker')
         assert_frame_equal(result, expected)
 
-    def test_basic_left_index(self):
+    def test_basic_left_index(self, trades, quotes, asof):
 
         # GH14253
-        expected = self.asof
-        trades = self.trades.set_index('time')
-        quotes = self.quotes
+        expected = asof
+        trades = trades.set_index('time')
+        quotes = quotes
 
         result = merge_asof(trades, quotes,
                             left_index=True,
@@ -191,11 +233,11 @@ def test_basic_left_index(self):
         expected = expected[result.columns]
         assert_frame_equal(result, expected)
 
-    def test_basic_right_index(self):
+    def test_basic_right_index(self, trades, quotes, asof):
 
-        expected = self.asof
-        trades = self.trades
-        quotes = self.quotes.set_index('time')
+        expected = asof
+        trades = trades
+        quotes = quotes.set_index('time')
 
         result = merge_asof(trades, quotes,
                             left_on='time',
@@ -203,11 +245,11 @@ def test_basic_right_index(self):
                             by='ticker')
         assert_frame_equal(result, expected)
 
-    def test_basic_left_index_right_index(self):
+    def test_basic_left_index_right_index(self, trades, quotes, asof):
 
-        expected = self.asof.set_index('time')
-        trades = self.trades.set_index('time')
-        quotes = self.quotes.set_index('time')
+        expected = asof.set_index('time')
+        trades = trades.set_index('time')
+        quotes = quotes.set_index('time')
 
         result = merge_asof(trades, quotes,
                             left_index=True,
@@ -215,48 +257,48 @@ def test_basic_left_index_right_index(self):
                             by='ticker')
         assert_frame_equal(result, expected)
 
-    def test_multi_index(self):
+    def test_multi_index(self, trades, quotes):
 
         # MultiIndex is prohibited
-        trades = self.trades.set_index(['time', 'price'])
-        quotes = self.quotes.set_index('time')
+        trades = trades.set_index(['time', 'price'])
+        quotes = quotes.set_index('time')
         with pytest.raises(MergeError):
             merge_asof(trades, quotes,
                        left_index=True,
                        right_index=True)
 
-        trades = self.trades.set_index('time')
-        quotes = self.quotes.set_index(['time', 'bid'])
+    def test_multi_index2(self, trades, quotes):
+        trades = trades.set_index('time')
+        quotes = quotes.set_index(['time', 'bid'])
         with pytest.raises(MergeError):
             merge_asof(trades, quotes,
                        left_index=True,
                        right_index=True)
 
-    def test_on_and_index(self):
+    def test_on_and_index_on_price(self, trades, quotes):
 
         # 'on' parameter and index together is prohibited
-        trades = self.trades.set_index('time')
-        quotes = self.quotes.set_index('time')
+        trades = trades.set_index('time')
+        quotes = quotes.set_index('time')
         with pytest.raises(MergeError):
             merge_asof(trades, quotes,
                        left_on='price',
                        left_index=True,
                        right_index=True)
 
-        trades = self.trades.set_index('time')
-        quotes = self.quotes.set_index('time')
+    def test_on_and_index_on_bid(self, trades, quotes):
+        trades = trades.set_index('time')
+        quotes = quotes.set_index('time')
         with pytest.raises(MergeError):
             merge_asof(trades, quotes,
                        right_on='bid',
                        left_index=True,
                        right_index=True)
 
-    def test_basic_left_by_right_by(self):
+    def test_basic_left_by_right_by(self, trades, quotes, asof):
 
         # GH14253
-        expected = self.asof
-        trades = self.trades
-        quotes = self.quotes
+        expected = asof
 
         result = merge_asof(trades, quotes,
                             on='time',
@@ -264,11 +306,11 @@ def test_basic_left_by_right_by(self):
                             right_by='ticker')
         assert_frame_equal(result, expected)
 
-    def test_missing_right_by(self):
+    def test_missing_right_by(self, trades, quotes, asof):
 
-        expected = self.asof
-        trades = self.trades
-        quotes = self.quotes
+        expected = asof
+        trades = trades
+        quotes = quotes
 
         q = quotes[quotes.ticker != 'MSFT']
         result = merge_asof(trades, q,
@@ -277,7 +319,7 @@ def test_missing_right_by(self):
         expected.loc[expected.ticker == 'MSFT', ['bid', 'ask']] = np.nan
         assert_frame_equal(result, expected)
 
-    def test_multiby(self):
+    def test_multiby(self, trades, quotes):
         # GH13936
         trades = pd.DataFrame({
             'time': pd.to_datetime(['20160525 13:30:00.023',
@@ -334,7 +376,7 @@ def test_multiby(self):
                                by=['ticker', 'exch'])
         assert_frame_equal(result, expected)
 
-    def test_multiby_heterogeneous_types(self):
+    def test_multiby_heterogeneous_types(self, trades, quotes):
         # GH13936
         trades = pd.DataFrame({
             'time': pd.to_datetime(['20160525 13:30:00.023',
@@ -423,34 +465,31 @@ def test_multiby_indexed(self):
             pd.merge_asof(left, right, left_index=True, right_index=True,
                           left_by=['k1', 'k2'], right_by=['k1'])
 
-    def test_basic2(self):
-
-        expected = self.read_data('asof2.csv')
-        trades = self.read_data('trades2.csv')
-        quotes = self.read_data('quotes2.csv', dedupe=True)
+    def test_basic2(self, asof2, trades2, quotes2):
 
-        result = merge_asof(trades, quotes,
+        expected = asof2
+        result = merge_asof(trades2, quotes2,
                             on='time',
                             by='ticker')
         assert_frame_equal(result, expected)
 
-    def test_basic_no_by(self):
+    def test_basic_no_by(self, asof, trades, quotes):
         f = lambda x: x[x.ticker == 'MSFT'].drop('ticker', axis=1) \
             .reset_index(drop=True)
 
         # just use a single ticker
-        expected = f(self.asof)
-        trades = f(self.trades)
-        quotes = f(self.quotes)
+        expected = f(asof)
+        trades = f(trades)
+        quotes = f(quotes)
 
         result = merge_asof(trades, quotes,
                             on='time')
         assert_frame_equal(result, expected)
 
-    def test_valid_join_keys(self):
+    def test_valid_join_keys(self, trades, quotes):
 
-        trades = self.trades
-        quotes = self.quotes
+        trades = trades
+        quotes = quotes
 
         with pytest.raises(MergeError):
             merge_asof(trades, quotes,
@@ -467,14 +506,14 @@ def test_valid_join_keys(self):
             merge_asof(trades, quotes,
                        by='ticker')
 
-    def test_with_duplicates(self):
+    def test_with_duplicates(self, asof, trades, quotes):
 
-        q = pd.concat([self.quotes, self.quotes]).sort_values(
+        q = pd.concat([quotes, quotes]).sort_values(
             ['time', 'ticker']).reset_index(drop=True)
-        result = merge_asof(self.trades, q,
+        result = merge_asof(trades, q,
                             on='time',
                             by='ticker')
-        expected = self.read_data('asof.csv')
+        expected = asof
         assert_frame_equal(result, expected)
 
     def test_with_duplicates_no_on(self):
@@ -489,22 +528,14 @@ def test_with_duplicates_no_on(self):
                                  'right_val': [1, 1, 3]})
         assert_frame_equal(result, expected)
 
-    def test_valid_allow_exact_matches(self):
-
-        trades = self.trades
-        quotes = self.quotes
-
+    def test_valid_allow_exact_matches(self, trades, quotes):
         with pytest.raises(MergeError):
             merge_asof(trades, quotes,
                        on='time',
                        by='ticker',
                        allow_exact_matches='foo')
 
-    def test_valid_tolerance(self):
-
-        trades = self.trades
-        quotes = self.quotes
-
+    def test_valid_tolerance(self, trades, quotes):
         # dti
         merge_asof(trades, quotes,
                    on='time',
@@ -544,10 +575,10 @@ def test_valid_tolerance(self):
                        by='ticker',
                        tolerance=-1)
 
-    def test_non_sorted(self):
+    def test_non_sorted(self, trades, quotes):
 
-        trades = self.trades.sort_values('time', ascending=False)
-        quotes = self.quotes.sort_values('time', ascending=False)
+        trades = trades.sort_values('time', ascending=False)
+        quotes = quotes.sort_values('time', ascending=False)
 
         # we require that we are already sorted on time & quotes
         assert not trades.time.is_monotonic
@@ -557,7 +588,7 @@ def test_non_sorted(self):
                        on='time',
                        by='ticker')
 
-        trades = self.trades.sort_values('time')
+        trades = trades.sort_values('time')
         assert trades.time.is_monotonic
         assert not quotes.time.is_monotonic
         with pytest.raises(ValueError):
@@ -565,26 +596,25 @@ def test_non_sorted(self):
                        on='time',
                        by='ticker')
 
-        quotes = self.quotes.sort_values('time')
+        quotes = quotes.sort_values('time')
         assert trades.time.is_monotonic
         assert quotes.time.is_monotonic
 
         # ok, though has dupes
-        merge_asof(trades, self.quotes,
+        merge_asof(trades, quotes,
                    on='time',
                    by='ticker')
 
-    def test_tolerance(self):
+    def test_tolerance(self, trades, quotes, tolerance):
 
-        trades = self.trades
-        quotes = self.quotes
+        trades = trades
+        quotes = quotes
 
         result = merge_asof(trades, quotes,
                             on='time',
                             by='ticker',
                             tolerance=Timedelta('1day'))
-        expected = self.tolerance
-        assert_frame_equal(result, expected)
+        assert_frame_equal(result, tolerance)
 
     def test_tolerance_forward(self):
         # GH14887
@@ -641,11 +671,11 @@ def test_tolerance_tz(self):
              'value2': list("BCDEE")})
         assert_frame_equal(result, expected)
 
-    def test_index_tolerance(self):
+    def test_index_tolerance(self, trades, quotes, tolerance):
         # GH 15135
-        expected = self.tolerance.set_index('time')
-        trades = self.trades.set_index('time')
-        quotes = self.quotes.set_index('time')
+        expected = tolerance.set_index('time')
+        trades = trades.set_index('time')
+        quotes = quotes.set_index('time')
 
         result = pd.merge_asof(trades, quotes,
                                left_index=True,
@@ -654,13 +684,13 @@ def test_index_tolerance(self):
                                tolerance=pd.Timedelta('1day'))
         assert_frame_equal(result, expected)
 
-    def test_allow_exact_matches(self):
+    def test_allow_exact_matches(self, trades, quotes, allow_exact_matches):
 
-        result = merge_asof(self.trades, self.quotes,
+        result = merge_asof(trades, quotes,
                             on='time',
                             by='ticker',
                             allow_exact_matches=False)
-        expected = self.allow_exact_matches
+        expected = allow_exact_matches
         assert_frame_equal(result, expected)
 
     def test_allow_exact_matches_forward(self):
@@ -695,14 +725,16 @@ def test_allow_exact_matches_nearest(self):
                                allow_exact_matches=False)
         assert_frame_equal(result, expected)
 
-    def test_allow_exact_matches_and_tolerance(self):
+    def test_allow_exact_matches_and_tolerance(
+            self, trades, quotes,
+            allow_exact_matches_and_tolerance):
 
-        result = merge_asof(self.trades, self.quotes,
+        result = merge_asof(trades, quotes,
                             on='time',
                             by='ticker',
                             tolerance=Timedelta('100ms'),
                             allow_exact_matches=False)
-        expected = self.allow_exact_matches_and_tolerance
+        expected = allow_exact_matches_and_tolerance
         assert_frame_equal(result, expected)
 
     def test_allow_exact_matches_and_tolerance2(self):
diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py
index ff914273d47b1..195faf8aad039 100644
--- a/pandas/tests/reshape/test_tile.py
+++ b/pandas/tests/reshape/test_tile.py
@@ -287,10 +287,10 @@ def test_round_frac(self):
         result = tmod._round_frac(0.000123456, precision=2)
         assert result == 0.00012
 
-    def test_qcut_binning_issues(self):
+    def test_qcut_binning_issues(self, datapath):
         # #1978, 1979
-        path = os.path.join(tm.get_data_path(), 'cut_data.csv')
-        arr = np.loadtxt(path)
+        cut_file = datapath(os.path.join('reshape', 'data', 'cut_data.csv'))
+        arr = np.loadtxt(cut_file)
 
         result = qcut(arr, 20)
 
diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py
index 1c878604b11a2..a3d91e2dcc75c 100644
--- a/pandas/tests/util/test_testing.py
+++ b/pandas/tests/util/test_testing.py
@@ -739,3 +739,15 @@ def test_locale(self):
         # GH9744
         locales = tm.get_locales()
         assert len(locales) >= 1
+
+
+def test_datapath_missing(datapath, request):
+    if not request.config.getoption("--strict-data-files"):
+        pytest.skip("Need to set '--strict-data-files'")
+
+    with pytest.raises(ValueError):
+        datapath('not_a_file')
+
+    result = datapath('data/iris.csv')
+    expected = 'pandas/tests/data/iris.csv'
+    assert result == expected
diff --git a/setup.cfg b/setup.cfg
index 942b2b0a1a0bf..6f668776d35e3 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -32,3 +32,4 @@ markers =
     slow: mark a test as slow
     network: mark a test as network
     high_memory: mark a test as a high-memory only
+addopts = --strict-data-files
\ No newline at end of file

From 70221525cc368377b33205c259faf9dbba0e4840 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 25 Feb 2018 14:30:24 -0600
Subject: [PATCH 04/31] More fixtures

---
 pandas/tests/io/conftest.py    | 30 ++++++------------------------
 pandas/tests/io/test_common.py | 21 +++++++++++----------
 2 files changed, 17 insertions(+), 34 deletions(-)

diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
index bcf8471d1af45..896690871b36e 100644
--- a/pandas/tests/io/conftest.py
+++ b/pandas/tests/io/conftest.py
@@ -11,36 +11,21 @@ def parser_data(request):
 
 
 @pytest.fixture
-def tips_file(request, parser_data):
+def tips_file(datapath):
     """Path to the tips dataset"""
-    path = os.path.join(parser_data, 'tips.csv')
-    if not os.path.exists(path):
-        if request.config.getoption("--strict-data-files"):
-            raise ValueError("Failed.")
-        else:
-            pytest.skip("Data files not included in pandas distribution.")
-
-    return path
+    return datapath(os.path.join('io', 'parser', 'data', 'tips.csv'))
 
 
 @pytest.fixture
-def jsonl_file(parser_data):
+def jsonl_file(datapath):
     """Path a JSONL dataset"""
-    path = os.path.join(parser_data, 'items.jsonl')
-    if not os.path.exists(path):
-        pytest.skip("Data files not included in pandas distribution.")
-
-    return path
+    return datapath(os.path.join('io', 'parser', 'data', 'items.jsonl'))
 
 
 @pytest.fixture
-def salaries_table(parser_data):
+def salaries_table(datapath):
     """DataFrame with the salaries dataset"""
-    path = os.path.join(parser_data, 'salaries.csv')
-    if not os.path.exists(path):
-        pytest.skip("Data files not included in pandas distribution.")
-
-    return read_table(path)
+    return datapath(os.path.join('io', 'parser', 'data', 'salaries.csv'))
 
 
 @pytest.fixture
@@ -71,9 +56,6 @@ def s3_resource(tips_file, jsonl_file):
 
     def add_tips_files(bucket_name):
         for s3_key, file_name in test_s3_files:
-            if not os.path.exists(file_name):
-                pytest.skip("Data files not included in pandas distribution.")
-
             with open(file_name, 'rb') as f:
                 conn.Bucket(bucket_name).put_object(
                     Key=s3_key,
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index 515254e43bbb3..7789525182521 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -234,13 +234,14 @@ def test_write_fspath_hdf5(self):
         tm.assert_frame_equal(result, expected)
 
 
-class TestMMapWrapper(object):
+@pytest.fixture
+def mmap_file(datapath):
+    return datapath(os.path.join('io', 'data', 'test_mmap.csv'))
+
 
-    def setup_method(self, method):
-        self.mmap_file = os.path.join(tm.get_data_path(),
-                                      'test_mmap.csv')
+class TestMMapWrapper(object):
 
-    def test_constructor_bad_file(self):
+    def test_constructor_bad_file(self, mmap_file):
         non_file = StringIO('I am not a file')
         non_file.fileno = lambda: -1
 
@@ -254,15 +255,15 @@ def test_constructor_bad_file(self):
 
         tm.assert_raises_regex(err, msg, common.MMapWrapper, non_file)
 
-        target = open(self.mmap_file, 'r')
+        target = open(mmap_file, 'r')
         target.close()
 
         msg = "I/O operation on closed file"
         tm.assert_raises_regex(
             ValueError, msg, common.MMapWrapper, target)
 
-    def test_get_attr(self):
-        with open(self.mmap_file, 'r') as target:
+    def test_get_attr(self, mmap_file):
+        with open(mmap_file, 'r') as target:
             wrapper = common.MMapWrapper(target)
 
         attrs = dir(wrapper.mmap)
@@ -275,8 +276,8 @@ def test_get_attr(self):
 
         assert not hasattr(wrapper, 'foo')
 
-    def test_next(self):
-        with open(self.mmap_file, 'r') as target:
+    def test_next(self, mmap_file):
+        with open(mmap_file, 'r') as target:
             wrapper = common.MMapWrapper(target)
             lines = target.readlines()
 

From 151ffdad453e5f563ba6dd25d986a6c55ef56620 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 26 Mar 2018 15:59:16 -0500
Subject: [PATCH 05/31] Updated html

---
 pandas/tests/conftest.py     |  4 +--
 pandas/tests/io/test_html.py | 69 +++++++++++++++++++-----------------
 2 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py
index 3b49d9f213909..3d83acc078f24 100644
--- a/pandas/tests/conftest.py
+++ b/pandas/tests/conftest.py
@@ -21,8 +21,8 @@ def datapath(request):
     ValueError
         If the path doesn't exist and the --strict-data-files option is set.
     """
-    def deco(path):
-        path = os.path.join('pandas', 'tests', os.path.join(path))
+    def deco(*args):
+        path = os.path.join('pandas', 'tests', *args)
         if not os.path.exists(path):
             if request.config.getoption("--strict-data-files"):
                 raise ValueError("Failed.")
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index 79b9a3715efd2..648845038f4a1 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -25,8 +25,7 @@
 import pandas.util._test_decorators as td
 from pandas.util.testing import makeCustomDataframe as mkdf, network
 
-
-DATA_PATH = tm.get_data_path()
+HERE = os.path.dirname(__file__)
 
 
 def assert_framelist_equal(list1, list2, *args, **kwargs):
@@ -44,11 +43,11 @@ def assert_framelist_equal(list1, list2, *args, **kwargs):
 
 
 @td.skip_if_no('bs4')
-def test_bs4_version_fails(monkeypatch):
+def test_bs4_version_fails(monkeypatch, datapath):
     import bs4
     monkeypatch.setattr(bs4, '__version__', '4.2')
     with tm.assert_raises_regex(ValueError, "minimum version"):
-        read_html(os.path.join(DATA_PATH, "spam.html"), flavor='bs4')
+        read_html(datapath("io", "data", "spam.html"), flavor='bs4')
 
 
 def test_invalid_flavor():
@@ -59,8 +58,8 @@ def test_invalid_flavor():
 
 @td.skip_if_no('bs4')
 @td.skip_if_no('lxml')
-def test_same_ordering():
-    filename = os.path.join(DATA_PATH, 'valid_markup.html')
+def test_same_ordering(datapath):
+    filename = datapath('io', 'data', 'valid_markup.html')
     dfs_lxml = read_html(filename, index_col=0, flavor=['lxml'])
     dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4'])
     assert_framelist_equal(dfs_lxml, dfs_bs4)
@@ -72,11 +71,14 @@ def test_same_ordering():
     pytest.param('lxml', marks=pytest.mark.skipif(
         not td.safe_import('lxml'), reason='No lxml'))], scope="class")
 class TestReadHtml(object):
-    spam_data = os.path.join(DATA_PATH, 'spam.html')
-    spam_data_kwargs = {}
-    if PY3:
-        spam_data_kwargs['encoding'] = 'UTF-8'
-    banklist_data = os.path.join(DATA_PATH, 'banklist.html')
+
+    @pytest.fixture(autouse=True)
+    def set_files(self, datapath):
+        self.spam_data = datapath('io', 'data', 'spam.html')
+        self.spam_data_kwargs = {}
+        if PY3:
+            self.spam_data_kwargs['encoding'] = 'UTF-8'
+        self.banklist_data = datapath("io", "data", "banklist.html")
 
     @pytest.fixture(autouse=True, scope="function")
     def set_defaults(self, flavor, request):
@@ -272,7 +274,8 @@ def test_invalid_url(self):
     @pytest.mark.slow
     def test_file_url(self):
         url = self.banklist_data
-        dfs = self.read_html(file_path_to_url(url), 'First',
+        dfs = self.read_html(file_path_to_url(os.path.abspath(url)),
+                             'First',
                              attrs={'id': 'table'})
         assert isinstance(dfs, list)
         for df in dfs:
@@ -326,7 +329,7 @@ def test_multiindex_header_index_skiprows(self):
     @pytest.mark.slow
     def test_regex_idempotency(self):
         url = self.banklist_data
-        dfs = self.read_html(file_path_to_url(url),
+        dfs = self.read_html(file_path_to_url(os.path.abspath(url)),
                              match=re.compile(re.compile('Florida')),
                              attrs={'id': 'table'})
         assert isinstance(dfs, list)
@@ -352,9 +355,9 @@ def test_python_docs_table(self):
         assert sorted(zz) == sorted(['Repo', 'What'])
 
     @pytest.mark.slow
-    def test_thousands_macau_stats(self):
+    def test_thousands_macau_stats(self, datapath):
         all_non_nan_table_index = -2
-        macau_data = os.path.join(DATA_PATH, 'macau.html')
+        macau_data = datapath("io", "data", "macau.html")
         dfs = self.read_html(macau_data, index_col=0,
                              attrs={'class': 'style1'})
         df = dfs[all_non_nan_table_index]
@@ -362,9 +365,9 @@ def test_thousands_macau_stats(self):
         assert not any(s.isna().any() for _, s in df.iteritems())
 
     @pytest.mark.slow
-    def test_thousands_macau_index_col(self):
+    def test_thousands_macau_index_col(self, datapath):
         all_non_nan_table_index = -2
-        macau_data = os.path.join(DATA_PATH, 'macau.html')
+        macau_data = datapath('io', 'data', 'macau.html')
         dfs = self.read_html(macau_data, index_col=0, header=0)
         df = dfs[all_non_nan_table_index]
 
@@ -491,8 +494,8 @@ def test_countries_municipalities(self):
         res2 = self.read_html(data2, header=0)
         assert_framelist_equal(res1, res2)
 
-    def test_nyse_wsj_commas_table(self):
-        data = os.path.join(DATA_PATH, 'nyse_wsj.html')
+    def test_nyse_wsj_commas_table(self, datapath):
+        data = datapath('io', 'data', 'nyse_wsj.html')
         df = self.read_html(data, index_col=0, header=0,
                             attrs={'class': 'mdcTable'})[0]
 
@@ -503,7 +506,7 @@ def test_nyse_wsj_commas_table(self):
         tm.assert_index_equal(df.columns, columns)
 
     @pytest.mark.slow
-    def test_banklist_header(self):
+    def test_banklist_header(self, datapath):
         from pandas.io.html import _remove_whitespace
 
         def try_remove_ws(x):
@@ -514,7 +517,7 @@ def try_remove_ws(x):
 
         df = self.read_html(self.banklist_data, 'Metcalf',
                             attrs={'id': 'table'})[0]
-        ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'),
+        ground_truth = read_csv(datapath('io', 'data', 'banklist.csv'),
                                 converters={'Updated Date': Timestamp,
                                             'Closing Date': Timestamp})
         assert df.shape == ground_truth.shape
@@ -631,19 +634,19 @@ def test_parse_dates_combine(self):
         newdf = DataFrame({'datetime': raw_dates})
         tm.assert_frame_equal(newdf, res[0])
 
-    def test_computer_sales_page(self):
-        data = os.path.join(DATA_PATH, 'computer_sales_page.html')
+    def test_computer_sales_page(self, datapath):
+        data = datapath('io', 'data', 'computer_sales_page.html')
         with tm.assert_raises_regex(ParserError,
                                     r"Passed header=\[0,1\] are "
                                     r"too many rows for this "
                                     r"multi_index of columns"):
             self.read_html(data, header=[0, 1])
 
-        data = os.path.join(DATA_PATH, 'computer_sales_page.html')
+        data = datapath('io', 'data', 'computer_sales_page.html')
         assert self.read_html(data, header=[1, 2])
 
-    def test_wikipedia_states_table(self):
-        data = os.path.join(DATA_PATH, 'wikipedia_states.html')
+    def test_wikipedia_states_table(self, datapath):
+        data = datapath('io', 'data', 'wikipedia_states.html')
         assert os.path.isfile(data), '%r is not a file' % data
         assert os.path.getsize(data), '%r is an empty file' % data
         result = self.read_html(data, 'Arizona', header=1)[0]
@@ -757,15 +760,15 @@ def test_multiple_header_rows(self):
         html_df = read_html(html, )[0]
         tm.assert_frame_equal(expected_df, html_df)
 
-    def test_works_on_valid_markup(self):
-        filename = os.path.join(DATA_PATH, 'valid_markup.html')
+    def test_works_on_valid_markup(self, datapath):
+        filename = datapath('io', 'data', 'valid_markup.html')
         dfs = self.read_html(filename, index_col=0)
         assert isinstance(dfs, list)
         assert isinstance(dfs[0], DataFrame)
 
     @pytest.mark.slow
-    def test_fallback_success(self):
-        banklist_data = os.path.join(DATA_PATH, 'banklist.html')
+    def test_fallback_success(self, datapath):
+        banklist_data = datapath('io', 'data', 'banklist.html')
         self.read_html(banklist_data, '.*Water.*', flavor=['lxml', 'html5lib'])
 
     def test_to_html_timestamp(self):
@@ -809,7 +812,7 @@ def test_displayed_only(self, displayed_only, exp0, exp1):
             assert len(dfs) == 1  # Should not parse hidden table
 
     @pytest.mark.parametrize("f", glob.glob(
-        os.path.join(DATA_PATH, 'html_encoding', '*.html')))
+        os.path.join(HERE, 'data', 'html_encoding', '*.html')))
     def test_encode(self, f):
         _, encoding = os.path.splitext(os.path.basename(f))[0].split('_')
 
@@ -879,7 +882,7 @@ def seekable(self):
         assert self.read_html(bad)
 
     @pytest.mark.slow
-    def test_importcheck_thread_safety(self):
+    def test_importcheck_thread_safety(self, datapath):
         # see gh-16928
 
         class ErrorThread(threading.Thread):
@@ -894,7 +897,7 @@ def run(self):
         # force import check by reinitalising global vars in html.py
         reload(pandas.io.html)
 
-        filename = os.path.join(DATA_PATH, 'valid_markup.html')
+        filename = datapath('io', 'data', 'valid_markup.html')
         helper_thread1 = ErrorThread(target=self.read_html, args=(filename,))
         helper_thread2 = ErrorThread(target=self.read_html, args=(filename,))
 

From d9d65706706d1c1e7b0793f11557abf88570f3a0 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 26 Mar 2018 17:27:12 -0500
Subject: [PATCH 06/31] Remove os.path.joins

---
 pandas/tests/conftest.py                      |  3 +-
 pandas/tests/io/conftest.py                   | 14 ++------
 pandas/tests/io/test_common.py                | 34 +++++++++----------
 pandas/tests/reshape/merge/test_merge_asof.py | 33 +++++++-----------
 pandas/tests/test_base.py                     |  2 --
 5 files changed, 34 insertions(+), 52 deletions(-)

diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py
index 3d83acc078f24..2c246fb9d371d 100644
--- a/pandas/tests/conftest.py
+++ b/pandas/tests/conftest.py
@@ -27,6 +27,7 @@ def deco(*args):
             if request.config.getoption("--strict-data-files"):
                 raise ValueError("Failed.")
             else:
-                pytest.skip("Data files not included in pandas distribution.")
+                pytest.skip("{} not included in pandas distribution."
+                            .format(path))
         return path
     return deco
diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
index 896690871b36e..7623587803b41 100644
--- a/pandas/tests/io/conftest.py
+++ b/pandas/tests/io/conftest.py
@@ -1,31 +1,23 @@
-import os
-
 import pytest
 from pandas.io.parsers import read_table
-from pandas.util import testing as tm
-
-
-@pytest.fixture
-def parser_data(request):
-    return os.path.join(tm.get_data_path(), '..', 'parser', 'data')
 
 
 @pytest.fixture
 def tips_file(datapath):
     """Path to the tips dataset"""
-    return datapath(os.path.join('io', 'parser', 'data', 'tips.csv'))
+    return datapath('io', 'parser', 'data', 'tips.csv')
 
 
 @pytest.fixture
 def jsonl_file(datapath):
     """Path a JSONL dataset"""
-    return datapath(os.path.join('io', 'parser', 'data', 'items.jsonl'))
+    return datapath('io', 'parser', 'data', 'items.jsonl')
 
 
 @pytest.fixture
 def salaries_table(datapath):
     """DataFrame with the salaries dataset"""
-    return datapath(os.path.join('io', 'parser', 'data', 'salaries.csv'))
+    return read_table(datapath('io', 'parser', 'data', 'salaries.csv'))
 
 
 @pytest.fixture
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index 7789525182521..37097033f0c75 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -149,29 +149,27 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext):
             reader(path)
 
     @pytest.mark.parametrize('reader, module, path', [
-        (pd.read_csv, 'os', os.path.join(HERE, 'data', 'iris.csv')),
-        (pd.read_table, 'os', os.path.join(HERE, 'data', 'iris.csv')),
-        (pd.read_fwf, 'os', os.path.join(HERE, 'data',
-                                         'fixed_width_format.txt')),
-        (pd.read_excel, 'xlrd', os.path.join(HERE, 'data', 'test1.xlsx')),
-        (pd.read_feather, 'feather', os.path.join(HERE, 'data',
-                                                  'feather-0_3_1.feather')),
-        (pd.read_hdf, 'tables', os.path.join(HERE, 'data', 'legacy_hdf',
+        (pd.read_csv, 'os', 'iris.csv'),
+        (pd.read_table, 'os', 'iris.csv'),
+        (pd.read_fwf, 'os', 'fixed_width_format.txt'),
+        (pd.read_excel, 'xlrd', 'test1.xlsx'),
+        (pd.read_feather, 'feather', 'feather-0_3_1.feather'),
+        (pd.read_hdf, 'tables', os.path.join('legacy_hdf',
                                              'datetimetz_object.h5')),
-        (pd.read_stata, 'os', os.path.join(HERE, 'data', 'stata10_115.dta')),
-        (pd.read_sas, 'os', os.path.join(HERE, 'sas', 'data',
+        (pd.read_stata, 'os', 'stata10_115.dta'),
+        # in the function, we go down to tests/io/data
+        # so step back up a level before going into sas
+        (pd.read_sas, 'os', os.path.join('..', 'sas', 'data',
                                          'test1.sas7bdat')),
-        (pd.read_json, 'os', os.path.join(HERE, 'json', 'data',
+        (pd.read_json, 'os', os.path.join('..', 'json', 'data',
                                           'tsframe_v012.json')),
-        (pd.read_msgpack, 'os', os.path.join(HERE, 'msgpack', 'data',
+        (pd.read_msgpack, 'os', os.path.join('..', 'msgpack', 'data',
                                              'frame.mp')),
-        (pd.read_pickle, 'os', os.path.join(HERE, 'data',
-                                            'categorical_0_14_1.pickle')),
+        (pd.read_pickle, 'os', 'categorical_0_14_1.pickle'),
     ])
-    def test_read_fspath_all(self, reader, module, path):
+    def test_read_fspath_all(self, reader, module, path, datapath):
         pytest.importorskip(module)
-        if not os.path.exists(path):
-            pytest.skip("Data files not included in pandas distribution.")
+        path = datapath('io', 'data', path)
 
         mypath = CustomFSPath(path)
         result = reader(mypath)
@@ -236,7 +234,7 @@ def test_write_fspath_hdf5(self):
 
 @pytest.fixture
 def mmap_file(datapath):
-    return datapath(os.path.join('io', 'data', 'test_mmap.csv'))
+    return datapath('io', 'data', 'test_mmap.csv')
 
 
 class TestMMapWrapper(object):
diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py
index 622bc787c7bf8..bcb27e49f28fe 100644
--- a/pandas/tests/reshape/merge/test_merge_asof.py
+++ b/pandas/tests/reshape/merge/test_merge_asof.py
@@ -1,5 +1,3 @@
-import os
-
 import pytest
 
 import pytz
@@ -23,57 +21,52 @@ def read_data(path, dedupe=False):
 
 @pytest.fixture
 def trades(datapath):
-    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
-                                           'trades.csv')))
+    return read_data(datapath('reshape', 'merge', 'data', 'trades.csv'))
 
 
 @pytest.fixture
 def trades2(datapath):
-    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
-                                           'trades2.csv')))
+    return read_data(datapath('reshape', 'merge', 'data', 'trades2.csv'))
 
 
 @pytest.fixture
 def quotes(datapath):
-    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
-                                           'quotes.csv')), dedupe=True)
+    return read_data(datapath('reshape', 'merge', 'data', 'quotes.csv'),
+                     dedupe=True)
 
 
 @pytest.fixture
 def quotes2(datapath):
-    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
-                                           'quotes2.csv')), dedupe=True)
+    return read_data(datapath('reshape', 'merge', 'data', 'quotes2.csv'),
+                     dedupe=True)
 
 
 @pytest.fixture
 def asof(datapath):
-    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
-                                           'asof.csv')))
+    return read_data(datapath('reshape', 'merge', 'data', 'asof.csv'))
 
 
 @pytest.fixture
 def asof2(datapath):
-    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
-                                           'asof2.csv')))
+    return read_data(datapath('reshape', 'merge', 'data', 'asof2.csv'))
 
 
 @pytest.fixture
 def tolerance(datapath):
-    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
-                                           'tolerance.csv')))
+    return read_data(datapath('reshape', 'merge', 'data', 'tolerance.csv'))
 
 
 @pytest.fixture
 def allow_exact_matches(datapath):
-    return read_data(datapath(os.path.join('reshape', 'merge', 'data',
-                                           'allow_exact_matches.csv')))
+    return read_data(datapath('reshape', 'merge', 'data',
+                              'allow_exact_matches.csv'))
 
 
 @pytest.fixture
 def allow_exact_matches_and_tolerance(datapath):
-    return read_data(datapath(os.path.join(
+    return read_data(datapath(
         'reshape', 'merge', 'data', 'allow_exact_matches_and_tolerance.csv'
-    )))
+    ))
 
 
 class TestAsOfMerge(object):
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 3e6b347991fa4..c4c02c0bf6f17 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -519,8 +519,6 @@ def test_value_counts_unique_nunique_null(self):
                                     index=expected_index[9:1:-1],
                                     dtype='int64', name='a')
 
-                if isinstance(o, pd.PeriodIndex):
-                    import pdb; pdb.set_trace()
                 result_s_na = o.value_counts(dropna=False)
                 tm.assert_series_equal(result_s_na, expected_s_na)
                 assert result_s_na.index.name is None

From 584959114cf02d63b235b5d01928e2ce22d25d4d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 26 Mar 2018 21:04:38 -0500
Subject: [PATCH 07/31] More modules

---
 pandas/tests/io/test_pytables.py             | 23 ++++++++++----------
 pandas/tests/io/test_sql.py                  |  4 ++--
 pandas/tests/tseries/offsets/test_offsets.py | 16 +++++++-------
 3 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index e690b1e302d8b..2db19bf373872 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -4416,28 +4416,27 @@ def f():
                 store.select('df')
             tm.assert_raises_regex(ClosedFileError, 'file is not open', f)
 
-    def test_pytables_native_read(self):
-
+    def test_pytables_native_read(self, datapath):
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/pytables_native.h5'),
+                datapath('io', 'data', 'legacy_hdf/pytables_native.h5'),
                 mode='r') as store:
             d2 = store['detector/readout']
             assert isinstance(d2, DataFrame)
 
     @pytest.mark.skipif(PY35 and is_platform_windows(),
                         reason="native2 read fails oddly on windows / 3.5")
-    def test_pytables_native2_read(self):
+    def test_pytables_native2_read(self, datapath):
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/pytables_native2.h5'),
+                datapath('io', 'data', 'legacy_hdf', 'pytables_native2.h5'),
                 mode='r') as store:
             str(store)
             d1 = store['detector']
             assert isinstance(d1, DataFrame)
 
-    def test_legacy_table_read(self):
+    def test_legacy_table_read(self, datapath):
         # legacy table types
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/legacy_table.h5'),
+                datapath('io', 'data', 'legacy_hdf', 'legacy_table.h5'),
                 mode='r') as store:
 
             with catch_warnings(record=True):
@@ -5082,7 +5081,7 @@ def test_fspath(self):
             with pd.HDFStore(path) as store:
                 assert os.fspath(store) == str(path)
 
-    def test_read_py2_hdf_file_in_py3(self):
+    def test_read_py2_hdf_file_in_py3(self, datapath):
         # GH 16781
 
         # tests reading a PeriodIndex DataFrame written in Python2 in Python3
@@ -5097,8 +5096,8 @@ def test_read_py2_hdf_file_in_py3(self):
             ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
 
         with ensure_clean_store(
-                tm.get_data_path(
-                    'legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
+                datapath('io', 'data', 'legacy_hdf',
+                         'periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
                 mode='r') as store:
             result = store['p']
             assert_frame_equal(result, expected)
@@ -5495,14 +5494,14 @@ def test_store_timezone(self):
 
             assert_frame_equal(result, df)
 
-    def test_legacy_datetimetz_object(self):
+    def test_legacy_datetimetz_object(self, datapath):
         # legacy from < 0.17.0
         # 8260
         expected = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
                                   B=Timestamp('20130603', tz='CET')),
                              index=range(5))
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/datetimetz_object.h5'),
+                datapath('io', 'data', 'legacy_hdf', 'datetimetz_object.h5'),
                 mode='r') as store:
             result = store['df']
             assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 4530cc9d2fba9..ca9b1749ecf8a 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -253,9 +253,9 @@ def _get_exec(self):
         else:
             return self.conn.cursor()
 
-    def _load_iris_data(self):
+    def _load_iris_data(self, datapath):
         import io
-        iris_csv_file = os.path.join(tm.get_data_path(), 'iris.csv')
+        iris_csv_file = datapath('io', 'data', 'iris.csv')
 
         self.drop_table('iris')
         self._get_exec().execute(SQL_STRINGS['create_iris'][self.flavor])
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index d96ebab615d12..03e2b42022967 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -455,14 +455,15 @@ def test_add(self, offset_types, tz):
         assert isinstance(result, Timestamp)
         assert result == expected_localize
 
-    def test_pickle_v0_15_2(self):
+    def test_pickle_v0_15_2(self, datapath):
         offsets = {'DateOffset': DateOffset(years=1),
                    'MonthBegin': MonthBegin(1),
                    'Day': Day(1),
                    'YearBegin': YearBegin(1),
                    'Week': Week(1)}
-        pickle_path = os.path.join(tm.get_data_path(),
-                                   'dateoffset_0_15_2.pickle')
+
+        pickle_path = datapath('tseries', 'offsets', 'data',
+                               'dateoffset_0_15_2.pickle')
         # This code was executed once on v0.15.2 to generate the pickle:
         # with open(pickle_path, 'wb') as f: pickle.dump(offsets, f)
         #
@@ -1848,12 +1849,11 @@ def _check_roundtrip(obj):
         _check_roundtrip(self.offset2)
         _check_roundtrip(self.offset * 2)
 
-    def test_pickle_compat_0_14_1(self):
+    def test_pickle_compat_0_14_1(self, datapath):
+        # /Users/taugspurger/sandbox/pandas-ip/pandas/pandas/tests/tseries/offsets/test_offsets.py
         hdays = [datetime(2013, 1, 1) for ele in range(4)]
-
-        pth = tm.get_data_path()
-
-        cday0_14_1 = read_pickle(os.path.join(pth, 'cday-0.14.1.pickle'))
+        pth = datapath('tseries', 'offsets', 'data', 'cday-0.14.1.pickle')
+        cday0_14_1 = read_pickle(pth)
         cday = CDay(holidays=hdays)
         assert cday == cday0_14_1
 

From 31fb0b634b14e204fe1db87f7ee51065664692f4 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 27 Mar 2018 06:05:36 -0500
Subject: [PATCH 08/31] Some more

---
 pandas/tests/indexes/test_multi.py        |  8 ++--
 pandas/tests/io/formats/test_format.py    |  4 +-
 pandas/tests/io/json/test_compression.py  |  6 +--
 pandas/tests/io/json/test_pandas.py       |  8 ++--
 pandas/tests/io/parser/common.py          | 23 +++++-----
 pandas/tests/io/parser/compression.py     |  4 +-
 pandas/tests/io/parser/dtypes.py          |  6 +--
 pandas/tests/io/parser/test_network.py    | 53 +++++++++++------------
 pandas/tests/io/parser/test_parsers.py    |  6 ++-
 pandas/tests/io/parser/test_textreader.py |  5 ++-
 pandas/tests/io/sas/test_sas7bdat.py      | 43 ++++++++----------
 pandas/tests/io/sas/test_xport.py         |  6 ++-
 pandas/tests/io/test_excel.py             |  8 ++--
 pandas/tests/io/test_packers.py           |  3 +-
 pandas/tests/io/test_stata.py             |  9 ++--
 15 files changed, 94 insertions(+), 98 deletions(-)

diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 34abf7052da8c..fa450a9262a82 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -1141,12 +1141,12 @@ def test_iter(self):
                     ('baz', 'two'), ('qux', 'one'), ('qux', 'two')]
         assert result == expected
 
-    def test_legacy_pickle(self):
+    def test_legacy_pickle(self, datapath):
         if PY3:
             pytest.skip("testing for legacy pickles not "
                         "support on py3")
 
-        path = tm.get_data_path('multiindex_v1.pickle')
+        path = datapath('indexes', 'data', 'multiindex_v1.pickle')
         obj = pd.read_pickle(path)
 
         obj2 = MultiIndex.from_tuples(obj.values)
@@ -1162,10 +1162,10 @@ def test_legacy_pickle(self):
         assert_almost_equal(res, exp)
         assert_almost_equal(exp, exp2)
 
-    def test_legacy_v2_unpickle(self):
+    def test_legacy_v2_unpickle(self, datapath):
 
         # 0.7.3 -> 0.8.0 format manage
-        path = tm.get_data_path('mindex_073.pickle')
+        path = datapath('indexes', 'data', 'mindex_073.pickle')
         obj = pd.read_pickle(path)
 
         obj2 = MultiIndex.from_tuples(obj.values)
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index 6c3b75cdfa6df..b3a04eeba0161 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -916,8 +916,8 @@ def test_unicode_problem_decoding_as_ascii(self):
         dm = DataFrame({u('c/\u03c3'): Series({'test': np.nan})})
         compat.text_type(dm.to_string())
 
-    def test_string_repr_encoding(self):
-        filepath = tm.get_data_path('unicode_series.csv')
+    def test_string_repr_encoding(self, datapath):
+        filepath = datapath('io', 'formats', 'data', 'unicode_series.csv')
         df = pd.read_csv(filepath, header=None, encoding='latin1')
         repr(df)
         repr(df[1])
diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
index c9074ca49e5be..05ceace20f5a4 100644
--- a/pandas/tests/io/json/test_compression.py
+++ b/pandas/tests/io/json/test_compression.py
@@ -21,11 +21,11 @@ def test_compression_roundtrip(compression):
         assert_frame_equal(df, pd.read_json(result))
 
 
-def test_read_zipped_json():
-    uncompressed_path = tm.get_data_path("tsframe_v012.json")
+def test_read_zipped_json(datapath):
+    uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json")
     uncompressed_df = pd.read_json(uncompressed_path)
 
-    compressed_path = tm.get_data_path("tsframe_v012.json.zip")
+    compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip")
     compressed_df = pd.read_json(compressed_path, compression='zip')
 
     assert_frame_equal(uncompressed_df, compressed_df)
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 7e497c395266f..bcbac4400c953 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -37,8 +37,9 @@
 
 class TestPandasContainer(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(scope="function", autouse=True)
+    def setup(self, datapath):
+        self.dirpath = datapath("io", "json", "data")
 
         self.ts = tm.makeTimeSeries()
         self.ts.name = 'ts'
@@ -59,7 +60,8 @@ def setup_method(self, method):
         self.mixed_frame = _mixed_frame.copy()
         self.categorical = _cat_frame.copy()
 
-    def teardown_method(self, method):
+        yield
+
         del self.dirpath
 
         del self.ts
diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
index cf7ec9e2f2652..2c363f5f4adff 100644
--- a/pandas/tests/io/parser/common.py
+++ b/pandas/tests/io/parser/common.py
@@ -635,21 +635,19 @@ def test_read_csv_parse_simple_list(self):
         tm.assert_frame_equal(df, expected)
 
     @tm.network
-    def test_url(self):
+    def test_url(self, datapath):
         # HTTP(S)
         url = ('https://raw.github.com/pandas-dev/pandas/master/'
                'pandas/tests/io/parser/data/salaries.csv')
         url_table = self.read_table(url)
-        dirpath = tm.get_data_path()
-        localtable = os.path.join(dirpath, 'salaries.csv')
+        localtable = datapath('io', 'parser', 'data', 'salaries.csv')
         local_table = self.read_table(localtable)
         tm.assert_frame_equal(url_table, local_table)
         # TODO: ftp testing
 
     @pytest.mark.slow
-    def test_file(self):
-        dirpath = tm.get_data_path()
-        localtable = os.path.join(dirpath, 'salaries.csv')
+    def test_file(self, datapath):
+        localtable = datapath('io', 'parser', 'data', 'salaries.csv')
         local_table = self.read_table(localtable)
 
         try:
@@ -739,8 +737,8 @@ def test_utf16_bom_skiprows(self):
 
                     tm.assert_frame_equal(result, expected)
 
-    def test_utf16_example(self):
-        path = tm.get_data_path('utf16_ex.txt')
+    def test_utf16_example(self, datapath):
+        path = datapath('io', 'parser', 'data', 'utf16_ex.txt')
 
         # it works! and is the right length
         result = self.read_table(path, encoding='utf-16')
@@ -751,8 +749,8 @@ def test_utf16_example(self):
             result = self.read_table(buf, encoding='utf-16')
             assert len(result) == 50
 
-    def test_unicode_encoding(self):
-        pth = tm.get_data_path('unicode_series.csv')
+    def test_unicode_encoding(self, datapath):
+        pth = datapath('io', 'parser', 'data', 'unicode_series.csv')
 
         result = self.read_csv(pth, header=None, encoding='latin-1')
         result = result.set_index(0)
@@ -1499,10 +1497,9 @@ def test_internal_eof_byte_to_file(self):
             result = self.read_csv(path)
             tm.assert_frame_equal(result, expected)
 
-    def test_sub_character(self):
+    def test_sub_character(self, datapath):
         # see gh-16893
-        dirpath = tm.get_data_path()
-        filename = os.path.join(dirpath, "sub_char.csv")
+        filename = datapath('io', 'parser', 'data', 'sub_char.csv')
 
         expected = DataFrame([[1, 2, 3]], columns=["a", "\x1ab", "c"])
         result = self.read_csv(filename)
diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py
index 01c6620e50d37..48b2cedb63811 100644
--- a/pandas/tests/io/parser/compression.py
+++ b/pandas/tests/io/parser/compression.py
@@ -121,9 +121,9 @@ def test_read_csv_infer_compression(self):
 
         inputs[3].close()
 
-    def test_read_csv_compressed_utf16_example(self):
+    def test_read_csv_compressed_utf16_example(self, datapath):
         # GH18071
-        path = tm.get_data_path('utf16_ex_small.zip')
+        path = datapath('io', 'parser', 'data', 'utf16_ex_small.zip')
 
         result = self.read_csv(path, encoding='utf-16',
                                compression='zip', sep='\t')
diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py
index b91ce04673e29..8060ebf2fbcd4 100644
--- a/pandas/tests/io/parser/dtypes.py
+++ b/pandas/tests/io/parser/dtypes.py
@@ -125,9 +125,9 @@ def test_categorical_dtype_high_cardinality_numeric(self):
             np.sort(actual.a.cat.categories), ordered=True)
         tm.assert_frame_equal(actual, expected)
 
-    def test_categorical_dtype_encoding(self):
+    def test_categorical_dtype_encoding(self, datapath):
         # GH 10153
-        pth = tm.get_data_path('unicode_series.csv')
+        pth = datapath('io', 'parser', 'data', 'unicode_series.csv')
         encoding = 'latin-1'
         expected = self.read_csv(pth, header=None, encoding=encoding)
         expected[1] = Categorical(expected[1])
@@ -135,7 +135,7 @@ def test_categorical_dtype_encoding(self):
                                dtype={1: 'category'})
         tm.assert_frame_equal(actual, expected)
 
-        pth = tm.get_data_path('utf16_ex.txt')
+        pth = datapath('io', 'parser', 'data', 'utf16_ex.txt')
         encoding = 'utf-16'
         expected = self.read_table(pth, encoding=encoding)
         expected = expected.apply(Categorical)
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
index fdf45f307e953..e2243b8087a5b 100644
--- a/pandas/tests/io/parser/test_network.py
+++ b/pandas/tests/io/parser/test_network.py
@@ -48,10 +48,16 @@ def check_compressed_urls(salaries_table, compression, extension, mode,
     tm.assert_frame_equal(url_table, salaries_table)
 
 
+@pytest.fixture
+def tips_df(datapath):
+    """DataFrame with the tips dataset."""
+    return read_csv(datapath('io', 'parser', 'data', 'tips.csv'))
+
+
 @pytest.mark.usefixtures("s3_resource")
 class TestS3(object):
 
-    def test_parse_public_s3_bucket(self):
+    def test_parse_public_s3_bucket(self, tips_df):
         pytest.importorskip('s3fs')
         # more of an integration test due to the not-public contents portion
         # can probably mock this though.
@@ -60,45 +66,40 @@ def test_parse_public_s3_bucket(self):
                           ext, compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
+            tm.assert_frame_equal(df, tips_df)
 
         # Read public file from bucket with not-public contents
         df = read_csv('s3://cant_get_it/tips.csv')
         assert isinstance(df, DataFrame)
         assert not df.empty
-        tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df)
+        tm.assert_frame_equal(df, tips_df)
 
-    def test_parse_public_s3n_bucket(self):
+    def test_parse_public_s3n_bucket(self, tips_df):
 
         # Read from AWS s3 as "s3n" URL
         df = read_csv('s3n://pandas-test/tips.csv', nrows=10)
         assert isinstance(df, DataFrame)
         assert not df.empty
-        tm.assert_frame_equal(read_csv(
-            tm.get_data_path('tips.csv')).iloc[:10], df)
+        tm.assert_frame_equal(tips_df.iloc[:10], df)
 
-    def test_parse_public_s3a_bucket(self):
+    def test_parse_public_s3a_bucket(self, tips_df):
         # Read from AWS s3 as "s3a" URL
         df = read_csv('s3a://pandas-test/tips.csv', nrows=10)
         assert isinstance(df, DataFrame)
         assert not df.empty
-        tm.assert_frame_equal(read_csv(
-            tm.get_data_path('tips.csv')).iloc[:10], df)
+        tm.assert_frame_equal(tips_df.iloc[:10], df)
 
-    def test_parse_public_s3_bucket_nrows(self):
+    def test_parse_public_s3_bucket_nrows(self, tips_df):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' +
                           ext, nrows=10, compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')).iloc[:10], df)
+            tm.assert_frame_equal(tips_df.iloc[:10], df)
 
-    def test_parse_public_s3_bucket_chunked(self):
+    def test_parse_public_s3_bucket_chunked(self, tips_df):
         # Read with a chunksize
         chunksize = 5
-        local_tips = read_csv(tm.get_data_path('tips.csv'))
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
                                  chunksize=chunksize, compression=comp)
@@ -109,14 +110,13 @@ def test_parse_public_s3_bucket_chunked(self):
                 df = df_reader.get_chunk()
                 assert isinstance(df, DataFrame)
                 assert not df.empty
-                true_df = local_tips.iloc[
+                true_df = tips_df.iloc[
                     chunksize * i_chunk: chunksize * (i_chunk + 1)]
                 tm.assert_frame_equal(true_df, df)
 
-    def test_parse_public_s3_bucket_chunked_python(self):
+    def test_parse_public_s3_bucket_chunked_python(self, tips_df):
         # Read with a chunksize using the Python parser
         chunksize = 5
-        local_tips = read_csv(tm.get_data_path('tips.csv'))
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
                                  chunksize=chunksize, compression=comp,
@@ -127,36 +127,33 @@ def test_parse_public_s3_bucket_chunked_python(self):
                 df = df_reader.get_chunk()
                 assert isinstance(df, DataFrame)
                 assert not df.empty
-                true_df = local_tips.iloc[
+                true_df = tips_df.iloc[
                     chunksize * i_chunk: chunksize * (i_chunk + 1)]
                 tm.assert_frame_equal(true_df, df)
 
-    def test_parse_public_s3_bucket_python(self):
+    def test_parse_public_s3_bucket_python(self, tips_df):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
                           compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
+            tm.assert_frame_equal(df, tips_df)
 
-    def test_infer_s3_compression(self):
+    def test_infer_s3_compression(self, tips_df):
         for ext in ['', '.gz', '.bz2']:
             df = read_csv('s3://pandas-test/tips.csv' + ext,
                           engine='python', compression='infer')
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
+            tm.assert_frame_equal(df, tips_df)
 
-    def test_parse_public_s3_bucket_nrows_python(self):
+    def test_parse_public_s3_bucket_nrows_python(self, tips_df):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
                           nrows=10, compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')).iloc[:10], df)
+            tm.assert_frame_equal(tips_df.iloc[:10], df)
 
     def test_s3_fails(self):
         with pytest.raises(IOError):
diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py
index 7717102b64fc5..b6f13039641a2 100644
--- a/pandas/tests/io/parser/test_parsers.py
+++ b/pandas/tests/io/parser/test_parsers.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 
 import os
+import pytest
 import pandas.util.testing as tm
 
 from pandas import read_csv, read_table, DataFrame
@@ -45,8 +46,9 @@ def read_table(self, *args, **kwargs):
     def float_precision_choices(self):
         raise com.AbstractMethodError(self)
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath('io', 'parser', 'data')
         self.csv1 = os.path.join(self.dirpath, 'test1.csv')
         self.csv2 = os.path.join(self.dirpath, 'test2.csv')
         self.xls1 = os.path.join(self.dirpath, 'test.xls')
diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py
index ab4c14034cd20..c1e0f1dc753e8 100644
--- a/pandas/tests/io/parser/test_textreader.py
+++ b/pandas/tests/io/parser/test_textreader.py
@@ -28,8 +28,9 @@
 
 class TestTextReader(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath('io', 'parser', 'data')
         self.csv1 = os.path.join(self.dirpath, 'test1.csv')
         self.csv2 = os.path.join(self.dirpath, 'test2.csv')
         self.xls1 = os.path.join(self.dirpath, 'test.xls')
diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py
index 5da347e47957c..ae40653c28f99 100644
--- a/pandas/tests/io/sas/test_sas7bdat.py
+++ b/pandas/tests/io/sas/test_sas7bdat.py
@@ -11,8 +11,9 @@
 
 class TestSAS7BDAT(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "sas", "data")
         self.data = []
         self.test_ix = [list(range(1, 16)), [16]]
         for j in 1, 2:
@@ -123,9 +124,8 @@ def test_iterator_read_too_much(self):
         rdr.close()
 
 
-def test_encoding_options():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "test1.sas7bdat")
+def test_encoding_options(datapath):
+    fname = datapath("io", "sas", "data", "test1.sas7bdat")
     df1 = pd.read_sas(fname)
     df2 = pd.read_sas(fname, encoding='utf-8')
     for col in df1.columns:
@@ -143,51 +143,46 @@ def test_encoding_options():
         assert(x == y.decode())
 
 
-def test_productsales():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "productsales.sas7bdat")
+def test_productsales(datapath):
+    fname = datapath("io", "sas", "data", "productsales.sas7bdat")
     df = pd.read_sas(fname, encoding='utf-8')
-    fname = os.path.join(dirpath, "productsales.csv")
+    fname = datapath("io", "sas", "data", "productsales.csv")
     df0 = pd.read_csv(fname, parse_dates=['MONTH'])
     vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"]
     df0[vn] = df0[vn].astype(np.float64)
     tm.assert_frame_equal(df, df0)
 
 
-def test_12659():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "test_12659.sas7bdat")
+def test_12659(datapath):
+    fname = datapath("io", "sas", "data", "test_12659.sas7bdat")
     df = pd.read_sas(fname)
-    fname = os.path.join(dirpath, "test_12659.csv")
+    fname = datapath("io", "sas", "data", "test_12659.csv")
     df0 = pd.read_csv(fname)
     df0 = df0.astype(np.float64)
     tm.assert_frame_equal(df, df0)
 
 
-def test_airline():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "airline.sas7bdat")
+def test_airline(datapath):
+    fname = datapath("io", "sas", "data", "airline.sas7bdat")
     df = pd.read_sas(fname)
-    fname = os.path.join(dirpath, "airline.csv")
+    fname = datapath("io", "sas", "data", "airline.csv")
     df0 = pd.read_csv(fname)
     df0 = df0.astype(np.float64)
     tm.assert_frame_equal(df, df0, check_exact=False)
 
 
-def test_date_time():
+def test_date_time(datapath):
     # Support of different SAS date/datetime formats (PR #15871)
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "datetime.sas7bdat")
+    fname = datapath("io", "sas", "data", "datetime.sas7bdat")
     df = pd.read_sas(fname)
-    fname = os.path.join(dirpath, "datetime.csv")
+    fname = datapath("io", "sas", "data", "datetime.csv")
     df0 = pd.read_csv(fname, parse_dates=['Date1', 'Date2', 'DateTime',
                                           'DateTimeHi', 'Taiw'])
     tm.assert_frame_equal(df, df0)
 
 
-def test_zero_variables():
+def test_zero_variables(datapath):
     # Check if the SAS file has zero variables (PR #18184)
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "zero_variables.sas7bdat")
+    fname = datapath("io", "sas", "data", "zero_variables.sas7bdat")
     with pytest.raises(EmptyDataError):
         pd.read_sas(fname)
diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py
index de31c3e36a8d5..6e5b2ab067aa5 100644
--- a/pandas/tests/io/sas/test_xport.py
+++ b/pandas/tests/io/sas/test_xport.py
@@ -1,3 +1,4 @@
+import pytest
 import pandas as pd
 import pandas.util.testing as tm
 from pandas.io.sas.sasreader import read_sas
@@ -18,8 +19,9 @@ def numeric_as_float(data):
 
 class TestXport(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "sas", "data")
         self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt")
         self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt")
         self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt")
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 6b39717213c0d..5fded9712a52a 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -40,8 +40,9 @@
 @td.skip_if_no('xlrd', '0.9')
 class SharedItems(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "data")
         self.frame = _frame.copy()
         self.frame2 = _frame2.copy()
         self.tsframe = _tsframe.copy()
@@ -50,7 +51,6 @@ def setup_method(self, method):
     def get_csv_refdf(self, basename):
         """
         Obtain the reference data from read_csv with the Python engine.
-        Test data path is defined by pandas.util.testing.get_data_path()
 
         Parameters
         ----------
@@ -70,7 +70,6 @@ def get_csv_refdf(self, basename):
     def get_excelfile(self, basename, ext):
         """
         Return test data ExcelFile instance. Test data path is defined by
-        pandas.util.testing.get_data_path()
 
         Parameters
         ----------
@@ -88,7 +87,6 @@ def get_excelfile(self, basename, ext):
     def get_exceldf(self, basename, ext, *args, **kwds):
         """
         Return test data DataFrame. Test data path is defined by
-        pandas.util.testing.get_data_path()
 
         Parameters
         ----------
diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py
index 3f3c0bb3b2a94..6e203787aec89 100644
--- a/pandas/tests/io/test_packers.py
+++ b/pandas/tests/io/test_packers.py
@@ -843,7 +843,8 @@ def test_default_encoding(self):
             assert_frame_equal(result, frame)
 
 
-def legacy_packers_versions():
+@pytest.fixture
+def legacy_packers_versions(datapath):
     # yield the packers versions
     path = tm.get_data_path('legacy_msgpack')
     if not os.path.exists(path):
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 49ad07b79d111..a4a1460026ee6 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -25,8 +25,8 @@
 
 
 @pytest.fixture
-def dirpath():
-    return tm.get_data_path()
+def dirpath(datapath):
+    return datapath("io", "data")
 
 
 @pytest.fixture
@@ -39,8 +39,9 @@ def parsed_114(dirpath):
 
 class TestStata(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "data")
         self.dta1_114 = os.path.join(self.dirpath, 'stata1_114.dta')
         self.dta1_117 = os.path.join(self.dirpath, 'stata1_117.dta')
 

From e897f11e66c7e1258eafa10bde11b162c5673e90 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 27 Mar 2018 08:47:51 -0500
Subject: [PATCH 09/31] Updated packers

---
 pandas/tests/io/test_packers.py | 53 ++++++++++++++-------------------
 1 file changed, 23 insertions(+), 30 deletions(-)

diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py
index 6e203787aec89..b5752f13cd62c 100644
--- a/pandas/tests/io/test_packers.py
+++ b/pandas/tests/io/test_packers.py
@@ -3,6 +3,7 @@
 from warnings import catch_warnings
 import os
 import datetime
+import glob
 import numpy as np
 import sys
 from distutils.version import LooseVersion
@@ -843,16 +844,13 @@ def test_default_encoding(self):
             assert_frame_equal(result, frame)
 
 
-@pytest.fixture
-def legacy_packers_versions(datapath):
-    # yield the packers versions
-    path = tm.get_data_path('legacy_msgpack')
-    if not os.path.exists(path):
-        raise pytest.skip("Data file {} does not exist.".format(path))
-    for v in os.listdir(path):
-        p = os.path.join(path, v)
-        if os.path.isdir(p):
-            yield v
+files = glob.glob(os.path.join(os.path.dirname(__file__), "data",
+                               "legacy_msgpack", "*", "*.msgpack"))
+
+
+@pytest.fixture(params=files)
+def legacy_packer(request, datapath):
+    return datapath(request.param)
 
 
 class TestMsgpack(object):
@@ -929,24 +927,19 @@ def compare_frame_dt_mixed_tzs(self, result, expected, typ, version):
         else:
             tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize('version', legacy_packers_versions())
     def test_msgpacks_legacy(self, current_packers_data, all_packers_data,
-                             version):
-
-        pth = tm.get_data_path('legacy_msgpack/{0}'.format(version))
-        n = 0
-        for f in os.listdir(pth):
-            # GH12142 0.17 files packed in P2 can't be read in P3
-            if (compat.PY3 and version.startswith('0.17.') and
-                    f.split('.')[-4][-1] == '2'):
-                continue
-            vf = os.path.join(pth, f)
-            try:
-                with catch_warnings(record=True):
-                    self.compare(current_packers_data, all_packers_data,
-                                 vf, version)
-            except ImportError:
-                # blosc not installed
-                continue
-            n += 1
-        assert n > 0, 'Msgpack files are not tested'
+                             legacy_packer, datapath):
+
+        version = os.path.basename(os.path.dirname(legacy_packer))
+
+        # GH12142 0.17 files packed in P2 can't be read in P3
+        if (compat.PY3 and version.startswith('0.17.') and
+                legacy_packer.split('.')[-4][-1] == '2'):
+            pytest.skip("Files packed in Py2 can't be read in Py3.")
+        try:
+            with catch_warnings(record=True):
+                self.compare(current_packers_data, all_packers_data,
+                             legacy_packer, version)
+        except ImportError:
+            # blosc not installed
+            pass

From 9cf30fd6d8c51a7aa8353a6e7449cb480c06288d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 27 Mar 2018 08:56:14 -0500
Subject: [PATCH 10/31] Pickle

---
 pandas/tests/io/test_pickle.py | 40 +++++++++++++++-------------------
 pandas/util/testing.py         | 15 -------------
 2 files changed, 17 insertions(+), 38 deletions(-)

diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index 702ba11c6df13..aed444b1f07e2 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -12,7 +12,7 @@
 
 3. Move the created pickle to "data/legacy_pickle/<version>" directory.
 """
-
+import glob
 import pytest
 from warnings import catch_warnings
 
@@ -185,29 +185,25 @@ def compare_sp_frame_float(result, expected, typ, version):
         tm.assert_sp_frame_equal(result, expected)
 
 
+files = glob.glob(os.path.join(os.path.dirname(__file__), "data",
+                  "legacy_pickle", "*", "*.pickle"))
+
+
+@pytest.fixture(params=files)
+def legacy_pickle(request, datapath):
+    return datapath(request.param)
+
+
 # ---------------------
 # tests
 # ---------------------
-def legacy_pickle_versions():
-    # yield the pickle versions
-    path = tm.get_data_path('legacy_pickle')
-    if not os.path.exists(path):
-        raise pytest.skip("Data path {} does not exists.".format(path))
-    for v in os.listdir(path):
-        p = os.path.join(path, v)
-        if os.path.isdir(p):
-            for f in os.listdir(p):
-                yield (v, f)
-
-
-@pytest.mark.parametrize('version, f', legacy_pickle_versions())
-def test_pickles(current_pickle_data, version, f):
+def test_pickles(current_pickle_data, legacy_pickle):
     if not is_platform_little_endian():
         pytest.skip("known failure on non-little endian")
 
-    vf = tm.get_data_path('legacy_pickle/{}/{}'.format(version, f))
+    version = os.path.basename(os.path.dirname(legacy_pickle))
     with catch_warnings(record=True):
-        compare(current_pickle_data, vf, version)
+        compare(current_pickle_data, legacy_pickle, version)
 
 
 def test_round_trip_current(current_pickle_data):
@@ -263,12 +259,11 @@ def python_unpickler(path):
                     compare_element(result, expected, typ)
 
 
-def test_pickle_v0_14_1():
+def test_pickle_v0_14_1(datapath):
 
     cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False,
                          categories=['a', 'b', 'c', 'd'])
-    pickle_path = os.path.join(tm.get_data_path(),
-                               'categorical_0_14_1.pickle')
+    pickle_path = datapath('io', 'data', 'categorical_0_14_1.pickle')
     # This code was executed once on v0.14.1 to generate the pickle:
     #
     # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
@@ -278,14 +273,13 @@ def test_pickle_v0_14_1():
     tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))
 
 
-def test_pickle_v0_15_2():
+def test_pickle_v0_15_2(datapath):
     # ordered -> _ordered
     # GH 9347
 
     cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False,
                          categories=['a', 'b', 'c', 'd'])
-    pickle_path = os.path.join(tm.get_data_path(),
-                               'categorical_0_15_2.pickle')
+    pickle_path = datapath('io', 'data', 'categorical_0_15_2.pickle')
     # This code was executed once on v0.15.2 to generate the pickle:
     #
     # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index ec8a6cf76cb4d..684de95350580 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -729,21 +729,6 @@ def ensure_clean(filename=None, return_filelike=False):
                 print("Exception on removing file: {error}".format(error=e))
 
 
-def get_data_path(f=''):
-    """Return the path of a data file, these are relative to the current test
-    directory.
-    """
-    import pytest
-
-    # get our callers file
-    _, filename, _, _, _, _ = inspect.getouterframes(inspect.currentframe())[1]
-    base_dir = os.path.abspath(os.path.dirname(filename))
-    path = os.path.join(base_dir, 'data', f)
-    if not os.path.exists(path):
-        pytest.skip("Data files not included in pandas distribution.")
-
-    return path
-
 # -----------------------------------------------------------------------------
 # Comparators
 

From 95cde7af6b22f8f4405a5e56de4693f28e4984dc Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 27 Mar 2018 09:00:38 -0500
Subject: [PATCH 11/31] Linting

---
 pandas/tests/io/test_sql.py                  | 1 -
 pandas/tests/tseries/offsets/test_offsets.py | 1 -
 pandas/util/testing.py                       | 1 -
 3 files changed, 3 deletions(-)

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index ca9b1749ecf8a..6a87d0dc952df 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -22,7 +22,6 @@
 import pytest
 import sqlite3
 import csv
-import os
 
 import warnings
 import numpy as np
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 03e2b42022967..fcb48d5cf801f 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -1,4 +1,3 @@
-import os
 from distutils.version import LooseVersion
 from datetime import date, datetime, timedelta
 
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 684de95350580..b8accd424edcf 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -6,7 +6,6 @@
 import sys
 import tempfile
 import warnings
-import inspect
 import os
 import subprocess
 import locale

From 10ddddcd718d7e77b9ca6b2295fa3aee5eea1f32 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 27 Mar 2018 09:05:41 -0500
Subject: [PATCH 12/31] Autouse stata

---
 pandas/tests/io/test_stata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index a4a1460026ee6..181ed1063f735 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -39,7 +39,7 @@ def parsed_114(dirpath):
 
 class TestStata(object):
 
-    @pytest.fixture
+    @pytest.fixture(autouse=True)
     def setup_method(self, datapath):
         self.dirpath = datapath("io", "data")
         self.dta1_114 = os.path.join(self.dirpath, 'stata1_114.dta')

From e1ea208b3b48fad6b3a2c4ee7e2116b3aa3a3c0b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 27 Mar 2018 09:10:12 -0500
Subject: [PATCH 13/31] Remove filename

---
 pandas/tests/tseries/offsets/test_offsets.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index fcb48d5cf801f..0ec95b56aacab 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -1849,7 +1849,6 @@ def _check_roundtrip(obj):
         _check_roundtrip(self.offset * 2)
 
     def test_pickle_compat_0_14_1(self, datapath):
-        # /Users/taugspurger/sandbox/pandas-ip/pandas/pandas/tests/tseries/offsets/test_offsets.py
         hdays = [datetime(2013, 1, 1) for ele in range(4)]
         pth = datapath('tseries', 'offsets', 'data', 'cday-0.14.1.pickle')
         cday0_14_1 = read_pickle(pth)

From 861687879e3362c15e10da4b262976610b061625 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 27 Mar 2018 09:13:33 -0500
Subject: [PATCH 14/31] Autouse in merge_asof

---
 pandas/tests/reshape/merge/test_merge_asof.py | 246 ++++++++----------
 1 file changed, 111 insertions(+), 135 deletions(-)

diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py
index bcb27e49f28fe..59b53cd23010e 100644
--- a/pandas/tests/reshape/merge/test_merge_asof.py
+++ b/pandas/tests/reshape/merge/test_merge_asof.py
@@ -10,67 +10,30 @@
 from pandas.util.testing import assert_frame_equal
 
 
-def read_data(path, dedupe=False):
-    x = read_csv(path)
-    if dedupe:
-        x = (x.drop_duplicates(['time', 'ticker'], keep='last')
-              .reset_index(drop=True))
-    x.time = to_datetime(x.time)
-    return x
-
-
-@pytest.fixture
-def trades(datapath):
-    return read_data(datapath('reshape', 'merge', 'data', 'trades.csv'))
-
-
-@pytest.fixture
-def trades2(datapath):
-    return read_data(datapath('reshape', 'merge', 'data', 'trades2.csv'))
-
-
-@pytest.fixture
-def quotes(datapath):
-    return read_data(datapath('reshape', 'merge', 'data', 'quotes.csv'),
-                     dedupe=True)
-
-
-@pytest.fixture
-def quotes2(datapath):
-    return read_data(datapath('reshape', 'merge', 'data', 'quotes2.csv'),
-                     dedupe=True)
-
-
-@pytest.fixture
-def asof(datapath):
-    return read_data(datapath('reshape', 'merge', 'data', 'asof.csv'))
-
-
-@pytest.fixture
-def asof2(datapath):
-    return read_data(datapath('reshape', 'merge', 'data', 'asof2.csv'))
-
-
-@pytest.fixture
-def tolerance(datapath):
-    return read_data(datapath('reshape', 'merge', 'data', 'tolerance.csv'))
-
-
-@pytest.fixture
-def allow_exact_matches(datapath):
-    return read_data(datapath('reshape', 'merge', 'data',
-                              'allow_exact_matches.csv'))
-
-
-@pytest.fixture
-def allow_exact_matches_and_tolerance(datapath):
-    return read_data(datapath(
-        'reshape', 'merge', 'data', 'allow_exact_matches_and_tolerance.csv'
-    ))
-
-
 class TestAsOfMerge(object):
 
+    def read_data(self, datapath, name, dedupe=False):
+        path = datapath('reshape', 'merge', 'data', name)
+        x = read_csv(path)
+        if dedupe:
+            x = (x.drop_duplicates(['time', 'ticker'], keep='last')
+                  .reset_index(drop=True)
+                 )
+        x.time = to_datetime(x.time)
+        return x
+
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+
+        self.trades = self.read_data(datapath, 'trades.csv')
+        self.quotes = self.read_data(datapath, 'quotes.csv', dedupe=True)
+        self.asof = self.read_data(datapath, 'asof.csv')
+        self.tolerance = self.read_data(datapath, 'tolerance.csv')
+        self.allow_exact_matches = self.read_data(datapath,
+                                                  'allow_exact_matches.csv')
+        self.allow_exact_matches_and_tolerance = self.read_data(
+            datapath, 'allow_exact_matches_and_tolerance.csv')
+
     def test_examples1(self):
         """ doc-string examples """
 
@@ -187,20 +150,23 @@ def test_examples4(self):
         result = pd.merge_asof(left, right, on='a', direction='nearest')
         assert_frame_equal(result, expected)
 
-    def test_basic(self, trades, quotes, asof):
+    def test_basic(self):
+
+        expected = self.asof
+        trades = self.trades
+        quotes = self.quotes
 
-        expected = asof
         result = merge_asof(trades, quotes,
                             on='time',
                             by='ticker')
         assert_frame_equal(result, expected)
 
-    def test_basic_categorical(self, asof, trades, quotes):
+    def test_basic_categorical(self):
 
-        expected = asof
-        trades = trades.copy()
+        expected = self.asof
+        trades = self.trades.copy()
         trades.ticker = trades.ticker.astype('category')
-        quotes = quotes.copy()
+        quotes = self.quotes.copy()
         quotes.ticker = quotes.ticker.astype('category')
         expected.ticker = expected.ticker.astype('category')
 
@@ -209,12 +175,12 @@ def test_basic_categorical(self, asof, trades, quotes):
                             by='ticker')
         assert_frame_equal(result, expected)
 
-    def test_basic_left_index(self, trades, quotes, asof):
+    def test_basic_left_index(self):
 
         # GH14253
-        expected = asof
-        trades = trades.set_index('time')
-        quotes = quotes
+        expected = self.asof
+        trades = self.trades.set_index('time')
+        quotes = self.quotes
 
         result = merge_asof(trades, quotes,
                             left_index=True,
@@ -226,11 +192,11 @@ def test_basic_left_index(self, trades, quotes, asof):
         expected = expected[result.columns]
         assert_frame_equal(result, expected)
 
-    def test_basic_right_index(self, trades, quotes, asof):
+    def test_basic_right_index(self):
 
-        expected = asof
-        trades = trades
-        quotes = quotes.set_index('time')
+        expected = self.asof
+        trades = self.trades
+        quotes = self.quotes.set_index('time')
 
         result = merge_asof(trades, quotes,
                             left_on='time',
@@ -238,11 +204,11 @@ def test_basic_right_index(self, trades, quotes, asof):
                             by='ticker')
         assert_frame_equal(result, expected)
 
-    def test_basic_left_index_right_index(self, trades, quotes, asof):
+    def test_basic_left_index_right_index(self):
 
-        expected = asof.set_index('time')
-        trades = trades.set_index('time')
-        quotes = quotes.set_index('time')
+        expected = self.asof.set_index('time')
+        trades = self.trades.set_index('time')
+        quotes = self.quotes.set_index('time')
 
         result = merge_asof(trades, quotes,
                             left_index=True,
@@ -250,48 +216,48 @@ def test_basic_left_index_right_index(self, trades, quotes, asof):
                             by='ticker')
         assert_frame_equal(result, expected)
 
-    def test_multi_index(self, trades, quotes):
+    def test_multi_index(self):
 
         # MultiIndex is prohibited
-        trades = trades.set_index(['time', 'price'])
-        quotes = quotes.set_index('time')
+        trades = self.trades.set_index(['time', 'price'])
+        quotes = self.quotes.set_index('time')
         with pytest.raises(MergeError):
             merge_asof(trades, quotes,
                        left_index=True,
                        right_index=True)
 
-    def test_multi_index2(self, trades, quotes):
-        trades = trades.set_index('time')
-        quotes = quotes.set_index(['time', 'bid'])
+        trades = self.trades.set_index('time')
+        quotes = self.quotes.set_index(['time', 'bid'])
         with pytest.raises(MergeError):
             merge_asof(trades, quotes,
                        left_index=True,
                        right_index=True)
 
-    def test_on_and_index_on_price(self, trades, quotes):
+    def test_on_and_index(self):
 
         # 'on' parameter and index together is prohibited
-        trades = trades.set_index('time')
-        quotes = quotes.set_index('time')
+        trades = self.trades.set_index('time')
+        quotes = self.quotes.set_index('time')
         with pytest.raises(MergeError):
             merge_asof(trades, quotes,
                        left_on='price',
                        left_index=True,
                        right_index=True)
 
-    def test_on_and_index_on_bid(self, trades, quotes):
-        trades = trades.set_index('time')
-        quotes = quotes.set_index('time')
+        trades = self.trades.set_index('time')
+        quotes = self.quotes.set_index('time')
         with pytest.raises(MergeError):
             merge_asof(trades, quotes,
                        right_on='bid',
                        left_index=True,
                        right_index=True)
 
-    def test_basic_left_by_right_by(self, trades, quotes, asof):
+    def test_basic_left_by_right_by(self):
 
         # GH14253
-        expected = asof
+        expected = self.asof
+        trades = self.trades
+        quotes = self.quotes
 
         result = merge_asof(trades, quotes,
                             on='time',
@@ -299,11 +265,11 @@ def test_basic_left_by_right_by(self, trades, quotes, asof):
                             right_by='ticker')
         assert_frame_equal(result, expected)
 
-    def test_missing_right_by(self, trades, quotes, asof):
+    def test_missing_right_by(self):
 
-        expected = asof
-        trades = trades
-        quotes = quotes
+        expected = self.asof
+        trades = self.trades
+        quotes = self.quotes
 
         q = quotes[quotes.ticker != 'MSFT']
         result = merge_asof(trades, q,
@@ -312,7 +278,7 @@ def test_missing_right_by(self, trades, quotes, asof):
         expected.loc[expected.ticker == 'MSFT', ['bid', 'ask']] = np.nan
         assert_frame_equal(result, expected)
 
-    def test_multiby(self, trades, quotes):
+    def test_multiby(self):
         # GH13936
         trades = pd.DataFrame({
             'time': pd.to_datetime(['20160525 13:30:00.023',
@@ -369,7 +335,7 @@ def test_multiby(self, trades, quotes):
                                by=['ticker', 'exch'])
         assert_frame_equal(result, expected)
 
-    def test_multiby_heterogeneous_types(self, trades, quotes):
+    def test_multiby_heterogeneous_types(self):
         # GH13936
         trades = pd.DataFrame({
             'time': pd.to_datetime(['20160525 13:30:00.023',
@@ -458,31 +424,34 @@ def test_multiby_indexed(self):
             pd.merge_asof(left, right, left_index=True, right_index=True,
                           left_by=['k1', 'k2'], right_by=['k1'])
 
-    def test_basic2(self, asof2, trades2, quotes2):
+    def test_basic2(self, datapath):
+
+        expected = self.read_data(datapath, 'asof2.csv')
+        trades = self.read_data(datapath, 'trades2.csv')
+        quotes = self.read_data(datapath, 'quotes2.csv', dedupe=True)
 
-        expected = asof2
-        result = merge_asof(trades2, quotes2,
+        result = merge_asof(trades, quotes,
                             on='time',
                             by='ticker')
         assert_frame_equal(result, expected)
 
-    def test_basic_no_by(self, asof, trades, quotes):
+    def test_basic_no_by(self):
         f = lambda x: x[x.ticker == 'MSFT'].drop('ticker', axis=1) \
             .reset_index(drop=True)
 
         # just use a single ticker
-        expected = f(asof)
-        trades = f(trades)
-        quotes = f(quotes)
+        expected = f(self.asof)
+        trades = f(self.trades)
+        quotes = f(self.quotes)
 
         result = merge_asof(trades, quotes,
                             on='time')
         assert_frame_equal(result, expected)
 
-    def test_valid_join_keys(self, trades, quotes):
+    def test_valid_join_keys(self):
 
-        trades = trades
-        quotes = quotes
+        trades = self.trades
+        quotes = self.quotes
 
         with pytest.raises(MergeError):
             merge_asof(trades, quotes,
@@ -499,14 +468,14 @@ def test_valid_join_keys(self, trades, quotes):
             merge_asof(trades, quotes,
                        by='ticker')
 
-    def test_with_duplicates(self, asof, trades, quotes):
+    def test_with_duplicates(self, datapath):
 
-        q = pd.concat([quotes, quotes]).sort_values(
+        q = pd.concat([self.quotes, self.quotes]).sort_values(
             ['time', 'ticker']).reset_index(drop=True)
-        result = merge_asof(trades, q,
+        result = merge_asof(self.trades, q,
                             on='time',
                             by='ticker')
-        expected = asof
+        expected = self.read_data(datapath, 'asof.csv')
         assert_frame_equal(result, expected)
 
     def test_with_duplicates_no_on(self):
@@ -521,14 +490,22 @@ def test_with_duplicates_no_on(self):
                                  'right_val': [1, 1, 3]})
         assert_frame_equal(result, expected)
 
-    def test_valid_allow_exact_matches(self, trades, quotes):
+    def test_valid_allow_exact_matches(self):
+
+        trades = self.trades
+        quotes = self.quotes
+
         with pytest.raises(MergeError):
             merge_asof(trades, quotes,
                        on='time',
                        by='ticker',
                        allow_exact_matches='foo')
 
-    def test_valid_tolerance(self, trades, quotes):
+    def test_valid_tolerance(self):
+
+        trades = self.trades
+        quotes = self.quotes
+
         # dti
         merge_asof(trades, quotes,
                    on='time',
@@ -568,10 +545,10 @@ def test_valid_tolerance(self, trades, quotes):
                        by='ticker',
                        tolerance=-1)
 
-    def test_non_sorted(self, trades, quotes):
+    def test_non_sorted(self):
 
-        trades = trades.sort_values('time', ascending=False)
-        quotes = quotes.sort_values('time', ascending=False)
+        trades = self.trades.sort_values('time', ascending=False)
+        quotes = self.quotes.sort_values('time', ascending=False)
 
         # we require that we are already sorted on time & quotes
         assert not trades.time.is_monotonic
@@ -581,7 +558,7 @@ def test_non_sorted(self, trades, quotes):
                        on='time',
                        by='ticker')
 
-        trades = trades.sort_values('time')
+        trades = self.trades.sort_values('time')
         assert trades.time.is_monotonic
         assert not quotes.time.is_monotonic
         with pytest.raises(ValueError):
@@ -589,25 +566,26 @@ def test_non_sorted(self, trades, quotes):
                        on='time',
                        by='ticker')
 
-        quotes = quotes.sort_values('time')
+        quotes = self.quotes.sort_values('time')
         assert trades.time.is_monotonic
         assert quotes.time.is_monotonic
 
         # ok, though has dupes
-        merge_asof(trades, quotes,
+        merge_asof(trades, self.quotes,
                    on='time',
                    by='ticker')
 
-    def test_tolerance(self, trades, quotes, tolerance):
+    def test_tolerance(self):
 
-        trades = trades
-        quotes = quotes
+        trades = self.trades
+        quotes = self.quotes
 
         result = merge_asof(trades, quotes,
                             on='time',
                             by='ticker',
                             tolerance=Timedelta('1day'))
-        assert_frame_equal(result, tolerance)
+        expected = self.tolerance
+        assert_frame_equal(result, expected)
 
     def test_tolerance_forward(self):
         # GH14887
@@ -664,11 +642,11 @@ def test_tolerance_tz(self):
              'value2': list("BCDEE")})
         assert_frame_equal(result, expected)
 
-    def test_index_tolerance(self, trades, quotes, tolerance):
+    def test_index_tolerance(self):
         # GH 15135
-        expected = tolerance.set_index('time')
-        trades = trades.set_index('time')
-        quotes = quotes.set_index('time')
+        expected = self.tolerance.set_index('time')
+        trades = self.trades.set_index('time')
+        quotes = self.quotes.set_index('time')
 
         result = pd.merge_asof(trades, quotes,
                                left_index=True,
@@ -677,13 +655,13 @@ def test_index_tolerance(self, trades, quotes, tolerance):
                                tolerance=pd.Timedelta('1day'))
         assert_frame_equal(result, expected)
 
-    def test_allow_exact_matches(self, trades, quotes, allow_exact_matches):
+    def test_allow_exact_matches(self):
 
-        result = merge_asof(trades, quotes,
+        result = merge_asof(self.trades, self.quotes,
                             on='time',
                             by='ticker',
                             allow_exact_matches=False)
-        expected = allow_exact_matches
+        expected = self.allow_exact_matches
         assert_frame_equal(result, expected)
 
     def test_allow_exact_matches_forward(self):
@@ -718,16 +696,14 @@ def test_allow_exact_matches_nearest(self):
                                allow_exact_matches=False)
         assert_frame_equal(result, expected)
 
-    def test_allow_exact_matches_and_tolerance(
-            self, trades, quotes,
-            allow_exact_matches_and_tolerance):
+    def test_allow_exact_matches_and_tolerance(self):
 
-        result = merge_asof(trades, quotes,
+        result = merge_asof(self.trades, self.quotes,
                             on='time',
                             by='ticker',
                             tolerance=Timedelta('100ms'),
                             allow_exact_matches=False)
-        expected = allow_exact_matches_and_tolerance
+        expected = self.allow_exact_matches_and_tolerance
         assert_frame_equal(result, expected)
 
     def test_allow_exact_matches_and_tolerance2(self):

From 77bf77c3475c23e42be34eef4926cc51412d7974 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 27 Mar 2018 09:16:00 -0500
Subject: [PATCH 15/31] Cleanup plotting

---
 pandas/tests/plotting/common.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py
index 7657130044e70..70c54cefa3833 100644
--- a/pandas/tests/plotting/common.py
+++ b/pandas/tests/plotting/common.py
@@ -45,7 +45,8 @@ def _ok_for_gaussian_kde(kind):
 @td.skip_if_no_mpl
 class TestPlotBase(object):
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
 
         import matplotlib as mpl
         mpl.rcdefaults()
@@ -76,11 +77,7 @@ def setup_method(self, method):
         self.default_tick_position = 'left' if self.mpl_ge_2_0_0 else 'default'
         # common test data
         from pandas import read_csv
-        base = os.path.join(os.path.dirname(curpath()), os.pardir)
-        path = os.path.join(base, 'tests', 'data', 'iris.csv')
-
-        if not os.path.exists(path):
-            pytest.skip("Data files not included in pandas distribution.")
+        path = datapath('data', 'iris.csv')
 
         self.iris = read_csv(path)
 

From 156e14bf1d28452af49d6eb7fc1721a938fb078a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 27 Mar 2018 09:26:30 -0500
Subject: [PATCH 16/31] CLN: Simplify fspath

---
 pandas/tests/conftest.py       |  7 ++++---
 pandas/tests/io/test_common.py | 31 +++++++++++++------------------
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py
index 2c246fb9d371d..cdc1248834681 100644
--- a/pandas/tests/conftest.py
+++ b/pandas/tests/conftest.py
@@ -25,9 +25,10 @@ def deco(*args):
         path = os.path.join('pandas', 'tests', *args)
         if not os.path.exists(path):
             if request.config.getoption("--strict-data-files"):
-                raise ValueError("Failed.")
+                msg = "Could not find file {} and --strict-data-files is set."
+                raise ValueError(msg.format(path))
             else:
-                pytest.skip("{} not included in pandas distribution."
-                            .format(path))
+                msg = "Could not find {}."
+                pytest.skip(msg.format(path))
         return path
     return deco
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index 37097033f0c75..5c9739be73393 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -149,27 +149,22 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext):
             reader(path)
 
     @pytest.mark.parametrize('reader, module, path', [
-        (pd.read_csv, 'os', 'iris.csv'),
-        (pd.read_table, 'os', 'iris.csv'),
-        (pd.read_fwf, 'os', 'fixed_width_format.txt'),
-        (pd.read_excel, 'xlrd', 'test1.xlsx'),
-        (pd.read_feather, 'feather', 'feather-0_3_1.feather'),
-        (pd.read_hdf, 'tables', os.path.join('legacy_hdf',
-                                             'datetimetz_object.h5')),
-        (pd.read_stata, 'os', 'stata10_115.dta'),
-        # in the function, we go down to tests/io/data
-        # so step back up a level before going into sas
-        (pd.read_sas, 'os', os.path.join('..', 'sas', 'data',
-                                         'test1.sas7bdat')),
-        (pd.read_json, 'os', os.path.join('..', 'json', 'data',
-                                          'tsframe_v012.json')),
-        (pd.read_msgpack, 'os', os.path.join('..', 'msgpack', 'data',
-                                             'frame.mp')),
-        (pd.read_pickle, 'os', 'categorical_0_14_1.pickle'),
+        (pd.read_csv, 'os', ('io', 'data', 'iris.csv')),
+        (pd.read_table, 'os', ('io', 'data', 'iris.csv')),
+        (pd.read_fwf, 'os', ('io', 'data', 'fixed_width_format.txt')),
+        (pd.read_excel, 'xlrd', ('io', 'data', 'test1.xlsx')),
+        (pd.read_feather, 'feather', ('io', 'data', 'feather-0_3_1.feather')),
+        (pd.read_hdf, 'tables', ('io', 'data', 'legacy_hdf',
+                                 'datetimetz_object.h5')),
+        (pd.read_stata, 'os', ('io', 'data', 'stata10_115.dta')),
+        (pd.read_sas, 'os', ('io', 'sas', 'data', 'test1.sas7bdat')),
+        (pd.read_json, 'os', ('io', 'json', 'data', 'tsframe_v012.json')),
+        (pd.read_msgpack, 'os', ('io', 'msgpack', 'data', 'frame.mp')),
+        (pd.read_pickle, 'os', ('io', 'data', 'categorical_0_14_1.pickle')),
     ])
     def test_read_fspath_all(self, reader, module, path, datapath):
         pytest.importorskip(module)
-        path = datapath('io', 'data', path)
+        path = datapath(*path)
 
         mypath = CustomFSPath(path)
         result = reader(mypath)

From f3f3662063241dfcdb7b51459f05b988face58e0 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 27 Mar 2018 11:27:57 -0500
Subject: [PATCH 17/31] Refactor sql tests

---
 pandas/tests/io/test_sql.py | 56 ++++++++++++++++++++++---------------
 1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 6a87d0dc952df..2d857bd24cfc3 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -183,9 +183,11 @@
 class MixInBase(object):
 
     def teardown_method(self, method):
-        for tbl in self._get_all_tables():
-            self.drop_table(tbl)
-        self._close_conn()
+        # if setup fails, there may not be a connection to close.
+        if hasattr(self, 'conn'):
+            for tbl in self._get_all_tables():
+                self.drop_table(tbl)
+            self._close_conn()
 
 
 class MySQLMixIn(MixInBase):
@@ -502,9 +504,10 @@ class _TestSQLApi(PandasSQLTest):
     flavor = 'sqlite'
     mode = None
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
         self.conn = self.connect()
-        self._load_iris_data()
+        self._load_iris_data(datapath)
         self._load_iris_view()
         self._load_test1_data()
         self._load_test2_data()
@@ -1024,8 +1027,9 @@ class _EngineToConnMixin(object):
     A mixin that causes setup_connect to create a conn rather than an engine.
     """
 
-    def setup_method(self, method):
-        super(_EngineToConnMixin, self).setup_method(method)
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        super(_EngineToConnMixin, self).setup_method(datapath)
         engine = self.conn
         conn = engine.connect()
         self.__tx = conn.begin()
@@ -1033,12 +1037,14 @@ def setup_method(self, method):
         self.__engine = engine
         self.conn = conn
 
-    def teardown_method(self, method):
+        yield
+
         self.__tx.rollback()
         self.conn.close()
         self.conn = self.__engine
         self.pandasSQL = sql.SQLDatabase(self.__engine)
-        super(_EngineToConnMixin, self).teardown_method(method)
+        # XXX:
+        # super(_EngineToConnMixin, self).teardown_method(method)
 
 
 @pytest.mark.single
@@ -1135,7 +1141,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest):
     """
     flavor = None
 
-    @classmethod
+    @pytest.fixture(autouse=True, scope='class')
     def setup_class(cls):
         cls.setup_import()
         cls.setup_driver()
@@ -1148,10 +1154,11 @@ def setup_class(cls):
             msg = "{0} - can't connect to {1} server".format(cls, cls.flavor)
             pytest.skip(msg)
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
         self.setup_connect()
 
-        self._load_iris_data()
+        self._load_iris_data(datapath)
         self._load_raw_sql()
         self._load_test1_data()
 
@@ -1945,11 +1952,12 @@ class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest):
     def connect(cls):
         return sqlite3.connect(':memory:')
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
         self.conn = self.connect()
         self.pandasSQL = sql.SQLiteDatabase(self.conn)
 
-        self._load_iris_data()
+        self._load_iris_data(datapath)
 
         self._load_test1_data()
 
@@ -2160,8 +2168,9 @@ def _skip_if_no_pymysql():
 @pytest.mark.single
 class TestXSQLite(SQLiteMixIn):
 
-    def setup_method(self, method):
-        self.method = method
+    @pytest.fixture(autouse=True)
+    def setup_method(self, request, datapath):
+        self.method = request.function
         self.conn = sqlite3.connect(':memory:')
 
     def test_basic(self):
@@ -2241,7 +2250,7 @@ def test_execute_fail(self):
             sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn)
 
     @tm.capture_stdout
-    def test_execute_closed_connection(self):
+    def test_execute_closed_connection(self, request, datapath):
         create_sql = """
         CREATE TABLE test
         (
@@ -2261,7 +2270,7 @@ def test_execute_closed_connection(self):
             tquery("select * from test", con=self.conn)
 
         # Initialize connection again (needed for tearDown)
-        self.setup_method(self.method)
+        self.setup_method(request, datapath)
 
     def test_na_roundtrip(self):
         pass
@@ -2366,7 +2375,7 @@ def clean_up(test_table_to_drop):
                   "if SQLAlchemy is not installed")
 class TestXMySQL(MySQLMixIn):
 
-    @classmethod
+    @pytest.fixture(autouse=True, scope='class')
     def setup_class(cls):
         _skip_if_no_pymysql()
 
@@ -2395,7 +2404,8 @@ def setup_class(cls):
                 "[pandas] in your system's mysql default file, "
                 "typically located at ~/.my.cnf or /etc/.my.cnf. ")
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, request, datapath):
         _skip_if_no_pymysql()
         import pymysql
         try:
@@ -2421,7 +2431,7 @@ def setup_method(self, method):
                 "[pandas] in your system's mysql default file, "
                 "typically located at ~/.my.cnf or /etc/.my.cnf. ")
 
-        self.method = method
+        self.method = request.function
 
     def test_basic(self):
         _skip_if_no_pymysql()
@@ -2527,7 +2537,7 @@ def test_execute_fail(self):
             sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn)
 
     @tm.capture_stdout
-    def test_execute_closed_connection(self):
+    def test_execute_closed_connection(self, request, datapath):
         _skip_if_no_pymysql()
         drop_sql = "DROP TABLE IF EXISTS test"
         create_sql = """
@@ -2550,7 +2560,7 @@ def test_execute_closed_connection(self):
             tquery("select * from test", con=self.conn)
 
         # Initialize connection again (needed for tearDown)
-        self.setup_method(self.method)
+        self.setup_method(request, datapath)
 
     def test_na_roundtrip(self):
         _skip_if_no_pymysql()

From aac3606642451f7116797125185e5d5f36a4827f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 27 Mar 2018 12:17:28 -0500
Subject: [PATCH 18/31] Fixed docstrings

---
 pandas/tests/io/test_excel.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 5fded9712a52a..9e6cefe79d5bb 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -69,7 +69,7 @@ def get_csv_refdf(self, basename):
 
     def get_excelfile(self, basename, ext):
         """
-        Return test data ExcelFile instance. Test data path is defined by
+        Return test data ExcelFile instance.
 
         Parameters
         ----------
@@ -86,7 +86,7 @@ def get_excelfile(self, basename, ext):
 
     def get_exceldf(self, basename, ext, *args, **kwds):
         """
-        Return test data DataFrame. Test data path is defined by
+        Return test data DataFrame.
 
         Parameters
         ----------

From 762a2d14d9101441b7cad82f8675b3553f20f8ad Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 27 Mar 2018 12:33:59 -0500
Subject: [PATCH 19/31] Moved

---
 pandas/conftest.py       | 35 ++++++++++++++++++++++++++++++++++-
 pandas/tests/conftest.py | 34 ----------------------------------
 2 files changed, 34 insertions(+), 35 deletions(-)
 delete mode 100644 pandas/tests/conftest.py

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 2851921e1804c..e6593f4af21b2 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1,6 +1,8 @@
-import pytest
+import os
 
+import pytest
 import numpy
+
 import pandas
 import pandas.util._test_decorators as td
 
@@ -89,3 +91,34 @@ def join_type(request):
     Fixture for trying all types of join operations
     """
     return request.param
+
+
+@pytest.fixture
+def datapath(request):
+    """Get the path to a data file.
+
+    Parameters
+    ----------
+    path : str
+        Path to the file, relative to ``pandas/tests/``
+
+    Returns
+    -------
+    path : path including ``pandas/tests``.
+
+    Raises
+    ------
+    ValueError
+        If the path doesn't exist and the --strict-data-files option is set.
+    """
+    def deco(*args):
+        path = os.path.join('pandas', 'tests', *args)
+        if not os.path.exists(path):
+            if request.config.getoption("--strict-data-files"):
+                msg = "Could not find file {} and --strict-data-files is set."
+                raise ValueError(msg.format(path))
+            else:
+                msg = "Could not find {}."
+                pytest.skip(msg.format(path))
+        return path
+    return deco
diff --git a/pandas/tests/conftest.py b/pandas/tests/conftest.py
deleted file mode 100644
index cdc1248834681..0000000000000
--- a/pandas/tests/conftest.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import os
-
-import pytest
-
-
-@pytest.fixture
-def datapath(request):
-    """Get the path to a data file.
-
-    Parameters
-    ----------
-    path : str
-        Path to the file, relative to ``pandas/tests/``
-
-    Returns
-    -------
-    path : path including ``pandas/tests``.
-
-    Raises
-    ------
-    ValueError
-        If the path doesn't exist and the --strict-data-files option is set.
-    """
-    def deco(*args):
-        path = os.path.join('pandas', 'tests', *args)
-        if not os.path.exists(path):
-            if request.config.getoption("--strict-data-files"):
-                msg = "Could not find file {} and --strict-data-files is set."
-                raise ValueError(msg.format(path))
-            else:
-                msg = "Could not find {}."
-                pytest.skip(msg.format(path))
-        return path
-    return deco

From 7c44b77711ab65ca2071131a262535430ad99284 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 28 Mar 2018 09:10:02 -0500
Subject: [PATCH 20/31] Use fixture for iris plotting

---
 pandas/conftest.py                       |  6 ++++++
 pandas/tests/plotting/common.py          |  8 +-------
 pandas/tests/plotting/test_deprecated.py |  5 ++---
 pandas/tests/plotting/test_misc.py       | 16 ++++++++--------
 4 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index e6593f4af21b2..1fa66a41c9243 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -122,3 +122,9 @@ def deco(*args):
                 pytest.skip(msg.format(path))
         return path
     return deco
+
+
+@pytest.fixture
+def iris(datapath):
+    """The iris dataset as a DataFrame."""
+    return pandas.read_csv(datapath('data', 'iris.csv'))
diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py
index 70c54cefa3833..09687dd97bd43 100644
--- a/pandas/tests/plotting/common.py
+++ b/pandas/tests/plotting/common.py
@@ -45,8 +45,7 @@ def _ok_for_gaussian_kde(kind):
 @td.skip_if_no_mpl
 class TestPlotBase(object):
 
-    @pytest.fixture(autouse=True)
-    def setup_method(self, datapath):
+    def setup_method(self, method):
 
         import matplotlib as mpl
         mpl.rcdefaults()
@@ -75,11 +74,6 @@ def setup_method(self, datapath):
         else:
             self.default_figsize = (8.0, 6.0)
         self.default_tick_position = 'left' if self.mpl_ge_2_0_0 else 'default'
-        # common test data
-        from pandas import read_csv
-        path = datapath('data', 'iris.csv')
-
-        self.iris = read_csv(path)
 
         n = 100
         with tm.RNGContext(42):
diff --git a/pandas/tests/plotting/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py
index 2c2d371921d2f..a45b17ec98261 100644
--- a/pandas/tests/plotting/test_deprecated.py
+++ b/pandas/tests/plotting/test_deprecated.py
@@ -46,10 +46,9 @@ def test_boxplot_deprecated(self):
                              by='indic')
 
     @pytest.mark.slow
-    def test_radviz_deprecated(self):
-        df = self.iris
+    def test_radviz_deprecated(self, iris):
         with tm.assert_produces_warning(FutureWarning):
-            plotting.radviz(frame=df, class_column='Name')
+            plotting.radviz(frame=iris, class_column='Name')
 
     @pytest.mark.slow
     def test_plot_params(self):
diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py
index c5ce8aba9d80e..0704577f83d50 100644
--- a/pandas/tests/plotting/test_misc.py
+++ b/pandas/tests/plotting/test_misc.py
@@ -91,11 +91,11 @@ def test_scatter_matrix_axis(self):
             axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0)
 
     @pytest.mark.slow
-    def test_andrews_curves(self):
+    def test_andrews_curves(self, iris):
         from pandas.plotting import andrews_curves
         from matplotlib import cm
 
-        df = self.iris
+        df = iris
 
         _check_plot_works(andrews_curves, frame=df, class_column='Name')
 
@@ -156,11 +156,11 @@ def test_andrews_curves(self):
             andrews_curves(data=df, class_column='Name')
 
     @pytest.mark.slow
-    def test_parallel_coordinates(self):
+    def test_parallel_coordinates(self, iris):
         from pandas.plotting import parallel_coordinates
         from matplotlib import cm
 
-        df = self.iris
+        df = iris
 
         ax = _check_plot_works(parallel_coordinates,
                                frame=df, class_column='Name')
@@ -225,11 +225,11 @@ def test_parallel_coordinates_with_sorted_labels(self):
             assert prev[1] < nxt[1] and prev[0] < nxt[0]
 
     @pytest.mark.slow
-    def test_radviz(self):
+    def test_radviz(self, iris):
         from pandas.plotting import radviz
         from matplotlib import cm
 
-        df = self.iris
+        df = iris
         _check_plot_works(radviz, frame=df, class_column='Name')
 
         rgba = ('#556270', '#4ECDC4', '#C7F464')
@@ -263,8 +263,8 @@ def test_radviz(self):
         self._check_colors(handles, facecolors=colors)
 
     @pytest.mark.slow
-    def test_subplot_titles(self):
-        df = self.iris.drop('Name', axis=1).head()
+    def test_subplot_titles(self, iris):
+        df = iris.drop('Name', axis=1).head()
         # Use the column names as the subplot titles
         title = list(df.columns)
 

From ad0995144ce167c84d920fcce0f66a75c34ff059 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 28 Mar 2018 16:56:42 -0500
Subject: [PATCH 21/31] Abs path for file test

---
 pandas/tests/io/parser/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
index 2c363f5f4adff..e5ceb4d52ba58 100644
--- a/pandas/tests/io/parser/common.py
+++ b/pandas/tests/io/parser/common.py
@@ -76,7 +76,7 @@ def test_read_csv(self):
             else:
                 prefix = u("file://")
 
-            fname = prefix + compat.text_type(self.csv1)
+            fname = prefix + compat.text_type(os.path.abspath(self.csv1))
             self.read_csv(fname, index_col=0, parse_dates=True)
 
     def test_1000_sep(self):

From 6f02d6b4a5aad8d9d4e0eea6d82f88f4a774b665 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 29 Mar 2018 08:38:18 -0500
Subject: [PATCH 22/31] Removed stdout capture from sql tests

This was interfering with pytest's dependency injection.
---
 pandas/tests/io/test_sql.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 2d857bd24cfc3..8d4c3ecf8ada9 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -2249,7 +2249,6 @@ def test_execute_fail(self):
         with pytest.raises(Exception):
             sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn)
 
-    @tm.capture_stdout
     def test_execute_closed_connection(self, request, datapath):
         create_sql = """
         CREATE TABLE test
@@ -2536,7 +2535,6 @@ def test_execute_fail(self):
         with pytest.raises(Exception):
             sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn)
 
-    @tm.capture_stdout
     def test_execute_closed_connection(self, request, datapath):
         _skip_if_no_pymysql()
         drop_sql = "DROP TABLE IF EXISTS test"

From bac438cde43ee0ba1a36064f71bef3adedea4236 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 12 Jun 2018 08:22:18 -0500
Subject: [PATCH 23/31] Cleanup Manifest

---
 MANIFEST.in | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index 9416da89cc627..b417b8890fa24 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,11 +3,12 @@ include LICENSE
 include RELEASE.md
 include README.md
 include setup.py
-include pyproject.toml
 
 graft doc
 prune doc/build
 
+graft LICENSES
+
 graft pandas
 
 global-exclude *.bz2
@@ -35,8 +36,6 @@ global-exclude .DS_Store
 global-exclude .git*
 global-exclude \#*
 
-recursive-exclude pandas/tests/io/data
-
 include versioneer.py
 include pandas/_version.py
 include pandas/io/formats/templates/*.tpl

From 84ccdbfa3dbebd1a56caf544607162549af5b35d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 12 Jun 2018 09:08:54 -0500
Subject: [PATCH 24/31] fixed test test

---
 pandas/tests/util/test_testing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py
index c5e025f051e82..a06e58844b1da 100644
--- a/pandas/tests/util/test_testing.py
+++ b/pandas/tests/util/test_testing.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import os
 import pandas as pd
 import pytest
 import numpy as np
@@ -851,5 +852,5 @@ def test_datapath_missing(datapath, request):
         datapath('not_a_file')
 
     result = datapath('data/iris.csv')
-    expected = 'pandas/tests/data/iris.csv'
+    expected = os.path.join('pandas', 'tests', 'data', 'iris.csv')
     assert result == expected

From 7fd766055cb71ded2f67c7ec58f9fa1fb8174b33 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 20 Jun 2018 08:14:02 -0500
Subject: [PATCH 25/31] Fixed windows

---
 pandas/tests/util/test_testing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py
index a06e58844b1da..4d34987e14f75 100644
--- a/pandas/tests/util/test_testing.py
+++ b/pandas/tests/util/test_testing.py
@@ -851,6 +851,6 @@ def test_datapath_missing(datapath, request):
     with pytest.raises(ValueError):
         datapath('not_a_file')
 
-    result = datapath('data/iris.csv')
+    result = datapath('data', 'iris.csv')
     expected = os.path.join('pandas', 'tests', 'data', 'iris.csv')
     assert result == expected

From c187f8b106bd55e31eabdb0572d640718f619326 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 20 Jun 2018 08:18:01 -0500
Subject: [PATCH 26/31] whatsnew

---
 doc/source/whatsnew/v0.23.2.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 5b3e607956f7a..966efa4c2e123 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -37,6 +37,11 @@ Documentation Changes
 -
 -
 
+Build Changes
+-------------
+
+- The source and binary distributions no longer include test files, resulting in smaller download sizes. Tests relying on these files will be skipped when using ``pandas.test()``. (:issue:`19320`)
+
 .. _whatsnew_0232.bug_fixes:
 
 Bug Fixes

From 632a61d7d0ff2607f4a642953aad8b804c2dbff7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 20 Jun 2018 10:59:44 -0500
Subject: [PATCH 27/31] Clarify note [ci skip]

[ci skip]
---
 doc/source/whatsnew/v0.23.2.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 966efa4c2e123..45b78fa4a3e4e 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -40,7 +40,7 @@ Documentation Changes
 Build Changes
 -------------
 
-- The source and binary distributions no longer include test files, resulting in smaller download sizes. Tests relying on these files will be skipped when using ``pandas.test()``. (:issue:`19320`)
+- The source and binary distributions no longer include test data files, resulting in smaller download sizes. Tests relying on these data files will be skipped when using ``pandas.test()``. (:issue:`19320`)
 
 .. _whatsnew_0232.bug_fixes:
 

From b5b70c77b182cb27da617c1609db235c6fe8ea0f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 21 Jun 2018 09:13:30 -0500
Subject: [PATCH 28/31] TST: refactored html tests

---
 pandas/tests/io/test_html.py    | 30 +++++++++++++++++++++++-------
 pandas/tests/io/test_packers.py |  3 ++-
 test_foo.py                     | 22 ++++++++++++++++++++++
 3 files changed, 47 insertions(+), 8 deletions(-)
 create mode 100644 test_foo.py

diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index 46e2b718e8343..371f85576812f 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -28,6 +28,22 @@
 HERE = os.path.dirname(__file__)
 
 
+def pytest_generate_tests(metafunc):
+    # Defers evaluation of the fixture until after collection.
+    # https://docs.pytest.org/en/latest/example/parametrize.html\
+    # deferring-the-setup-of-parametrized-resources
+    if 'html_file' in metafunc.fixturenames:
+        paths = glob.glob(
+            os.path.join(HERE, 'data', 'html_encoding', '*.html')
+        )
+        metafunc.parametrize("html_file", paths, indirect=True)
+
+
+@pytest.fixture
+def html_file(request, datapath):
+    return datapath(request.param)
+
+
 def assert_framelist_equal(list1, list2, *args, **kwargs):
     assert len(list1) == len(list2), ('lists are not of equal size '
                                       'len(list1) == {0}, '
@@ -838,22 +854,22 @@ def test_displayed_only(self, displayed_only, exp0, exp1):
         else:
             assert len(dfs) == 1  # Should not parse hidden table
 
-    @pytest.mark.parametrize("f", glob.glob(
-        os.path.join(HERE, 'data', 'html_encoding', '*.html')))
-    def test_encode(self, f):
-        _, encoding = os.path.splitext(os.path.basename(f))[0].split('_')
+    def test_encode(self, html_file):
+        _, encoding = os.path.splitext(
+            os.path.basename(html_file)
+        )[0].split('_')
 
         try:
-            with open(f, 'rb') as fobj:
+            with open(html_file, 'rb') as fobj:
                 from_string = self.read_html(fobj.read(), encoding=encoding,
                                              index_col=0).pop()
 
-            with open(f, 'rb') as fobj:
+            with open(html_file, 'rb') as fobj:
                 from_file_like = self.read_html(BytesIO(fobj.read()),
                                                 encoding=encoding,
                                                 index_col=0).pop()
 
-            from_filename = self.read_html(f, encoding=encoding,
+            from_filename = self.read_html(html_file, encoding=encoding,
                                            index_col=0).pop()
             tm.assert_frame_equal(from_string, from_file_like)
             tm.assert_frame_equal(from_string, from_filename)
diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py
index aca623cb14aae..412e218f95c6f 100644
--- a/pandas/tests/io/test_packers.py
+++ b/pandas/tests/io/test_packers.py
@@ -928,7 +928,8 @@ def test_msgpacks_legacy(self, current_packers_data, all_packers_data,
         # GH12142 0.17 files packed in P2 can't be read in P3
         if (compat.PY3 and version.startswith('0.17.') and
                 legacy_packer.split('.')[-4][-1] == '2'):
-            pytest.skip("Files packed in Py2 can't be read in Py3.")
+            msg = "Files packed in Py2 can't be read in Py3 ({})"
+            pytest.skip(msg.format(version))
         try:
             with catch_warnings(record=True):
                 self.compare(current_packers_data, all_packers_data,
diff --git a/test_foo.py b/test_foo.py
new file mode 100644
index 0000000000000..93623d5fdb045
--- /dev/null
+++ b/test_foo.py
@@ -0,0 +1,22 @@
+import pytest
+
+CALL_COUNT = 0 
+
+
+@pytest.fixture(scope="module")
+def fixture(request, datapath):
+    global CALL_COUNT
+    CALL_COUNT += 1 
+
+    return request.param
+
+
+def pytest_generate_tests(metafunc):
+    if "fixture" in metafunc.fixturenames:
+        metafunc.parametrize("fixture", ["foo"], indirect=True, scope="module")
+
+
+@pytest.mark.parametrize("param", ["bar", "zaz"])
+def test_1(fixture, param):
+    global CALL_COUNT
+    assert CALL_COUNT == 1

From 9954bba5b72e7d31e5e9326d9fe9cedc35af2372 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 22 Jun 2018 15:28:56 -0500
Subject: [PATCH 29/31] Remove auto-generated html fixtures

---
 pandas/tests/io/test_html.py | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index 371f85576812f..b05d37a90cff0 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -28,20 +28,15 @@
 HERE = os.path.dirname(__file__)
 
 
-def pytest_generate_tests(metafunc):
-    # Defers evaluation of the fixture until after collection.
-    # https://docs.pytest.org/en/latest/example/parametrize.html\
-    # deferring-the-setup-of-parametrized-resources
-    if 'html_file' in metafunc.fixturenames:
-        paths = glob.glob(
-            os.path.join(HERE, 'data', 'html_encoding', '*.html')
-        )
-        metafunc.parametrize("html_file", paths, indirect=True)
-
-
-@pytest.fixture
-def html_file(request, datapath):
-    return datapath(request.param)
+@pytest.fixture(params=[
+    'chinese_utf-16.html',
+    'chinese_utf-32.html',
+    'chinese_utf-8.html',
+    'letz_latin1.html',
+])
+def html_encoding_file(request, datapath):
+    """Parametrized fixture for HTML encoding test filenames."""
+    return datapath('io', 'data', 'html_encoding', request.param)
 
 
 def assert_framelist_equal(list1, list2, *args, **kwargs):
@@ -854,22 +849,23 @@ def test_displayed_only(self, displayed_only, exp0, exp1):
         else:
             assert len(dfs) == 1  # Should not parse hidden table
 
-    def test_encode(self, html_file):
+    def test_encode(self, html_encoding_file):
         _, encoding = os.path.splitext(
-            os.path.basename(html_file)
+            os.path.basename(html_encoding_file)
         )[0].split('_')
 
         try:
-            with open(html_file, 'rb') as fobj:
+            with open(html_encoding_file, 'rb') as fobj:
                 from_string = self.read_html(fobj.read(), encoding=encoding,
                                              index_col=0).pop()
 
-            with open(html_file, 'rb') as fobj:
+            with open(html_encoding_file, 'rb') as fobj:
                 from_file_like = self.read_html(BytesIO(fobj.read()),
                                                 encoding=encoding,
                                                 index_col=0).pop()
 
-            from_filename = self.read_html(html_file, encoding=encoding,
+            from_filename = self.read_html(html_encoding_file,
+                                           encoding=encoding,
                                            index_col=0).pop()
             tm.assert_frame_equal(from_string, from_file_like)
             tm.assert_frame_equal(from_string, from_filename)

From c7718852e576d55c7352a19dc430fc4fed78dbce Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 22 Jun 2018 17:09:03 -0500
Subject: [PATCH 30/31] linting

---
 pandas/tests/io/test_html.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index b05d37a90cff0..9c6a8de7ed446 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -1,6 +1,5 @@
 from __future__ import print_function
 
-import glob
 import os
 import re
 import threading

From dd752708e42ac989fdfcbf16cb455a62aa74fa2f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 26 Jun 2018 07:15:48 -0500
Subject: [PATCH 31/31] Removed test test file

---
 test_foo.py | 22 ----------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 test_foo.py

diff --git a/test_foo.py b/test_foo.py
deleted file mode 100644
index 93623d5fdb045..0000000000000
--- a/test_foo.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import pytest
-
-CALL_COUNT = 0 
-
-
-@pytest.fixture(scope="module")
-def fixture(request, datapath):
-    global CALL_COUNT
-    CALL_COUNT += 1 
-
-    return request.param
-
-
-def pytest_generate_tests(metafunc):
-    if "fixture" in metafunc.fixturenames:
-        metafunc.parametrize("fixture", ["foo"], indirect=True, scope="module")
-
-
-@pytest.mark.parametrize("param", ["bar", "zaz"])
-def test_1(fixture, param):
-    global CALL_COUNT
-    assert CALL_COUNT == 1