diff --git a/MANIFEST.in b/MANIFEST.in
index 9773019c6e6e0..b417b8890fa24 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,27 +3,39 @@ include LICENSE
 include RELEASE.md
 include README.md
 include setup.py
-include pyproject.toml
 
 graft doc
 prune doc/build
 
+graft LICENSES
+
 graft pandas
 
-global-exclude *.so
-global-exclude *.pyd
+global-exclude *.bz2
+global-exclude *.csv
+global-exclude *.dta
+global-exclude *.gz
+global-exclude *.h5
+global-exclude *.html
+global-exclude *.json
+global-exclude *.msgpack
+global-exclude *.pickle
+global-exclude *.png
 global-exclude *.pyc
+global-exclude *.pyd
+global-exclude *.sas7bdat
+global-exclude *.so
+global-exclude *.xls
+global-exclude *.xlsm
+global-exclude *.xlsx
+global-exclude *.xpt
+global-exclude *.xz
+global-exclude *.zip
 global-exclude *~
-global-exclude \#*
-global-exclude .git*
 global-exclude .DS_Store
-global-exclude *.png
+global-exclude .git*
+global-exclude \#*
 
-# include examples/data/*
-# recursive-include examples *.py
-# recursive-include doc/source *
-# recursive-include doc/sphinxext *
-# recursive-include LICENSES *
 include versioneer.py
 include pandas/_version.py
 include pandas/io/formats/templates/*.tpl
diff --git a/ci/script_single.sh b/ci/script_single.sh
index f376c920ac71b..60e2fbb33ee5d 100755
--- a/ci/script_single.sh
+++ b/ci/script_single.sh
@@ -25,12 +25,12 @@ if [ "$DOC" ]; then
     echo "We are not running pytest as this is a doc-build"
 
 elif [ "$COVERAGE" ]; then
-    echo pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
-    pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
+    echo pytest -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
+    pytest      -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
 
 else
-    echo pytest -m "single" -r xX --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas
-    pytest -m "single" -r xX  --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas # TODO: doctest
+    echo pytest -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas
+    pytest      -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas # TODO: doctest
 
 fi
 
diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index a603bf9f7e9e0..a41a6c31b0678 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -76,6 +76,11 @@ Documentation Changes
 -
 -
 
+Build Changes
+-------------
+
+- The source and binary distributions no longer include test data files, resulting in smaller download sizes. Tests relying on these data files will be skipped when using ``pandas.test()``. (:issue:`19320`)
+
 .. _whatsnew_0232.bug_fixes:
 
 Bug Fixes
diff --git a/pandas/conftest.py b/pandas/conftest.py
index b4a599758417c..82d860b091b82 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1,7 +1,9 @@
+import os
 import importlib
 
 import pytest
 
+import pandas
 import numpy as np
 import pandas as pd
 from pandas.compat import PY3
@@ -17,6 +19,8 @@ def pytest_addoption(parser):
                      help="run high memory tests")
     parser.addoption("--only-slow", action="store_true",
                      help="run only slow tests")
+    parser.addoption("--strict-data-files", action="store_true",
+                     help="Fail if a test is skipped for missing data file.")
 
 
 def pytest_runtest_setup(item):
@@ -131,6 +135,43 @@ def join_type(request):
     return request.param
 
 
+@pytest.fixture
+def datapath(request):
+    """Get the path to a data file.
+
+    Parameters
+    ----------
+    path : str
+        Path to the file, relative to ``pandas/tests/``
+
+    Returns
+    -------
+    path : path including ``pandas/tests``.
+
+    Raises
+    ------
+    ValueError
+        If the path doesn't exist and the --strict-data-files option is set.
+    """
+    def deco(*args):
+        path = os.path.join('pandas', 'tests', *args)
+        if not os.path.exists(path):
+            if request.config.getoption("--strict-data-files"):
+                msg = "Could not find file {} and --strict-data-files is set."
+                raise ValueError(msg.format(path))
+            else:
+                msg = "Could not find {}."
+                pytest.skip(msg.format(path))
+        return path
+    return deco
+
+
+@pytest.fixture
+def iris(datapath):
+    """The iris dataset as a DataFrame."""
+    return pandas.read_csv(datapath('data', 'iris.csv'))
+
+
 @pytest.fixture(params=['nlargest', 'nsmallest'])
 def nselect_method(request):
     """
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 362f917e74972..c925c4c403960 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -1182,12 +1182,12 @@ def test_iter(self):
                     ('baz', 'two'), ('qux', 'one'), ('qux', 'two')]
         assert result == expected
 
-    def test_legacy_pickle(self):
+    def test_legacy_pickle(self, datapath):
         if PY3:
             pytest.skip("testing for legacy pickles not "
                         "support on py3")
 
-        path = tm.get_data_path('multiindex_v1.pickle')
+        path = datapath('indexes', 'data', 'multiindex_v1.pickle')
         obj = pd.read_pickle(path)
 
         obj2 = MultiIndex.from_tuples(obj.values)
@@ -1203,10 +1203,10 @@ def test_legacy_pickle(self):
         assert_almost_equal(res, exp)
         assert_almost_equal(exp, exp2)
 
-    def test_legacy_v2_unpickle(self):
+    def test_legacy_v2_unpickle(self, datapath):
 
         # 0.7.3 -> 0.8.0 format manage
-        path = tm.get_data_path('mindex_073.pickle')
+        path = datapath('indexes', 'data', 'mindex_073.pickle')
         obj = pd.read_pickle(path)
 
         obj2 = MultiIndex.from_tuples(obj.values)
diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
index 8deb51e190bab..7623587803b41 100644
--- a/pandas/tests/io/conftest.py
+++ b/pandas/tests/io/conftest.py
@@ -1,32 +1,23 @@
-import os
-
 import pytest
 from pandas.io.parsers import read_table
-from pandas.util import testing as tm
-
-
-@pytest.fixture
-def parser_data(request):
-    return os.path.join(tm.get_data_path(), '..', 'parser', 'data')
 
 
 @pytest.fixture
-def tips_file(parser_data):
+def tips_file(datapath):
     """Path to the tips dataset"""
-    return os.path.join(parser_data, 'tips.csv')
+    return datapath('io', 'parser', 'data', 'tips.csv')
 
 
 @pytest.fixture
-def jsonl_file(parser_data):
+def jsonl_file(datapath):
     """Path a JSONL dataset"""
-    return os.path.join(parser_data, 'items.jsonl')
+    return datapath('io', 'parser', 'data', 'items.jsonl')
 
 
 @pytest.fixture
-def salaries_table(parser_data):
+def salaries_table(datapath):
     """DataFrame with the salaries dataset"""
-    path = os.path.join(parser_data, 'salaries.csv')
-    return read_table(path)
+    return read_table(datapath('io', 'parser', 'data', 'salaries.csv'))
 
 
 @pytest.fixture
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index f221df93dd412..63b7cb3459069 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -916,8 +916,8 @@ def test_unicode_problem_decoding_as_ascii(self):
         dm = DataFrame({u('c/\u03c3'): Series({'test': np.nan})})
         compat.text_type(dm.to_string())
 
-    def test_string_repr_encoding(self):
-        filepath = tm.get_data_path('unicode_series.csv')
+    def test_string_repr_encoding(self, datapath):
+        filepath = datapath('io', 'formats', 'data', 'unicode_series.csv')
         df = pd.read_csv(filepath, header=None, encoding='latin1')
         repr(df)
         repr(df[1])
diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
index c9074ca49e5be..05ceace20f5a4 100644
--- a/pandas/tests/io/json/test_compression.py
+++ b/pandas/tests/io/json/test_compression.py
@@ -21,11 +21,11 @@ def test_compression_roundtrip(compression):
         assert_frame_equal(df, pd.read_json(result))
 
 
-def test_read_zipped_json():
-    uncompressed_path = tm.get_data_path("tsframe_v012.json")
+def test_read_zipped_json(datapath):
+    uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json")
     uncompressed_df = pd.read_json(uncompressed_path)
 
-    compressed_path = tm.get_data_path("tsframe_v012.json.zip")
+    compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip")
     compressed_df = pd.read_json(compressed_path, compression='zip')
 
     assert_frame_equal(uncompressed_df, compressed_df)
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 7e497c395266f..bcbac4400c953 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -37,8 +37,9 @@
 
 class TestPandasContainer(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(scope="function", autouse=True)
+    def setup(self, datapath):
+        self.dirpath = datapath("io", "json", "data")
 
         self.ts = tm.makeTimeSeries()
         self.ts.name = 'ts'
@@ -59,7 +60,8 @@ def setup_method(self, method):
         self.mixed_frame = _mixed_frame.copy()
         self.categorical = _cat_frame.copy()
 
-    def teardown_method(self, method):
+        yield
+
         del self.dirpath
 
         del self.ts
diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
index 6e1d3575a1481..9e871d27f0ce8 100644
--- a/pandas/tests/io/parser/common.py
+++ b/pandas/tests/io/parser/common.py
@@ -77,7 +77,7 @@ def test_read_csv(self):
             else:
                 prefix = u("file://")
 
-            fname = prefix + compat.text_type(self.csv1)
+            fname = prefix + compat.text_type(os.path.abspath(self.csv1))
             self.read_csv(fname, index_col=0, parse_dates=True)
 
     def test_1000_sep(self):
@@ -651,21 +651,19 @@ def test_read_csv_parse_simple_list(self):
         tm.assert_frame_equal(df, expected)
 
     @tm.network
-    def test_url(self):
+    def test_url(self, datapath):
         # HTTP(S)
         url = ('https://raw.github.com/pandas-dev/pandas/master/'
                'pandas/tests/io/parser/data/salaries.csv')
         url_table = self.read_table(url)
-        dirpath = tm.get_data_path()
-        localtable = os.path.join(dirpath, 'salaries.csv')
+        localtable = datapath('io', 'parser', 'data', 'salaries.csv')
         local_table = self.read_table(localtable)
         tm.assert_frame_equal(url_table, local_table)
         # TODO: ftp testing
 
     @pytest.mark.slow
-    def test_file(self):
-        dirpath = tm.get_data_path()
-        localtable = os.path.join(dirpath, 'salaries.csv')
+    def test_file(self, datapath):
+        localtable = datapath('io', 'parser', 'data', 'salaries.csv')
         local_table = self.read_table(localtable)
 
         try:
@@ -755,8 +753,8 @@ def test_utf16_bom_skiprows(self):
 
                     tm.assert_frame_equal(result, expected)
 
-    def test_utf16_example(self):
-        path = tm.get_data_path('utf16_ex.txt')
+    def test_utf16_example(self, datapath):
+        path = datapath('io', 'parser', 'data', 'utf16_ex.txt')
 
         # it works! and is the right length
         result = self.read_table(path, encoding='utf-16')
@@ -767,8 +765,8 @@ def test_utf16_example(self):
             result = self.read_table(buf, encoding='utf-16')
             assert len(result) == 50
 
-    def test_unicode_encoding(self):
-        pth = tm.get_data_path('unicode_series.csv')
+    def test_unicode_encoding(self, datapath):
+        pth = datapath('io', 'parser', 'data', 'unicode_series.csv')
 
         result = self.read_csv(pth, header=None, encoding='latin-1')
         result = result.set_index(0)
@@ -1513,10 +1511,9 @@ def test_internal_eof_byte_to_file(self):
             result = self.read_csv(path)
             tm.assert_frame_equal(result, expected)
 
-    def test_sub_character(self):
+    def test_sub_character(self, datapath):
         # see gh-16893
-        dirpath = tm.get_data_path()
-        filename = os.path.join(dirpath, "sub_char.csv")
+        filename = datapath('io', 'parser', 'data', 'sub_char.csv')
 
         expected = DataFrame([[1, 2, 3]], columns=["a", "\x1ab", "c"])
         result = self.read_csv(filename)
diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py
index e84db66561c49..e4950af19ea95 100644
--- a/pandas/tests/io/parser/compression.py
+++ b/pandas/tests/io/parser/compression.py
@@ -120,9 +120,9 @@ def test_read_csv_infer_compression(self):
 
                 tm.assert_frame_equal(expected, df)
 
-    def test_read_csv_compressed_utf16_example(self):
+    def test_read_csv_compressed_utf16_example(self, datapath):
         # GH18071
-        path = tm.get_data_path('utf16_ex_small.zip')
+        path = datapath('io', 'parser', 'data', 'utf16_ex_small.zip')
 
         result = self.read_csv(path, encoding='utf-16',
                                compression='zip', sep='\t')
diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py
index b91ce04673e29..8060ebf2fbcd4 100644
--- a/pandas/tests/io/parser/dtypes.py
+++ b/pandas/tests/io/parser/dtypes.py
@@ -125,9 +125,9 @@ def test_categorical_dtype_high_cardinality_numeric(self):
             np.sort(actual.a.cat.categories), ordered=True)
         tm.assert_frame_equal(actual, expected)
 
-    def test_categorical_dtype_encoding(self):
+    def test_categorical_dtype_encoding(self, datapath):
         # GH 10153
-        pth = tm.get_data_path('unicode_series.csv')
+        pth = datapath('io', 'parser', 'data', 'unicode_series.csv')
         encoding = 'latin-1'
         expected = self.read_csv(pth, header=None, encoding=encoding)
         expected[1] = Categorical(expected[1])
@@ -135,7 +135,7 @@ def test_categorical_dtype_encoding(self):
                                dtype={1: 'category'})
         tm.assert_frame_equal(actual, expected)
 
-        pth = tm.get_data_path('utf16_ex.txt')
+        pth = datapath('io', 'parser', 'data', 'utf16_ex.txt')
         encoding = 'utf-16'
         expected = self.read_table(pth, encoding=encoding)
         expected = expected.apply(Categorical)
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
index fdf45f307e953..e2243b8087a5b 100644
--- a/pandas/tests/io/parser/test_network.py
+++ b/pandas/tests/io/parser/test_network.py
@@ -48,10 +48,16 @@ def check_compressed_urls(salaries_table, compression, extension, mode,
     tm.assert_frame_equal(url_table, salaries_table)
 
 
+@pytest.fixture
+def tips_df(datapath):
+    """DataFrame with the tips dataset."""
+    return read_csv(datapath('io', 'parser', 'data', 'tips.csv'))
+
+
 @pytest.mark.usefixtures("s3_resource")
 class TestS3(object):
 
-    def test_parse_public_s3_bucket(self):
+    def test_parse_public_s3_bucket(self, tips_df):
         pytest.importorskip('s3fs')
         # more of an integration test due to the not-public contents portion
         # can probably mock this though.
@@ -60,45 +66,40 @@ def test_parse_public_s3_bucket(self):
                           ext, compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
+            tm.assert_frame_equal(df, tips_df)
 
         # Read public file from bucket with not-public contents
         df = read_csv('s3://cant_get_it/tips.csv')
         assert isinstance(df, DataFrame)
         assert not df.empty
-        tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df)
+        tm.assert_frame_equal(df, tips_df)
 
-    def test_parse_public_s3n_bucket(self):
+    def test_parse_public_s3n_bucket(self, tips_df):
 
         # Read from AWS s3 as "s3n" URL
         df = read_csv('s3n://pandas-test/tips.csv', nrows=10)
         assert isinstance(df, DataFrame)
         assert not df.empty
-        tm.assert_frame_equal(read_csv(
-            tm.get_data_path('tips.csv')).iloc[:10], df)
+        tm.assert_frame_equal(tips_df.iloc[:10], df)
 
-    def test_parse_public_s3a_bucket(self):
+    def test_parse_public_s3a_bucket(self, tips_df):
         # Read from AWS s3 as "s3a" URL
         df = read_csv('s3a://pandas-test/tips.csv', nrows=10)
         assert isinstance(df, DataFrame)
         assert not df.empty
-        tm.assert_frame_equal(read_csv(
-            tm.get_data_path('tips.csv')).iloc[:10], df)
+        tm.assert_frame_equal(tips_df.iloc[:10], df)
 
-    def test_parse_public_s3_bucket_nrows(self):
+    def test_parse_public_s3_bucket_nrows(self, tips_df):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' +
                           ext, nrows=10, compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')).iloc[:10], df)
+            tm.assert_frame_equal(tips_df.iloc[:10], df)
 
-    def test_parse_public_s3_bucket_chunked(self):
+    def test_parse_public_s3_bucket_chunked(self, tips_df):
         # Read with a chunksize
         chunksize = 5
-        local_tips = read_csv(tm.get_data_path('tips.csv'))
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
                                  chunksize=chunksize, compression=comp)
@@ -109,14 +110,13 @@ def test_parse_public_s3_bucket_chunked(self):
                 df = df_reader.get_chunk()
                 assert isinstance(df, DataFrame)
                 assert not df.empty
-                true_df = local_tips.iloc[
+                true_df = tips_df.iloc[
                     chunksize * i_chunk: chunksize * (i_chunk + 1)]
                 tm.assert_frame_equal(true_df, df)
 
-    def test_parse_public_s3_bucket_chunked_python(self):
+    def test_parse_public_s3_bucket_chunked_python(self, tips_df):
         # Read with a chunksize using the Python parser
         chunksize = 5
-        local_tips = read_csv(tm.get_data_path('tips.csv'))
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
                                  chunksize=chunksize, compression=comp,
@@ -127,36 +127,33 @@ def test_parse_public_s3_bucket_chunked_python(self):
                 df = df_reader.get_chunk()
                 assert isinstance(df, DataFrame)
                 assert not df.empty
-                true_df = local_tips.iloc[
+                true_df = tips_df.iloc[
                     chunksize * i_chunk: chunksize * (i_chunk + 1)]
                 tm.assert_frame_equal(true_df, df)
 
-    def test_parse_public_s3_bucket_python(self):
+    def test_parse_public_s3_bucket_python(self, tips_df):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
                           compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
+            tm.assert_frame_equal(df, tips_df)
 
-    def test_infer_s3_compression(self):
+    def test_infer_s3_compression(self, tips_df):
         for ext in ['', '.gz', '.bz2']:
             df = read_csv('s3://pandas-test/tips.csv' + ext,
                           engine='python', compression='infer')
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
+            tm.assert_frame_equal(df, tips_df)
 
-    def test_parse_public_s3_bucket_nrows_python(self):
+    def test_parse_public_s3_bucket_nrows_python(self, tips_df):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
                           nrows=10, compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')).iloc[:10], df)
+            tm.assert_frame_equal(tips_df.iloc[:10], df)
 
     def test_s3_fails(self):
         with pytest.raises(IOError):
diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py
index 7717102b64fc5..b6f13039641a2 100644
--- a/pandas/tests/io/parser/test_parsers.py
+++ b/pandas/tests/io/parser/test_parsers.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 
 import os
+import pytest
 import pandas.util.testing as tm
 
 from pandas import read_csv, read_table, DataFrame
@@ -45,8 +46,9 @@ def read_table(self, *args, **kwargs):
     def float_precision_choices(self):
         raise com.AbstractMethodError(self)
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath('io', 'parser', 'data')
         self.csv1 = os.path.join(self.dirpath, 'test1.csv')
         self.csv2 = os.path.join(self.dirpath, 'test2.csv')
         self.xls1 = os.path.join(self.dirpath, 'test.xls')
diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py
index e8d9d8b52164b..c7026e3e0fc88 100644
--- a/pandas/tests/io/parser/test_textreader.py
+++ b/pandas/tests/io/parser/test_textreader.py
@@ -28,8 +28,9 @@
 
 class TestTextReader(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath('io', 'parser', 'data')
         self.csv1 = os.path.join(self.dirpath, 'test1.csv')
         self.csv2 = os.path.join(self.dirpath, 'test2.csv')
         self.xls1 = os.path.join(self.dirpath, 'test.xls')
diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py
index b80263021c269..101ee3e619f5b 100644
--- a/pandas/tests/io/sas/test_sas7bdat.py
+++ b/pandas/tests/io/sas/test_sas7bdat.py
@@ -11,8 +11,9 @@
 
 class TestSAS7BDAT(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "sas", "data")
         self.data = []
         self.test_ix = [list(range(1, 16)), [16]]
         for j in 1, 2:
@@ -123,9 +124,8 @@ def test_iterator_read_too_much(self):
         rdr.close()
 
 
-def test_encoding_options():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "test1.sas7bdat")
+def test_encoding_options(datapath):
+    fname = datapath("io", "sas", "data", "test1.sas7bdat")
     df1 = pd.read_sas(fname)
     df2 = pd.read_sas(fname, encoding='utf-8')
     for col in df1.columns:
@@ -143,43 +143,39 @@ def test_encoding_options():
         assert(x == y.decode())
 
 
-def test_productsales():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "productsales.sas7bdat")
+def test_productsales(datapath):
+    fname = datapath("io", "sas", "data", "productsales.sas7bdat")
     df = pd.read_sas(fname, encoding='utf-8')
-    fname = os.path.join(dirpath, "productsales.csv")
+    fname = datapath("io", "sas", "data", "productsales.csv")
     df0 = pd.read_csv(fname, parse_dates=['MONTH'])
     vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"]
     df0[vn] = df0[vn].astype(np.float64)
     tm.assert_frame_equal(df, df0)
 
 
-def test_12659():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "test_12659.sas7bdat")
+def test_12659(datapath):
+    fname = datapath("io", "sas", "data", "test_12659.sas7bdat")
     df = pd.read_sas(fname)
-    fname = os.path.join(dirpath, "test_12659.csv")
+    fname = datapath("io", "sas", "data", "test_12659.csv")
     df0 = pd.read_csv(fname)
     df0 = df0.astype(np.float64)
     tm.assert_frame_equal(df, df0)
 
 
-def test_airline():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "airline.sas7bdat")
+def test_airline(datapath):
+    fname = datapath("io", "sas", "data", "airline.sas7bdat")
     df = pd.read_sas(fname)
-    fname = os.path.join(dirpath, "airline.csv")
+    fname = datapath("io", "sas", "data", "airline.csv")
     df0 = pd.read_csv(fname)
     df0 = df0.astype(np.float64)
     tm.assert_frame_equal(df, df0, check_exact=False)
 
 
-def test_date_time():
+def test_date_time(datapath):
     # Support of different SAS date/datetime formats (PR #15871)
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "datetime.sas7bdat")
+    fname = datapath("io", "sas", "data", "datetime.sas7bdat")
     df = pd.read_sas(fname)
-    fname = os.path.join(dirpath, "datetime.csv")
+    fname = datapath("io", "sas", "data", "datetime.csv")
     df0 = pd.read_csv(fname, parse_dates=['Date1', 'Date2', 'DateTime',
                                           'DateTimeHi', 'Taiw'])
     # GH 19732: Timestamps imported from sas will incur floating point errors
@@ -187,9 +183,8 @@ def test_date_time():
     tm.assert_frame_equal(df, df0)
 
 
-def test_zero_variables():
+def test_zero_variables(datapath):
     # Check if the SAS file has zero variables (PR #18184)
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "zero_variables.sas7bdat")
+    fname = datapath("io", "sas", "data", "zero_variables.sas7bdat")
     with pytest.raises(EmptyDataError):
         pd.read_sas(fname)
diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py
index de31c3e36a8d5..6e5b2ab067aa5 100644
--- a/pandas/tests/io/sas/test_xport.py
+++ b/pandas/tests/io/sas/test_xport.py
@@ -1,3 +1,4 @@
+import pytest
 import pandas as pd
 import pandas.util.testing as tm
 from pandas.io.sas.sasreader import read_sas
@@ -18,8 +19,9 @@ def numeric_as_float(data):
 
 class TestXport(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "sas", "data")
         self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt")
         self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt")
         self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt")
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index a89156db38ae3..5c9739be73393 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -149,27 +149,22 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext):
             reader(path)
 
     @pytest.mark.parametrize('reader, module, path', [
-        (pd.read_csv, 'os', os.path.join(HERE, 'data', 'iris.csv')),
-        (pd.read_table, 'os', os.path.join(HERE, 'data', 'iris.csv')),
-        (pd.read_fwf, 'os', os.path.join(HERE, 'data',
-                                         'fixed_width_format.txt')),
-        (pd.read_excel, 'xlrd', os.path.join(HERE, 'data', 'test1.xlsx')),
-        (pd.read_feather, 'feather', os.path.join(HERE, 'data',
-                                                  'feather-0_3_1.feather')),
-        (pd.read_hdf, 'tables', os.path.join(HERE, 'data', 'legacy_hdf',
-                                             'datetimetz_object.h5')),
-        (pd.read_stata, 'os', os.path.join(HERE, 'data', 'stata10_115.dta')),
-        (pd.read_sas, 'os', os.path.join(HERE, 'sas', 'data',
-                                         'test1.sas7bdat')),
-        (pd.read_json, 'os', os.path.join(HERE, 'json', 'data',
-                                          'tsframe_v012.json')),
-        (pd.read_msgpack, 'os', os.path.join(HERE, 'msgpack', 'data',
-                                             'frame.mp')),
-        (pd.read_pickle, 'os', os.path.join(HERE, 'data',
-                                            'categorical_0_14_1.pickle')),
+        (pd.read_csv, 'os', ('io', 'data', 'iris.csv')),
+        (pd.read_table, 'os', ('io', 'data', 'iris.csv')),
+        (pd.read_fwf, 'os', ('io', 'data', 'fixed_width_format.txt')),
+        (pd.read_excel, 'xlrd', ('io', 'data', 'test1.xlsx')),
+        (pd.read_feather, 'feather', ('io', 'data', 'feather-0_3_1.feather')),
+        (pd.read_hdf, 'tables', ('io', 'data', 'legacy_hdf',
+                                 'datetimetz_object.h5')),
+        (pd.read_stata, 'os', ('io', 'data', 'stata10_115.dta')),
+        (pd.read_sas, 'os', ('io', 'sas', 'data', 'test1.sas7bdat')),
+        (pd.read_json, 'os', ('io', 'json', 'data', 'tsframe_v012.json')),
+        (pd.read_msgpack, 'os', ('io', 'msgpack', 'data', 'frame.mp')),
+        (pd.read_pickle, 'os', ('io', 'data', 'categorical_0_14_1.pickle')),
     ])
-    def test_read_fspath_all(self, reader, module, path):
+    def test_read_fspath_all(self, reader, module, path, datapath):
         pytest.importorskip(module)
+        path = datapath(*path)
 
         mypath = CustomFSPath(path)
         result = reader(mypath)
@@ -232,13 +227,14 @@ def test_write_fspath_hdf5(self):
         tm.assert_frame_equal(result, expected)
 
 
-class TestMMapWrapper(object):
+@pytest.fixture
+def mmap_file(datapath):
+    return datapath('io', 'data', 'test_mmap.csv')
+
 
-    def setup_method(self, method):
-        self.mmap_file = os.path.join(tm.get_data_path(),
-                                      'test_mmap.csv')
+class TestMMapWrapper(object):
 
-    def test_constructor_bad_file(self):
+    def test_constructor_bad_file(self, mmap_file):
         non_file = StringIO('I am not a file')
         non_file.fileno = lambda: -1
 
@@ -252,15 +248,15 @@ def test_constructor_bad_file(self):
 
         tm.assert_raises_regex(err, msg, common.MMapWrapper, non_file)
 
-        target = open(self.mmap_file, 'r')
+        target = open(mmap_file, 'r')
         target.close()
 
         msg = "I/O operation on closed file"
         tm.assert_raises_regex(
             ValueError, msg, common.MMapWrapper, target)
 
-    def test_get_attr(self):
-        with open(self.mmap_file, 'r') as target:
+    def test_get_attr(self, mmap_file):
+        with open(mmap_file, 'r') as target:
             wrapper = common.MMapWrapper(target)
 
         attrs = dir(wrapper.mmap)
@@ -273,8 +269,8 @@ def test_get_attr(self):
 
         assert not hasattr(wrapper, 'foo')
 
-    def test_next(self):
-        with open(self.mmap_file, 'r') as target:
+    def test_next(self, mmap_file):
+        with open(mmap_file, 'r') as target:
             wrapper = common.MMapWrapper(target)
             lines = target.readlines()
 
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 2a225e6fe6a45..1fda56dbff772 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -39,8 +39,9 @@
 @td.skip_if_no('xlrd', '0.9')
 class SharedItems(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "data")
         self.frame = _frame.copy()
         self.frame2 = _frame2.copy()
         self.tsframe = _tsframe.copy()
@@ -49,7 +50,6 @@ def setup_method(self, method):
     def get_csv_refdf(self, basename):
         """
         Obtain the reference data from read_csv with the Python engine.
-        Test data path is defined by pandas.util.testing.get_data_path()
 
         Parameters
         ----------
@@ -68,8 +68,7 @@ def get_csv_refdf(self, basename):
 
     def get_excelfile(self, basename, ext):
         """
-        Return test data ExcelFile instance. Test data path is defined by
-        pandas.util.testing.get_data_path()
+        Return test data ExcelFile instance.
 
         Parameters
         ----------
@@ -86,8 +85,7 @@ def get_excelfile(self, basename, ext):
 
     def get_exceldf(self, basename, ext, *args, **kwds):
         """
-        Return test data DataFrame. Test data path is defined by
-        pandas.util.testing.get_data_path()
+        Return test data DataFrame.
 
         Parameters
         ----------
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index a56946b82b027..9c6a8de7ed446 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -1,6 +1,5 @@
 from __future__ import print_function
 
-import glob
 import os
 import re
 import threading
@@ -25,8 +24,18 @@
 import pandas.util._test_decorators as td
 from pandas.util.testing import makeCustomDataframe as mkdf, network
 
+HERE = os.path.dirname(__file__)
 
-DATA_PATH = tm.get_data_path()
+
+@pytest.fixture(params=[
+    'chinese_utf-16.html',
+    'chinese_utf-32.html',
+    'chinese_utf-8.html',
+    'letz_latin1.html',
+])
+def html_encoding_file(request, datapath):
+    """Parametrized fixture for HTML encoding test filenames."""
+    return datapath('io', 'data', 'html_encoding', request.param)
 
 
 def assert_framelist_equal(list1, list2, *args, **kwargs):
@@ -44,11 +53,11 @@ def assert_framelist_equal(list1, list2, *args, **kwargs):
 
 
 @td.skip_if_no('bs4')
-def test_bs4_version_fails(monkeypatch):
+def test_bs4_version_fails(monkeypatch, datapath):
     import bs4
     monkeypatch.setattr(bs4, '__version__', '4.2')
     with tm.assert_raises_regex(ValueError, "minimum version"):
-        read_html(os.path.join(DATA_PATH, "spam.html"), flavor='bs4')
+        read_html(datapath("io", "data", "spam.html"), flavor='bs4')
 
 
 def test_invalid_flavor():
@@ -59,8 +68,8 @@ def test_invalid_flavor():
 
 @td.skip_if_no('bs4')
 @td.skip_if_no('lxml')
-def test_same_ordering():
-    filename = os.path.join(DATA_PATH, 'valid_markup.html')
+def test_same_ordering(datapath):
+    filename = datapath('io', 'data', 'valid_markup.html')
     dfs_lxml = read_html(filename, index_col=0, flavor=['lxml'])
     dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4'])
     assert_framelist_equal(dfs_lxml, dfs_bs4)
@@ -72,11 +81,14 @@ def test_same_ordering():
     pytest.param('lxml', marks=pytest.mark.skipif(
         not td.safe_import('lxml'), reason='No lxml'))], scope="class")
 class TestReadHtml(object):
-    spam_data = os.path.join(DATA_PATH, 'spam.html')
-    spam_data_kwargs = {}
-    if PY3:
-        spam_data_kwargs['encoding'] = 'UTF-8'
-    banklist_data = os.path.join(DATA_PATH, 'banklist.html')
+
+    @pytest.fixture(autouse=True)
+    def set_files(self, datapath):
+        self.spam_data = datapath('io', 'data', 'spam.html')
+        self.spam_data_kwargs = {}
+        if PY3:
+            self.spam_data_kwargs['encoding'] = 'UTF-8'
+        self.banklist_data = datapath("io", "data", "banklist.html")
 
     @pytest.fixture(autouse=True, scope="function")
     def set_defaults(self, flavor, request):
@@ -272,7 +284,8 @@ def test_invalid_url(self):
     @pytest.mark.slow
     def test_file_url(self):
         url = self.banklist_data
-        dfs = self.read_html(file_path_to_url(url), 'First',
+        dfs = self.read_html(file_path_to_url(os.path.abspath(url)),
+                             'First',
                              attrs={'id': 'table'})
         assert isinstance(dfs, list)
         for df in dfs:
@@ -326,7 +339,7 @@ def test_multiindex_header_index_skiprows(self):
     @pytest.mark.slow
     def test_regex_idempotency(self):
         url = self.banklist_data
-        dfs = self.read_html(file_path_to_url(url),
+        dfs = self.read_html(file_path_to_url(os.path.abspath(url)),
                              match=re.compile(re.compile('Florida')),
                              attrs={'id': 'table'})
         assert isinstance(dfs, list)
@@ -352,9 +365,9 @@ def test_python_docs_table(self):
         assert sorted(zz) == sorted(['Repo', 'What'])
 
     @pytest.mark.slow
-    def test_thousands_macau_stats(self):
+    def test_thousands_macau_stats(self, datapath):
         all_non_nan_table_index = -2
-        macau_data = os.path.join(DATA_PATH, 'macau.html')
+        macau_data = datapath("io", "data", "macau.html")
         dfs = self.read_html(macau_data, index_col=0,
                              attrs={'class': 'style1'})
         df = dfs[all_non_nan_table_index]
@@ -362,9 +375,9 @@ def test_thousands_macau_stats(self):
         assert not any(s.isna().any() for _, s in df.iteritems())
 
     @pytest.mark.slow
-    def test_thousands_macau_index_col(self):
+    def test_thousands_macau_index_col(self, datapath):
         all_non_nan_table_index = -2
-        macau_data = os.path.join(DATA_PATH, 'macau.html')
+        macau_data = datapath('io', 'data', 'macau.html')
         dfs = self.read_html(macau_data, index_col=0, header=0)
         df = dfs[all_non_nan_table_index]
 
@@ -518,8 +531,8 @@ def test_countries_municipalities(self):
         res2 = self.read_html(data2, header=0)
         assert_framelist_equal(res1, res2)
 
-    def test_nyse_wsj_commas_table(self):
-        data = os.path.join(DATA_PATH, 'nyse_wsj.html')
+    def test_nyse_wsj_commas_table(self, datapath):
+        data = datapath('io', 'data', 'nyse_wsj.html')
         df = self.read_html(data, index_col=0, header=0,
                             attrs={'class': 'mdcTable'})[0]
 
@@ -530,7 +543,7 @@ def test_nyse_wsj_commas_table(self):
         tm.assert_index_equal(df.columns, columns)
 
     @pytest.mark.slow
-    def test_banklist_header(self):
+    def test_banklist_header(self, datapath):
         from pandas.io.html import _remove_whitespace
 
         def try_remove_ws(x):
@@ -541,7 +554,7 @@ def try_remove_ws(x):
 
         df = self.read_html(self.banklist_data, 'Metcalf',
                             attrs={'id': 'table'})[0]
-        ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'),
+        ground_truth = read_csv(datapath('io', 'data', 'banklist.csv'),
                                 converters={'Updated Date': Timestamp,
                                             'Closing Date': Timestamp})
         assert df.shape == ground_truth.shape
@@ -658,19 +671,19 @@ def test_parse_dates_combine(self):
         newdf = DataFrame({'datetime': raw_dates})
         tm.assert_frame_equal(newdf, res[0])
 
-    def test_computer_sales_page(self):
-        data = os.path.join(DATA_PATH, 'computer_sales_page.html')
+    def test_computer_sales_page(self, datapath):
+        data = datapath('io', 'data', 'computer_sales_page.html')
         with tm.assert_raises_regex(ParserError,
                                     r"Passed header=\[0,1\] are "
                                     r"too many rows for this "
                                     r"multi_index of columns"):
             self.read_html(data, header=[0, 1])
 
-        data = os.path.join(DATA_PATH, 'computer_sales_page.html')
+        data = datapath('io', 'data', 'computer_sales_page.html')
         assert self.read_html(data, header=[1, 2])
 
-    def test_wikipedia_states_table(self):
-        data = os.path.join(DATA_PATH, 'wikipedia_states.html')
+    def test_wikipedia_states_table(self, datapath):
+        data = datapath('io', 'data', 'wikipedia_states.html')
         assert os.path.isfile(data), '%r is not a file' % data
         assert os.path.getsize(data), '%r is an empty file' % data
         result = self.read_html(data, 'Arizona', header=1)[0]
@@ -784,15 +797,15 @@ def test_multiple_header_rows(self):
         html_df = read_html(html, )[0]
         tm.assert_frame_equal(expected_df, html_df)
 
-    def test_works_on_valid_markup(self):
-        filename = os.path.join(DATA_PATH, 'valid_markup.html')
+    def test_works_on_valid_markup(self, datapath):
+        filename = datapath('io', 'data', 'valid_markup.html')
         dfs = self.read_html(filename, index_col=0)
         assert isinstance(dfs, list)
         assert isinstance(dfs[0], DataFrame)
 
     @pytest.mark.slow
-    def test_fallback_success(self):
-        banklist_data = os.path.join(DATA_PATH, 'banklist.html')
+    def test_fallback_success(self, datapath):
+        banklist_data = datapath('io', 'data', 'banklist.html')
         self.read_html(banklist_data, '.*Water.*', flavor=['lxml', 'html5lib'])
 
     def test_to_html_timestamp(self):
@@ -835,22 +848,23 @@ def test_displayed_only(self, displayed_only, exp0, exp1):
         else:
             assert len(dfs) == 1  # Should not parse hidden table
 
-    @pytest.mark.parametrize("f", glob.glob(
-        os.path.join(DATA_PATH, 'html_encoding', '*.html')))
-    def test_encode(self, f):
-        _, encoding = os.path.splitext(os.path.basename(f))[0].split('_')
+    def test_encode(self, html_encoding_file):
+        _, encoding = os.path.splitext(
+            os.path.basename(html_encoding_file)
+        )[0].split('_')
 
         try:
-            with open(f, 'rb') as fobj:
+            with open(html_encoding_file, 'rb') as fobj:
                 from_string = self.read_html(fobj.read(), encoding=encoding,
                                              index_col=0).pop()
 
-            with open(f, 'rb') as fobj:
+            with open(html_encoding_file, 'rb') as fobj:
                 from_file_like = self.read_html(BytesIO(fobj.read()),
                                                 encoding=encoding,
                                                 index_col=0).pop()
 
-            from_filename = self.read_html(f, encoding=encoding,
+            from_filename = self.read_html(html_encoding_file,
+                                           encoding=encoding,
                                            index_col=0).pop()
             tm.assert_frame_equal(from_string, from_file_like)
             tm.assert_frame_equal(from_string, from_filename)
@@ -906,7 +920,7 @@ def seekable(self):
         assert self.read_html(bad)
 
     @pytest.mark.slow
-    def test_importcheck_thread_safety(self):
+    def test_importcheck_thread_safety(self, datapath):
         # see gh-16928
 
         class ErrorThread(threading.Thread):
@@ -921,7 +935,7 @@ def run(self):
         # force import check by reinitalising global vars in html.py
         reload(pandas.io.html)
 
-        filename = os.path.join(DATA_PATH, 'valid_markup.html')
+        filename = datapath('io', 'data', 'valid_markup.html')
         helper_thread1 = ErrorThread(target=self.read_html, args=(filename,))
         helper_thread2 = ErrorThread(target=self.read_html, args=(filename,))
 
diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py
index 0b1c1ca178762..412e218f95c6f 100644
--- a/pandas/tests/io/test_packers.py
+++ b/pandas/tests/io/test_packers.py
@@ -3,6 +3,7 @@
 from warnings import catch_warnings
 import os
 import datetime
+import glob
 import numpy as np
 from distutils.version import LooseVersion
 
@@ -836,13 +837,13 @@ def test_default_encoding(self):
             assert_frame_equal(result, frame)
 
 
-def legacy_packers_versions():
-    # yield the packers versions
-    path = tm.get_data_path('legacy_msgpack')
-    for v in os.listdir(path):
-        p = os.path.join(path, v)
-        if os.path.isdir(p):
-            yield v
+files = glob.glob(os.path.join(os.path.dirname(__file__), "data",
+                               "legacy_msgpack", "*", "*.msgpack"))
+
+
+@pytest.fixture(params=files)
+def legacy_packer(request, datapath):
+    return datapath(request.param)
 
 
 class TestMsgpack(object):
@@ -919,24 +920,20 @@ def compare_frame_dt_mixed_tzs(self, result, expected, typ, version):
         else:
             tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize('version', legacy_packers_versions())
     def test_msgpacks_legacy(self, current_packers_data, all_packers_data,
-                             version):
-
-        pth = tm.get_data_path('legacy_msgpack/{0}'.format(version))
-        n = 0
-        for f in os.listdir(pth):
-            # GH12142 0.17 files packed in P2 can't be read in P3
-            if (compat.PY3 and version.startswith('0.17.') and
-                    f.split('.')[-4][-1] == '2'):
-                continue
-            vf = os.path.join(pth, f)
-            try:
-                with catch_warnings(record=True):
-                    self.compare(current_packers_data, all_packers_data,
-                                 vf, version)
-            except ImportError:
-                # blosc not installed
-                continue
-            n += 1
-        assert n > 0, 'Msgpack files are not tested'
+                             legacy_packer, datapath):
+
+        version = os.path.basename(os.path.dirname(legacy_packer))
+
+        # GH12142 0.17 files packed in P2 can't be read in P3
+        if (compat.PY3 and version.startswith('0.17.') and
+                legacy_packer.split('.')[-4][-1] == '2'):
+            msg = "Files packed in Py2 can't be read in Py3 ({})"
+            pytest.skip(msg.format(version))
+        try:
+            with catch_warnings(record=True):
+                self.compare(current_packers_data, all_packers_data,
+                             legacy_packer, version)
+        except ImportError:
+            # blosc not installed
+            pass
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index fbe2174e603e2..45cbbd43cd6a8 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -12,7 +12,7 @@
 
 3. Move the created pickle to "data/legacy_pickle/<version>" directory.
 """
-
+import glob
 import pytest
 from warnings import catch_warnings
 
@@ -184,27 +184,25 @@ def compare_sp_frame_float(result, expected, typ, version):
         tm.assert_sp_frame_equal(result, expected)
 
 
+files = glob.glob(os.path.join(os.path.dirname(__file__), "data",
+                  "legacy_pickle", "*", "*.pickle"))
+
+
+@pytest.fixture(params=files)
+def legacy_pickle(request, datapath):
+    return datapath(request.param)
+
+
 # ---------------------
 # tests
 # ---------------------
-def legacy_pickle_versions():
-    # yield the pickle versions
-    path = tm.get_data_path('legacy_pickle')
-    for v in os.listdir(path):
-        p = os.path.join(path, v)
-        if os.path.isdir(p):
-            for f in os.listdir(p):
-                yield (v, f)
-
-
-@pytest.mark.parametrize('version, f', legacy_pickle_versions())
-def test_pickles(current_pickle_data, version, f):
+def test_pickles(current_pickle_data, legacy_pickle):
     if not is_platform_little_endian():
         pytest.skip("known failure on non-little endian")
 
-    vf = tm.get_data_path('legacy_pickle/{}/{}'.format(version, f))
+    version = os.path.basename(os.path.dirname(legacy_pickle))
     with catch_warnings(record=True):
-        compare(current_pickle_data, vf, version)
+        compare(current_pickle_data, legacy_pickle, version)
 
 
 def test_round_trip_current(current_pickle_data):
@@ -260,12 +258,11 @@ def python_unpickler(path):
                     compare_element(result, expected, typ)
 
 
-def test_pickle_v0_14_1():
+def test_pickle_v0_14_1(datapath):
 
     cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False,
                          categories=['a', 'b', 'c', 'd'])
-    pickle_path = os.path.join(tm.get_data_path(),
-                               'categorical_0_14_1.pickle')
+    pickle_path = datapath('io', 'data', 'categorical_0_14_1.pickle')
     # This code was executed once on v0.14.1 to generate the pickle:
     #
     # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
@@ -275,14 +272,13 @@ def test_pickle_v0_14_1():
     tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))
 
 
-def test_pickle_v0_15_2():
+def test_pickle_v0_15_2(datapath):
     # ordered -> _ordered
     # GH 9347
 
     cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False,
                          categories=['a', 'b', 'c', 'd'])
-    pickle_path = os.path.join(tm.get_data_path(),
-                               'categorical_0_15_2.pickle')
+    pickle_path = datapath('io', 'data', 'categorical_0_15_2.pickle')
     # This code was executed once on v0.15.2 to generate the pickle:
     #
     # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index f96e7eeb40ea2..b95df3840b6c5 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -4449,28 +4449,27 @@ def f():
                 store.select('df')
             tm.assert_raises_regex(ClosedFileError, 'file is not open', f)
 
-    def test_pytables_native_read(self):
-
+    def test_pytables_native_read(self, datapath):
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/pytables_native.h5'),
+                datapath('io', 'data', 'legacy_hdf/pytables_native.h5'),
                 mode='r') as store:
             d2 = store['detector/readout']
             assert isinstance(d2, DataFrame)
 
     @pytest.mark.skipif(PY35 and is_platform_windows(),
                         reason="native2 read fails oddly on windows / 3.5")
-    def test_pytables_native2_read(self):
+    def test_pytables_native2_read(self, datapath):
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/pytables_native2.h5'),
+                datapath('io', 'data', 'legacy_hdf', 'pytables_native2.h5'),
                 mode='r') as store:
             str(store)
             d1 = store['detector']
             assert isinstance(d1, DataFrame)
 
-    def test_legacy_table_read(self):
+    def test_legacy_table_read(self, datapath):
         # legacy table types
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/legacy_table.h5'),
+                datapath('io', 'data', 'legacy_hdf', 'legacy_table.h5'),
                 mode='r') as store:
 
             with catch_warnings(record=True):
@@ -5117,7 +5116,7 @@ def test_fspath(self):
             with pd.HDFStore(path) as store:
                 assert os.fspath(store) == str(path)
 
-    def test_read_py2_hdf_file_in_py3(self):
+    def test_read_py2_hdf_file_in_py3(self, datapath):
         # GH 16781
 
         # tests reading a PeriodIndex DataFrame written in Python2 in Python3
@@ -5132,8 +5131,8 @@ def test_read_py2_hdf_file_in_py3(self):
             ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
 
         with ensure_clean_store(
-                tm.get_data_path(
-                    'legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
+                datapath('io', 'data', 'legacy_hdf',
+                         'periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
                 mode='r') as store:
             result = store['p']
             assert_frame_equal(result, expected)
@@ -5530,14 +5529,14 @@ def test_store_timezone(self):
 
             assert_frame_equal(result, df)
 
-    def test_legacy_datetimetz_object(self):
+    def test_legacy_datetimetz_object(self, datapath):
         # legacy from < 0.17.0
         # 8260
         expected = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
                                   B=Timestamp('20130603', tz='CET')),
                              index=range(5))
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/datetimetz_object.h5'),
+                datapath('io', 'data', 'legacy_hdf', 'datetimetz_object.h5'),
                 mode='r') as store:
             result = store['df']
             assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index f3ab74d37a2bc..f8f742c5980ac 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -22,7 +22,6 @@
 import pytest
 import sqlite3
 import csv
-import os
 
 import warnings
 import numpy as np
@@ -184,9 +183,11 @@
 class MixInBase(object):
 
     def teardown_method(self, method):
-        for tbl in self._get_all_tables():
-            self.drop_table(tbl)
-        self._close_conn()
+        # if setup fails, there may not be a connection to close.
+        if hasattr(self, 'conn'):
+            for tbl in self._get_all_tables():
+                self.drop_table(tbl)
+            self._close_conn()
 
 
 class MySQLMixIn(MixInBase):
@@ -253,9 +254,9 @@ def _get_exec(self):
         else:
             return self.conn.cursor()
 
-    def _load_iris_data(self):
+    def _load_iris_data(self, datapath):
         import io
-        iris_csv_file = os.path.join(tm.get_data_path(), 'iris.csv')
+        iris_csv_file = datapath('io', 'data', 'iris.csv')
 
         self.drop_table('iris')
         self._get_exec().execute(SQL_STRINGS['create_iris'][self.flavor])
@@ -503,9 +504,10 @@ class _TestSQLApi(PandasSQLTest):
     flavor = 'sqlite'
     mode = None
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
         self.conn = self.connect()
-        self._load_iris_data()
+        self._load_iris_data(datapath)
         self._load_iris_view()
         self._load_test1_data()
         self._load_test2_data()
@@ -1025,8 +1027,9 @@ class _EngineToConnMixin(object):
     A mixin that causes setup_connect to create a conn rather than an engine.
     """
 
-    def setup_method(self, method):
-        super(_EngineToConnMixin, self).setup_method(method)
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        super(_EngineToConnMixin, self).setup_method(datapath)
         engine = self.conn
         conn = engine.connect()
         self.__tx = conn.begin()
@@ -1034,12 +1037,14 @@ def setup_method(self, method):
         self.__engine = engine
         self.conn = conn
 
-    def teardown_method(self, method):
+        yield
+
         self.__tx.rollback()
         self.conn.close()
         self.conn = self.__engine
         self.pandasSQL = sql.SQLDatabase(self.__engine)
-        super(_EngineToConnMixin, self).teardown_method(method)
+        # XXX:
+        # super(_EngineToConnMixin, self).teardown_method(method)
 
 
 @pytest.mark.single
@@ -1136,7 +1141,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest):
     """
     flavor = None
 
-    @classmethod
+    @pytest.fixture(autouse=True, scope='class')
     def setup_class(cls):
         cls.setup_import()
         cls.setup_driver()
@@ -1149,10 +1154,11 @@ def setup_class(cls):
             msg = "{0} - can't connect to {1} server".format(cls, cls.flavor)
             pytest.skip(msg)
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
         self.setup_connect()
 
-        self._load_iris_data()
+        self._load_iris_data(datapath)
         self._load_raw_sql()
         self._load_test1_data()
 
@@ -1920,11 +1926,12 @@ class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest):
     def connect(cls):
         return sqlite3.connect(':memory:')
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
         self.conn = self.connect()
         self.pandasSQL = sql.SQLiteDatabase(self.conn)
 
-        self._load_iris_data()
+        self._load_iris_data(datapath)
 
         self._load_test1_data()
 
@@ -2135,8 +2142,9 @@ def _skip_if_no_pymysql():
 @pytest.mark.single
 class TestXSQLite(SQLiteMixIn):
 
-    def setup_method(self, method):
-        self.method = method
+    @pytest.fixture(autouse=True)
+    def setup_method(self, request, datapath):
+        self.method = request.function
         self.conn = sqlite3.connect(':memory:')
 
     def test_basic(self):
@@ -2215,8 +2223,7 @@ def test_execute_fail(self):
         with pytest.raises(Exception):
             sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn)
 
-    @tm.capture_stdout
-    def test_execute_closed_connection(self):
+    def test_execute_closed_connection(self, request, datapath):
         create_sql = """
         CREATE TABLE test
         (
@@ -2236,7 +2243,7 @@ def test_execute_closed_connection(self):
             tquery("select * from test", con=self.conn)
 
         # Initialize connection again (needed for tearDown)
-        self.setup_method(self.method)
+        self.setup_method(request, datapath)
 
     def test_na_roundtrip(self):
         pass
@@ -2341,7 +2348,7 @@ def clean_up(test_table_to_drop):
                   "if SQLAlchemy is not installed")
 class TestXMySQL(MySQLMixIn):
 
-    @classmethod
+    @pytest.fixture(autouse=True, scope='class')
     def setup_class(cls):
         _skip_if_no_pymysql()
 
@@ -2370,7 +2377,8 @@ def setup_class(cls):
                 "[pandas] in your system's mysql default file, "
                 "typically located at ~/.my.cnf or /etc/.my.cnf. ")
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, request, datapath):
         _skip_if_no_pymysql()
         import pymysql
         try:
@@ -2396,7 +2404,7 @@ def setup_method(self, method):
                 "[pandas] in your system's mysql default file, "
                 "typically located at ~/.my.cnf or /etc/.my.cnf. ")
 
-        self.method = method
+        self.method = request.function
 
     def test_basic(self):
         _skip_if_no_pymysql()
@@ -2501,8 +2509,7 @@ def test_execute_fail(self):
         with pytest.raises(Exception):
             sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn)
 
-    @tm.capture_stdout
-    def test_execute_closed_connection(self):
+    def test_execute_closed_connection(self, request, datapath):
         _skip_if_no_pymysql()
         drop_sql = "DROP TABLE IF EXISTS test"
         create_sql = """
@@ -2525,7 +2532,7 @@ def test_execute_closed_connection(self):
             tquery("select * from test", con=self.conn)
 
         # Initialize connection again (needed for tearDown)
-        self.setup_method(self.method)
+        self.setup_method(request, datapath)
 
     def test_na_roundtrip(self):
         _skip_if_no_pymysql()
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index bfb72be80400e..cfe47cae7e5e1 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -25,8 +25,8 @@
 
 
 @pytest.fixture
-def dirpath():
-    return tm.get_data_path()
+def dirpath(datapath):
+    return datapath("io", "data")
 
 
 @pytest.fixture
@@ -39,8 +39,9 @@ def parsed_114(dirpath):
 
 class TestStata(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "data")
         self.dta1_114 = os.path.join(self.dirpath, 'stata1_114.dta')
         self.dta1_117 = os.path.join(self.dirpath, 'stata1_117.dta')
 
diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py
index f65791329f2f1..09687dd97bd43 100644
--- a/pandas/tests/plotting/common.py
+++ b/pandas/tests/plotting/common.py
@@ -74,11 +74,6 @@ def setup_method(self, method):
         else:
             self.default_figsize = (8.0, 6.0)
         self.default_tick_position = 'left' if self.mpl_ge_2_0_0 else 'default'
-        # common test data
-        from pandas import read_csv
-        base = os.path.join(os.path.dirname(curpath()), os.pardir)
-        path = os.path.join(base, 'tests', 'data', 'iris.csv')
-        self.iris = read_csv(path)
 
         n = 100
         with tm.RNGContext(42):
diff --git a/pandas/tests/plotting/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py
index 2c2d371921d2f..a45b17ec98261 100644
--- a/pandas/tests/plotting/test_deprecated.py
+++ b/pandas/tests/plotting/test_deprecated.py
@@ -46,10 +46,9 @@ def test_boxplot_deprecated(self):
                              by='indic')
 
     @pytest.mark.slow
-    def test_radviz_deprecated(self):
-        df = self.iris
+    def test_radviz_deprecated(self, iris):
         with tm.assert_produces_warning(FutureWarning):
-            plotting.radviz(frame=df, class_column='Name')
+            plotting.radviz(frame=iris, class_column='Name')
 
     @pytest.mark.slow
     def test_plot_params(self):
diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py
index c82c939584dc7..0473610ea2f8f 100644
--- a/pandas/tests/plotting/test_misc.py
+++ b/pandas/tests/plotting/test_misc.py
@@ -100,11 +100,11 @@ def test_scatter_matrix_axis(self):
             axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0)
 
     @pytest.mark.slow
-    def test_andrews_curves(self):
+    def test_andrews_curves(self, iris):
         from pandas.plotting import andrews_curves
         from matplotlib import cm
 
-        df = self.iris
+        df = iris
 
         _check_plot_works(andrews_curves, frame=df, class_column='Name')
 
@@ -165,11 +165,11 @@ def test_andrews_curves(self):
             andrews_curves(data=df, class_column='Name')
 
     @pytest.mark.slow
-    def test_parallel_coordinates(self):
+    def test_parallel_coordinates(self, iris):
         from pandas.plotting import parallel_coordinates
         from matplotlib import cm
 
-        df = self.iris
+        df = iris
 
         ax = _check_plot_works(parallel_coordinates,
                                frame=df, class_column='Name')
@@ -234,11 +234,11 @@ def test_parallel_coordinates_with_sorted_labels(self):
             assert prev[1] < nxt[1] and prev[0] < nxt[0]
 
     @pytest.mark.slow
-    def test_radviz(self):
+    def test_radviz(self, iris):
         from pandas.plotting import radviz
         from matplotlib import cm
 
-        df = self.iris
+        df = iris
         _check_plot_works(radviz, frame=df, class_column='Name')
 
         rgba = ('#556270', '#4ECDC4', '#C7F464')
@@ -272,8 +272,8 @@ def test_radviz(self):
         self._check_colors(handles, facecolors=colors)
 
     @pytest.mark.slow
-    def test_subplot_titles(self):
-        df = self.iris.drop('Name', axis=1).head()
+    def test_subplot_titles(self, iris):
+        df = iris.drop('Name', axis=1).head()
         # Use the column names as the subplot titles
         title = list(df.columns)
 
diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py
index cebbcc41c3e17..59b53cd23010e 100644
--- a/pandas/tests/reshape/merge/test_merge_asof.py
+++ b/pandas/tests/reshape/merge/test_merge_asof.py
@@ -1,4 +1,3 @@
-import os
 import pytest
 
 import pytz
@@ -13,8 +12,8 @@
 
 class TestAsOfMerge(object):
 
-    def read_data(self, name, dedupe=False):
-        path = os.path.join(tm.get_data_path(), name)
+    def read_data(self, datapath, name, dedupe=False):
+        path = datapath('reshape', 'merge', 'data', name)
         x = read_csv(path)
         if dedupe:
             x = (x.drop_duplicates(['time', 'ticker'], keep='last')
@@ -23,15 +22,17 @@ def read_data(self, name, dedupe=False):
         x.time = to_datetime(x.time)
         return x
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
 
-        self.trades = self.read_data('trades.csv')
-        self.quotes = self.read_data('quotes.csv', dedupe=True)
-        self.asof = self.read_data('asof.csv')
-        self.tolerance = self.read_data('tolerance.csv')
-        self.allow_exact_matches = self.read_data('allow_exact_matches.csv')
+        self.trades = self.read_data(datapath, 'trades.csv')
+        self.quotes = self.read_data(datapath, 'quotes.csv', dedupe=True)
+        self.asof = self.read_data(datapath, 'asof.csv')
+        self.tolerance = self.read_data(datapath, 'tolerance.csv')
+        self.allow_exact_matches = self.read_data(datapath,
+                                                  'allow_exact_matches.csv')
         self.allow_exact_matches_and_tolerance = self.read_data(
-            'allow_exact_matches_and_tolerance.csv')
+            datapath, 'allow_exact_matches_and_tolerance.csv')
 
     def test_examples1(self):
         """ doc-string examples """
@@ -423,11 +424,11 @@ def test_multiby_indexed(self):
             pd.merge_asof(left, right, left_index=True, right_index=True,
                           left_by=['k1', 'k2'], right_by=['k1'])
 
-    def test_basic2(self):
+    def test_basic2(self, datapath):
 
-        expected = self.read_data('asof2.csv')
-        trades = self.read_data('trades2.csv')
-        quotes = self.read_data('quotes2.csv', dedupe=True)
+        expected = self.read_data(datapath, 'asof2.csv')
+        trades = self.read_data(datapath, 'trades2.csv')
+        quotes = self.read_data(datapath, 'quotes2.csv', dedupe=True)
 
         result = merge_asof(trades, quotes,
                             on='time',
@@ -467,14 +468,14 @@ def test_valid_join_keys(self):
             merge_asof(trades, quotes,
                        by='ticker')
 
-    def test_with_duplicates(self):
+    def test_with_duplicates(self, datapath):
 
         q = pd.concat([self.quotes, self.quotes]).sort_values(
             ['time', 'ticker']).reset_index(drop=True)
         result = merge_asof(self.trades, q,
                             on='time',
                             by='ticker')
-        expected = self.read_data('asof.csv')
+        expected = self.read_data(datapath, 'asof.csv')
         assert_frame_equal(result, expected)
 
     def test_with_duplicates_no_on(self):
diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py
index 5ea27f9e34e1c..807fb2530603a 100644
--- a/pandas/tests/reshape/test_tile.py
+++ b/pandas/tests/reshape/test_tile.py
@@ -282,10 +282,10 @@ def test_round_frac(self):
         result = tmod._round_frac(0.000123456, precision=2)
         assert result == 0.00012
 
-    def test_qcut_binning_issues(self):
+    def test_qcut_binning_issues(self, datapath):
         # #1978, 1979
-        path = os.path.join(tm.get_data_path(), 'cut_data.csv')
-        arr = np.loadtxt(path)
+        cut_file = datapath(os.path.join('reshape', 'data', 'cut_data.csv'))
+        arr = np.loadtxt(cut_file)
 
         result = qcut(arr, 20)
 
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 74bc08ee9649b..b93a0206479ca 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -1,4 +1,3 @@
-import os
 from distutils.version import LooseVersion
 from datetime import date, datetime, timedelta
 
@@ -518,14 +517,15 @@ def test_add(self, offset_types, tz):
         assert isinstance(result, Timestamp)
         assert result == expected_localize
 
-    def test_pickle_v0_15_2(self):
+    def test_pickle_v0_15_2(self, datapath):
         offsets = {'DateOffset': DateOffset(years=1),
                    'MonthBegin': MonthBegin(1),
                    'Day': Day(1),
                    'YearBegin': YearBegin(1),
                    'Week': Week(1)}
-        pickle_path = os.path.join(tm.get_data_path(),
-                                   'dateoffset_0_15_2.pickle')
+
+        pickle_path = datapath('tseries', 'offsets', 'data',
+                               'dateoffset_0_15_2.pickle')
         # This code was executed once on v0.15.2 to generate the pickle:
         # with open(pickle_path, 'wb') as f: pickle.dump(offsets, f)
         #
@@ -1838,12 +1838,10 @@ def _check_roundtrip(obj):
         _check_roundtrip(self.offset2)
         _check_roundtrip(self.offset * 2)
 
-    def test_pickle_compat_0_14_1(self):
+    def test_pickle_compat_0_14_1(self, datapath):
         hdays = [datetime(2013, 1, 1) for ele in range(4)]
-
-        pth = tm.get_data_path()
-
-        cday0_14_1 = read_pickle(os.path.join(pth, 'cday-0.14.1.pickle'))
+        pth = datapath('tseries', 'offsets', 'data', 'cday-0.14.1.pickle')
+        cday0_14_1 = read_pickle(pth)
         cday = CDay(holidays=hdays)
         assert cday == cday0_14_1
 
diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py
index ab7c4fb528452..4d34987e14f75 100644
--- a/pandas/tests/util/test_testing.py
+++ b/pandas/tests/util/test_testing.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import os
 import pandas as pd
 import pytest
 import numpy as np
@@ -841,3 +842,15 @@ def test_locale(self):
         # GH9744
         locales = tm.get_locales()
         assert len(locales) >= 1
+
+
+def test_datapath_missing(datapath, request):
+    if not request.config.getoption("--strict-data-files"):
+        pytest.skip("Need to set '--strict-data-files'")
+
+    with pytest.raises(ValueError):
+        datapath('not_a_file')
+
+    result = datapath('data', 'iris.csv')
+    expected = os.path.join('pandas', 'tests', 'data', 'iris.csv')
+    assert result == expected
diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
index 27c24e3a68079..c6ab24403d58d 100644
--- a/pandas/util/_test_decorators.py
+++ b/pandas/util/_test_decorators.py
@@ -23,7 +23,6 @@ def test_foo():
 
 For more information, refer to the ``pytest`` documentation on ``skipif``.
 """
-
 import pytest
 import locale
 from distutils.version import LooseVersion
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 675dd94d49750..a5afcb6915034 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -6,7 +6,6 @@
 import sys
 import tempfile
 import warnings
-import inspect
 import os
 import subprocess
 import locale
@@ -757,15 +756,6 @@ def ensure_clean(filename=None, return_filelike=False):
                 print("Exception on removing file: {error}".format(error=e))
 
 
-def get_data_path(f=''):
-    """Return the path of a data file, these are relative to the current test
-    directory.
-    """
-    # get our callers file
-    _, filename, _, _, _, _ = inspect.getouterframes(inspect.currentframe())[1]
-    base_dir = os.path.abspath(os.path.dirname(filename))
-    return os.path.join(base_dir, 'data', f)
-
 # -----------------------------------------------------------------------------
 # Comparators
 
diff --git a/setup.cfg b/setup.cfg
index 6d9657737a8bd..9ec967c25e225 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -32,4 +32,5 @@ markers =
     slow: mark a test as slow
     network: mark a test as network
     high_memory: mark a test as a high-memory only
-doctest_optionflags= NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL
+addopts = --strict-data-files
+doctest_optionflags= NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL
\ No newline at end of file
diff --git a/setup.py b/setup.py
index dd026bd611727..0fd008612b5bd 100755
--- a/setup.py
+++ b/setup.py
@@ -735,11 +735,7 @@ def pxd(name):
       maintainer=AUTHOR,
       version=versioneer.get_version(),
       packages=find_packages(include=['pandas', 'pandas.*']),
-      package_data={'': ['data/*', 'templates/*', '_libs/*.dll'],
-                    'pandas.tests.io': ['data/legacy_hdf/*.h5',
-                                        'data/legacy_pickle/*/*.pickle',
-                                        'data/legacy_msgpack/*/*.msgpack',
-                                        'data/html_encoding/*.html']},
+      package_data={'': ['templates/*', '_libs/*.dll']},
       ext_modules=extensions,
       maintainer_email=EMAIL,
       description=DESCRIPTION,