Skip to content

Make subdirs in tests/io/data #29513

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Nov 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/plotting/_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def parallel_coordinates(
--------
>>> from matplotlib import pyplot as plt
>>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master'
'/pandas/tests/data/iris.csv')
'/pandas/tests/data/csv/iris.csv')
>>> pd.plotting.parallel_coordinates(
df, 'Name',
color=('#556270', '#4ECDC4', '#C7F464'))
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
5 changes: 3 additions & 2 deletions pandas/tests/io/excel/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ def merge_cells(request):


@pytest.fixture
def df_ref():
def df_ref(datapath):
"""
Obtain the reference data from read_csv with the Python engine.
"""
df_ref = read_csv("test1.csv", index_col=0, parse_dates=True, engine="python")
filepath = datapath("io", "data", "csv", "test1.csv")
df_ref = read_csv(filepath, index_col=0, parse_dates=True, engine="python")
return df_ref


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/excel/test_odf.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
def cd_and_set_engine(monkeypatch, datapath):
func = functools.partial(pd.read_excel, engine="odf")
monkeypatch.setattr(pd, "read_excel", func)
monkeypatch.chdir(datapath("io", "data"))
monkeypatch.chdir(datapath("io", "data", "excel"))


def test_read_invalid_types_raises():
Expand Down
12 changes: 7 additions & 5 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch, read_ext):
pytest.skip()

func = partial(pd.read_excel, engine=engine)
monkeypatch.chdir(datapath("io", "data"))
monkeypatch.chdir(datapath("io", "data", "excel"))
monkeypatch.setattr(pd, "read_excel", func)

def test_usecols_int(self, read_ext, df_ref):
Expand Down Expand Up @@ -502,9 +502,11 @@ def test_read_from_http_url(self, read_ext):
if read_ext == ".ods": # TODO: remove once on master
pytest.skip()

# TODO: alimcmaster1 - revert to master
url = (
"https://raw.github.com/pandas-dev/pandas/master/"
"pandas/tests/io/data/test1" + read_ext
"https://raw.githubusercontent.com/alimcmaster1"
"/pandas/mcmali-tests-dir-struct/"
"pandas/tests/io/data/excel/test1" + read_ext
)
url_table = pd.read_excel(url)
local_table = pd.read_excel("test1" + read_ext)
Expand All @@ -527,7 +529,7 @@ def test_read_from_s3_url(self, read_ext, s3_resource):
def test_read_from_file_url(self, read_ext, datapath):

# FILE
localtable = os.path.join(datapath("io", "data"), "test1" + read_ext)
localtable = os.path.join(datapath("io", "data", "excel"), "test1" + read_ext)
local_table = pd.read_excel(localtable)

try:
Expand Down Expand Up @@ -828,7 +830,7 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch, read_ext):
pytest.skip()

func = partial(pd.ExcelFile, engine=engine)
monkeypatch.chdir(datapath("io", "data"))
monkeypatch.chdir(datapath("io", "data", "excel"))
monkeypatch.setattr(pd, "ExcelFile", func)

def test_excel_passes_na(self, read_ext):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/excel/test_xlrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_read_xlrd_book(read_ext, frame):

# TODO: test for openpyxl as well
def test_excel_table_sheet_by_index(datapath, read_ext):
path = datapath("io", "data", "test1{}".format(read_ext))
path = datapath("io", "data", "excel", "test1{}".format(read_ext))
with pd.ExcelFile(path) as excel:
with pytest.raises(xlrd.XLRDError):
pd.read_excel(excel, "asdf")
28 changes: 20 additions & 8 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,21 +208,33 @@ def test_read_expands_user_home_dir(
@pytest.mark.parametrize(
"reader, module, path",
[
(pd.read_csv, "os", ("io", "data", "iris.csv")),
(pd.read_table, "os", ("io", "data", "iris.csv")),
(pd.read_fwf, "os", ("io", "data", "fixed_width_format.txt")),
(pd.read_excel, "xlrd", ("io", "data", "test1.xlsx")),
(pd.read_feather, "feather", ("io", "data", "feather-0_3_1.feather")),
(pd.read_csv, "os", ("data", "iris.csv")),
(pd.read_table, "os", ("data", "iris.csv")),
(
pd.read_fwf,
"os",
("io", "data", "fixed_width", "fixed_width_format.txt"),
),
(pd.read_excel, "xlrd", ("io", "data", "excel", "test1.xlsx")),
(
pd.read_feather,
"feather",
("io", "data", "feather", "feather-0_3_1.feather"),
),
(
pd.read_hdf,
"tables",
("io", "data", "legacy_hdf", "datetimetz_object.h5"),
),
(pd.read_stata, "os", ("io", "data", "stata10_115.dta")),
(pd.read_stata, "os", ("io", "data", "stata", "stata10_115.dta")),
(pd.read_sas, "os", ("io", "sas", "data", "test1.sas7bdat")),
(pd.read_json, "os", ("io", "json", "data", "tsframe_v012.json")),
(pd.read_msgpack, "os", ("io", "msgpack", "data", "frame.mp")),
(pd.read_pickle, "os", ("io", "data", "categorical.0.25.0.pickle")),
(
pd.read_pickle,
"os",
("io", "data", "pickle", "categorical.0.25.0.pickle"),
),
],
)
def test_read_fspath_all(self, reader, module, path, datapath):
Expand Down Expand Up @@ -296,7 +308,7 @@ def test_write_fspath_hdf5(self):

@pytest.fixture
def mmap_file(datapath):
return datapath("io", "data", "test_mmap.csv")
return datapath("io", "data", "csv", "test_mmap.csv")


class TestMMapWrapper:
Expand Down
34 changes: 18 additions & 16 deletions pandas/tests/io/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_bs4_version_fails(monkeypatch, datapath):

monkeypatch.setattr(bs4, "__version__", "4.2")
with pytest.raises(ImportError, match="Pandas requires version"):
read_html(datapath("io", "data", "spam.html"), flavor="bs4")
read_html(datapath("io", "data", "html", "spam.html"), flavor="bs4")


def test_invalid_flavor():
Expand All @@ -78,7 +78,7 @@ def test_invalid_flavor():
@td.skip_if_no("bs4")
@td.skip_if_no("lxml")
def test_same_ordering(datapath):
filename = datapath("io", "data", "valid_markup.html")
filename = datapath("io", "data", "html", "valid_markup.html")
dfs_lxml = read_html(filename, index_col=0, flavor=["lxml"])
dfs_bs4 = read_html(filename, index_col=0, flavor=["bs4"])
assert_framelist_equal(dfs_lxml, dfs_bs4)
Expand All @@ -95,10 +95,10 @@ def test_same_ordering(datapath):
class TestReadHtml:
@pytest.fixture(autouse=True)
def set_files(self, datapath):
self.spam_data = datapath("io", "data", "spam.html")
self.spam_data = datapath("io", "data", "html", "spam.html")
self.spam_data_kwargs = {}
self.spam_data_kwargs["encoding"] = "UTF-8"
self.banklist_data = datapath("io", "data", "banklist.html")
self.banklist_data = datapath("io", "data", "html", "banklist.html")

@pytest.fixture(autouse=True, scope="function")
def set_defaults(self, flavor, request):
Expand Down Expand Up @@ -133,9 +133,11 @@ def test_banklist_url(self):

@tm.network
def test_spam_url(self):
# TODO: alimcmaster1 - revert to master
url = (
"https://raw.githubusercontent.com/pandas-dev/pandas/master/"
"pandas/tests/io/data/spam.html"
"https://raw.githubusercontent.com/alimcmaster1/"
"pandas/mcmali-tests-dir-struct/"
"pandas/tests/io/data/html/spam.html"
)
df1 = self.read_html(url, ".*Water.*")
df2 = self.read_html(url, "Unit")
Expand Down Expand Up @@ -376,7 +378,7 @@ def test_python_docs_table(self):
@pytest.mark.slow
def test_thousands_macau_stats(self, datapath):
all_non_nan_table_index = -2
macau_data = datapath("io", "data", "macau.html")
macau_data = datapath("io", "data", "html", "macau.html")
dfs = self.read_html(macau_data, index_col=0, attrs={"class": "style1"})
df = dfs[all_non_nan_table_index]

Expand All @@ -385,7 +387,7 @@ def test_thousands_macau_stats(self, datapath):
@pytest.mark.slow
def test_thousands_macau_index_col(self, datapath):
all_non_nan_table_index = -2
macau_data = datapath("io", "data", "macau.html")
macau_data = datapath("io", "data", "html", "macau.html")
dfs = self.read_html(macau_data, index_col=0, header=0)
df = dfs[all_non_nan_table_index]

Expand Down Expand Up @@ -566,7 +568,7 @@ def test_parse_header_of_non_string_column(self):
tm.assert_frame_equal(result, expected)

def test_nyse_wsj_commas_table(self, datapath):
data = datapath("io", "data", "nyse_wsj.html")
data = datapath("io", "data", "html", "nyse_wsj.html")
df = self.read_html(data, index_col=0, header=0, attrs={"class": "mdcTable"})[0]

expected = Index(
Expand Down Expand Up @@ -594,7 +596,7 @@ def try_remove_ws(x):

df = self.read_html(self.banklist_data, "Metcalf", attrs={"id": "table"})[0]
ground_truth = read_csv(
datapath("io", "data", "banklist.csv"),
datapath("io", "data", "csv", "banklist.csv"),
converters={"Updated Date": Timestamp, "Closing Date": Timestamp},
)
assert df.shape == ground_truth.shape
Expand Down Expand Up @@ -889,19 +891,19 @@ def test_parse_dates_combine(self):
tm.assert_frame_equal(newdf, res[0])

def test_computer_sales_page(self, datapath):
data = datapath("io", "data", "computer_sales_page.html")
data = datapath("io", "data", "html", "computer_sales_page.html")
msg = (
r"Passed header=\[0,1\] are too many "
r"rows for this multi_index of columns"
)
with pytest.raises(ParserError, match=msg):
self.read_html(data, header=[0, 1])

data = datapath("io", "data", "computer_sales_page.html")
data = datapath("io", "data", "html", "computer_sales_page.html")
assert self.read_html(data, header=[1, 2])

def test_wikipedia_states_table(self, datapath):
data = datapath("io", "data", "wikipedia_states.html")
data = datapath("io", "data", "html", "wikipedia_states.html")
assert os.path.isfile(data), "{data!r} is not a file".format(data=data)
assert os.path.getsize(data), "{data!r} is an empty file".format(data=data)
result = self.read_html(data, "Arizona", header=1)[0]
Expand Down Expand Up @@ -1095,14 +1097,14 @@ def test_multiple_header_rows(self):
tm.assert_frame_equal(expected_df, html_df)

def test_works_on_valid_markup(self, datapath):
filename = datapath("io", "data", "valid_markup.html")
filename = datapath("io", "data", "html", "valid_markup.html")
dfs = self.read_html(filename, index_col=0)
assert isinstance(dfs, list)
assert isinstance(dfs[0], DataFrame)

@pytest.mark.slow
def test_fallback_success(self, datapath):
banklist_data = datapath("io", "data", "banklist.html")
banklist_data = datapath("io", "data", "html", "banklist.html")
self.read_html(banklist_data, ".*Water.*", flavor=["lxml", "html5lib"])

def test_to_html_timestamp(self):
Expand Down Expand Up @@ -1240,7 +1242,7 @@ def run(self):
# force import check by reinitalising global vars in html.py
reload(pandas.io.html)

filename = datapath("io", "data", "valid_markup.html")
filename = datapath("io", "data", "html", "valid_markup.html")
helper_thread1 = ErrorThread(target=self.read_html, args=(filename,))
helper_thread2 = ErrorThread(target=self.read_html, args=(filename,))

Expand Down
12 changes: 7 additions & 5 deletions pandas/tests/io/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,23 +202,25 @@ def test_legacy_sparse_warning(datapath):
Generated with

>>> df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [0, 0, 1, 1]}).to_sparse()
>>> df.to_pickle("pandas/tests/io/data/sparseframe-0.20.3.pickle.gz",
>>> df.to_pickle("pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz",
... compression="gzip")

>>> s = df['B']
>>> s.to_pickle("pandas/tests/io/data/sparseseries-0.20.3.pickle.gz",
>>> s.to_pickle("pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz",
... compression="gzip")
"""
with tm.assert_produces_warning(FutureWarning):
simplefilter("ignore", DeprecationWarning) # from boto
pd.read_pickle(
datapath("io", "data", "sparseseries-0.20.3.pickle.gz"), compression="gzip"
datapath("io", "data", "pickle", "sparseseries-0.20.3.pickle.gz"),
compression="gzip",
)

with tm.assert_produces_warning(FutureWarning):
simplefilter("ignore", DeprecationWarning) # from boto
pd.read_pickle(
datapath("io", "data", "sparseframe-0.20.3.pickle.gz"), compression="gzip"
datapath("io", "data", "pickle", "sparseframe-0.20.3.pickle.gz"),
compression="gzip",
)


Expand Down Expand Up @@ -382,7 +384,7 @@ def test_read(self, protocol, get_random_path):
def test_unicode_decode_error():
# pickle file written with py27, should be readable without raising
# UnicodeDecodeError, see GH#28645
path = os.path.join(os.path.dirname(__file__), "data", "test_py27.pkl")
path = os.path.join(os.path.dirname(__file__), "data", "pickle", "test_py27.pkl")
df = pd.read_pickle(path)

# just test the columns are correct since the values are random
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/io/test_spss.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def test_spss_labelled_num(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
fname = datapath("io", "data", "labelled-num.sav")
fname = datapath("io", "data", "spss", "labelled-num.sav")

df = pd.read_spss(fname, convert_categoricals=True)
expected = pd.DataFrame({"VAR00002": "This is one"}, index=[0])
Expand All @@ -23,7 +23,7 @@ def test_spss_labelled_num(datapath):

def test_spss_labelled_num_na(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
fname = datapath("io", "data", "labelled-num-na.sav")
fname = datapath("io", "data", "spss", "labelled-num-na.sav")

df = pd.read_spss(fname, convert_categoricals=True)
expected = pd.DataFrame({"VAR00002": ["This is one", None]})
Expand All @@ -37,7 +37,7 @@ def test_spss_labelled_num_na(datapath):

def test_spss_labelled_str(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
fname = datapath("io", "data", "labelled-str.sav")
fname = datapath("io", "data", "spss", "labelled-str.sav")

df = pd.read_spss(fname, convert_categoricals=True)
expected = pd.DataFrame({"gender": ["Male", "Female"]})
Expand All @@ -51,7 +51,7 @@ def test_spss_labelled_str(datapath):

def test_spss_umlauts(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
fname = datapath("io", "data", "umlauts.sav")
fname = datapath("io", "data", "spss", "umlauts.sav")

df = pd.read_spss(fname, convert_categoricals=True)
expected = pd.DataFrame(
Expand All @@ -67,7 +67,7 @@ def test_spss_umlauts(datapath):

def test_spss_usecols(datapath):
# usecols must be list-like
fname = datapath("io", "data", "labelled-num.sav")
fname = datapath("io", "data", "spss", "labelled-num.sav")

with pytest.raises(TypeError, match="usecols must be list-like."):
pd.read_spss(fname, usecols="VAR00002")
2 changes: 1 addition & 1 deletion pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def _get_exec(self):
else:
return self.conn.cursor()

@pytest.fixture(params=[("io", "data", "iris.csv")])
@pytest.fixture(params=[("data", "iris.csv")])
def load_iris_data(self, datapath, request):
import io

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/test_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

@pytest.fixture
def dirpath(datapath):
return datapath("io", "data")
return datapath("io", "data", "stata")


@pytest.fixture
Expand All @@ -42,7 +42,7 @@ def parsed_114(dirpath):
class TestStata:
@pytest.fixture(autouse=True)
def setup_method(self, datapath):
self.dirpath = datapath("io", "data")
self.dirpath = datapath("io", "data", "stata")
self.dta1_114 = os.path.join(self.dirpath, "stata1_114.dta")
self.dta1_117 = os.path.join(self.dirpath, "stata1_117.dta")

Expand Down