Skip to content

Commit 33181d9

Browse files
alimcmaster1jreback
authored andcommitted
Make subdirs in tests/io/data (#29513)
1 parent b68899f commit 33181d9

File tree

153 files changed

+66
-47
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

153 files changed

+66
-47
lines changed

pandas/plotting/_misc.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ def parallel_coordinates(
364364
--------
365365
>>> from matplotlib import pyplot as plt
366366
>>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master'
367-
'/pandas/tests/data/iris.csv')
367+
'/pandas/tests/data/csv/iris.csv')
368368
>>> pd.plotting.parallel_coordinates(
369369
df, 'Name',
370370
color=('#556270', '#4ECDC4', '#C7F464'))
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

pandas/tests/io/excel/conftest.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,12 @@ def merge_cells(request):
2424

2525

2626
@pytest.fixture
27-
def df_ref():
27+
def df_ref(datapath):
2828
"""
2929
Obtain the reference data from read_csv with the Python engine.
3030
"""
31-
df_ref = read_csv("test1.csv", index_col=0, parse_dates=True, engine="python")
31+
filepath = datapath("io", "data", "csv", "test1.csv")
32+
df_ref = read_csv(filepath, index_col=0, parse_dates=True, engine="python")
3233
return df_ref
3334

3435

pandas/tests/io/excel/test_odf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
def cd_and_set_engine(monkeypatch, datapath):
1414
func = functools.partial(pd.read_excel, engine="odf")
1515
monkeypatch.setattr(pd, "read_excel", func)
16-
monkeypatch.chdir(datapath("io", "data"))
16+
monkeypatch.chdir(datapath("io", "data", "excel"))
1717

1818

1919
def test_read_invalid_types_raises():

pandas/tests/io/excel/test_readers.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch, read_ext):
8181
pytest.skip()
8282

8383
func = partial(pd.read_excel, engine=engine)
84-
monkeypatch.chdir(datapath("io", "data"))
84+
monkeypatch.chdir(datapath("io", "data", "excel"))
8585
monkeypatch.setattr(pd, "read_excel", func)
8686

8787
def test_usecols_int(self, read_ext, df_ref):
@@ -502,9 +502,11 @@ def test_read_from_http_url(self, read_ext):
502502
if read_ext == ".ods": # TODO: remove once on master
503503
pytest.skip()
504504

505+
# TODO: alimcmaster1 - revert to master
505506
url = (
506-
"https://raw.github.com/pandas-dev/pandas/master/"
507-
"pandas/tests/io/data/test1" + read_ext
507+
"https://raw.githubusercontent.com/alimcmaster1"
508+
"/pandas/mcmali-tests-dir-struct/"
509+
"pandas/tests/io/data/excel/test1" + read_ext
508510
)
509511
url_table = pd.read_excel(url)
510512
local_table = pd.read_excel("test1" + read_ext)
@@ -527,7 +529,7 @@ def test_read_from_s3_url(self, read_ext, s3_resource):
527529
def test_read_from_file_url(self, read_ext, datapath):
528530

529531
# FILE
530-
localtable = os.path.join(datapath("io", "data"), "test1" + read_ext)
532+
localtable = os.path.join(datapath("io", "data", "excel"), "test1" + read_ext)
531533
local_table = pd.read_excel(localtable)
532534

533535
try:
@@ -828,7 +830,7 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch, read_ext):
828830
pytest.skip()
829831

830832
func = partial(pd.ExcelFile, engine=engine)
831-
monkeypatch.chdir(datapath("io", "data"))
833+
monkeypatch.chdir(datapath("io", "data", "excel"))
832834
monkeypatch.setattr(pd, "ExcelFile", func)
833835

834836
def test_excel_passes_na(self, read_ext):

pandas/tests/io/excel/test_xlrd.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def test_read_xlrd_book(read_ext, frame):
3535

3636
# TODO: test for openpyxl as well
3737
def test_excel_table_sheet_by_index(datapath, read_ext):
38-
path = datapath("io", "data", "test1{}".format(read_ext))
38+
path = datapath("io", "data", "excel", "test1{}".format(read_ext))
3939
with pd.ExcelFile(path) as excel:
4040
with pytest.raises(xlrd.XLRDError):
4141
pd.read_excel(excel, "asdf")

pandas/tests/io/test_common.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -208,21 +208,33 @@ def test_read_expands_user_home_dir(
208208
@pytest.mark.parametrize(
209209
"reader, module, path",
210210
[
211-
(pd.read_csv, "os", ("io", "data", "iris.csv")),
212-
(pd.read_table, "os", ("io", "data", "iris.csv")),
213-
(pd.read_fwf, "os", ("io", "data", "fixed_width_format.txt")),
214-
(pd.read_excel, "xlrd", ("io", "data", "test1.xlsx")),
215-
(pd.read_feather, "feather", ("io", "data", "feather-0_3_1.feather")),
211+
(pd.read_csv, "os", ("data", "iris.csv")),
212+
(pd.read_table, "os", ("data", "iris.csv")),
213+
(
214+
pd.read_fwf,
215+
"os",
216+
("io", "data", "fixed_width", "fixed_width_format.txt"),
217+
),
218+
(pd.read_excel, "xlrd", ("io", "data", "excel", "test1.xlsx")),
219+
(
220+
pd.read_feather,
221+
"feather",
222+
("io", "data", "feather", "feather-0_3_1.feather"),
223+
),
216224
(
217225
pd.read_hdf,
218226
"tables",
219227
("io", "data", "legacy_hdf", "datetimetz_object.h5"),
220228
),
221-
(pd.read_stata, "os", ("io", "data", "stata10_115.dta")),
229+
(pd.read_stata, "os", ("io", "data", "stata", "stata10_115.dta")),
222230
(pd.read_sas, "os", ("io", "sas", "data", "test1.sas7bdat")),
223231
(pd.read_json, "os", ("io", "json", "data", "tsframe_v012.json")),
224232
(pd.read_msgpack, "os", ("io", "msgpack", "data", "frame.mp")),
225-
(pd.read_pickle, "os", ("io", "data", "categorical.0.25.0.pickle")),
233+
(
234+
pd.read_pickle,
235+
"os",
236+
("io", "data", "pickle", "categorical.0.25.0.pickle"),
237+
),
226238
],
227239
)
228240
def test_read_fspath_all(self, reader, module, path, datapath):
@@ -296,7 +308,7 @@ def test_write_fspath_hdf5(self):
296308

297309
@pytest.fixture
298310
def mmap_file(datapath):
299-
return datapath("io", "data", "test_mmap.csv")
311+
return datapath("io", "data", "csv", "test_mmap.csv")
300312

301313

302314
class TestMMapWrapper:

pandas/tests/io/test_html.py

+18-16
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def test_bs4_version_fails(monkeypatch, datapath):
6363

6464
monkeypatch.setattr(bs4, "__version__", "4.2")
6565
with pytest.raises(ImportError, match="Pandas requires version"):
66-
read_html(datapath("io", "data", "spam.html"), flavor="bs4")
66+
read_html(datapath("io", "data", "html", "spam.html"), flavor="bs4")
6767

6868

6969
def test_invalid_flavor():
@@ -78,7 +78,7 @@ def test_invalid_flavor():
7878
@td.skip_if_no("bs4")
7979
@td.skip_if_no("lxml")
8080
def test_same_ordering(datapath):
81-
filename = datapath("io", "data", "valid_markup.html")
81+
filename = datapath("io", "data", "html", "valid_markup.html")
8282
dfs_lxml = read_html(filename, index_col=0, flavor=["lxml"])
8383
dfs_bs4 = read_html(filename, index_col=0, flavor=["bs4"])
8484
assert_framelist_equal(dfs_lxml, dfs_bs4)
@@ -95,10 +95,10 @@ def test_same_ordering(datapath):
9595
class TestReadHtml:
9696
@pytest.fixture(autouse=True)
9797
def set_files(self, datapath):
98-
self.spam_data = datapath("io", "data", "spam.html")
98+
self.spam_data = datapath("io", "data", "html", "spam.html")
9999
self.spam_data_kwargs = {}
100100
self.spam_data_kwargs["encoding"] = "UTF-8"
101-
self.banklist_data = datapath("io", "data", "banklist.html")
101+
self.banklist_data = datapath("io", "data", "html", "banklist.html")
102102

103103
@pytest.fixture(autouse=True, scope="function")
104104
def set_defaults(self, flavor, request):
@@ -133,9 +133,11 @@ def test_banklist_url(self):
133133

134134
@tm.network
135135
def test_spam_url(self):
136+
# TODO: alimcmaster1 - revert to master
136137
url = (
137-
"https://raw.githubusercontent.com/pandas-dev/pandas/master/"
138-
"pandas/tests/io/data/spam.html"
138+
"https://raw.githubusercontent.com/alimcmaster1/"
139+
"pandas/mcmali-tests-dir-struct/"
140+
"pandas/tests/io/data/html/spam.html"
139141
)
140142
df1 = self.read_html(url, ".*Water.*")
141143
df2 = self.read_html(url, "Unit")
@@ -376,7 +378,7 @@ def test_python_docs_table(self):
376378
@pytest.mark.slow
377379
def test_thousands_macau_stats(self, datapath):
378380
all_non_nan_table_index = -2
379-
macau_data = datapath("io", "data", "macau.html")
381+
macau_data = datapath("io", "data", "html", "macau.html")
380382
dfs = self.read_html(macau_data, index_col=0, attrs={"class": "style1"})
381383
df = dfs[all_non_nan_table_index]
382384

@@ -385,7 +387,7 @@ def test_thousands_macau_stats(self, datapath):
385387
@pytest.mark.slow
386388
def test_thousands_macau_index_col(self, datapath):
387389
all_non_nan_table_index = -2
388-
macau_data = datapath("io", "data", "macau.html")
390+
macau_data = datapath("io", "data", "html", "macau.html")
389391
dfs = self.read_html(macau_data, index_col=0, header=0)
390392
df = dfs[all_non_nan_table_index]
391393

@@ -566,7 +568,7 @@ def test_parse_header_of_non_string_column(self):
566568
tm.assert_frame_equal(result, expected)
567569

568570
def test_nyse_wsj_commas_table(self, datapath):
569-
data = datapath("io", "data", "nyse_wsj.html")
571+
data = datapath("io", "data", "html", "nyse_wsj.html")
570572
df = self.read_html(data, index_col=0, header=0, attrs={"class": "mdcTable"})[0]
571573

572574
expected = Index(
@@ -594,7 +596,7 @@ def try_remove_ws(x):
594596

595597
df = self.read_html(self.banklist_data, "Metcalf", attrs={"id": "table"})[0]
596598
ground_truth = read_csv(
597-
datapath("io", "data", "banklist.csv"),
599+
datapath("io", "data", "csv", "banklist.csv"),
598600
converters={"Updated Date": Timestamp, "Closing Date": Timestamp},
599601
)
600602
assert df.shape == ground_truth.shape
@@ -889,19 +891,19 @@ def test_parse_dates_combine(self):
889891
tm.assert_frame_equal(newdf, res[0])
890892

891893
def test_computer_sales_page(self, datapath):
892-
data = datapath("io", "data", "computer_sales_page.html")
894+
data = datapath("io", "data", "html", "computer_sales_page.html")
893895
msg = (
894896
r"Passed header=\[0,1\] are too many "
895897
r"rows for this multi_index of columns"
896898
)
897899
with pytest.raises(ParserError, match=msg):
898900
self.read_html(data, header=[0, 1])
899901

900-
data = datapath("io", "data", "computer_sales_page.html")
902+
data = datapath("io", "data", "html", "computer_sales_page.html")
901903
assert self.read_html(data, header=[1, 2])
902904

903905
def test_wikipedia_states_table(self, datapath):
904-
data = datapath("io", "data", "wikipedia_states.html")
906+
data = datapath("io", "data", "html", "wikipedia_states.html")
905907
assert os.path.isfile(data), "{data!r} is not a file".format(data=data)
906908
assert os.path.getsize(data), "{data!r} is an empty file".format(data=data)
907909
result = self.read_html(data, "Arizona", header=1)[0]
@@ -1095,14 +1097,14 @@ def test_multiple_header_rows(self):
10951097
tm.assert_frame_equal(expected_df, html_df)
10961098

10971099
def test_works_on_valid_markup(self, datapath):
1098-
filename = datapath("io", "data", "valid_markup.html")
1100+
filename = datapath("io", "data", "html", "valid_markup.html")
10991101
dfs = self.read_html(filename, index_col=0)
11001102
assert isinstance(dfs, list)
11011103
assert isinstance(dfs[0], DataFrame)
11021104

11031105
@pytest.mark.slow
11041106
def test_fallback_success(self, datapath):
1105-
banklist_data = datapath("io", "data", "banklist.html")
1107+
banklist_data = datapath("io", "data", "html", "banklist.html")
11061108
self.read_html(banklist_data, ".*Water.*", flavor=["lxml", "html5lib"])
11071109

11081110
def test_to_html_timestamp(self):
@@ -1240,7 +1242,7 @@ def run(self):
12401242
# force import check by reinitalising global vars in html.py
12411243
reload(pandas.io.html)
12421244

1243-
filename = datapath("io", "data", "valid_markup.html")
1245+
filename = datapath("io", "data", "html", "valid_markup.html")
12441246
helper_thread1 = ErrorThread(target=self.read_html, args=(filename,))
12451247
helper_thread2 = ErrorThread(target=self.read_html, args=(filename,))
12461248

pandas/tests/io/test_pickle.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -202,23 +202,25 @@ def test_legacy_sparse_warning(datapath):
202202
Generated with
203203
204204
>>> df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [0, 0, 1, 1]}).to_sparse()
205-
>>> df.to_pickle("pandas/tests/io/data/sparseframe-0.20.3.pickle.gz",
205+
>>> df.to_pickle("pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz",
206206
... compression="gzip")
207207
208208
>>> s = df['B']
209-
>>> s.to_pickle("pandas/tests/io/data/sparseseries-0.20.3.pickle.gz",
209+
>>> s.to_pickle("pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz",
210210
... compression="gzip")
211211
"""
212212
with tm.assert_produces_warning(FutureWarning):
213213
simplefilter("ignore", DeprecationWarning) # from boto
214214
pd.read_pickle(
215-
datapath("io", "data", "sparseseries-0.20.3.pickle.gz"), compression="gzip"
215+
datapath("io", "data", "pickle", "sparseseries-0.20.3.pickle.gz"),
216+
compression="gzip",
216217
)
217218

218219
with tm.assert_produces_warning(FutureWarning):
219220
simplefilter("ignore", DeprecationWarning) # from boto
220221
pd.read_pickle(
221-
datapath("io", "data", "sparseframe-0.20.3.pickle.gz"), compression="gzip"
222+
datapath("io", "data", "pickle", "sparseframe-0.20.3.pickle.gz"),
223+
compression="gzip",
222224
)
223225

224226

@@ -382,7 +384,7 @@ def test_read(self, protocol, get_random_path):
382384
def test_unicode_decode_error():
383385
# pickle file written with py27, should be readable without raising
384386
# UnicodeDecodeError, see GH#28645
385-
path = os.path.join(os.path.dirname(__file__), "data", "test_py27.pkl")
387+
path = os.path.join(os.path.dirname(__file__), "data", "pickle", "test_py27.pkl")
386388
df = pd.read_pickle(path)
387389

388390
# just test the columns are correct since the values are random

pandas/tests/io/test_spss.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
def test_spss_labelled_num(datapath):
1111
# test file from the Haven project (https://haven.tidyverse.org/)
12-
fname = datapath("io", "data", "labelled-num.sav")
12+
fname = datapath("io", "data", "spss", "labelled-num.sav")
1313

1414
df = pd.read_spss(fname, convert_categoricals=True)
1515
expected = pd.DataFrame({"VAR00002": "This is one"}, index=[0])
@@ -23,7 +23,7 @@ def test_spss_labelled_num(datapath):
2323

2424
def test_spss_labelled_num_na(datapath):
2525
# test file from the Haven project (https://haven.tidyverse.org/)
26-
fname = datapath("io", "data", "labelled-num-na.sav")
26+
fname = datapath("io", "data", "spss", "labelled-num-na.sav")
2727

2828
df = pd.read_spss(fname, convert_categoricals=True)
2929
expected = pd.DataFrame({"VAR00002": ["This is one", None]})
@@ -37,7 +37,7 @@ def test_spss_labelled_num_na(datapath):
3737

3838
def test_spss_labelled_str(datapath):
3939
# test file from the Haven project (https://haven.tidyverse.org/)
40-
fname = datapath("io", "data", "labelled-str.sav")
40+
fname = datapath("io", "data", "spss", "labelled-str.sav")
4141

4242
df = pd.read_spss(fname, convert_categoricals=True)
4343
expected = pd.DataFrame({"gender": ["Male", "Female"]})
@@ -51,7 +51,7 @@ def test_spss_labelled_str(datapath):
5151

5252
def test_spss_umlauts(datapath):
5353
# test file from the Haven project (https://haven.tidyverse.org/)
54-
fname = datapath("io", "data", "umlauts.sav")
54+
fname = datapath("io", "data", "spss", "umlauts.sav")
5555

5656
df = pd.read_spss(fname, convert_categoricals=True)
5757
expected = pd.DataFrame(
@@ -67,7 +67,7 @@ def test_spss_umlauts(datapath):
6767

6868
def test_spss_usecols(datapath):
6969
# usecols must be list-like
70-
fname = datapath("io", "data", "labelled-num.sav")
70+
fname = datapath("io", "data", "spss", "labelled-num.sav")
7171

7272
with pytest.raises(TypeError, match="usecols must be list-like."):
7373
pd.read_spss(fname, usecols="VAR00002")

pandas/tests/io/test_sql.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ def _get_exec(self):
275275
else:
276276
return self.conn.cursor()
277277

278-
@pytest.fixture(params=[("io", "data", "iris.csv")])
278+
@pytest.fixture(params=[("data", "iris.csv")])
279279
def load_iris_data(self, datapath, request):
280280
import io
281281

pandas/tests/io/test_stata.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
@pytest.fixture
3030
def dirpath(datapath):
31-
return datapath("io", "data")
31+
return datapath("io", "data", "stata")
3232

3333

3434
@pytest.fixture
@@ -42,7 +42,7 @@ def parsed_114(dirpath):
4242
class TestStata:
4343
@pytest.fixture(autouse=True)
4444
def setup_method(self, datapath):
45-
self.dirpath = datapath("io", "data")
45+
self.dirpath = datapath("io", "data", "stata")
4646
self.dta1_114 = os.path.join(self.dirpath, "stata1_114.dta")
4747
self.dta1_117 = os.path.join(self.dirpath, "stata1_117.dta")
4848

0 commit comments

Comments
 (0)