-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
PKG: Exclude data test files. #19535
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
4d77cd8
270e442
26e9b4b
1804bcc
7022152
080f000
151ffda
d9d6570
5849591
31fb0b6
9193f15
e897f11
9cf30fd
95cde7a
10ddddc
e1ea208
8616878
77bf77c
156e14b
f3f3662
2cd9706
aac3606
762a2d1
7c44b77
ad09951
6f02d6b
489f540
8b42d1c
ee4fefd
bac438c
84ccdbf
c4802db
7fd7660
c187f8b
632a61d
b5b70c7
9954bba
c771885
dd75270
dbe0c57
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import os | ||
|
||
import pytest | ||
|
||
|
||
@pytest.fixture | ||
def datapath(request): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jorisvandenbossche @jreback thoughts on this approach? The idea would be to replace all instances of hard-coded paths with a call to this. It inspects the configuration and skips the file isn't there and skips are allowed (controlled by the I'm not sure if I could do this with Do we know if any tests actually use the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I suppose this is just to figure out the "current file" of the test? So we can use relative paths instead of absolute (eg you changed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I should have been clearer. I was wondering if there was a base class somewhere, and two children classes inheriting from that base class, so that But indeed, when we move test files to different directories, we'll need to update the paths. I think that's fine since it'll be a simple find / replace. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should go in the conftest 1 level up |
||
"""Get the path to a data file. | ||
|
||
Parameters | ||
---------- | ||
path : str | ||
Path to the file, relative to ``pandas/tests/`` | ||
|
||
Returns | ||
------- | ||
path : path including ``pandas/tests``. | ||
|
||
Raises | ||
------ | ||
ValueError | ||
If the path doesn't exist and the --strict-data-files option is set. | ||
""" | ||
def deco(path): | ||
path = os.path.join('pandas', 'tests', os.path.join(path)) | ||
if not os.path.exists(path): | ||
if request.config.getoption("--strict-data-files"): | ||
raise ValueError("Failed.") | ||
else: | ||
pytest.skip("Data files not included in pandas distribution.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe add the path name in the message |
||
return path | ||
return deco |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -170,6 +170,8 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): | |
]) | ||
def test_read_fspath_all(self, reader, module, path): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can generally just autouse the datapath fixture |
||
pytest.importorskip(module) | ||
if not os.path.exists(path): | ||
pytest.skip("Data files not included in pandas distribution.") | ||
|
||
mypath = CustomFSPath(path) | ||
result = reader(mypath) | ||
|
@@ -232,13 +234,14 @@ def test_write_fspath_hdf5(self): | |
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
class TestMMapWrapper(object): | ||
@pytest.fixture | ||
def mmap_file(datapath): | ||
return datapath(os.path.join('io', 'data', 'test_mmap.csv')) | ||
|
||
|
||
def setup_method(self, method): | ||
self.mmap_file = os.path.join(tm.get_data_path(), | ||
'test_mmap.csv') | ||
class TestMMapWrapper(object): | ||
|
||
def test_constructor_bad_file(self): | ||
def test_constructor_bad_file(self, mmap_file): | ||
non_file = StringIO('I am not a file') | ||
non_file.fileno = lambda: -1 | ||
|
||
|
@@ -252,15 +255,15 @@ def test_constructor_bad_file(self): | |
|
||
tm.assert_raises_regex(err, msg, common.MMapWrapper, non_file) | ||
|
||
target = open(self.mmap_file, 'r') | ||
target = open(mmap_file, 'r') | ||
target.close() | ||
|
||
msg = "I/O operation on closed file" | ||
tm.assert_raises_regex( | ||
ValueError, msg, common.MMapWrapper, target) | ||
|
||
def test_get_attr(self): | ||
with open(self.mmap_file, 'r') as target: | ||
def test_get_attr(self, mmap_file): | ||
with open(mmap_file, 'r') as target: | ||
wrapper = common.MMapWrapper(target) | ||
|
||
attrs = dir(wrapper.mmap) | ||
|
@@ -273,8 +276,8 @@ def test_get_attr(self): | |
|
||
assert not hasattr(wrapper, 'foo') | ||
|
||
def test_next(self): | ||
with open(self.mmap_file, 'r') as target: | ||
def test_next(self, mmap_file): | ||
with open(mmap_file, 'r') as target: | ||
wrapper = common.MMapWrapper(target) | ||
lines = target.readlines() | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,9 +65,6 @@ def _skip_if_none_of(module_names): | |
pytest.skip("Bad version of bs4: 4.2.0") | ||
|
||
|
||
DATA_PATH = tm.get_data_path() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This types of changes were because |
||
|
||
|
||
def assert_framelist_equal(list1, list2, *args, **kwargs): | ||
assert len(list1) == len(list2), ('lists are not of equal size ' | ||
'len(list1) == {0}, ' | ||
|
@@ -86,8 +83,8 @@ def test_bs4_version_fails(): | |
_skip_if_none_of(('bs4', 'html5lib')) | ||
import bs4 | ||
if LooseVersion(bs4.__version__) == LooseVersion('4.2.0'): | ||
tm.assert_raises(AssertionError, read_html, os.path.join(DATA_PATH, | ||
"spam.html"), | ||
tm.assert_raises(AssertionError, read_html, | ||
os.path.join(tm.get_data_path(), "spam.html"), | ||
flavor='bs4') | ||
|
||
|
||
|
@@ -100,16 +97,17 @@ def read_html(self, *args, **kwargs): | |
|
||
class TestReadHtml(ReadHtmlMixin): | ||
flavor = 'bs4' | ||
spam_data = os.path.join(DATA_PATH, 'spam.html') | ||
spam_data_kwargs = {} | ||
if PY3: | ||
spam_data_kwargs['encoding'] = 'UTF-8' | ||
banklist_data = os.path.join(DATA_PATH, 'banklist.html') | ||
|
||
@classmethod | ||
def setup_class(cls): | ||
_skip_if_none_of(('bs4', 'html5lib')) | ||
|
||
cls.spam_data = os.path.join(tm.get_data_path(), 'spam.html') | ||
cls.spam_data_kwargs = {} | ||
if PY3: | ||
cls.spam_data_kwargs['encoding'] = 'UTF-8' | ||
cls.banklist_data = os.path.join(tm.get_data_path(), 'banklist.html') | ||
|
||
def test_to_html_compat(self): | ||
df = mkdf(4, 3, data_gen_f=lambda *args: rand(), c_idx_names=False, | ||
r_idx_names=False).applymap('{0:.3f}'.format).astype(float) | ||
|
@@ -382,7 +380,7 @@ def test_python_docs_table(self): | |
@pytest.mark.slow | ||
def test_thousands_macau_stats(self): | ||
all_non_nan_table_index = -2 | ||
macau_data = os.path.join(DATA_PATH, 'macau.html') | ||
macau_data = os.path.join(tm.get_data_path(), 'macau.html') | ||
dfs = self.read_html(macau_data, index_col=0, | ||
attrs={'class': 'style1'}) | ||
df = dfs[all_non_nan_table_index] | ||
|
@@ -392,7 +390,7 @@ def test_thousands_macau_stats(self): | |
@pytest.mark.slow | ||
def test_thousands_macau_index_col(self): | ||
all_non_nan_table_index = -2 | ||
macau_data = os.path.join(DATA_PATH, 'macau.html') | ||
macau_data = os.path.join(tm.get_data_path(), 'macau.html') | ||
dfs = self.read_html(macau_data, index_col=0, header=0) | ||
df = dfs[all_non_nan_table_index] | ||
|
||
|
@@ -520,7 +518,7 @@ def test_countries_municipalities(self): | |
assert_framelist_equal(res1, res2) | ||
|
||
def test_nyse_wsj_commas_table(self): | ||
data = os.path.join(DATA_PATH, 'nyse_wsj.html') | ||
data = os.path.join(tm.get_data_path(), 'nyse_wsj.html') | ||
df = self.read_html(data, index_col=0, header=0, | ||
attrs={'class': 'mdcTable'})[0] | ||
|
||
|
@@ -542,7 +540,8 @@ def try_remove_ws(x): | |
|
||
df = self.read_html(self.banklist_data, 'Metcalf', | ||
attrs={'id': 'table'})[0] | ||
ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'), | ||
ground_truth = read_csv(os.path.join(tm.get_data_path(), | ||
'banklist.csv'), | ||
converters={'Updated Date': Timestamp, | ||
'Closing Date': Timestamp}) | ||
assert df.shape == ground_truth.shape | ||
|
@@ -660,15 +659,15 @@ def test_parse_dates_combine(self): | |
tm.assert_frame_equal(newdf, res[0]) | ||
|
||
def test_computer_sales_page(self): | ||
data = os.path.join(DATA_PATH, 'computer_sales_page.html') | ||
data = os.path.join(tm.get_data_path(), 'computer_sales_page.html') | ||
with tm.assert_raises_regex(ParserError, | ||
r"Passed header=\[0,1\] are " | ||
r"too many rows for this " | ||
r"multi_index of columns"): | ||
self.read_html(data, header=[0, 1]) | ||
|
||
def test_wikipedia_states_table(self): | ||
data = os.path.join(DATA_PATH, 'wikipedia_states.html') | ||
data = os.path.join(tm.get_data_path(), 'wikipedia_states.html') | ||
assert os.path.isfile(data), '%r is not a file' % data | ||
assert os.path.getsize(data), '%r is an empty file' % data | ||
result = self.read_html(data, 'Arizona', header=1)[0] | ||
|
@@ -788,11 +787,14 @@ def _lang_enc(filename): | |
|
||
|
||
class TestReadHtmlEncoding(object): | ||
files = glob.glob(os.path.join(DATA_PATH, 'html_encoding', '*.html')) | ||
flavor = 'bs4' | ||
|
||
@classmethod | ||
def setup_class(cls): | ||
cls.files = glob.glob(os.path.join(tm.get_data_path(), | ||
'html_encoding', | ||
'*.html')) | ||
|
||
_skip_if_none_of((cls.flavor, 'html5lib')) | ||
|
||
def read_html(self, *args, **kwargs): | ||
|
@@ -847,8 +849,8 @@ def setup_class(cls): | |
|
||
def test_data_fail(self): | ||
from lxml.etree import XMLSyntaxError | ||
spam_data = os.path.join(DATA_PATH, 'spam.html') | ||
banklist_data = os.path.join(DATA_PATH, 'banklist.html') | ||
spam_data = os.path.join(tm.get_data_path(), 'spam.html') | ||
banklist_data = os.path.join(tm.get_data_path(), 'banklist.html') | ||
|
||
with pytest.raises(XMLSyntaxError): | ||
self.read_html(spam_data) | ||
|
@@ -857,15 +859,15 @@ def test_data_fail(self): | |
self.read_html(banklist_data) | ||
|
||
def test_works_on_valid_markup(self): | ||
filename = os.path.join(DATA_PATH, 'valid_markup.html') | ||
filename = os.path.join(tm.get_data_path(), 'valid_markup.html') | ||
dfs = self.read_html(filename, index_col=0) | ||
assert isinstance(dfs, list) | ||
assert isinstance(dfs[0], DataFrame) | ||
|
||
@pytest.mark.slow | ||
def test_fallback_success(self): | ||
_skip_if_none_of(('bs4', 'html5lib')) | ||
banklist_data = os.path.join(DATA_PATH, 'banklist.html') | ||
banklist_data = os.path.join(tm.get_data_path(), 'banklist.html') | ||
self.read_html(banklist_data, '.*Water.*', flavor=['lxml', 'html5lib']) | ||
|
||
def test_to_html_timestamp(self): | ||
|
@@ -893,7 +895,7 @@ def test_parse_dates_combine(self): | |
tm.assert_frame_equal(newdf, res[0]) | ||
|
||
def test_computer_sales_page(self): | ||
data = os.path.join(DATA_PATH, 'computer_sales_page.html') | ||
data = os.path.join(tm.get_data_path(), 'computer_sales_page.html') | ||
self.read_html(data, header=[0, 1]) | ||
|
||
|
||
|
@@ -914,7 +916,7 @@ def get_elements_from_file(url, element='table'): | |
|
||
@pytest.mark.slow | ||
def test_bs4_finds_tables(): | ||
filepath = os.path.join(DATA_PATH, "spam.html") | ||
filepath = os.path.join(tm.get_data_path(), "spam.html") | ||
with warnings.catch_warnings(): | ||
warnings.filterwarnings('ignore') | ||
assert get_elements_from_file(filepath, 'table') | ||
|
@@ -929,19 +931,19 @@ def get_lxml_elements(url, element): | |
|
||
@pytest.mark.slow | ||
def test_lxml_finds_tables(): | ||
filepath = os.path.join(DATA_PATH, "spam.html") | ||
filepath = os.path.join(tm.get_data_path(), "spam.html") | ||
assert get_lxml_elements(filepath, 'table') | ||
|
||
|
||
@pytest.mark.slow | ||
def test_lxml_finds_tbody(): | ||
filepath = os.path.join(DATA_PATH, "spam.html") | ||
filepath = os.path.join(tm.get_data_path(), "spam.html") | ||
assert get_lxml_elements(filepath, 'tbody') | ||
|
||
|
||
def test_same_ordering(): | ||
_skip_if_none_of(['bs4', 'lxml', 'html5lib']) | ||
filename = os.path.join(DATA_PATH, 'valid_markup.html') | ||
filename = os.path.join(tm.get_data_path(), 'valid_markup.html') | ||
dfs_lxml = read_html(filename, index_col=0, flavor=['lxml']) | ||
dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4']) | ||
assert_framelist_equal(dfs_lxml, dfs_bs4) | ||
|
@@ -965,7 +967,7 @@ def test_importcheck_thread_safety(): | |
pytest.importorskip('lxml') | ||
reload(pandas.io.html) | ||
|
||
filename = os.path.join(DATA_PATH, 'valid_markup.html') | ||
filename = os.path.join(tm.get_data_path(), 'valid_markup.html') | ||
helper_thread1 = ErrorThread(target=read_html, args=(filename,)) | ||
helper_thread2 = ErrorThread(target=read_html, args=(filename,)) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -830,6 +830,8 @@ def test_default_encoding(self): | |
def legacy_packers_versions(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two main changes: This used to be called in a
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here's a local run:
err sorry that was for pickle, but they had the same refactoring. |
||
# yield the packers versions | ||
path = tm.get_data_path('legacy_msgpack') | ||
if not os.path.exists(path): | ||
raise pytest.skip("Data file {} does not exist.".format(path)) | ||
for v in os.listdir(path): | ||
p = os.path.join(path, v) | ||
if os.path.isdir(p): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe should make a variable that holds all of these options for both the echo and the run to avoid duplication