-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
PKG: Exclude data test files. #19535
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
4d77cd8
270e442
26e9b4b
1804bcc
7022152
080f000
151ffda
d9d6570
5849591
31fb0b6
9193f15
e897f11
9cf30fd
95cde7a
10ddddc
e1ea208
8616878
77bf77c
156e14b
f3f3662
2cd9706
aac3606
762a2d1
7c44b77
ad09951
6f02d6b
489f540
8b42d1c
ee4fefd
bac438c
84ccdbf
c4802db
7fd7660
c187f8b
632a61d
b5b70c7
9954bba
c771885
dd75270
dbe0c57
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -166,6 +166,8 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): | |
]) | ||
def test_read_fspath_all(self, reader, module, path): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can generally just autouse the datapath fixture |
||
pytest.importorskip(module) | ||
if not os.path.exists(path): | ||
pytest.skip("Data files not included in pandas distribution.") | ||
|
||
mypath = CustomFSPath(path) | ||
result = reader(mypath) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,9 +65,6 @@ def _skip_if_none_of(module_names): | |
pytest.skip("Bad version of bs4: 4.2.0") | ||
|
||
|
||
DATA_PATH = tm.get_data_path() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This types of changes were because |
||
|
||
|
||
def assert_framelist_equal(list1, list2, *args, **kwargs): | ||
assert len(list1) == len(list2), ('lists are not of equal size ' | ||
'len(list1) == {0}, ' | ||
|
@@ -86,8 +83,8 @@ def test_bs4_version_fails(): | |
_skip_if_none_of(('bs4', 'html5lib')) | ||
import bs4 | ||
if LooseVersion(bs4.__version__) == LooseVersion('4.2.0'): | ||
tm.assert_raises(AssertionError, read_html, os.path.join(DATA_PATH, | ||
"spam.html"), | ||
tm.assert_raises(AssertionError, read_html, | ||
os.path.join(tm.get_data_path(), "spam.html"), | ||
flavor='bs4') | ||
|
||
|
||
|
@@ -100,16 +97,17 @@ def read_html(self, *args, **kwargs): | |
|
||
class TestReadHtml(ReadHtmlMixin): | ||
flavor = 'bs4' | ||
spam_data = os.path.join(DATA_PATH, 'spam.html') | ||
spam_data_kwargs = {} | ||
if PY3: | ||
spam_data_kwargs['encoding'] = 'UTF-8' | ||
banklist_data = os.path.join(DATA_PATH, 'banklist.html') | ||
|
||
@classmethod | ||
def setup_class(cls): | ||
_skip_if_none_of(('bs4', 'html5lib')) | ||
|
||
cls.spam_data = os.path.join(tm.get_data_path(), 'spam.html') | ||
cls.spam_data_kwargs = {} | ||
if PY3: | ||
cls.spam_data_kwargs['encoding'] = 'UTF-8' | ||
cls.banklist_data = os.path.join(tm.get_data_path(), 'banklist.html') | ||
|
||
def test_to_html_compat(self): | ||
df = mkdf(4, 3, data_gen_f=lambda *args: rand(), c_idx_names=False, | ||
r_idx_names=False).applymap('{0:.3f}'.format).astype(float) | ||
|
@@ -382,7 +380,7 @@ def test_python_docs_table(self): | |
@pytest.mark.slow | ||
def test_thousands_macau_stats(self): | ||
all_non_nan_table_index = -2 | ||
macau_data = os.path.join(DATA_PATH, 'macau.html') | ||
macau_data = os.path.join(tm.get_data_path(), 'macau.html') | ||
dfs = self.read_html(macau_data, index_col=0, | ||
attrs={'class': 'style1'}) | ||
df = dfs[all_non_nan_table_index] | ||
|
@@ -392,7 +390,7 @@ def test_thousands_macau_stats(self): | |
@pytest.mark.slow | ||
def test_thousands_macau_index_col(self): | ||
all_non_nan_table_index = -2 | ||
macau_data = os.path.join(DATA_PATH, 'macau.html') | ||
macau_data = os.path.join(tm.get_data_path(), 'macau.html') | ||
dfs = self.read_html(macau_data, index_col=0, header=0) | ||
df = dfs[all_non_nan_table_index] | ||
|
||
|
@@ -520,7 +518,7 @@ def test_countries_municipalities(self): | |
assert_framelist_equal(res1, res2) | ||
|
||
def test_nyse_wsj_commas_table(self): | ||
data = os.path.join(DATA_PATH, 'nyse_wsj.html') | ||
data = os.path.join(tm.get_data_path(), 'nyse_wsj.html') | ||
df = self.read_html(data, index_col=0, header=0, | ||
attrs={'class': 'mdcTable'})[0] | ||
|
||
|
@@ -542,7 +540,8 @@ def try_remove_ws(x): | |
|
||
df = self.read_html(self.banklist_data, 'Metcalf', | ||
attrs={'id': 'table'})[0] | ||
ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'), | ||
ground_truth = read_csv(os.path.join(tm.get_data_path(), | ||
'banklist.csv'), | ||
converters={'Updated Date': Timestamp, | ||
'Closing Date': Timestamp}) | ||
assert df.shape == ground_truth.shape | ||
|
@@ -660,15 +659,15 @@ def test_parse_dates_combine(self): | |
tm.assert_frame_equal(newdf, res[0]) | ||
|
||
def test_computer_sales_page(self): | ||
data = os.path.join(DATA_PATH, 'computer_sales_page.html') | ||
data = os.path.join(tm.get_data_path(), 'computer_sales_page.html') | ||
with tm.assert_raises_regex(ParserError, | ||
r"Passed header=\[0,1\] are " | ||
r"too many rows for this " | ||
r"multi_index of columns"): | ||
self.read_html(data, header=[0, 1]) | ||
|
||
def test_wikipedia_states_table(self): | ||
data = os.path.join(DATA_PATH, 'wikipedia_states.html') | ||
data = os.path.join(tm.get_data_path(), 'wikipedia_states.html') | ||
assert os.path.isfile(data), '%r is not a file' % data | ||
assert os.path.getsize(data), '%r is an empty file' % data | ||
result = self.read_html(data, 'Arizona', header=1)[0] | ||
|
@@ -788,11 +787,14 @@ def _lang_enc(filename): | |
|
||
|
||
class TestReadHtmlEncoding(object): | ||
files = glob.glob(os.path.join(DATA_PATH, 'html_encoding', '*.html')) | ||
flavor = 'bs4' | ||
|
||
@classmethod | ||
def setup_class(cls): | ||
cls.files = glob.glob(os.path.join(tm.get_data_path(), | ||
'html_encoding', | ||
'*.html')) | ||
|
||
_skip_if_none_of((cls.flavor, 'html5lib')) | ||
|
||
def read_html(self, *args, **kwargs): | ||
|
@@ -847,8 +849,8 @@ def setup_class(cls): | |
|
||
def test_data_fail(self): | ||
from lxml.etree import XMLSyntaxError | ||
spam_data = os.path.join(DATA_PATH, 'spam.html') | ||
banklist_data = os.path.join(DATA_PATH, 'banklist.html') | ||
spam_data = os.path.join(tm.get_data_path(), 'spam.html') | ||
banklist_data = os.path.join(tm.get_data_path(), 'banklist.html') | ||
|
||
with pytest.raises(XMLSyntaxError): | ||
self.read_html(spam_data) | ||
|
@@ -857,15 +859,15 @@ def test_data_fail(self): | |
self.read_html(banklist_data) | ||
|
||
def test_works_on_valid_markup(self): | ||
filename = os.path.join(DATA_PATH, 'valid_markup.html') | ||
filename = os.path.join(tm.get_data_path(), 'valid_markup.html') | ||
dfs = self.read_html(filename, index_col=0) | ||
assert isinstance(dfs, list) | ||
assert isinstance(dfs[0], DataFrame) | ||
|
||
@pytest.mark.slow | ||
def test_fallback_success(self): | ||
_skip_if_none_of(('bs4', 'html5lib')) | ||
banklist_data = os.path.join(DATA_PATH, 'banklist.html') | ||
banklist_data = os.path.join(tm.get_data_path(), 'banklist.html') | ||
self.read_html(banklist_data, '.*Water.*', flavor=['lxml', 'html5lib']) | ||
|
||
def test_to_html_timestamp(self): | ||
|
@@ -893,7 +895,7 @@ def test_parse_dates_combine(self): | |
tm.assert_frame_equal(newdf, res[0]) | ||
|
||
def test_computer_sales_page(self): | ||
data = os.path.join(DATA_PATH, 'computer_sales_page.html') | ||
data = os.path.join(tm.get_data_path(), 'computer_sales_page.html') | ||
self.read_html(data, header=[0, 1]) | ||
|
||
|
||
|
@@ -914,7 +916,7 @@ def get_elements_from_file(url, element='table'): | |
|
||
@pytest.mark.slow | ||
def test_bs4_finds_tables(): | ||
filepath = os.path.join(DATA_PATH, "spam.html") | ||
filepath = os.path.join(tm.get_data_path(), "spam.html") | ||
with warnings.catch_warnings(): | ||
warnings.filterwarnings('ignore') | ||
assert get_elements_from_file(filepath, 'table') | ||
|
@@ -929,19 +931,19 @@ def get_lxml_elements(url, element): | |
|
||
@pytest.mark.slow | ||
def test_lxml_finds_tables(): | ||
filepath = os.path.join(DATA_PATH, "spam.html") | ||
filepath = os.path.join(tm.get_data_path(), "spam.html") | ||
assert get_lxml_elements(filepath, 'table') | ||
|
||
|
||
@pytest.mark.slow | ||
def test_lxml_finds_tbody(): | ||
filepath = os.path.join(DATA_PATH, "spam.html") | ||
filepath = os.path.join(tm.get_data_path(), "spam.html") | ||
assert get_lxml_elements(filepath, 'tbody') | ||
|
||
|
||
def test_same_ordering(): | ||
_skip_if_none_of(['bs4', 'lxml', 'html5lib']) | ||
filename = os.path.join(DATA_PATH, 'valid_markup.html') | ||
filename = os.path.join(tm.get_data_path(), 'valid_markup.html') | ||
dfs_lxml = read_html(filename, index_col=0, flavor=['lxml']) | ||
dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4']) | ||
assert_framelist_equal(dfs_lxml, dfs_bs4) | ||
|
@@ -965,7 +967,7 @@ def test_importcheck_thread_safety(): | |
pytest.importorskip('lxml') | ||
reload(pandas.io.html) | ||
|
||
filename = os.path.join(DATA_PATH, 'valid_markup.html') | ||
filename = os.path.join(tm.get_data_path(), 'valid_markup.html') | ||
helper_thread1 = ErrorThread(target=read_html, args=(filename,)) | ||
helper_thread2 = ErrorThread(target=read_html, args=(filename,)) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -830,6 +830,8 @@ def test_default_encoding(self): | |
def legacy_packers_versions(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two main changes: This used to be called in a
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here's a local run:
err sorry that was for pickle, but they had the same refactoring. |
||
# yield the packers versions | ||
path = tm.get_data_path('legacy_msgpack') | ||
if not os.path.exists(path): | ||
raise pytest.skip("Data file {} does not exist.".format(path)) | ||
for v in os.listdir(path): | ||
p = os.path.join(path, v) | ||
if os.path.isdir(p): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -191,6 +191,8 @@ def compare_sp_frame_float(result, expected, typ, version): | |
def legacy_pickle_versions(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comments as for packers. |
||
# yield the pickle versions | ||
path = tm.get_data_path('legacy_pickle') | ||
if not os.path.exists(path): | ||
raise pytest.skip("Data path {} does not exists.".format(path)) | ||
for v in os.listdir(path): | ||
p = os.path.join(path, v) | ||
if os.path.isdir(p): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -722,11 +722,7 @@ def pxd(name): | |
maintainer=AUTHOR, | ||
version=versioneer.get_version(), | ||
packages=find_packages(include=['pandas', 'pandas.*']), | ||
package_data={'': ['data/*', 'templates/*'], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Anyone know what the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIRC this looks for a data and template sub directory in any of the packages. It doesn’t refer to just one directory. FWIW I think if those sub directories had an init they would automatically be included per the line above this, but given that’s not the case this helps include those folders relative to any of the packages that are found |
||
'pandas.tests.io': ['data/legacy_hdf/*.h5', | ||
'data/legacy_pickle/*/*.pickle', | ||
'data/legacy_msgpack/*/*.msgpack', | ||
'data/html_encoding/*.html']}, | ||
package_data={'': ['templates/*']}, | ||
ext_modules=extensions, | ||
maintainer_email=EMAIL, | ||
description=DESCRIPTION, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should just make this a function (or maybe a decorator)