Skip to content

Commit 31f44e5

Browse files
author
Brendan Boerner
committed
* 'master' of https://github.com/pydata/pandas: DOC: release notes [ci skip] TST: add assertion that we will actually read a file BUG: fix html reading for bytes types in py3 CLN: add html_encoding path to setup.py data
2 parents a6f27ff + 441c585 commit 31f44e5

File tree

4 files changed

+13
-4
lines changed

4 files changed

+13
-4
lines changed

doc/source/v0.15.0.txt

+4
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,11 @@ Bug Fixes
533533

534534

535535

536+
- Bug in installation where ``html_encoding/*.html`` wasn't installed and
537+
therefore some tests were not running correctly (:issue:`7927`).
536538

539+
- Bug in ``read_html`` where ``bytes`` objects were not tested for in
540+
``_read`` (:issue:`7927`).
537541

538542

539543

pandas/io/html.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515

1616
from pandas.io.common import _is_url, urlopen, parse_url
1717
from pandas.io.parsers import TextParser
18-
from pandas.compat import (lrange, lmap, u, string_types, iteritems, text_type,
19-
raise_with_traceback)
18+
from pandas.compat import (lrange, lmap, u, string_types, iteritems,
19+
raise_with_traceback, binary_type)
2020
from pandas.core import common as com
2121
from pandas import Series
2222

@@ -51,6 +51,9 @@
5151
_RE_WHITESPACE = re.compile(r'[\r\n]+|\s{2,}')
5252

5353

54+
char_types = string_types + (binary_type,)
55+
56+
5457
def _remove_whitespace(s, regex=_RE_WHITESPACE):
5558
"""Replace extra whitespace inside of a string with a single space.
5659
@@ -114,13 +117,13 @@ def _read(obj):
114117
text = url.read()
115118
elif hasattr(obj, 'read'):
116119
text = obj.read()
117-
elif isinstance(obj, string_types):
120+
elif isinstance(obj, char_types):
118121
text = obj
119122
try:
120123
if os.path.isfile(text):
121124
with open(text, 'rb') as f:
122125
return f.read()
123-
except TypeError:
126+
except (TypeError, ValueError):
124127
pass
125128
else:
126129
raise TypeError("Cannot read object of type %r" % type(obj).__name__)

pandas/io/tests/test_html.py

+1
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,7 @@ def read_string(self, f, encoding):
622622
return self.read_html(fobj.read(), encoding=encoding, index_col=0)
623623

624624
def test_encode(self):
625+
assert self.files, 'no files read from the data folder'
625626
for f in self.files:
626627
_, encoding = _lang_enc(f)
627628
from_string = self.read_string(f, encoding).pop()

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,7 @@ def pxd(name):
583583
'tests/data/*.xlsm',
584584
'tests/data/*.table',
585585
'tests/data/*.html',
586+
'tests/data/html_encoding/*.html',
586587
'tests/test_json/data/*.json'],
587588
'pandas.tools': ['tests/*.csv'],
588589
'pandas.tests': ['data/*.pickle',

0 commit comments

Comments
 (0)