BUG: fix html reading for bytes types in py3

cpcloud · cpcloud · commit 2e3c4ea35217 · 2014-08-14T13:46:13.000-04:00
diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -15,8 +15,8 @@
 
 from pandas.io.common import _is_url, urlopen, parse_url
 from pandas.io.parsers import TextParser
-from pandas.compat import (lrange, lmap, u, string_types, iteritems, text_type,
-                           raise_with_traceback)
+from pandas.compat import (lrange, lmap, u, string_types, iteritems,
+                           raise_with_traceback, binary_type)
 from pandas.core import common as com
 from pandas import Series
 
@@ -51,6 +51,9 @@
 _RE_WHITESPACE = re.compile(r'[\r\n]+|\s{2,}')
 
 
+char_types = string_types + (binary_type,)
+
+
 def _remove_whitespace(s, regex=_RE_WHITESPACE):
     """Replace extra whitespace inside of a string with a single space.
 
@@ -114,13 +117,13 @@ def _read(obj):
             text = url.read()
     elif hasattr(obj, 'read'):
         text = obj.read()
-    elif isinstance(obj, string_types):
+    elif isinstance(obj, char_types):
         text = obj
         try:
             if os.path.isfile(text):
                 with open(text, 'rb') as f:
                     return f.read()
-        except TypeError:
+        except (TypeError, ValueError):
             pass
     else:
         raise TypeError("Cannot read object of type %r" % type(obj).__name__)