20
20
from pandas import (DataFrame , MultiIndex , read_csv , Timestamp , Index ,
21
21
date_range , Series )
22
22
from pandas .compat import (map , zip , StringIO , string_types , BytesIO ,
23
- is_platform_windows )
23
+ is_platform_windows , PY3 )
24
24
from pandas .io .common import URLError , urlopen , file_path_to_url
25
25
from pandas .io .html import read_html
26
26
from pandas ._libs .parsers import ParserError
@@ -96,6 +96,9 @@ def read_html(self, *args, **kwargs):
96
96
class TestReadHtml (ReadHtmlMixin ):
97
97
flavor = 'bs4'
98
98
spam_data = os .path .join (DATA_PATH , 'spam.html' )
99
+ spam_data_kwargs = {}
100
+ if PY3 :
101
+ spam_data_kwargs ['encoding' ] = 'UTF-8'
99
102
banklist_data = os .path .join (DATA_PATH , 'banklist.html' )
100
103
101
104
@classmethod
@@ -247,18 +250,18 @@ def test_infer_types(self):
247
250
assert_framelist_equal (df1 , df2 )
248
251
249
252
def test_string_io (self ):
250
- with open (self .spam_data ) as f :
253
+ with open (self .spam_data , ** self . spam_data_kwargs ) as f :
251
254
data1 = StringIO (f .read ())
252
255
253
- with open (self .spam_data ) as f :
256
+ with open (self .spam_data , ** self . spam_data_kwargs ) as f :
254
257
data2 = StringIO (f .read ())
255
258
256
259
df1 = self .read_html (data1 , '.*Water.*' )
257
260
df2 = self .read_html (data2 , 'Unit' )
258
261
assert_framelist_equal (df1 , df2 )
259
262
260
263
def test_string (self ):
261
- with open (self .spam_data ) as f :
264
+ with open (self .spam_data , ** self . spam_data_kwargs ) as f :
262
265
data = f .read ()
263
266
264
267
df1 = self .read_html (data , '.*Water.*' )
@@ -267,10 +270,10 @@ def test_string(self):
267
270
assert_framelist_equal (df1 , df2 )
268
271
269
272
def test_file_like (self ):
270
- with open (self .spam_data ) as f :
273
+ with open (self .spam_data , ** self . spam_data_kwargs ) as f :
271
274
df1 = self .read_html (f , '.*Water.*' )
272
275
273
- with open (self .spam_data ) as f :
276
+ with open (self .spam_data , ** self . spam_data_kwargs ) as f :
274
277
df2 = self .read_html (f , 'Unit' )
275
278
276
279
assert_framelist_equal (df1 , df2 )
0 commit comments