Skip to content

Commit d5d98fa

Browse files
0x0L0x0L
0x0L
authored and
0x0L
committed
use lxml instead of bs4
1 parent 11712b4 commit d5d98fa

File tree

2 files changed

+9
-13
lines changed

2 files changed

+9
-13
lines changed

pandas_datareader/data.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -595,15 +595,18 @@ def get_datasets_famafrench():
595595
-------
596596
A list of valid inputs for get_data_famafrench.
597597
"""
598-
from bs4 import BeautifulSoup
598+
try:
599+
from lxml.html import parse
600+
except ImportError:
601+
raise ImportError("Please install lxml if you want to use the "
602+
"get_datasets_famafrench function")
599603

600-
with urlopen(_FAMAFRENCH_URL + 'data_library.html') as socket:
601-
root = BeautifulSoup(socket.read(), 'html.parser')
604+
root = parse(_FAMAFRENCH_URL + 'data_library.html')
602605

603606
l = filter(lambda x: x.startswith(_FF_PREFIX) and x.endswith(_FF_SUFFIX),
604-
[e.attrs['href'] for e in root.findAll('a') if 'href' in e.attrs])
607+
[e.attrib['href'] for e in root.findall('.//a') if 'href' in e.attrib])
605608

606-
return list(map(lambda x: x[len(_FF_PREFIX):-len(_FF_SUFFIX)], l))
609+
return lmap(lambda x: x[len(_FF_PREFIX):-len(_FF_SUFFIX)], l)
607610

608611

609612
def _download_data_famafrench(name):
@@ -621,7 +624,6 @@ def _download_data_famafrench(name):
621624

622625

623626
def _parse_date_famafrench(x):
624-
# what's the correct python way to do that ??
625627
x = x.strip()
626628
try: return dt.datetime.strptime(x, '%Y')
627629
except: pass

pandas_datareader/tests/test_famafrench.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,6 @@
33

44
import pandas_datareader.data as web
55

6-
def _skip_if_no_bs4():
7-
try:
8-
import bs4
9-
except ImportError:
10-
raise nose.SkipTest('no bs4')
11-
126

137
class TestFamaFrench(tm.TestCase):
148
def test_read_famafrench(self):
@@ -23,7 +17,7 @@ def test_read_famafrench(self):
2317
assert len(ff) > 1
2418

2519
def test_get_datasets(self):
26-
_skip_if_no_bs4()
20+
# _skip_if_no_lxml()
2721
l = web.get_datasets_famafrench()
2822
assert len(l) > 100
2923

0 commit comments

Comments
 (0)