Skip to content

Commit d3391eb

Browse files
committed
BUG: fix data.py regression
1 parent f445088 commit d3391eb

File tree

4 files changed

+16
-10
lines changed

4 files changed

+16
-10
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,8 @@ pandas 0.12
342342
- Fixed bug in initializing ``DatetimeIndex`` with an array of strings
343343
in a certain time zone (:issue:`4229`)
344344
- Fixed bug where html5lib wasn't being properly skipped (:issue:`4265`)
345+
- Fixed bug where get_data_famafrench wasn't using the correct file edges
346+
(:issue:`4281`)
345347

346348
pandas 0.11.0
347349
=============

doc/source/v0.12.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,8 @@ Bug Fixes
475475
- Fixed bug in initializing ``DatetimeIndex`` with an array of strings
476476
in a certain time zone (:issue:`4229`)
477477
- Fixed bug where html5lib wasn't being properly skipped (:issue:`4265`)
478+
- Fixed bug where get_data_famafrench wasn't using the correct file edges
479+
(:issue:`4281`)
478480

479481
See the :ref:`full release notes
480482
<release>` or issue tracker

pandas/io/data.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,8 @@ def get_data_fred(name, start=dt.datetime(2010, 1, 1),
453453
def get_data_famafrench(name):
454454
# path of zip files
455455
zip_file_url = ('http://mba.tuck.dartmouth.edu/pages/faculty/'
456-
'ken.french/ftp/')
457-
zip_file_path = '{0}{1}.zip'.format(zip_file_url, name)
456+
'ken.french/ftp')
457+
zip_file_path = '{0}/{1}.zip'.format(zip_file_url, name)
458458

459459
with urlopen(zip_file_path) as url:
460460
raw = url.read()
@@ -463,13 +463,13 @@ def get_data_famafrench(name):
463463
tmpf.write(raw)
464464

465465
with ZipFile(tmpf, 'r') as zf:
466-
data = zf.read(name + '.txt').splitlines()
466+
data = zf.open(name + '.txt').readlines()
467467

468468
line_lengths = np.array(map(len, data))
469-
file_edges = np.where(line_lengths)[0]
469+
file_edges = np.where(line_lengths == 2)[0]
470470

471471
datasets = {}
472-
edges = itertools.izip(file_edges[:-1], file_edges[1:])
472+
edges = itertools.izip(file_edges + 1, file_edges[1:])
473473
for i, (left_edge, right_edge) in enumerate(edges):
474474
dataset = [d.split() for d in data[left_edge:right_edge]]
475475
if len(dataset) > 10:
@@ -479,14 +479,15 @@ def get_data_famafrench(name):
479479
header = dataset[header_index]
480480
ds_header = dataset[header_index + 1:]
481481
# to ensure the header is unique
482-
header = ['{0} {1}'.format(*items) for items in enumerate(header,
483-
start=1)]
484-
index = np.fromiter((d[0] for d in ds_header), dtype=int)
485-
dataset = np.fromiter((d[1:] for d in ds_header), dtype=float)
482+
header = ['{0} {1}'.format(j, hj) for j, hj in enumerate(header,
483+
start=1)]
484+
index = np.array([d[0] for d in ds_header], dtype=int)
485+
dataset = np.array([d[1:] for d in ds_header], dtype=float)
486486
datasets[i] = DataFrame(dataset, index, columns=header)
487487

488488
return datasets
489489

490+
490491
# Items needed for options class
491492
CUR_MONTH = dt.datetime.now().month
492493
CUR_YEAR = dt.datetime.now().year

pandas/io/tests/test_data.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from pandas.io import data as web
1111
from pandas.io.data import DataReader, SymbolWarning
1212
from pandas.util.testing import (assert_series_equal, assert_produces_warning,
13-
assert_frame_equal, network)
13+
network)
1414
from numpy.testing import assert_array_equal
1515

1616

@@ -343,6 +343,7 @@ def test_read_famafrench(self):
343343
"F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3",
344344
"F-F_ST_Reversal_Factor"):
345345
ff = DataReader(name, "famafrench")
346+
assert ff
346347
assert isinstance(ff, dict)
347348

348349

0 commit comments

Comments
 (0)