Skip to content

BUG: fix data.py regression #4281

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 18, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,8 @@ pandas 0.12
- Fixed bug in initializing ``DatetimeIndex`` with an array of strings
in a certain time zone (:issue:`4229`)
- Fixed bug where html5lib wasn't being properly skipped (:issue:`4265`)
- Fixed bug where get_data_famafrench wasn't using the correct file edges
(:issue:`4281`)

pandas 0.11.0
=============
Expand Down
2 changes: 2 additions & 0 deletions doc/source/v0.12.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,8 @@ Bug Fixes
- Fixed bug in initializing ``DatetimeIndex`` with an array of strings
in a certain time zone (:issue:`4229`)
- Fixed bug where html5lib wasn't being properly skipped (:issue:`4265`)
- Fixed bug where get_data_famafrench wasn't using the correct file edges
(:issue:`4281`)

See the :ref:`full release notes
<release>` or issue tracker
Expand Down
19 changes: 10 additions & 9 deletions pandas/io/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,8 +453,8 @@ def get_data_fred(name, start=dt.datetime(2010, 1, 1),
def get_data_famafrench(name):
# path of zip files
zip_file_url = ('http://mba.tuck.dartmouth.edu/pages/faculty/'
'ken.french/ftp/')
zip_file_path = '{0}{1}.zip'.format(zip_file_url, name)
'ken.french/ftp')
zip_file_path = '{0}/{1}.zip'.format(zip_file_url, name)

with urlopen(zip_file_path) as url:
raw = url.read()
Expand All @@ -463,13 +463,13 @@ def get_data_famafrench(name):
tmpf.write(raw)

with ZipFile(tmpf, 'r') as zf:
data = zf.read(name + '.txt').splitlines()
data = zf.open(name + '.txt').readlines()

line_lengths = np.array(map(len, data))
file_edges = np.where(line_lengths)[0]
file_edges = np.where(line_lengths == 2)[0]

datasets = {}
edges = itertools.izip(file_edges[:-1], file_edges[1:])
edges = itertools.izip(file_edges + 1, file_edges[1:])
for i, (left_edge, right_edge) in enumerate(edges):
dataset = [d.split() for d in data[left_edge:right_edge]]
if len(dataset) > 10:
Expand All @@ -479,14 +479,15 @@ def get_data_famafrench(name):
header = dataset[header_index]
ds_header = dataset[header_index + 1:]
# to ensure the header is unique
header = ['{0} {1}'.format(*items) for items in enumerate(header,
start=1)]
index = np.fromiter((d[0] for d in ds_header), dtype=int)
dataset = np.fromiter((d[1:] for d in ds_header), dtype=float)
header = ['{0} {1}'.format(j, hj) for j, hj in enumerate(header,
start=1)]
index = np.array([d[0] for d in ds_header], dtype=int)
dataset = np.array([d[1:] for d in ds_header], dtype=float)
datasets[i] = DataFrame(dataset, index, columns=header)

return datasets


# Items needed for options class
CUR_MONTH = dt.datetime.now().month
CUR_YEAR = dt.datetime.now().year
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pandas.io import data as web
from pandas.io.data import DataReader, SymbolWarning
from pandas.util.testing import (assert_series_equal, assert_produces_warning,
assert_frame_equal, network)
network)
from numpy.testing import assert_array_equal


Expand Down Expand Up @@ -343,6 +343,7 @@ def test_read_famafrench(self):
"F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3",
"F-F_ST_Reversal_Factor"):
ff = DataReader(name, "famafrench")
assert ff
assert isinstance(ff, dict)


Expand Down