Skip to content

Commit 70e008a

Browse files
committed
Merge pull request #5492 from TomAugspurger/fred_multi_series
ENH: accept mutliple series for FRED DataReader
2 parents d3be761 + 33433e0 commit 70e008a

File tree

4 files changed

+55
-16
lines changed

4 files changed

+55
-16
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ Improvements to existing features
220220
option it is no longer possible to round trip Excel files with merged
221221
MultiIndex and Hierarchical Rows. Set the ``merge_cells`` to ``False`` to
222222
restore the previous behaviour. (:issue:`5254`)
223+
- The FRED DataReader now accepts multiple series (:issue`3413`)
223224

224225
API Changes
225226
~~~~~~~~~~~

doc/source/remote_data.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,9 @@ FRED
8181
gdp=web.DataReader("GDP", "fred", start, end)
8282
gdp.ix['2013-01-01']
8383
84-
84+
# Multiple series:
85+
inflation = web.DataReader(["CPIAUCSL", "CPILFESL"], "fred", start, end)
86+
inflation.head()
8587
.. _remote_data.ff:
8688

8789
Fama/French

pandas/io/data.py

+29-15
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
)
1818
import pandas.compat as compat
1919
from pandas import Panel, DataFrame, Series, read_csv, concat
20-
from pandas.core.common import PandasError
20+
from pandas.core.common import is_list_like, PandasError
2121
from pandas.io.parsers import TextParser
2222
from pandas.io.common import urlopen, ZipFile, urlencode
2323
from pandas.util.testing import _network_error_classes
@@ -41,8 +41,9 @@ def DataReader(name, data_source=None, start=None, end=None,
4141
4242
Parameters
4343
----------
44-
name : str
45-
the name of the dataset
44+
name : str or list of strs
45+
the name of the dataset. Some data sources (yahoo, google, fred) will
46+
accept a list of names.
4647
data_source: str
4748
the data source ("yahoo", "google", "fred", or "ff")
4849
start : {datetime, None}
@@ -436,24 +437,37 @@ def get_data_fred(name, start=dt.datetime(2010, 1, 1),
436437
Date format is datetime
437438
438439
Returns a DataFrame.
440+
441+
If multiple names are passed for "series" then the index of the
442+
DataFrame is the outer join of the indicies of each series.
439443
"""
440444
start, end = _sanitize_dates(start, end)
441445

442446
fred_URL = "http://research.stlouisfed.org/fred2/series/"
443447

444-
url = fred_URL + '%s' % name + '/downloaddata/%s' % name + '.csv'
445-
with urlopen(url) as resp:
446-
data = read_csv(resp, index_col=0, parse_dates=True,
447-
header=None, skiprows=1, names=["DATE", name],
448-
na_values='.')
449-
try:
450-
return data.truncate(start, end)
451-
except KeyError:
452-
if data.ix[3].name[7:12] == 'Error':
453-
raise IOError("Failed to get the data. Check that {0!r} is "
454-
"a valid FRED series.".format(name))
455-
raise
448+
if not is_list_like(name):
449+
names = [name]
450+
else:
451+
names = name
456452

453+
urls = [fred_URL + '%s' % n + '/downloaddata/%s' % n + '.csv' for
454+
n in names]
455+
456+
def fetch_data(url, name):
457+
with urlopen(url) as resp:
458+
data = read_csv(resp, index_col=0, parse_dates=True,
459+
header=None, skiprows=1, names=["DATE", name],
460+
na_values='.')
461+
try:
462+
return data.truncate(start, end)
463+
except KeyError:
464+
if data.ix[3].name[7:12] == 'Error':
465+
raise IOError("Failed to get the data. Check that {0!r} is "
466+
"a valid FRED series.".format(name))
467+
raise
468+
df = concat([fetch_data(url, n) for url, n in zip(urls, names)],
469+
axis=1, join='outer')
470+
return df
457471

458472
def get_data_famafrench(name):
459473
# path of zip files

pandas/io/tests/test_data.py

+22
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
import pandas.util.testing as tm
1717
from numpy.testing import assert_array_equal
1818

19+
if compat.PY3:
20+
from urllib.error import HTTPError
21+
else:
22+
from urllib2 import HTTPError
1923

2024
def _skip_if_no_lxml():
2125
try:
@@ -422,6 +426,24 @@ def test_invalid_series(self):
422426
name = "NOT A REAL SERIES"
423427
self.assertRaises(Exception, web.get_data_fred, name)
424428

429+
@network
430+
def test_fred_multi(self):
431+
names = ['CPIAUCSL', 'CPALTT01USQ661S', 'CPILFESL']
432+
start = datetime(2010, 1, 1)
433+
end = datetime(2013, 1, 27)
434+
435+
received = web.DataReader(names, "fred", start, end).head(1)
436+
expected = DataFrame([[217.478, 0.99701529, 220.544]], columns=names,
437+
index=[pd.tslib.Timestamp('2010-01-01 00:00:00')])
438+
expected.index.rename('DATE', inplace=True)
439+
assert_frame_equal(received, expected, check_less_precise=True)
440+
441+
@network
442+
def test_fred_multi_bad_series(self):
443+
444+
names = ['NOTAREALSERIES', 'CPIAUCSL', "ALSO FAKE"]
445+
with tm.assertRaises(HTTPError):
446+
DataReader(names, data_source="fred")
425447

426448
if __name__ == '__main__':
427449
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

0 commit comments

Comments
 (0)