Skip to content

ENH: Adding '.' as an na_value for FRED. #3469

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
4 commits merged into from
Apr 27, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ pandas 0.12.0
- Unordered time series selection was misbehaving when using label slicing (GH3448_)
- Duplicate indexes with getitem will return items in the correct order (GH3455_, GH3457_)
- Fix sorting in a frame with a list of columns which contains datetime64[ns] dtypes (GH3461_)
- DataFrames fetched via FRED now handle '.' as a NaN. (GH3469_)

.. _GH3164: https://github.com/pydata/pandas/issues/3164
.. _GH3251: https://github.com/pydata/pandas/issues/3251
Expand Down
14 changes: 11 additions & 3 deletions pandas/io/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,9 +367,17 @@ def get_data_fred(name=None, start=dt.datetime(2010, 1, 1),
url = fred_URL + '%s' % name + \
'/downloaddata/%s' % name + '.csv'
data = read_csv(urllib.urlopen(url), index_col=0, parse_dates=True,
header=None, skiprows=1, names=["DATE", name])
return data.truncate(start, end)

header=None, skiprows=1, names=["DATE", name],
na_values='.')
try:
return data.truncate(start, end)
except KeyError:
if data.ix[3].name[7:12] == 'Error':
raise Exception("Failed to get the data. "
"Check that {} is valid FRED "
"series.".format(name))
else:
raise

def get_data_famafrench(name, start=None, end=None):
start, end = _sanitize_dates(start, end)
Expand Down
85 changes: 85 additions & 0 deletions pandas/io/tests/test_fred.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import unittest
import nose
from datetime import datetime

from pandas.util.py3compat import StringIO, BytesIO

import pandas as pd
import pandas.io.data as web
from pandas.util.testing import (network, assert_frame_equal,
assert_series_equal,
assert_almost_equal)
from numpy.testing.decorators import slow

import urllib2


class TestFred(unittest.TestCase):

@slow
@network
def test_fred(self):
"""
Throws an exception when DataReader can't get a 200 response from
FRED.
"""
start = datetime(2010, 1, 1)
end = datetime(2013, 01, 27)

try:
self.assertEquals(
web.DataReader("GDP", "fred", start, end)['GDP'].tail(1),
16010.2)

self.assertRaises(
Exception,
lambda: web.DataReader("NON EXISTENT SERIES", 'fred',
start, end))
except urllib2.URLError:
try:
urllib2.urlopen('http://google.com')
except urllib2.URLError:
raise nose.SkipTest
else:
raise

@slow
@network
def test_fred_nan(self):
start = datetime(2010, 1, 1)
end = datetime(2013, 01, 27)
df = web.DataReader("DFII5", "fred", start, end)
assert pd.isnull(df.ix['2010-01-01'])

@slow
@network
def test_fred_parts(self):
import numpy as np

start = datetime(2010, 1, 1)
end = datetime(2013, 01, 27)
df = web.get_data_fred("CPIAUCSL", start, end)
assert df.ix['2010-05-01'] == 217.23

t = np.array(df.CPIAUCSL.tolist())
assert np.issubdtype(t.dtype, np.floating)
assert t.shape == (37,)

# Test some older ones:
expected = [[576.7],
[962.9],
[684.7],
[848.3],
[933.3]]
result = web.get_data_fred("A09024USA144NNBR", start="1915").ix[:5]
assert (result.values == expected).all()

@slow
@network
def test_invalid_series(self):
name = "NOT A REAL SERIES"
self.assertRaises(Exception, web.get_data_fred, name)

if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)