diff --git a/pandas_datareader/data.py b/pandas_datareader/data.py index b9a4c83e..54de8e10 100644 --- a/pandas_datareader/data.py +++ b/pandas_datareader/data.py @@ -19,6 +19,7 @@ from pandas_datareader.famafrench import FamaFrenchReader from pandas_datareader.oecd import OECDReader from pandas_datareader.edgar import EdgarIndexReader +from pandas_datareader.truefx import TrueFXReader def get_data_fred(*args, **kwargs): @@ -146,6 +147,9 @@ def DataReader(name, data_source=None, start=None, end=None, return EdgarIndexReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() + elif data_source == "truefx": + return TrueFXReader(retry_count=retry_count, pause=pause, + session=session).read(symbols=name, start=start, end=end) else: msg = "data_source=%r is not implemented" % data_source raise NotImplementedError(msg) diff --git a/pandas_datareader/tests/test_truefx.py b/pandas_datareader/tests/test_truefx.py new file mode 100644 index 00000000..eafda1f5 --- /dev/null +++ b/pandas_datareader/tests/test_truefx.py @@ -0,0 +1,32 @@ +import os +import pandas as pd +import pandas.util.testing as tm +import pandas_datareader.data as web +from pandas_datareader.truefx import TrueFXReader + +class TestTrueFX(tm.TestCase): + def setUp(self): + if os.getenv('USE_REQUESTS_CACHE', '0') == '1': + import requests_cache + from datetime import timedelta + self.session = requests_cache.CachedSession(cache_name='cache', expire_after=timedelta(days=30)) + else: + self.session = None + self.dr = TrueFXReader(retry_count=3, pause=0.001, session=self.session) + + def test_url(self): + expected = 'http://www.truefx.com/dev/data/2014/JANUARY-2014/AUDUSD-2014-01.zip' + tm.assert_equal(self.dr.url('AUDUSD', 2014, 1), expected) + + def test_filename_csv(self): + expected = 'AUDUSD-2014-01.csv' + tm.assert_equal(self.dr._filename_csv('AUDUSD', 2014, 1), expected) + + def test_get_truefx_datareader(self): + df = web.DataReader('AUD/USD', 'truefx', '2014-01-01', '2014-02-28', session=self.session) + tm.assert_almost_equal(df.loc['2014-01-01 21:55:34.404', 'Ask'], 0.88922) + tm.assert_almost_equal(df.loc['2014-02-03 00:03:38.169', 'Ask'], 0.87524) + +if __name__ == '__main__': + import nose + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas_datareader/truefx.py b/pandas_datareader/truefx.py new file mode 100644 index 00000000..0c5c5015 --- /dev/null +++ b/pandas_datareader/truefx.py @@ -0,0 +1,69 @@ +import logging +logger = logging.getLogger(__name__) + +import datetime + +import pandas as pd +import pandas.compat as compat +from pandas.io.common import ZipFile + +from pandas_datareader.base import _BaseReader + +class TrueFXReader(_BaseReader): + + """Get data from TrueFX""" + + def __init__(self, retry_count=3, pause=0.001, session=None): + if not isinstance(retry_count, int) or retry_count < 0: + raise ValueError("'retry_count' must be integer larger than 0") + self.retry_count = retry_count + self.pause = pause + self.session = self._init_session(session, retry_count) + + def url(self, symbol, year, month): + month_name = datetime.datetime(year=year, month=month, day=1).strftime('%B').upper() + return 'http://www.truefx.com/dev/data/{year:04d}/{month_name}-{year:04d}/{symbol}-{year:04d}-{month:02d}.zip'.format(year=year, month=month, symbol=symbol, month_name=month_name) + + def _sanitize_symbol(self, symbol): + return symbol.replace("/", "").upper() + + def _filename_csv(self, symbol, year, month): + return "{symbol}-{year:04d}-{month:02d}.csv".format(year=year, month=month, symbol=symbol) + + def read(self, symbols, start, end): + """ read data """ + start, end = self._sanitize_dates(start, end) + # If a single symbol, (e.g., 'GOOG') + if isinstance(symbols, (compat.string_types, int)): + df = self._read_several_months(symbols, start, end) + # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT']) + else: + raise NotImplementedError("Can't download several symbols") + return df + + def _read_one_month(self, symbol, year, month): + url = self.url(symbol, year, month) + symbol = symbol.replace("/", "").upper() + + logger.debug("querying '%s'" % url) + response = self.session.get(url) + zip_data = compat.BytesIO(response.content) + + with ZipFile(zip_data, 'r') as zf: + zfile = zf.open(self._filename_csv(symbol, year, month)) + df = pd.read_csv(zfile, names=['Symbol', 'Date', 'Bid', 'Ask']) + + df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d %H:%M:%S.%f') + df = df.set_index('Date') + + return df + + def _read_several_months(self, symbol, start, end): + symbol = self._sanitize_symbol(symbol) + months = pd.date_range(start, end, freq='MS') + lst = [] + for dt in months: + year, month = dt.year, dt.month + df = self._read_one_month(symbol, year, month) + lst.append(df) + return pd.concat(lst)