From d5aa584f16a0ce90b254e3a52ea40af44da6162a Mon Sep 17 00:00:00 2001 From: femtotrader Date: Tue, 22 Dec 2015 18:33:01 +0100 Subject: [PATCH 1/5] First TrueFX tick datareader --- pandas_datareader/data.py | 4 ++ pandas_datareader/tests/test_truefx.py | 37 ++++++++++++ pandas_datareader/truefx.py | 80 ++++++++++++++++++++++++++ 3 files changed, 121 insertions(+) create mode 100644 pandas_datareader/tests/test_truefx.py create mode 100644 pandas_datareader/truefx.py diff --git a/pandas_datareader/data.py b/pandas_datareader/data.py index 28344ca8..2a04bcaf 100644 --- a/pandas_datareader/data.py +++ b/pandas_datareader/data.py @@ -18,6 +18,7 @@ from pandas_datareader.famafrench import FamaFrenchReader from pandas_datareader.oecd import OECDReader from pandas_datareader.edgar import EdgarIndexReader +from pandas_datareader.truefx import TrueFXReader def get_data_fred(*args, **kwargs): @@ -135,6 +136,9 @@ def DataReader(name, data_source=None, start=None, end=None, return EdgarIndexReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() + elif data_source == "truefx": + return TrueFXReader(retry_count=retry_count, pause=pause, + session=session).read(symbols=name, start=start, end=end) else: raise NotImplementedError( "data_source=%r is not implemented" % data_source) diff --git a/pandas_datareader/tests/test_truefx.py b/pandas_datareader/tests/test_truefx.py new file mode 100644 index 00000000..88b20adf --- /dev/null +++ b/pandas_datareader/tests/test_truefx.py @@ -0,0 +1,37 @@ +import pandas as pd +import pandas.util.testing as tm +import pandas_datareader.data as web +from pandas_datareader.truefx import TrueFXReader + +class TestTrueFX(tm.TestCase): + def setUp(self): + #session = None + + # Uncomment this for local tests without network + import requests_cache + from datetime import timedelta + session = requests_cache.CachedSession(cache_name='cache', expire_after=timedelta(days=30)) + + self.dr = TrueFXReader(retry_count=3, pause=0.001, session=session) + + def test_url(self): + expected = 'http://www.truefx.com/dev/data/2014/JANUARY-2014/AUDUSD-2014-01.zip' + tm.assert_equal(self.dr.url('AUDUSD', 2014, 1), expected) + + def test_filename_csv(self): + expected = 'AUDUSD-2014-01.csv' + tm.assert_equal(self.dr._filename_csv('AUDUSD', 2014, 1), expected) + + def test_get_truefx_read_one_month(self): + symbol = 'AUDUSD' + df = self.dr._read_one_month(symbol, 2014, 1) + tm.assert_equal(df['Ask']['2014-01-01 21:55:34.404'], 0.88922) + + def test_get_truefx_datareader(self): + df = web.DataReader('AUD/USD', 'truefx', '2014-01-01', '2014-02-28') + tm.assert_equal(df['Ask']['2014-01-01 21:55:34.404'], 0.88922) + tm.assert_equal(df['Ask']['2014-02-03 00:03:38.169'], 0.87524) + +if __name__ == '__main__': + import nose + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas_datareader/truefx.py b/pandas_datareader/truefx.py new file mode 100644 index 00000000..0834c452 --- /dev/null +++ b/pandas_datareader/truefx.py @@ -0,0 +1,80 @@ +import logging +logger = logging.getLogger(__name__) + +import datetime + +import pandas as pd +import pandas.compat as compat +from pandas.io.common import ZipFile + +from pandas_datareader.base import _BaseReader + +class TrueFXReader(_BaseReader): + + """Get data from TrueFX""" + + def __init__(self, retry_count=3, pause=0.001, session=None): + if not isinstance(retry_count, int) or retry_count < 0: + raise ValueError("'retry_count' must be integer larger than 0") + self.retry_count = retry_count + self.pause = pause + self.session = self._init_session(session, retry_count) + + def url(self, symbol, year, month): + month_name = datetime.datetime(year=year, month=month, day=1).strftime('%B').upper() + return 'http://www.truefx.com/dev/data/{year:04d}/{month_name}-{year:04d}/{symbol}-{year:04d}-{month:02d}.zip'.format(year=year, month=month, symbol=symbol, month_name=month_name) + + def _sanitize_symbol(self, symbol): + return symbol.replace("/", "").upper() + + def _filename_csv(self, symbol, year, month): + return "{symbol}-{year:04d}-{month:02d}.csv".format(year=year, month=month, symbol=symbol) + + def read(self, symbols, start, end): + """ read data """ + start, end = self._sanitize_dates(start, end) + # If a single symbol, (e.g., 'GOOG') + if isinstance(symbols, (compat.string_types, int)): + df = self._read_several_months(symbols, start, end) + # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT']) + else: + raise NotImplementedError("Can't download several symbols") + return df + + def _read_one_month(self, symbol, year, month): + url = self.url(symbol, year, month) + symbol = symbol.replace("/", "").upper() + + logger.debug("querying '%s'" % url) + response = self.session.get(url) + zip_data = compat.BytesIO(response.content) + + with ZipFile(zip_data, 'r') as zf: + zfile = zf.open(self._filename_csv(symbol, year, month)) + data = zfile.readlines() + + df = pd.DataFrame(data) + + #df = df[:100] # uncomment this - just for test + + df[0] = df[0].str.decode('utf8') + df[0] = df[0].str.replace('\n', '') + df[0] = df[0].map(lambda s: s.split(',')) + df['Symbol'] = df[0].map(lambda t: t[0]) + df['Date'] = df[0].map(lambda t: pd.to_datetime(t[1])) + df['Bid'] = df[0].map(lambda t: t[2]).astype(float) + df['Ask'] = df[0].map(lambda t: t[3]).astype(float) + del df[0] + df = df.set_index('Date') + + return df + + def _read_several_months(self, symbol, start, end): + symbol = self._sanitize_symbol(symbol) + months = pd.date_range(start, end, freq='MS') + lst = [] + for dt in months: + year, month = dt.year, dt.month + df = self._read_one_month(symbol, year, month) + lst.append(df) + return pd.concat(lst) From 2af7f30630ae673972b2ee7b7de1e644ed0a8f11 Mon Sep 17 00:00:00 2001 From: femtotrader Date: Tue, 22 Dec 2015 18:58:23 +0100 Subject: [PATCH 2/5] Add requests_cache in tests only when an environment variable is set --- pandas_datareader/tests/test_truefx.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas_datareader/tests/test_truefx.py b/pandas_datareader/tests/test_truefx.py index 88b20adf..a1a500e5 100644 --- a/pandas_datareader/tests/test_truefx.py +++ b/pandas_datareader/tests/test_truefx.py @@ -1,3 +1,4 @@ +import os import pandas as pd import pandas.util.testing as tm import pandas_datareader.data as web @@ -5,13 +6,12 @@ class TestTrueFX(tm.TestCase): def setUp(self): - #session = None - - # Uncomment this for local tests without network - import requests_cache - from datetime import timedelta - session = requests_cache.CachedSession(cache_name='cache', expire_after=timedelta(days=30)) - + if os.getenv('USE_REQUESTS_CACHE', '0') == '1': + import requests_cache + from datetime import timedelta + session = requests_cache.CachedSession(cache_name='cache', expire_after=timedelta(days=30)) + else: + session = None self.dr = TrueFXReader(retry_count=3, pause=0.001, session=session) def test_url(self): From c6bfde8d5b1be161edfca35793342d7b585bec0c Mon Sep 17 00:00:00 2001 From: femtotrader Date: Sat, 26 Dec 2015 18:23:33 +0100 Subject: [PATCH 3/5] passing format to to_datetime to speedup --- pandas_datareader/truefx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas_datareader/truefx.py b/pandas_datareader/truefx.py index 0834c452..7f457458 100644 --- a/pandas_datareader/truefx.py +++ b/pandas_datareader/truefx.py @@ -61,7 +61,7 @@ def _read_one_month(self, symbol, year, month): df[0] = df[0].str.replace('\n', '') df[0] = df[0].map(lambda s: s.split(',')) df['Symbol'] = df[0].map(lambda t: t[0]) - df['Date'] = df[0].map(lambda t: pd.to_datetime(t[1])) + df['Date'] = df[0].map(lambda t: pd.to_datetime(t[1], format='%Y%m%d %H:%M:%S.%f')) df['Bid'] = df[0].map(lambda t: t[2]).astype(float) df['Ask'] = df[0].map(lambda t: t[3]).astype(float) del df[0] From fe2badc83a6a706a1d84a381821a8cc48d79463c Mon Sep 17 00:00:00 2001 From: femtotrader Date: Sat, 26 Dec 2015 18:45:57 +0100 Subject: [PATCH 4/5] Fix tests --- pandas_datareader/tests/test_truefx.py | 11 +++-------- pandas_datareader/truefx.py | 15 ++------------- 2 files changed, 5 insertions(+), 21 deletions(-) diff --git a/pandas_datareader/tests/test_truefx.py b/pandas_datareader/tests/test_truefx.py index a1a500e5..23715ebb 100644 --- a/pandas_datareader/tests/test_truefx.py +++ b/pandas_datareader/tests/test_truefx.py @@ -22,15 +22,10 @@ def test_filename_csv(self): expected = 'AUDUSD-2014-01.csv' tm.assert_equal(self.dr._filename_csv('AUDUSD', 2014, 1), expected) - def test_get_truefx_read_one_month(self): - symbol = 'AUDUSD' - df = self.dr._read_one_month(symbol, 2014, 1) - tm.assert_equal(df['Ask']['2014-01-01 21:55:34.404'], 0.88922) - def test_get_truefx_datareader(self): - df = web.DataReader('AUD/USD', 'truefx', '2014-01-01', '2014-02-28') - tm.assert_equal(df['Ask']['2014-01-01 21:55:34.404'], 0.88922) - tm.assert_equal(df['Ask']['2014-02-03 00:03:38.169'], 0.87524) + df = web.DataReader('AUD/USD', 'truefx', '2014-01-01', '2014-02-28', session=self.session) + tm.assert_almost_equal(df.loc['2014-01-01 21:55:34.404', 'Ask'], 0.88922) + tm.assert_almost_equal(df.loc['2014-02-03 00:03:38.169', 'Ask'], 0.87524) if __name__ == '__main__': import nose diff --git a/pandas_datareader/truefx.py b/pandas_datareader/truefx.py index 7f457458..0c5c5015 100644 --- a/pandas_datareader/truefx.py +++ b/pandas_datareader/truefx.py @@ -51,20 +51,9 @@ def _read_one_month(self, symbol, year, month): with ZipFile(zip_data, 'r') as zf: zfile = zf.open(self._filename_csv(symbol, year, month)) - data = zfile.readlines() + df = pd.read_csv(zfile, names=['Symbol', 'Date', 'Bid', 'Ask']) - df = pd.DataFrame(data) - - #df = df[:100] # uncomment this - just for test - - df[0] = df[0].str.decode('utf8') - df[0] = df[0].str.replace('\n', '') - df[0] = df[0].map(lambda s: s.split(',')) - df['Symbol'] = df[0].map(lambda t: t[0]) - df['Date'] = df[0].map(lambda t: pd.to_datetime(t[1], format='%Y%m%d %H:%M:%S.%f')) - df['Bid'] = df[0].map(lambda t: t[2]).astype(float) - df['Ask'] = df[0].map(lambda t: t[3]).astype(float) - del df[0] + df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d %H:%M:%S.%f') df = df.set_index('Date') return df From fd75bbc0f5ea470649ef16f68402cb93a1e30335 Mon Sep 17 00:00:00 2001 From: femtotrader Date: Sat, 26 Dec 2015 18:48:46 +0100 Subject: [PATCH 5/5] Fix session parameter in tests --- pandas_datareader/tests/test_truefx.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas_datareader/tests/test_truefx.py b/pandas_datareader/tests/test_truefx.py index 23715ebb..eafda1f5 100644 --- a/pandas_datareader/tests/test_truefx.py +++ b/pandas_datareader/tests/test_truefx.py @@ -9,10 +9,10 @@ def setUp(self): if os.getenv('USE_REQUESTS_CACHE', '0') == '1': import requests_cache from datetime import timedelta - session = requests_cache.CachedSession(cache_name='cache', expire_after=timedelta(days=30)) + self.session = requests_cache.CachedSession(cache_name='cache', expire_after=timedelta(days=30)) else: - session = None - self.dr = TrueFXReader(retry_count=3, pause=0.001, session=session) + self.session = None + self.dr = TrueFXReader(retry_count=3, pause=0.001, session=self.session) def test_url(self): expected = 'http://www.truefx.com/dev/data/2014/JANUARY-2014/AUDUSD-2014-01.zip'