Skip to content

ENH: TrueFX tick datareader #152

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pandas_datareader/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pandas_datareader.famafrench import FamaFrenchReader
from pandas_datareader.oecd import OECDReader
from pandas_datareader.edgar import EdgarIndexReader
from pandas_datareader.truefx import TrueFXReader


def get_data_fred(*args, **kwargs):
Expand Down Expand Up @@ -141,6 +142,9 @@ def DataReader(name, data_source=None, start=None, end=None,
return EdgarIndexReader(symbols=name, start=start, end=end,
retry_count=retry_count, pause=pause,
session=session).read()
elif data_source == "truefx":
return TrueFXReader(retry_count=retry_count, pause=pause,
session=session).read(symbols=name, start=start, end=end)
else:
msg = "data_source=%r is not implemented" % data_source
raise NotImplementedError(msg)
Expand Down
32 changes: 32 additions & 0 deletions pandas_datareader/tests/test_truefx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import os
import pandas as pd
import pandas.util.testing as tm
import pandas_datareader.data as web
from pandas_datareader.truefx import TrueFXReader

class TestTrueFX(tm.TestCase):
def setUp(self):
if os.getenv('USE_REQUESTS_CACHE', '0') == '1':
import requests_cache
from datetime import timedelta
self.session = requests_cache.CachedSession(cache_name='cache', expire_after=timedelta(days=30))
else:
self.session = None
self.dr = TrueFXReader(retry_count=3, pause=0.001, session=self.session)

def test_url(self):
expected = 'http://www.truefx.com/dev/data/2014/JANUARY-2014/AUDUSD-2014-01.zip'
tm.assert_equal(self.dr.url('AUDUSD', 2014, 1), expected)

def test_filename_csv(self):
expected = 'AUDUSD-2014-01.csv'
tm.assert_equal(self.dr._filename_csv('AUDUSD', 2014, 1), expected)

def test_get_truefx_datareader(self):
df = web.DataReader('AUD/USD', 'truefx', '2014-01-01', '2014-02-28', session=self.session)
tm.assert_almost_equal(df.loc['2014-01-01 21:55:34.404', 'Ask'], 0.88922)
tm.assert_almost_equal(df.loc['2014-02-03 00:03:38.169', 'Ask'], 0.87524)

if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False)
69 changes: 69 additions & 0 deletions pandas_datareader/truefx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import logging
logger = logging.getLogger(__name__)

import datetime

import pandas as pd
import pandas.compat as compat
from pandas.io.common import ZipFile

from pandas_datareader.base import _BaseReader

class TrueFXReader(_BaseReader):

"""Get data from TrueFX"""

def __init__(self, retry_count=3, pause=0.001, session=None):
if not isinstance(retry_count, int) or retry_count < 0:
raise ValueError("'retry_count' must be integer larger than 0")
self.retry_count = retry_count
self.pause = pause
self.session = self._init_session(session, retry_count)

def url(self, symbol, year, month):
month_name = datetime.datetime(year=year, month=month, day=1).strftime('%B').upper()
return 'http://www.truefx.com/dev/data/{year:04d}/{month_name}-{year:04d}/{symbol}-{year:04d}-{month:02d}.zip'.format(year=year, month=month, symbol=symbol, month_name=month_name)

def _sanitize_symbol(self, symbol):
return symbol.replace("/", "").upper()

def _filename_csv(self, symbol, year, month):
return "{symbol}-{year:04d}-{month:02d}.csv".format(year=year, month=month, symbol=symbol)

def read(self, symbols, start, end):
""" read data """
start, end = self._sanitize_dates(start, end)
# If a single symbol, (e.g., 'GOOG')
if isinstance(symbols, (compat.string_types, int)):
df = self._read_several_months(symbols, start, end)
# Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
else:
raise NotImplementedError("Can't download several symbols")
return df

def _read_one_month(self, symbol, year, month):
url = self.url(symbol, year, month)
symbol = symbol.replace("/", "").upper()

logger.debug("querying '%s'" % url)
response = self.session.get(url)
zip_data = compat.BytesIO(response.content)

with ZipFile(zip_data, 'r') as zf:
zfile = zf.open(self._filename_csv(symbol, year, month))
df = pd.read_csv(zfile, names=['Symbol', 'Date', 'Bid', 'Ask'])

df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d %H:%M:%S.%f')
df = df.set_index('Date')
Copy link
Contributor Author

@femtotrader femtotrader Jun 17, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Caution: Date columns have non unique values

df = df.set_index('Date', verify_integrity=True)

should raises error

we might make Date unique (adding a quantum of 1 microsecond or 1 nanosecond)

See http://stackoverflow.com/questions/34575126/create-a-dataframe-with-datetimeindex-with-unique-values-by-adding-a-timedelta/34576154#34576154


return df

def _read_several_months(self, symbol, start, end):
symbol = self._sanitize_symbol(symbol)
months = pd.date_range(start, end, freq='MS')
lst = []
for dt in months:
year, month = dt.year, dt.month
df = self._read_one_month(symbol, year, month)
lst.append(df)
return pd.concat(lst)