From d07b9040598512feb5a5a144561302fce1cc0450 Mon Sep 17 00:00:00 2001 From: Trevor Prater Date: Wed, 28 Sep 2016 14:53:03 -0400 Subject: [PATCH] ENH: Adds support for Enigma datasets --- docs/source/remote_data.rst | 17 ++++ pandas_datareader/__init__.py | 4 +- pandas_datareader/data.py | 10 ++- pandas_datareader/enigma.py | 107 +++++++++++++++++++++++++ pandas_datareader/tests/test_enigma.py | 54 +++++++++++++ 5 files changed, 189 insertions(+), 3 deletions(-) create mode 100644 pandas_datareader/enigma.py create mode 100644 pandas_datareader/tests/test_enigma.py diff --git a/docs/source/remote_data.rst b/docs/source/remote_data.rst index 5ee87679..2d4100f3 100644 --- a/docs/source/remote_data.rst +++ b/docs/source/remote_data.rst @@ -25,6 +25,7 @@ Currently the following sources are supported: - :ref:`Yahoo! Finance` - :ref:`Google Finance` + - :ref:`Enigma` - :ref:`St.Louis FED (FRED)` - :ref:`Kenneth French's data library` - :ref:`World Bank` @@ -195,6 +196,22 @@ Available expiry dates can be accessed from the ``expiry_dates`` property. data = goog.get_options_data(expiry=goog.expiry_dates[0]) data.iloc[0:5, 0:5] +.. _remote_data.enigma: + +Enigma +====== + +Access datasets from `Enigma `__, +the world's largest repository of structured public data. + +.. ipython:: python + + import os + import pandas_datareader as pdr + + df = pdr.get_data_enigma('enigma.trade.ams.toxic.2015', os.getenv('ENIGMA_API_KEY')) + df.columns + .. _remote_data.fred: FRED diff --git a/pandas_datareader/__init__.py b/pandas_datareader/__init__.py index 0d4a051a..951c3dc1 100644 --- a/pandas_datareader/__init__.py +++ b/pandas_datareader/__init__.py @@ -1,4 +1,4 @@ -__version__ = version = '0.2.1' +__version__ = version = '0.2.2' -from .data import (get_components_yahoo, get_data_famafrench, get_data_google, get_data_yahoo, # noqa +from .data import (get_components_yahoo, get_data_famafrench, get_data_google, get_data_yahoo, get_data_enigma, # noqa get_data_yahoo_actions, get_quote_google, get_quote_yahoo, DataReader, Options) # noqa diff --git a/pandas_datareader/data.py b/pandas_datareader/data.py index 31ff03e6..b8a3eeea 100644 --- a/pandas_datareader/data.py +++ b/pandas_datareader/data.py @@ -19,6 +19,7 @@ from pandas_datareader.famafrench import FamaFrenchReader from pandas_datareader.oecd import OECDReader from pandas_datareader.edgar import EdgarIndexReader +from pandas_datareader.enigma import EnigmaReader from pandas_datareader.oanda import get_oanda_currency_historical_rates @@ -38,6 +39,10 @@ def get_data_yahoo(*args, **kwargs): return YahooDailyReader(*args, **kwargs).read() +def get_data_enigma(*args, **kwargs): + return EnigmaReader(*args, **kwargs).read() + + def get_data_yahoo_actions(*args, **kwargs): return YahooActionReader(*args, **kwargs).read() @@ -51,7 +56,7 @@ def get_quote_google(*args, **kwargs): def DataReader(name, data_source=None, start=None, end=None, - retry_count=3, pause=0.001, session=None): + retry_count=3, pause=0.001, session=None, access_key=None): """ Imports data from a number of online sources. @@ -125,6 +130,9 @@ def DataReader(name, data_source=None, start=None, end=None, retry_count=retry_count, pause=pause, session=session).read() + elif data_source == "enigma": + return EnigmaReader(datapath=name, api_key=access_key).read() + elif data_source == "fred": return FredReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, diff --git a/pandas_datareader/enigma.py b/pandas_datareader/enigma.py new file mode 100644 index 00000000..e2f60715 --- /dev/null +++ b/pandas_datareader/enigma.py @@ -0,0 +1,107 @@ +import zlib +import json +import os +import sys +import time +from pandas.compat import StringIO + +from pandas import DataFrame +import pandas.compat as compat +import pandas as pd +import requests + +from pandas_datareader.base import _BaseReader + + +class EnigmaReader(_BaseReader): + """ + Collects Enigma data located at the specified datapath and returns a pandas DataFrame. + + Usage (high-level): + ``` + import pandas_datareader as pdr + df = pdr.get_data_enigma('enigma.inspections.restaurants.fl') + + #in the event that ENIGMA_API_KEY does not exist in your env, it can be supplied as the second arg: + df = prd.get_data_enigma('enigma.inspections.restaurants.fl', 'ARIAMFHKJMISF38UT') + ``` + + Usage: + ``` + df = EnigmaReader(datapath='enigma.inspections.restaurants.fl', api_key='ARIAMFHKJMISF38UT').read() + ``` + """ + + def __init__(self, + datapath=None, + api_key=None, + retry_count=5, + pause=0.250, + session=None): + + super(EnigmaReader, self).__init__(symbols=[], + retry_count=retry_count, + pause=pause) + if api_key == None: + self._api_key = os.getenv('ENIGMA_API_KEY') + if self._api_key == None: + raise ValueError( + """Please provide an Enigma API key or set the ENIGMA_API_KEY environment variable\n + If you do not have an API key, you can get one here: https://app.enigma.io/signup""") + else: + self._api_key = api_key + + self._datapath = datapath + if not isinstance(self._datapath, compat.string_types): + raise ValueError( + "The Enigma datapath must be a string (ex: 'enigma.inspections.restaurants.fl')") + + + @property + def url(self): + return 'https://api.enigma.io/v2/export/{}/{}'.format(self._api_key, + self._datapath) + + @property + def export_key(self): + return 'export_url' + + + @property + def _head_key(self): + return 'head_url' + + + def _request(self, url): + self.session.headers.update({'User-Agent': 'pandas-datareader'}) + resp = self.session.get(url) + resp.raise_for_status() + return resp + + + def _decompress_export(self, compressed_export_data): + return zlib.decompress(compressed_export_data, 16 + zlib.MAX_WBITS) + + + def extract_export_url(self, delay=10, max_attempts=10): + """ + Performs an HTTP HEAD request on 'head_url' until it returns a `200`. + This allows the Enigma API time to export the requested data. + """ + resp = self._request(self.url) + attempts = 0 + while True: + try: + requests.head(resp.json()[self._head_key]).raise_for_status() + except Exception as e: + attempts += 1 + if attempts > max_attempts: + raise e + time.sleep(delay) + continue + return resp.json()[self.export_key] + + def read(self): + export_gzipped_req = self._request(self.extract_export_url()) + decompressed_data = self._decompress_export(export_gzipped_req.content) + return pd.read_csv(StringIO(decompressed_data)) diff --git a/pandas_datareader/tests/test_enigma.py b/pandas_datareader/tests/test_enigma.py new file mode 100644 index 00000000..91146c4a --- /dev/null +++ b/pandas_datareader/tests/test_enigma.py @@ -0,0 +1,54 @@ +import os + +import requests +from requests.exceptions import HTTPError + +import nose +import pandas.util.testing as tm +from pandas.util.testing import (assert_series_equal, assert_frame_equal) +from pandas_datareader.tests._utils import _skip_if_no_lxml + +import pandas_datareader.data as web +import pandas_datareader as pdr + +TEST_API_KEY = os.getenv('ENIGMA_API_KEY') + + +class TestEnigma(tm.TestCase): + @classmethod + def setUpClass(cls): + super(TestEnigma, cls).setUpClass() + _skip_if_no_lxml() + + def test_enigma(self): + self.assertTrue('serialid' in list( + web.DataReader('enigma.inspections.restaurants.fl', + 'enigma', + access_key=TEST_API_KEY).columns)) + self.assertTrue('serialid' in list(pdr.get_data_enigma( + 'enigma.inspections.restaurants.fl', TEST_API_KEY))) + + def test_bad_key(self): + _exception = None + try: + web.DataReader('enigma.inspections.restaurants.fl', + 'enigma', + access_key=TEST_API_KEY + 'xxx') + except HTTPError as e: + _exception = e + assert isinstance(_exception, HTTPError) + + def test_bad_url(self): + _exception = None + try: + web.DataReader('enigma.inspections.restaurants.fllzzy', + 'enigma', + access_key=TEST_API_KEY) + except Exception as e: + _exception = e + assert isinstance(_exception, HTTPError) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False)