Skip to content

Adds support for Enigma datasets #245

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions docs/source/remote_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Currently the following sources are supported:

- :ref:`Yahoo! Finance<remote_data.yahoo>`
- :ref:`Google Finance<remote_data.google>`
- :ref:`Enigma<remote_data.enigma>`
- :ref:`St.Louis FED (FRED)<remote_data.fred>`
- :ref:`Kenneth French's data library<remote_data.ff>`
- :ref:`World Bank<remote_data.wb>`
Expand Down Expand Up @@ -195,6 +196,22 @@ Available expiry dates can be accessed from the ``expiry_dates`` property.
data = goog.get_options_data(expiry=goog.expiry_dates[0])
data.iloc[0:5, 0:5]

.. _remote_data.enigma:

Enigma
======

Access datasets from `Enigma <https://app.enigma.io>`__,
the world's largest repository of structured public data.

.. ipython:: python

import os
import pandas_datareader as pdr

df = pdr.get_data_enigma('enigma.trade.ams.toxic.2015', os.getenv('ENIGMA_API_KEY'))
df.columns

.. _remote_data.fred:

FRED
Expand Down
4 changes: 2 additions & 2 deletions pandas_datareader/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = version = '0.2.1'
__version__ = version = '0.2.2'

from .data import (get_components_yahoo, get_data_famafrench, get_data_google, get_data_yahoo, # noqa
from .data import (get_components_yahoo, get_data_famafrench, get_data_google, get_data_yahoo, get_data_enigma, # noqa
get_data_yahoo_actions, get_quote_google, get_quote_yahoo, DataReader, Options) # noqa
10 changes: 9 additions & 1 deletion pandas_datareader/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pandas_datareader.famafrench import FamaFrenchReader
from pandas_datareader.oecd import OECDReader
from pandas_datareader.edgar import EdgarIndexReader
from pandas_datareader.enigma import EnigmaReader
from pandas_datareader.oanda import get_oanda_currency_historical_rates


Expand All @@ -38,6 +39,10 @@ def get_data_yahoo(*args, **kwargs):
return YahooDailyReader(*args, **kwargs).read()


def get_data_enigma(*args, **kwargs):
return EnigmaReader(*args, **kwargs).read()


def get_data_yahoo_actions(*args, **kwargs):
return YahooActionReader(*args, **kwargs).read()

Expand All @@ -51,7 +56,7 @@ def get_quote_google(*args, **kwargs):


def DataReader(name, data_source=None, start=None, end=None,
retry_count=3, pause=0.001, session=None):
retry_count=3, pause=0.001, session=None, access_key=None):
"""
Imports data from a number of online sources.

Expand Down Expand Up @@ -125,6 +130,9 @@ def DataReader(name, data_source=None, start=None, end=None,
retry_count=retry_count, pause=pause,
session=session).read()

elif data_source == "enigma":
return EnigmaReader(datapath=name, api_key=access_key).read()

elif data_source == "fred":
return FredReader(symbols=name, start=start, end=end,
retry_count=retry_count, pause=pause,
Expand Down
107 changes: 107 additions & 0 deletions pandas_datareader/enigma.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import zlib
import json
import os
import sys
import time
from pandas.compat import StringIO

from pandas import DataFrame
import pandas.compat as compat
import pandas as pd
import requests

from pandas_datareader.base import _BaseReader


class EnigmaReader(_BaseReader):
"""
Collects Enigma data located at the specified datapath and returns a pandas DataFrame.

Usage (high-level):
```
import pandas_datareader as pdr
df = pdr.get_data_enigma('enigma.inspections.restaurants.fl')

#in the event that ENIGMA_API_KEY does not exist in your env, it can be supplied as the second arg:
df = prd.get_data_enigma('enigma.inspections.restaurants.fl', 'ARIAMFHKJMISF38UT')
```

Usage:
```
df = EnigmaReader(datapath='enigma.inspections.restaurants.fl', api_key='ARIAMFHKJMISF38UT').read()
```
"""

def __init__(self,
datapath=None,
api_key=None,
retry_count=5,
pause=0.250,
session=None):

super(EnigmaReader, self).__init__(symbols=[],
retry_count=retry_count,
pause=pause)
if api_key == None:
self._api_key = os.getenv('ENIGMA_API_KEY')
if self._api_key == None:
raise ValueError(
"""Please provide an Enigma API key or set the ENIGMA_API_KEY environment variable\n
If you do not have an API key, you can get one here: https://app.enigma.io/signup""")
else:
self._api_key = api_key

self._datapath = datapath
if not isinstance(self._datapath, compat.string_types):
raise ValueError(
"The Enigma datapath must be a string (ex: 'enigma.inspections.restaurants.fl')")


@property
def url(self):
return 'https://api.enigma.io/v2/export/{}/{}'.format(self._api_key,
self._datapath)

@property
def export_key(self):
return 'export_url'


@property
def _head_key(self):
return 'head_url'


def _request(self, url):
self.session.headers.update({'User-Agent': 'pandas-datareader'})
resp = self.session.get(url)
resp.raise_for_status()
return resp


def _decompress_export(self, compressed_export_data):
return zlib.decompress(compressed_export_data, 16 + zlib.MAX_WBITS)


def extract_export_url(self, delay=10, max_attempts=10):
"""
Performs an HTTP HEAD request on 'head_url' until it returns a `200`.
This allows the Enigma API time to export the requested data.
"""
resp = self._request(self.url)
attempts = 0
while True:
try:
requests.head(resp.json()[self._head_key]).raise_for_status()
except Exception as e:
attempts += 1
if attempts > max_attempts:
raise e
time.sleep(delay)
continue
return resp.json()[self.export_key]

def read(self):
export_gzipped_req = self._request(self.extract_export_url())
decompressed_data = self._decompress_export(export_gzipped_req.content)
return pd.read_csv(StringIO(decompressed_data))
54 changes: 54 additions & 0 deletions pandas_datareader/tests/test_enigma.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import os

import requests
from requests.exceptions import HTTPError

import nose
import pandas.util.testing as tm
from pandas.util.testing import (assert_series_equal, assert_frame_equal)
from pandas_datareader.tests._utils import _skip_if_no_lxml

import pandas_datareader.data as web
import pandas_datareader as pdr

TEST_API_KEY = os.getenv('ENIGMA_API_KEY')


class TestEnigma(tm.TestCase):
@classmethod
def setUpClass(cls):
super(TestEnigma, cls).setUpClass()
_skip_if_no_lxml()

def test_enigma(self):
self.assertTrue('serialid' in list(
web.DataReader('enigma.inspections.restaurants.fl',
'enigma',
access_key=TEST_API_KEY).columns))
self.assertTrue('serialid' in list(pdr.get_data_enigma(
'enigma.inspections.restaurants.fl', TEST_API_KEY)))

def test_bad_key(self):
_exception = None
try:
web.DataReader('enigma.inspections.restaurants.fl',
'enigma',
access_key=TEST_API_KEY + 'xxx')
except HTTPError as e:
_exception = e
assert isinstance(_exception, HTTPError)

def test_bad_url(self):
_exception = None
try:
web.DataReader('enigma.inspections.restaurants.fllzzy',
'enigma',
access_key=TEST_API_KEY)
except Exception as e:
_exception = e
assert isinstance(_exception, HTTPError)


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)