Skip to content

Commit c7c496c

Browse files
author
Trevor Prater
committed
ENH: Adds support for Enigma datasets
Init commit Cleans up code Improves failure messaging Adds usage, improves errors Adds docs Bumps version Adds retry logic, improves docs Fixes import Adds DataReader as an entry point for enigma Adds query caching Formatting Reads DataReader before returning Adds tests Removes unused import Cleans up code Fixes test Cleans up unused imports Updates User-Agent header Removes hardcoded API token
1 parent d46db9e commit c7c496c

File tree

5 files changed

+189
-3
lines changed

5 files changed

+189
-3
lines changed

docs/source/remote_data.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Currently the following sources are supported:
2525

2626
- :ref:`Yahoo! Finance<remote_data.yahoo>`
2727
- :ref:`Google Finance<remote_data.google>`
28+
- :ref:`Enigma<remote_data.enigma>`
2829
- :ref:`St.Louis FED (FRED)<remote_data.fred>`
2930
- :ref:`Kenneth French's data library<remote_data.ff>`
3031
- :ref:`World Bank<remote_data.wb>`
@@ -195,6 +196,22 @@ Available expiry dates can be accessed from the ``expiry_dates`` property.
195196
data = goog.get_options_data(expiry=goog.expiry_dates[0])
196197
data.iloc[0:5, 0:5]
197198
199+
.. _remote_data.enigma:
200+
201+
Enigma
202+
======
203+
204+
Access datasets from `Enigma <https://app.enigma.io>`__,
205+
the world's largest repository of structured public data.
206+
207+
.. ipython:: python
208+
209+
import os
210+
import pandas_datareader as pdr
211+
212+
df = pdr.get_data_enigma('enigma.trade.ams.toxic.2015', os.getenv('ENIGMA_API_KEY'))
213+
df.columns
214+
198215
.. _remote_data.fred:
199216

200217
FRED

pandas_datareader/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = version = '0.2.1'
1+
__version__ = version = '0.2.2'
22

3-
from .data import (get_components_yahoo, get_data_famafrench, get_data_google, get_data_yahoo, # noqa
3+
from .data import (get_components_yahoo, get_data_famafrench, get_data_google, get_data_yahoo, get_data_enigma, # noqa
44
get_data_yahoo_actions, get_quote_google, get_quote_yahoo, DataReader, Options) # noqa

pandas_datareader/data.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from pandas_datareader.famafrench import FamaFrenchReader
2020
from pandas_datareader.oecd import OECDReader
2121
from pandas_datareader.edgar import EdgarIndexReader
22+
from pandas_datareader.enigma import EnigmaReader
2223
from pandas_datareader.oanda import get_oanda_currency_historical_rates
2324

2425

@@ -38,6 +39,10 @@ def get_data_yahoo(*args, **kwargs):
3839
return YahooDailyReader(*args, **kwargs).read()
3940

4041

42+
def get_data_enigma(*args, **kwargs):
43+
return EnigmaReader(*args, **kwargs).read()
44+
45+
4146
def get_data_yahoo_actions(*args, **kwargs):
4247
return YahooActionReader(*args, **kwargs).read()
4348

@@ -51,7 +56,7 @@ def get_quote_google(*args, **kwargs):
5156

5257

5358
def DataReader(name, data_source=None, start=None, end=None,
54-
retry_count=3, pause=0.001, session=None):
59+
retry_count=3, pause=0.001, session=None, access_key=None):
5560
"""
5661
Imports data from a number of online sources.
5762
@@ -125,6 +130,9 @@ def DataReader(name, data_source=None, start=None, end=None,
125130
retry_count=retry_count, pause=pause,
126131
session=session).read()
127132

133+
elif data_source == "enigma":
134+
return EnigmaReader(datapath=name, api_key=access_key).read()
135+
128136
elif data_source == "fred":
129137
return FredReader(symbols=name, start=start, end=end,
130138
retry_count=retry_count, pause=pause,

pandas_datareader/enigma.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import zlib
2+
import json
3+
import os
4+
import sys
5+
import time
6+
from pandas.compat import StringIO
7+
8+
from pandas import DataFrame
9+
import pandas.compat as compat
10+
import pandas as pd
11+
import requests
12+
13+
from pandas_datareader.base import _BaseReader
14+
15+
16+
class EnigmaReader(_BaseReader):
17+
"""
18+
Collects Enigma data located at the specified datapath and returns a pandas DataFrame.
19+
20+
Usage (high-level):
21+
```
22+
import pandas_datareader as pdr
23+
df = pdr.get_data_enigma('enigma.inspections.restaurants.fl')
24+
25+
#in the event that ENIGMA_API_KEY does not exist in your env, it can be supplied as the second arg:
26+
df = prd.get_data_enigma('enigma.inspections.restaurants.fl', 'ARIAMFHKJMISF38UT')
27+
```
28+
29+
Usage:
30+
```
31+
df = EnigmaReader(datapath='enigma.inspections.restaurants.fl', api_key='ARIAMFHKJMISF38UT').read()
32+
```
33+
"""
34+
35+
def __init__(self,
36+
datapath=None,
37+
api_key=None,
38+
retry_count=5,
39+
pause=0.250,
40+
session=None):
41+
42+
super(EnigmaReader, self).__init__(symbols=[],
43+
retry_count=retry_count,
44+
pause=pause)
45+
if api_key == None:
46+
self._api_key = os.getenv('ENIGMA_API_KEY')
47+
if self._api_key == None:
48+
raise ValueError(
49+
"""Please provide an Enigma API key or set the ENIGMA_API_KEY environment variable\n
50+
If you do not have an API key, you can get one here: https://app.enigma.io/signup""")
51+
else:
52+
self._api_key = api_key
53+
54+
self._datapath = datapath
55+
if not isinstance(self._datapath, compat.string_types):
56+
raise ValueError(
57+
"The Enigma datapath must be a string (ex: 'enigma.inspections.restaurants.fl')")
58+
59+
60+
@property
61+
def url(self):
62+
return 'https://api.enigma.io/v2/export/{}/{}'.format(self._api_key,
63+
self._datapath)
64+
65+
@property
66+
def export_key(self):
67+
return 'export_url'
68+
69+
70+
@property
71+
def _head_key(self):
72+
return 'head_url'
73+
74+
75+
def _request(self, url):
76+
self.session.headers.update({'User-Agent': 'pandas-datareader'})
77+
resp = self.session.get(url)
78+
resp.raise_for_status()
79+
return resp
80+
81+
82+
def _decompress_export(self, compressed_export_data):
83+
return zlib.decompress(compressed_export_data, 16 + zlib.MAX_WBITS)
84+
85+
86+
def extract_export_url(self, delay=10, max_attempts=10):
87+
"""
88+
Performs an HTTP HEAD request on 'head_url' until it returns a `200`.
89+
This allows the Enigma API time to export the requested data.
90+
"""
91+
resp = self._request(self.url)
92+
attempts = 0
93+
while True:
94+
try:
95+
requests.head(resp.json()[self._head_key]).raise_for_status()
96+
except Exception as e:
97+
attempts += 1
98+
if attempts > max_attempts:
99+
raise e
100+
time.sleep(delay)
101+
continue
102+
return resp.json()[self.export_key]
103+
104+
def read(self):
105+
export_gzipped_req = self._request(self.extract_export_url())
106+
decompressed_data = self._decompress_export(export_gzipped_req.content)
107+
return pd.read_csv(StringIO(decompressed_data))
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import os
2+
3+
import requests
4+
from requests.exceptions import HTTPError
5+
6+
import nose
7+
import pandas.util.testing as tm
8+
from pandas.util.testing import (assert_series_equal, assert_frame_equal)
9+
from pandas_datareader.tests._utils import _skip_if_no_lxml
10+
11+
import pandas_datareader.data as web
12+
import pandas_datareader as pdr
13+
14+
TEST_API_KEY = os.getenv('ENIGMA_API_KEY')
15+
16+
17+
class TestEnigma(tm.TestCase):
18+
@classmethod
19+
def setUpClass(cls):
20+
super(TestEnigma, cls).setUpClass()
21+
_skip_if_no_lxml()
22+
23+
def test_enigma(self):
24+
self.assertTrue('serialid' in list(
25+
web.DataReader('enigma.inspections.restaurants.fl',
26+
'enigma',
27+
access_key=TEST_API_KEY).columns))
28+
self.assertTrue('serialid' in list(pdr.get_data_enigma(
29+
'enigma.inspections.restaurants.fl', TEST_API_KEY)))
30+
31+
def test_bad_key(self):
32+
_exception = None
33+
try:
34+
web.DataReader('enigma.inspections.restaurants.fl',
35+
'enigma',
36+
access_key=TEST_API_KEY + 'xxx')
37+
except HTTPError as e:
38+
_exception = e
39+
assert isinstance(_exception, HTTPError)
40+
41+
def test_bad_url(self):
42+
_exception = None
43+
try:
44+
web.DataReader('enigma.inspections.restaurants.fllzzy',
45+
'enigma',
46+
access_key=TEST_API_KEY)
47+
except Exception as e:
48+
_exception = e
49+
assert isinstance(_exception, HTTPError)
50+
51+
52+
if __name__ == '__main__':
53+
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
54+
exit=False)

0 commit comments

Comments
 (0)