Skip to content

Commit a88c84e

Browse files
gliptakjreback
authored andcommitted
Added QuandlReader (plus tests and doc) (#361)
1 parent 6cce5f1 commit a88c84e

File tree

5 files changed

+256
-5
lines changed

5 files changed

+256
-5
lines changed

docs/source/remote_data.rst

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Currently the following sources are supported:
2626
- :ref:`Yahoo! Finance<remote_data.yahoo>`
2727
- :ref:`Google Finance<remote_data.google>`
2828
- :ref:`Enigma<remote_data.enigma>`
29+
- :ref:`Quandl<remote_data.quandl>`
2930
- :ref:`St.Louis FED (FRED)<remote_data.fred>`
3031
- :ref:`Kenneth French's data library<remote_data.ff>`
3132
- :ref:`World Bank<remote_data.wb>`
@@ -79,7 +80,7 @@ Historical dividends from Yahoo! Finance.
7980
.. _remote_data.yahoo_quotes:
8081

8182
Yahoo! Finance Quotes
82-
----------------------
83+
---------------------
8384
***Experimental***
8485

8586
The YahooQuotesReader class allows to get quotes data from Yahoo! Finance.
@@ -165,7 +166,7 @@ Google Finance
165166
.. _remote_data.google_quotes:
166167

167168
Google Finance Quotes
168-
----------------------
169+
---------------------
169170
***Experimental***
170171

171172
The GoogleQuotesReader class allows to get quotes data from Google Finance.
@@ -213,6 +214,33 @@ the world's largest repository of structured public data.
213214
df = pdr.get_data_enigma('enigma.trade.ams.toxic.2015', os.getenv('ENIGMA_API_KEY'))
214215
df.columns
215216
217+
.. _remote_data.quandl:
218+
219+
Quandl
220+
======
221+
222+
Daily financial data (prices of stocks, ETFs etc.) from
223+
`Quandl <https://www.quandl.com/>`__.
224+
The symbol names consist of two parts: DB name and symbol name.
225+
DB names can be all the
226+
`free ones listed on the Quandl website <https://blog.quandl.com/free-data-on-quandl>__.
227+
Symbol names vary with DB name; for WIKI (US stocks), they are the common
228+
ticker symbols, in some other cases (such as FSE) they can be a bit strange.
229+
Some sources are also mapped to suitable ISO country codes in the dot suffix
230+
style shown above, currently available for
231+
`BE, CN, DE, FR, IN, JP, NL, PT, UK, US <https://www.quandl.com/search?query=>`__.
232+
233+
As of June 2017, each DB has a different data schema,
234+
the coverage in terms of time range is sometimes surprisingly small, and
235+
the data quality is not always good.
236+
237+
.. ipython:: python
238+
239+
import pandas_datareader.data as web
240+
symbol = 'WIKI/AAPL' # or 'AAPL.US'
241+
df = web.DataReader(symbol, 'quandl', "2015-01-01", "2015-01-05")
242+
df.loc['2015-01-02']
243+
216244
.. _remote_data.fred:
217245

218246
FRED
@@ -548,7 +576,7 @@ Download currency historical rate from `Oanda <https://www.oanda.com/>`__.
548576
.. _remote_data.nasdaq_symbols:
549577
550578
Nasdaq Trader Symbol Definitions
551-
==============================
579+
================================
552580
553581
Download the latest symbols from `Nasdaq <ftp://ftp.nasdaqtrader.com/SymbolDirectory/nasdaqtraded.txt>`_.
554582
@@ -572,5 +600,3 @@ available. More information on the `field <http://www.nasdaqtrader.com/trader.as
572600
NASDAQ Symbol IBM
573601
NextShares False
574602
Name: IBM, dtype: object
575-
576-

docs/source/whatsnew/v0.5.0.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@ Highlights include:
1313
:local:
1414
:backlinks: none
1515

16+
.. _whatsnew_050.enhancements:
17+
18+
Enhancements
19+
~~~~~~~~~~~~
20+
21+
- ``DataReader`` now supports Quandl, see :ref:`here<remote_data.quandl>` (:issue:`361`).
22+
1623
.. _whatsnew_050.bug_fixes:
1724

1825
Bug Fixes

pandas_datareader/data.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from pandas_datareader.enigma import EnigmaReader
2323
from pandas_datareader.oanda import get_oanda_currency_historical_rates
2424
from pandas_datareader.nasdaq_trader import get_nasdaq_symbols
25+
from pandas_datareader.quandl import QuandlReader
2526

2627

2728
def get_data_fred(*args, **kwargs):
@@ -56,6 +57,10 @@ def get_quote_google(*args, **kwargs):
5657
return GoogleQuotesReader(*args, **kwargs).read()
5758

5859

60+
def get_data_quandl(*args, **kwargs):
61+
return QuandlReader(*args, **kwargs).read()
62+
63+
5964
def DataReader(name, data_source=None, start=None, end=None,
6065
retry_count=3, pause=0.001, session=None, access_key=None):
6166
"""
@@ -168,6 +173,10 @@ def DataReader(name, data_source=None, start=None, end=None,
168173
raise ValueError("Only the string 'symbols' is supported for "
169174
"Nasdaq, not %r" % (name,))
170175
return get_nasdaq_symbols(retry_count=retry_count, pause=pause)
176+
elif data_source == "quandl":
177+
return QuandlReader(symbols=name, start=start, end=end,
178+
retry_count=retry_count, pause=pause,
179+
session=session).read()
171180
else:
172181
msg = "data_source=%r is not implemented" % data_source
173182
raise NotImplementedError(msg)

pandas_datareader/quandl.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import re
2+
3+
from pandas_datareader.base import _DailyBaseReader
4+
5+
6+
class QuandlReader(_DailyBaseReader):
7+
8+
"""
9+
Returns DataFrame of historical stock prices from symbol, over date
10+
range, start to end.
11+
12+
.. versionadded:: 0.5.0
13+
14+
Parameters
15+
----------
16+
symbols : string
17+
Possible formats:
18+
1. DB/SYM: The Quandl 'codes': DB is the database name,
19+
SYM is a ticker-symbol-like Quandl abbreviation
20+
for a particular security.
21+
2. SYM.CC: SYM is the same symbol and CC is an ISO country code,
22+
will try to map to the best single Quandl database for that country.
23+
Beware of ambiguous symbols (different securities per country)!
24+
Note: Cannot use more than a single string because of the inflexible
25+
way the URL is composed of url and _get_params in the superclass
26+
start : string
27+
Starting date, timestamp. Parses many different kind of date
28+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
29+
end : string, (defaults to today)
30+
Ending date, timestamp. Same format as starting date.
31+
retry_count : int, default 3
32+
Number of times to retry query request.
33+
pause : int, default 0
34+
Time, in seconds, to pause between consecutive queries of chunks. If
35+
single value given for symbol, represents the pause between retries.
36+
chunksize : int, default 25
37+
Number of symbols to download consecutively before intiating pause.
38+
session : Session, default None
39+
requests.sessions.Session instance to be used
40+
"""
41+
42+
_BASE_URL = "https://www.quandl.com/api/v3/datasets/"
43+
44+
@property
45+
def url(self):
46+
symbol = self.symbols if isinstance(self.symbols, str) \
47+
else self.symbols[0]
48+
mm = self._fullmatch(r"([A-Z0-9]+)(([/\.])([A-Z0-9_]+))?", symbol)
49+
assert mm, ("Symbol '%s' must conform to Quandl convention 'DB/SYM'" %
50+
symbol)
51+
datasetname = 'WIKI'
52+
if not mm.group(2):
53+
# bare symbol:
54+
datasetname = 'WIKI' # default; symbol stays itself
55+
elif mm.group(3) == "/":
56+
# --- normal Quandl DB/SYM convention:
57+
symbol = mm.group(4)
58+
datasetname = mm.group(1)
59+
elif mm.group(3) == ".":
60+
# secondary convention SYM.CountryCode:
61+
symbol = mm.group(1)
62+
datasetname = self._db_from_countrycode(mm.group(4))
63+
params = {
64+
'start_date': self.start.strftime('%Y-%m-%d'),
65+
'end_date': self.end.strftime('%Y-%m-%d'),
66+
'order': "asc",
67+
}
68+
paramstring = '&'.join(['%s=%s' % (k, v) for k, v in params.items()])
69+
return '%s%s/%s.csv?%s' % (self._BASE_URL, datasetname,
70+
symbol, paramstring)
71+
72+
def _fullmatch(self, regex, string, flags=0):
73+
"""Emulate python-3.4 re.fullmatch()."""
74+
return re.match("(?:" + regex + r")\Z", string, flags=flags)
75+
76+
_COUNTRYCODE_TO_DATASET = dict(
77+
# https://www.quandl.com/data/EURONEXT-Euronext-Stock-Exchange
78+
BE='EURONEXT',
79+
# https://www.quandl.com/data/HKEX-Hong-Kong-Exchange
80+
CN='HKEX',
81+
# https://www.quandl.com/data/SSE-Boerse-Stuttgart
82+
DE='SSE',
83+
FR='EURONEXT',
84+
# https://www.quandl.com/data/NSE-National-Stock-Exchange-of-India
85+
IN='NSE',
86+
# https://www.quandl.com/data/TSE-Tokyo-Stock-Exchange
87+
JP='TSE',
88+
NL='EURONEXT',
89+
PT='EURONEXT',
90+
# https://www.quandl.com/data/LSE-London-Stock-Exchange
91+
UK='LSE',
92+
# https://www.quandl.com/data/WIKI-Wiki-EOD-Stock-Prices
93+
US='WIKI',
94+
)
95+
96+
def _db_from_countrycode(self, code):
97+
assert code in self._COUNTRYCODE_TO_DATASET,\
98+
"No Quandl dataset known for country code '%s'" % code
99+
return self._COUNTRYCODE_TO_DATASET[code]
100+
101+
def _get_params(self, symbol):
102+
return {}
103+
104+
def read(self):
105+
df = super(QuandlReader, self).read()
106+
df.rename(columns=lambda n: n.replace(' ', '')
107+
.replace('.', '')
108+
.replace('/', '')
109+
.replace('%', '')
110+
.replace('(', '')
111+
.replace(')', '')
112+
.replace("'", '')
113+
.replace('-', ''),
114+
inplace=True)
115+
return df
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import pandas_datareader.data as web
2+
from pandas_datareader._utils import RemoteDataError
3+
from pandas_datareader._testing import skip_on_exception
4+
5+
6+
class TestQuandl(object):
7+
# we test data from 10 years back where possible, 2 years otherwise, or...
8+
start10 = "2007-01-01" # over ten years back
9+
end10 = "2007-01-05"
10+
day10 = "2007-01-04"
11+
start2 = "2015-01-01" # over two years back
12+
end2 = "2015-01-05"
13+
day2 = "2015-01-02"
14+
15+
def check_headers(self, df, expected_cols):
16+
expected_cols = frozenset(expected_cols)
17+
act_cols = frozenset(df.columns.tolist())
18+
assert expected_cols == act_cols, "unexpected cols: " + str(act_cols)
19+
20+
@skip_on_exception(RemoteDataError)
21+
def test_db_wiki_us(self):
22+
df = web.DataReader('F', 'quandl', self.start10, self.end10)
23+
self.check_headers(df, ['Open', 'High', 'Low', 'Close', 'Volume',
24+
'ExDividend', 'SplitRatio', 'AdjOpen',
25+
'AdjHigh', 'AdjLow', 'AdjClose', 'AdjVolume'])
26+
assert df.Close.at[self.day10] == 7.70
27+
28+
@skip_on_exception(RemoteDataError)
29+
def test_db_fse_frankfurt(self):
30+
# ALV_X: Allianz SE
31+
df = web.DataReader('FSE/ALV_X', 'quandl', self.start10, self.end10)
32+
self.check_headers(df, ['Open', 'High', 'Low', 'Close', 'Change',
33+
'TradedVolume', 'Turnover',
34+
'LastPriceoftheDay', 'DailyTradedUnits',
35+
'DailyTurnover'])
36+
assert df.Close.at[self.day10] == 159.45
37+
38+
@skip_on_exception(RemoteDataError)
39+
def test_db_sse_de_stuttgart(self):
40+
# ALV: Allianz SE
41+
df = web.DataReader('SSE/ALV', 'quandl', self.start2, self.end2)
42+
self.check_headers(df, [
43+
"High", "Low", "Last", "PreviousDayPrice", "Volume"])
44+
# as of 2017-06-11: PreviousDayPrice can be outside Low/High range;
45+
# Volume can be NaN
46+
assert df.Last.at[self.day2] == 136.47
47+
df2 = web.DataReader('ALV.DE', 'quandl', self.start2, self.end2)
48+
assert (df.Last == df2.Last).all()
49+
50+
@skip_on_exception(RemoteDataError)
51+
def test_db_euronext_be_fr_nl_pt(self):
52+
# FP: Total SA
53+
# as of 2017-06-11, some datasets end a few months after their start,
54+
# e.g. ALVD, BASD
55+
df = web.DataReader('EURONEXT/FP', 'quandl', self.start2, self.end2)
56+
self.check_headers(df, [
57+
"Open", "High", "Low", "Last", "Turnover", "Volume"])
58+
assert df.Last.at[self.day2] == 42.525
59+
df2 = web.DataReader('FP.FR', 'quandl', self.start2, self.end2)
60+
assert (df.Last == df2.Last).all()
61+
62+
@skip_on_exception(RemoteDataError)
63+
def test_db_lse_uk(self):
64+
# RBS: Royal Bank of Scotland
65+
df = web.DataReader('LSE/RBS', 'quandl', self.start10, self.end10)
66+
self.check_headers(df, ["High", "Low", "LastClose", "Price",
67+
"Volume", "Change", "Var"])
68+
# as of 2017-06-11, Price == LastClose, all others are NaN
69+
assert df.Price.at[self.day10] == 5950.983
70+
71+
@skip_on_exception(RemoteDataError)
72+
def test_db_nse_in(self):
73+
# TCS: Tata Consutancy Services
74+
df = web.DataReader('NSE/TCS', 'quandl', self.start10, self.end10)
75+
self.check_headers(df, ['Open', 'High', 'Low', 'Last', 'Close',
76+
'TotalTradeQuantity', 'TurnoverLacs'])
77+
assert df.Close.at[self.day10] == 1259.05
78+
79+
@skip_on_exception(RemoteDataError)
80+
def test_db_tse_jp(self):
81+
# TSE/6758: Sony Corp.
82+
df = web.DataReader('TSE/6758', 'quandl', self.start10, self.end10)
83+
self.check_headers(df, ['Open', 'High', 'Low', 'Close', 'Volume'])
84+
assert df.Close.at[self.day10] == 5190.0
85+
86+
@skip_on_exception(RemoteDataError)
87+
def test_db_hkex_cn(self):
88+
# HKEX/00941: China Mobile
89+
df = web.DataReader('HKEX/00941', 'quandl', self.start2, self.end2)
90+
self.check_headers(df,
91+
['NominalPrice', 'NetChange', 'Change', 'Bid',
92+
'Ask', 'PEx', 'High', 'Low', 'PreviousClose',
93+
'ShareVolume000', 'Turnover000', 'LotSize'])
94+
assert df.High.at[self.day2] == 91.9

0 commit comments

Comments
 (0)