Skip to content

Commit d6e10a1

Browse files
femtotraderdavidastephens
authored andcommitted
CLN: Split into subpackages
1 parent df405e2 commit d6e10a1

File tree

14 files changed

+1284
-1234
lines changed

14 files changed

+1284
-1234
lines changed

pandas_datareader/data.py

Lines changed: 20 additions & 1232 deletions
Large diffs are not rendered by default.

pandas_datareader/famafrench.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import tempfile
2+
import numpy as np
3+
from pandas.io.common import urlopen, ZipFile
4+
from pandas.compat import lmap
5+
from pandas import DataFrame
6+
7+
_URL = 'http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp'
8+
9+
10+
def _get_data(name):
11+
# path of zip files
12+
zip_file_path = '{0}/{1}_TXT.zip'.format(_URL, name)
13+
14+
with urlopen(zip_file_path) as url:
15+
raw = url.read()
16+
17+
with tempfile.TemporaryFile() as tmpf:
18+
tmpf.write(raw)
19+
20+
with ZipFile(tmpf, 'r') as zf:
21+
data = zf.open(zf.namelist()[0]).readlines()
22+
23+
line_lengths = np.array(lmap(len, data))
24+
file_edges = np.where(line_lengths == 2)[0]
25+
26+
datasets = {}
27+
edges = zip(file_edges + 1, file_edges[1:])
28+
for i, (left_edge, right_edge) in enumerate(edges):
29+
dataset = [d.split() for d in data[left_edge:right_edge]]
30+
if len(dataset) > 10:
31+
ncol_raw = np.array(lmap(len, dataset))
32+
ncol = np.median(ncol_raw)
33+
header_index = np.where(ncol_raw == ncol - 1)[0][-1]
34+
header = dataset[header_index]
35+
ds_header = dataset[header_index + 1:]
36+
# to ensure the header is unique
37+
header = ['{0} {1}'.format(j, hj) for j, hj in enumerate(header,
38+
start=1)]
39+
index = np.array([d[0] for d in ds_header], dtype=int)
40+
dataset = np.array([d[1:] for d in ds_header], dtype=float)
41+
datasets[i] = DataFrame(dataset, index, columns=header)
42+
43+
return datasets

pandas_datareader/fred.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import datetime as dt
2+
from pandas.core.common import is_list_like
3+
from pandas.io.common import urlopen
4+
from pandas import concat, read_csv
5+
6+
from pandas_datareader.utils import _sanitize_dates
7+
8+
_URL = "http://research.stlouisfed.org/fred2/series/"
9+
10+
11+
def _get_data(name, start=dt.datetime(2010, 1, 1),
12+
end=dt.datetime.today()):
13+
"""
14+
Get data for the given name from the St. Louis FED (FRED).
15+
Date format is datetime
16+
17+
Returns a DataFrame.
18+
19+
If multiple names are passed for "series" then the index of the
20+
DataFrame is the outer join of the indicies of each series.
21+
"""
22+
start, end = _sanitize_dates(start, end)
23+
24+
if not is_list_like(name):
25+
names = [name]
26+
else:
27+
names = name
28+
29+
urls = [_URL + '%s' % n + '/downloaddata/%s' % n + '.csv' for
30+
n in names]
31+
32+
def fetch_data(url, name):
33+
with urlopen(url) as resp:
34+
data = read_csv(resp, index_col=0, parse_dates=True,
35+
header=None, skiprows=1, names=["DATE", name],
36+
na_values='.')
37+
try:
38+
return data.truncate(start, end)
39+
except KeyError:
40+
if data.ix[3].name[7:12] == 'Error':
41+
raise IOError("Failed to get the data. Check that {0!r} is "
42+
"a valid FRED series.".format(name))
43+
raise
44+
df = concat([fetch_data(url, n) for url, n in zip(urls, names)],
45+
axis=1, join='outer')
46+
return df

pandas_datareader/google/__init__.py

Whitespace-only changes.

pandas_datareader/google/daily.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
from pandas.io.common import urlencode
2+
from pandas_datareader.utils import _retry_read_url
3+
from pandas_datareader.utils import _sanitize_dates
4+
from pandas_datareader.utils import _get_data_from
5+
6+
_URL = 'http://www.google.com/finance/historical?'
7+
8+
9+
def _get_data(symbols=None, start=None, end=None, retry_count=3,
10+
pause=0.001, chunksize=25):
11+
"""
12+
Returns DataFrame/Panel of historical stock prices from symbols, over date
13+
range, start to end. To avoid being penalized by Google Finance servers,
14+
pauses between downloading 'chunks' of symbols can be specified.
15+
16+
Parameters
17+
----------
18+
symbols : string, array-like object (list, tuple, Series), or DataFrame
19+
Single stock symbol (ticker), array-like object of symbols or
20+
DataFrame with index containing stock symbols.
21+
start : string, (defaults to '1/1/2010')
22+
Starting date, timestamp. Parses many different kind of date
23+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
24+
end : string, (defaults to today)
25+
Ending date, timestamp. Same format as starting date.
26+
retry_count : int, default 3
27+
Number of times to retry query request.
28+
pause : int, default 0
29+
Time, in seconds, to pause between consecutive queries of chunks. If
30+
single value given for symbol, represents the pause between retries.
31+
chunksize : int, default 25
32+
Number of symbols to download consecutively before intiating pause.
33+
34+
Returns
35+
-------
36+
hist_data : DataFrame (str) or Panel (array-like object, DataFrame)
37+
"""
38+
return _get_data_from(symbols, start, end, None, retry_count, pause,
39+
chunksize, _get_data_one)
40+
41+
42+
def _get_data_one(sym, start, end, interval, retry_count, pause):
43+
"""
44+
Get historical data for the given name from google.
45+
Date format is datetime
46+
47+
Returns a DataFrame.
48+
"""
49+
start, end = _sanitize_dates(start, end)
50+
51+
# www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv
52+
url = "%s%s" % (_URL,
53+
urlencode({"q": sym,
54+
"startdate": start.strftime('%b %d, ' '%Y'),
55+
"enddate": end.strftime('%b %d, %Y'),
56+
"output": "csv"}))
57+
return _retry_read_url(url, retry_count, pause, 'Google')

pandas_datareader/google/quotes.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
def _get_data(symbols):
2+
"""
3+
Get current yahoo quote
4+
5+
(Should) Returns a DataFrame
6+
7+
ToDo: Not implemented
8+
"""
9+
msg = "Google Finance doesn't have this functionality - can't get quote for %s" % symbols
10+
raise NotImplementedError(msg)

pandas_datareader/tests/test_data.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@
2424
from urllib2 import HTTPError
2525

2626
import pandas_datareader.data as web
27-
from pandas_datareader.data import (
28-
DataReader, SymbolWarning, RemoteDataError, _yahoo_codes)
27+
from pandas_datareader.data import DataReader
28+
from pandas_datareader.utils import SymbolWarning, RemoteDataError
29+
from pandas_datareader.yahoo.quotes import _yahoo_codes
2930

3031
def _skip_if_no_lxml():
3132
try:

pandas_datareader/utils.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import time
2+
import warnings
3+
import numpy as np
4+
import datetime as dt
5+
6+
from pandas import to_datetime
7+
import pandas.compat as compat
8+
from pandas.core.common import PandasError
9+
from pandas import Panel, DataFrame
10+
from pandas.io.common import urlopen
11+
from pandas import read_csv
12+
from pandas.compat import StringIO, bytes_to_str
13+
from pandas.util.testing import _network_error_classes
14+
15+
16+
class SymbolWarning(UserWarning):
17+
pass
18+
19+
class RemoteDataError(PandasError, IOError):
20+
pass
21+
22+
def _get_data_from(symbols, start, end, interval, retry_count, pause,
23+
chunksize, src_fn):
24+
25+
# If a single symbol, (e.g., 'GOOG')
26+
if isinstance(symbols, (compat.string_types, int)):
27+
hist_data = src_fn(symbols, start, end, interval, retry_count, pause)
28+
# Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
29+
elif isinstance(symbols, DataFrame):
30+
hist_data = _dl_mult_symbols(symbols.index, start, end, interval, chunksize,
31+
retry_count, pause, src_fn)
32+
else:
33+
hist_data = _dl_mult_symbols(symbols, start, end, interval, chunksize,
34+
retry_count, pause, src_fn)
35+
return hist_data
36+
37+
def _dl_mult_symbols(symbols, start, end, interval, chunksize, retry_count, pause,
38+
method):
39+
stocks = {}
40+
failed = []
41+
passed = []
42+
for sym_group in _in_chunks(symbols, chunksize):
43+
for sym in sym_group:
44+
try:
45+
stocks[sym] = method(sym, start, end, interval, retry_count, pause)
46+
passed.append(sym)
47+
except IOError:
48+
warnings.warn('Failed to read symbol: {0!r}, replacing with '
49+
'NaN.'.format(sym), SymbolWarning)
50+
failed.append(sym)
51+
52+
if len(passed) == 0:
53+
raise RemoteDataError("No data fetched using "
54+
"{0!r}".format(method.__name__))
55+
try:
56+
if len(stocks) > 0 and len(failed) > 0 and len(passed) > 0:
57+
df_na = stocks[passed[0]].copy()
58+
df_na[:] = np.nan
59+
for sym in failed:
60+
stocks[sym] = df_na
61+
return Panel(stocks).swapaxes('items', 'minor')
62+
except AttributeError:
63+
# cannot construct a panel with just 1D nans indicating no data
64+
raise RemoteDataError("No data fetched using "
65+
"{0!r}".format(method.__name__))
66+
67+
68+
def _sanitize_dates(start, end):
69+
"""
70+
Return (datetime_start, datetime_end) tuple
71+
if start is None - default is 2010/01/01
72+
if end is None - default is today
73+
"""
74+
start = to_datetime(start)
75+
end = to_datetime(end)
76+
if start is None:
77+
start = dt.datetime(2010, 1, 1)
78+
if end is None:
79+
end = dt.datetime.today()
80+
return start, end
81+
82+
def _in_chunks(seq, size):
83+
"""
84+
Return sequence in 'chunks' of size defined by size
85+
"""
86+
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
87+
88+
def _retry_read_url(url, retry_count, pause, name):
89+
"""
90+
Open url (and retry)
91+
"""
92+
for _ in range(retry_count):
93+
time.sleep(pause)
94+
95+
# kludge to close the socket ASAP
96+
try:
97+
with urlopen(url) as resp:
98+
lines = resp.read()
99+
except _network_error_classes:
100+
pass
101+
else:
102+
rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
103+
parse_dates=True, na_values='-')[::-1]
104+
# Yahoo! Finance sometimes does this awesome thing where they
105+
# return 2 rows for the most recent business day
106+
if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover
107+
rs = rs[:-1]
108+
109+
#Get rid of unicode characters in index name.
110+
try:
111+
rs.index.name = rs.index.name.decode('unicode_escape').encode('ascii', 'ignore')
112+
except AttributeError:
113+
#Python 3 string has no decode method.
114+
rs.index.name = rs.index.name.encode('ascii', 'ignore').decode()
115+
116+
return rs
117+
118+
raise IOError("after %d tries, %s did not "
119+
"return a 200 for url %r" % (retry_count, name, url))

pandas_datareader/yahoo/__init__.py

Whitespace-only changes.

pandas_datareader/yahoo/actions.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import time
2+
import csv
3+
from pandas import to_datetime, DataFrame
4+
from pandas.io.common import urlopen
5+
from pandas.util.testing import _network_error_classes
6+
from pandas.compat import StringIO, bytes_to_str
7+
8+
from pandas_datareader.utils import _sanitize_dates
9+
10+
_URL = 'http://ichart.finance.yahoo.com/x?'
11+
12+
13+
def _get_data(symbol, start=None, end=None, retry_count=3, pause=0.001):
14+
"""
15+
Returns DataFrame of historical corporate actions (dividends and stock
16+
splits) from symbols, over date range, start to end. All dates in the
17+
resulting DataFrame correspond with dividend and stock split ex-dates.
18+
19+
Parameters
20+
----------
21+
sym : string with a single Single stock symbol (ticker).
22+
start : string, (defaults to '1/1/2010')
23+
Starting date, timestamp. Parses many different kind of date
24+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
25+
end : string, (defaults to today)
26+
Ending date, timestamp. Same format as starting date.
27+
retry_count : int, default 3
28+
Number of times to retry query request.
29+
pause : int, default 0
30+
Time, in seconds, of the pause between retries.
31+
"""
32+
33+
start, end = _sanitize_dates(start, end)
34+
url = (_URL + 's=%s' % symbol + \
35+
'&a=%s' % (start.month - 1) + \
36+
'&b=%s' % start.day + \
37+
'&c=%s' % start.year + \
38+
'&d=%s' % (end.month - 1) + \
39+
'&e=%s' % end.day + \
40+
'&f=%s' % end.year + \
41+
'&g=v')
42+
43+
for _ in range(retry_count):
44+
time.sleep(pause)
45+
46+
try:
47+
with urlopen(url) as resp:
48+
lines = resp.read()
49+
except _network_error_classes:
50+
pass
51+
else:
52+
actions_index = []
53+
actions_entries = []
54+
55+
for line in csv.reader(StringIO(bytes_to_str(lines))):
56+
# Ignore lines that aren't dividends or splits (Yahoo
57+
# add a bunch of irrelevant fields.)
58+
if len(line) != 3 or line[0] not in ('DIVIDEND', 'SPLIT'):
59+
continue
60+
61+
action, date, value = line
62+
if action == 'DIVIDEND':
63+
actions_index.append(to_datetime(date))
64+
actions_entries.append({
65+
'action': action,
66+
'value': float(value)
67+
})
68+
elif action == 'SPLIT' and ':' in value:
69+
# Convert the split ratio to a fraction. For example a
70+
# 4:1 split expressed as a fraction is 1/4 = 0.25.
71+
denominator, numerator = value.split(':', 1)
72+
split_fraction = float(numerator) / float(denominator)
73+
74+
actions_index.append(to_datetime(date))
75+
actions_entries.append({
76+
'action': action,
77+
'value': split_fraction
78+
})
79+
80+
return DataFrame(actions_entries, index=actions_index)
81+
82+
raise IOError("after %d tries, Yahoo! did not " \
83+
"return a 200 for url %r" % (retry_count, url))

0 commit comments

Comments
 (0)