Skip to content

New package structure #68

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions pandas_datareader/commons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import time
import warnings
import numpy as np
import datetime as dt

import pandas.compat as compat
from pandas.core.datetools import to_datetime
from pandas.core.common import PandasError
from pandas import Panel, DataFrame
from pandas.io.common import urlopen
from pandas import read_csv
from pandas.compat import StringIO, bytes_to_str
from pandas.util.testing import _network_error_classes


class SymbolWarning(UserWarning):
pass


class RemoteDataError(PandasError, IOError):
pass


def _sanitize_dates(start, end):
start = to_datetime(start)
end = to_datetime(end)
if start is None:
start = dt.datetime(2010, 1, 1)
if end is None:
end = dt.datetime.today()
return start, end


def _in_chunks(seq, size):
"""
Return sequence in 'chunks' of size defined by size
"""
return (seq[pos:pos + size] for pos in range(0, len(seq), size))


def _retry_read_url(url, retry_count, pause, name):
for _ in range(retry_count):
time.sleep(pause)

# kludge to close the socket ASAP
try:
with urlopen(url) as resp:
lines = resp.read()
except _network_error_classes:
pass
else:
rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
parse_dates=True, na_values='-')[::-1]
# Yahoo! Finance sometimes does this awesome thing where they
# return 2 rows for the most recent business day
if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover
rs = rs[:-1]

#Get rid of unicode characters in index name.
try:
rs.index.name = rs.index.name.decode('unicode_escape').encode('ascii', 'ignore')
except AttributeError:
#Python 3 string has no decode method.
rs.index.name = rs.index.name.encode('ascii', 'ignore').decode()

return rs

raise IOError("after %d tries, %s did not "
"return a 200 for url %r" % (retry_count, name, url))


def _dl_mult_symbols(symbols, start, end, interval, chunksize, retry_count, pause,
method):
stocks = {}
failed = []
passed = []
for sym_group in _in_chunks(symbols, chunksize):
for sym in sym_group:
try:
stocks[sym] = method(sym, start, end, interval, retry_count, pause)
passed.append(sym)
except IOError:
warnings.warn('Failed to read symbol: {0!r}, replacing with '
'NaN.'.format(sym), SymbolWarning)
failed.append(sym)

if len(passed) == 0:
raise RemoteDataError("No data fetched using "
"{0!r}".format(method.__name__))
try:
if len(stocks) > 0 and len(failed) > 0 and len(passed) > 0:
df_na = stocks[passed[0]].copy()
df_na[:] = np.nan
for sym in failed:
stocks[sym] = df_na
return Panel(stocks).swapaxes('items', 'minor')
except AttributeError:
# cannot construct a panel with just 1D nans indicating no data
raise RemoteDataError("No data fetched using "
"{0!r}".format(method.__name__))

def _get_data_from(symbols, start, end, interval, retry_count, pause,
chunksize, src_fn):

# If a single symbol, (e.g., 'GOOG')
if isinstance(symbols, (compat.string_types, int)):
hist_data = src_fn(symbols, start, end, interval, retry_count, pause)
# Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
elif isinstance(symbols, DataFrame):
hist_data = _dl_mult_symbols(symbols.index, start, end, interval, chunksize,
retry_count, pause, src_fn)
else:
hist_data = _dl_mult_symbols(symbols, start, end, interval, chunksize,
retry_count, pause, src_fn)

return hist_data
Loading