diff --git a/pandas_datareader/commons.py b/pandas_datareader/commons.py new file mode 100644 index 00000000..d3457e04 --- /dev/null +++ b/pandas_datareader/commons.py @@ -0,0 +1,119 @@ +import time +import warnings +import numpy as np +import datetime as dt + +from pandas import to_datetime +import pandas.compat as compat +from pandas.core.common import PandasError +from pandas import Panel, DataFrame +from pandas.io.common import urlopen +from pandas import read_csv +from pandas.compat import StringIO, bytes_to_str +from pandas.util.testing import _network_error_classes + + +class SymbolWarning(UserWarning): + pass + +class RemoteDataError(PandasError, IOError): + pass + +def _get_data_from(symbols, start, end, interval, retry_count, pause, + chunksize, src_fn): + + # If a single symbol, (e.g., 'GOOG') + if isinstance(symbols, (compat.string_types, int)): + hist_data = src_fn(symbols, start, end, interval, retry_count, pause) + # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT']) + elif isinstance(symbols, DataFrame): + hist_data = _dl_mult_symbols(symbols.index, start, end, interval, chunksize, + retry_count, pause, src_fn) + else: + hist_data = _dl_mult_symbols(symbols, start, end, interval, chunksize, + retry_count, pause, src_fn) + return hist_data + +def _dl_mult_symbols(symbols, start, end, interval, chunksize, retry_count, pause, + method): + stocks = {} + failed = [] + passed = [] + for sym_group in _in_chunks(symbols, chunksize): + for sym in sym_group: + try: + stocks[sym] = method(sym, start, end, interval, retry_count, pause) + passed.append(sym) + except IOError: + warnings.warn('Failed to read symbol: {0!r}, replacing with ' + 'NaN.'.format(sym), SymbolWarning) + failed.append(sym) + + if len(passed) == 0: + raise RemoteDataError("No data fetched using " + "{0!r}".format(method.__name__)) + try: + if len(stocks) > 0 and len(failed) > 0 and len(passed) > 0: + df_na = stocks[passed[0]].copy() + df_na[:] = np.nan + for sym in failed: + stocks[sym] = df_na + return Panel(stocks).swapaxes('items', 'minor') + except AttributeError: + # cannot construct a panel with just 1D nans indicating no data + raise RemoteDataError("No data fetched using " + "{0!r}".format(method.__name__)) + + +def _sanitize_dates(start, end): + """ + Return (datetime_start, datetime_end) tuple + if start is None - default is 2010/01/01 + if end is None - default is today + """ + start = to_datetime(start) + end = to_datetime(end) + if start is None: + start = dt.datetime(2010, 1, 1) + if end is None: + end = dt.datetime.today() + return start, end + +def _in_chunks(seq, size): + """ + Return sequence in 'chunks' of size defined by size + """ + return (seq[pos:pos + size] for pos in range(0, len(seq), size)) + +def _retry_read_url(url, retry_count, pause, name): + """ + Open url (and retry) + """ + for _ in range(retry_count): + time.sleep(pause) + + # kludge to close the socket ASAP + try: + with urlopen(url) as resp: + lines = resp.read() + except _network_error_classes: + pass + else: + rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0, + parse_dates=True, na_values='-')[::-1] + # Yahoo! Finance sometimes does this awesome thing where they + # return 2 rows for the most recent business day + if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover + rs = rs[:-1] + + #Get rid of unicode characters in index name. + try: + rs.index.name = rs.index.name.decode('unicode_escape').encode('ascii', 'ignore') + except AttributeError: + #Python 3 string has no decode method. + rs.index.name = rs.index.name.encode('ascii', 'ignore').decode() + + return rs + + raise IOError("after %d tries, %s did not " + "return a 200 for url %r" % (retry_count, name, url)) diff --git a/pandas_datareader/data.py b/pandas_datareader/data.py index 10d20058..3c676dfc 100644 --- a/pandas_datareader/data.py +++ b/pandas_datareader/data.py @@ -3,34 +3,22 @@ """ -import warnings -import tempfile -import datetime as dt -import time -import csv - -from collections import defaultdict - -import numpy as np -from pandas.compat import( - StringIO, bytes_to_str, range, lmap, zip -) -import pandas.compat as compat -from pandas import Panel, DataFrame, Series, read_csv, concat, to_datetime, DatetimeIndex, DateOffset -from pandas.core.common import is_list_like, PandasError -from pandas.io.common import urlopen, ZipFile, urlencode -from pandas.tseries.offsets import MonthEnd -from pandas.util.testing import _network_error_classes -from pandas.io.html import read_html +import warnings -class SymbolWarning(UserWarning): - pass +from pandas_datareader.commons import _sanitize_dates +from pandas_datareader.google.daily import _get_data as get_data_google +from pandas_datareader.google.quotes import _get_data as get_quote_google -class RemoteDataError(PandasError, IOError): - pass +from pandas_datareader.yahoo.daily import _get_data as get_data_yahoo +from pandas_datareader.yahoo.quotes import _get_data as get_quote_yahoo +from pandas_datareader.yahoo.actions import _get_data as get_data_yahoo_actions +from pandas_datareader.yahoo.components import _get_data as get_components_yahoo +from pandas_datareader.yahoo.options import Options as YahooOptions +from pandas_datareader.fred import _get_data as get_data_fred +from pandas_datareader.famafrench import _get_data as get_data_famafrench def DataReader(name, data_source=None, start=None, end=None, retry_count=3, pause=0.001): @@ -84,1225 +72,21 @@ def DataReader(name, data_source=None, start=None, end=None, retry_count=retry_count, pause=pause) elif data_source == "google": return get_data_google(symbols=name, start=start, end=end, - adjust_price=False, chunksize=25, - retry_count=retry_count, pause=pause) + chunksize=25, retry_count=retry_count, pause=pause) elif data_source == "fred": return get_data_fred(name, start, end) elif data_source == "famafrench": return get_data_famafrench(name) - - -def _sanitize_dates(start, end): - from pandas.core.datetools import to_datetime - start = to_datetime(start) - end = to_datetime(end) - if start is None: - start = dt.datetime(2010, 1, 1) - if end is None: - end = dt.datetime.today() - return start, end - - -def _in_chunks(seq, size): - """ - Return sequence in 'chunks' of size defined by size - """ - return (seq[pos:pos + size] for pos in range(0, len(seq), size)) - - -_yahoo_codes = {'symbol': 's', 'last': 'l1', 'change_pct': 'p2', 'PE': 'r', - 'time': 't1', 'short_ratio': 's7'} - - -_YAHOO_QUOTE_URL = 'http://finance.yahoo.com/d/quotes.csv?' - - -def get_quote_yahoo(symbols): - """ - Get current yahoo quote - - Returns a DataFrame - """ - if isinstance(symbols, compat.string_types): - sym_list = symbols else: - sym_list = '+'.join(symbols) - - # for codes see: http://www.gummy-stuff.org/Yahoo-data.htm - request = ''.join(compat.itervalues(_yahoo_codes)) # code request string - header = list(_yahoo_codes.keys()) - - data = defaultdict(list) - - url_str = _YAHOO_QUOTE_URL + 's=%s&f=%s' % (sym_list, request) - - with urlopen(url_str) as url: - lines = url.readlines() - - for line in lines: - fields = line.decode('utf-8').strip().split(',') - for i, field in enumerate(fields): - if field[-2:] == '%"': - v = float(field.strip('"%')) - elif field[0] == '"': - v = field.strip('"') - else: - try: - v = float(field) - except ValueError: - v = field - data[header[i]].append(v) - - idx = data.pop('symbol') - return DataFrame(data, index=idx) - - -def get_quote_google(symbols): - raise NotImplementedError("Google Finance doesn't have this functionality") - - -def _retry_read_url(url, retry_count, pause, name): - for _ in range(retry_count): - time.sleep(pause) - - # kludge to close the socket ASAP - try: - with urlopen(url) as resp: - lines = resp.read() - except _network_error_classes: - pass - else: - rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0, - parse_dates=True, na_values='-')[::-1] - # Yahoo! Finance sometimes does this awesome thing where they - # return 2 rows for the most recent business day - if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover - rs = rs[:-1] - - #Get rid of unicode characters in index name. - try: - rs.index.name = rs.index.name.decode('unicode_escape').encode('ascii', 'ignore') - except AttributeError: - #Python 3 string has no decode method. - rs.index.name = rs.index.name.encode('ascii', 'ignore').decode() - - return rs - - raise IOError("after %d tries, %s did not " - "return a 200 for url %r" % (retry_count, name, url)) - - -_HISTORICAL_YAHOO_URL = 'http://ichart.finance.yahoo.com/table.csv?' - - -def _get_hist_yahoo(sym, start, end, interval, retry_count, pause): - """ - Get historical data for the given name from yahoo. - Date format is datetime - - Returns a DataFrame. - """ - start, end = _sanitize_dates(start, end) - url = (_HISTORICAL_YAHOO_URL + 's=%s' % sym + - '&a=%s' % (start.month - 1) + - '&b=%s' % start.day + - '&c=%s' % start.year + - '&d=%s' % (end.month - 1) + - '&e=%s' % end.day + - '&f=%s' % end.year + - '&g=%s' % interval + - '&ignore=.csv') - return _retry_read_url(url, retry_count, pause, 'Yahoo!') - - -_HISTORICAL_GOOGLE_URL = 'http://www.google.com/finance/historical?' - - -def _get_hist_google(sym, start, end, interval, retry_count, pause): - """ - Get historical data for the given name from google. - Date format is datetime - - Returns a DataFrame. - """ - start, end = _sanitize_dates(start, end) - - # www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv - url = "%s%s" % (_HISTORICAL_GOOGLE_URL, - urlencode({"q": sym, - "startdate": start.strftime('%b %d, ' '%Y'), - "enddate": end.strftime('%b %d, %Y'), - "output": "csv"})) - return _retry_read_url(url, retry_count, pause, 'Google') - - -def _adjust_prices(hist_data, price_list=None): - """ - Return modifed DataFrame or Panel with adjusted prices based on - 'Adj Close' price. Adds 'Adj_Ratio' column. - """ - if price_list is None: - price_list = 'Open', 'High', 'Low', 'Close' - adj_ratio = hist_data['Adj Close'] / hist_data['Close'] - - data = hist_data.copy() - for item in price_list: - data[item] = hist_data[item] * adj_ratio - data['Adj_Ratio'] = adj_ratio - del data['Adj Close'] - return data - - -def _calc_return_index(price_df): - """ - Return a returns index from a input price df or series. Initial value - (typically NaN) is set to 1. - """ - df = price_df.pct_change().add(1).cumprod() - mask = df.ix[1].notnull() & df.ix[0].isnull() - df.ix[0][mask] = 1 - - # Check for first stock listings after starting date of index in ret_index - # If True, find first_valid_index and set previous entry to 1. - if (~mask).any(): - for sym in mask.index[~mask]: - tstamp = df[sym].first_valid_index() - t_idx = df.index.get_loc(tstamp) - 1 - df[sym].ix[t_idx] = 1 - - return df - - -_YAHOO_COMPONENTS_URL = 'http://download.finance.yahoo.com/d/quotes.csv?' - - -def get_components_yahoo(idx_sym): - """ - Returns DataFrame containing list of component information for - index represented in idx_sym from yahoo. Includes component symbol - (ticker), exchange, and name. - - Parameters - ---------- - idx_sym : str - Stock index symbol - Examples: - '^DJI' (Dow Jones Industrial Average) - '^NYA' (NYSE Composite) - '^IXIC' (NASDAQ Composite) - - See: http://finance.yahoo.com/indices for other index symbols - - Returns - ------- - idx_df : DataFrame - """ - stats = 'snx' - # URL of form: - # http://download.finance.yahoo.com/d/quotes.csv?s=@%5EIXIC&f=snxl1d1t1c1ohgv - url = _YAHOO_COMPONENTS_URL + 's={0}&f={1}&e=.csv&h={2}' - - idx_mod = idx_sym.replace('^', '@%5E') - url_str = url.format(idx_mod, stats, 1) - - idx_df = DataFrame() - mask = [True] - comp_idx = 1 - - # LOOP across component index structure, - # break when no new components are found - while True in mask: - url_str = url.format(idx_mod, stats, comp_idx) - with urlopen(url_str) as resp: - raw = resp.read() - lines = raw.decode('utf-8').strip().strip('"').split('"\r\n"') - lines = [line.strip().split('","') for line in lines] - - temp_df = DataFrame(lines, columns=['ticker', 'name', 'exchange']) - temp_df = temp_df.drop_duplicates() - temp_df = temp_df.set_index('ticker') - mask = ~temp_df.index.isin(idx_df.index) - - comp_idx = comp_idx + 50 - idx_df = idx_df.append(temp_df[mask]) - - return idx_df - - -def _dl_mult_symbols(symbols, start, end, interval, chunksize, retry_count, pause, - method): - stocks = {} - failed = [] - passed = [] - for sym_group in _in_chunks(symbols, chunksize): - for sym in sym_group: - try: - stocks[sym] = method(sym, start, end, interval, retry_count, pause) - passed.append(sym) - except IOError: - warnings.warn('Failed to read symbol: {0!r}, replacing with ' - 'NaN.'.format(sym), SymbolWarning) - failed.append(sym) - - if len(passed) == 0: - raise RemoteDataError("No data fetched using " - "{0!r}".format(method.__name__)) - try: - if len(stocks) > 0 and len(failed) > 0 and len(passed) > 0: - df_na = stocks[passed[0]].copy() - df_na[:] = np.nan - for sym in failed: - stocks[sym] = df_na - return Panel(stocks).swapaxes('items', 'minor') - except AttributeError: - # cannot construct a panel with just 1D nans indicating no data - raise RemoteDataError("No data fetched using " - "{0!r}".format(method.__name__)) - -_source_functions = {'google': _get_hist_google, 'yahoo': _get_hist_yahoo} - - -def _get_data_from(symbols, start, end, interval, retry_count, pause, adjust_price, - ret_index, chunksize, source): - - src_fn = _source_functions[source] - - # If a single symbol, (e.g., 'GOOG') - if isinstance(symbols, (compat.string_types, int)): - hist_data = src_fn(symbols, start, end, interval, retry_count, pause) - # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT']) - elif isinstance(symbols, DataFrame): - hist_data = _dl_mult_symbols(symbols.index, start, end, interval, chunksize, - retry_count, pause, src_fn) - else: - hist_data = _dl_mult_symbols(symbols, start, end, interval, chunksize, - retry_count, pause, src_fn) - if source.lower() == 'yahoo': - if ret_index: - hist_data['Ret_Index'] = _calc_return_index(hist_data['Adj Close']) - if adjust_price: - hist_data = _adjust_prices(hist_data) - - return hist_data - - -def get_data_yahoo(symbols=None, start=None, end=None, retry_count=3, - pause=0.001, adjust_price=False, ret_index=False, - chunksize=25, interval='d'): - """ - Returns DataFrame/Panel of historical stock prices from symbols, over date - range, start to end. To avoid being penalized by Yahoo! Finance servers, - pauses between downloading 'chunks' of symbols can be specified. - - Parameters - ---------- - symbols : string, array-like object (list, tuple, Series), or DataFrame - Single stock symbol (ticker), array-like object of symbols or - DataFrame with index containing stock symbols. - start : string, (defaults to '1/1/2010') - Starting date, timestamp. Parses many different kind of date - representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') - end : string, (defaults to today) - Ending date, timestamp. Same format as starting date. - retry_count : int, default 3 - Number of times to retry query request. - pause : int, default 0 - Time, in seconds, to pause between consecutive queries of chunks. If - single value given for symbol, represents the pause between retries. - adjust_price : bool, default False - If True, adjusts all prices in hist_data ('Open', 'High', 'Low', - 'Close') based on 'Adj Close' price. Adds 'Adj_Ratio' column and drops - 'Adj Close'. - ret_index : bool, default False - If True, includes a simple return index 'Ret_Index' in hist_data. - chunksize : int, default 25 - Number of symbols to download consecutively before intiating pause. - interval : string, default 'd' - Time interval code, valid values are 'd' for daily, 'w' for weekly, - 'm' for monthly and 'v' for dividend. - - Returns - ------- - hist_data : DataFrame (str) or Panel (array-like object, DataFrame) - """ - if interval not in ['d', 'w', 'm', 'v']: - raise ValueError("Invalid interval: valid values are 'd', 'w', 'm' and 'v'") - return _get_data_from(symbols, start, end, interval, retry_count, pause, - adjust_price, ret_index, chunksize, 'yahoo') - -_HISTORICAL_YAHOO_ACTIONS_URL = 'http://ichart.finance.yahoo.com/x?' - -def get_data_yahoo_actions(symbol, start=None, end=None, retry_count=3, - pause=0.001): - """ - Returns DataFrame of historical corporate actions (dividends and stock - splits) from symbols, over date range, start to end. All dates in the - resulting DataFrame correspond with dividend and stock split ex-dates. - - Parameters - ---------- - sym : string with a single Single stock symbol (ticker). - start : string, (defaults to '1/1/2010') - Starting date, timestamp. Parses many different kind of date - representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') - end : string, (defaults to today) - Ending date, timestamp. Same format as starting date. - retry_count : int, default 3 - Number of times to retry query request. - pause : int, default 0 - Time, in seconds, of the pause between retries. - """ - - start, end = _sanitize_dates(start, end) - url = (_HISTORICAL_YAHOO_ACTIONS_URL + 's=%s' % symbol + - '&a=%s' % (start.month - 1) + - '&b=%s' % start.day + - '&c=%s' % start.year + - '&d=%s' % (end.month - 1) + - '&e=%s' % end.day + - '&f=%s' % end.year + - '&g=v') - - for _ in range(retry_count): - time.sleep(pause) - - try: - with urlopen(url) as resp: - lines = resp.read() - except _network_error_classes: - pass - else: - actions_index = [] - actions_entries = [] - - for line in csv.reader(StringIO(bytes_to_str(lines))): - # Ignore lines that aren't dividends or splits (Yahoo - # add a bunch of irrelevant fields.) - if len(line) != 3 or line[0] not in ('DIVIDEND', 'SPLIT'): - continue - - action, date, value = line - if action == 'DIVIDEND': - actions_index.append(to_datetime(date)) - actions_entries.append({ - 'action': action, - 'value': float(value) - }) - elif action == 'SPLIT' and ':' in value: - # Convert the split ratio to a fraction. For example a - # 4:1 split expressed as a fraction is 1/4 = 0.25. - denominator, numerator = value.split(':', 1) - split_fraction = float(numerator) / float(denominator) - - actions_index.append(to_datetime(date)) - actions_entries.append({ - 'action': action, - 'value': split_fraction - }) - - return DataFrame(actions_entries, index=actions_index) - - raise IOError("after %d tries, Yahoo! did not " - "return a 200 for url %r" % (retry_count, url)) - - -def get_data_google(symbols=None, start=None, end=None, retry_count=3, - pause=0.001, adjust_price=False, ret_index=False, - chunksize=25): - """ - Returns DataFrame/Panel of historical stock prices from symbols, over date - range, start to end. To avoid being penalized by Google Finance servers, - pauses between downloading 'chunks' of symbols can be specified. - - Parameters - ---------- - symbols : string, array-like object (list, tuple, Series), or DataFrame - Single stock symbol (ticker), array-like object of symbols or - DataFrame with index containing stock symbols. - start : string, (defaults to '1/1/2010') - Starting date, timestamp. Parses many different kind of date - representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') - end : string, (defaults to today) - Ending date, timestamp. Same format as starting date. - retry_count : int, default 3 - Number of times to retry query request. - pause : int, default 0 - Time, in seconds, to pause between consecutive queries of chunks. If - single value given for symbol, represents the pause between retries. - chunksize : int, default 25 - Number of symbols to download consecutively before intiating pause. - - Returns - ------- - hist_data : DataFrame (str) or Panel (array-like object, DataFrame) - """ - return _get_data_from(symbols, start, end, None, retry_count, pause, - adjust_price, ret_index, chunksize, 'google') - - -_FRED_URL = "http://research.stlouisfed.org/fred2/series/" - - -def get_data_fred(name, start=dt.datetime(2010, 1, 1), - end=dt.datetime.today()): - """ - Get data for the given name from the St. Louis FED (FRED). - Date format is datetime - - Returns a DataFrame. - - If multiple names are passed for "series" then the index of the - DataFrame is the outer join of the indicies of each series. - """ - start, end = _sanitize_dates(start, end) - - if not is_list_like(name): - names = [name] + raise NotImplementedError( + "data_source=%r is not implemented" % data_source) + +def Options(symbol, data_source=None): + if data_source is None: + warnings.warn("Options(symbol) is deprecated, use Options(symbol," + " data_source) instead", FutureWarning) + data_source = "yahoo" + if data_source == "yahoo": + return YahooOptions(symbol) else: - names = name - - urls = [_FRED_URL + '%s' % n + '/downloaddata/%s' % n + '.csv' for - n in names] - - def fetch_data(url, name): - with urlopen(url) as resp: - data = read_csv(resp, index_col=0, parse_dates=True, - header=None, skiprows=1, names=["DATE", name], - na_values='.') - try: - return data.truncate(start, end) - except KeyError: - if data.ix[3].name[7:12] == 'Error': - raise IOError("Failed to get the data. Check that {0!r} is " - "a valid FRED series.".format(name)) - raise - df = concat([fetch_data(url, n) for url, n in zip(urls, names)], - axis=1, join='outer') - return df - - -_FAMAFRENCH_URL = 'http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp' - - -def get_data_famafrench(name): - # path of zip files - zip_file_path = '{0}/{1}_TXT.zip'.format(_FAMAFRENCH_URL, name) - - with urlopen(zip_file_path) as url: - raw = url.read() - - with tempfile.TemporaryFile() as tmpf: - tmpf.write(raw) - - with ZipFile(tmpf, 'r') as zf: - data = zf.open(zf.namelist()[0]).readlines() - - line_lengths = np.array(lmap(len, data)) - file_edges = np.where(line_lengths == 2)[0] - - datasets = {} - edges = zip(file_edges + 1, file_edges[1:]) - for i, (left_edge, right_edge) in enumerate(edges): - dataset = [d.split() for d in data[left_edge:right_edge]] - if len(dataset) > 10: - ncol_raw = np.array(lmap(len, dataset)) - ncol = np.median(ncol_raw) - header_index = np.where(ncol_raw == ncol - 1)[0][-1] - header = dataset[header_index] - ds_header = dataset[header_index + 1:] - # to ensure the header is unique - header = ['{0} {1}'.format(j, hj) for j, hj in enumerate(header, - start=1)] - index = np.array([d[0] for d in ds_header], dtype=int) - dataset = np.array([d[1:] for d in ds_header], dtype=float) - datasets[i] = DataFrame(dataset, index, columns=header) - - return datasets - - -# Items needed for options class -CUR_MONTH = dt.datetime.now().month -CUR_YEAR = dt.datetime.now().year -CUR_DAY = dt.datetime.now().day - - -def _two_char(s): - return '{0:0>2}'.format(s) - - -class Options(object): - """ - ***Experimental*** - This class fetches call/put data for a given stock/expiry month. - - It is instantiated with a string representing the ticker symbol. - - The class has the following methods: - get_options_data:(month, year, expiry) - get_call_data:(month, year, expiry) - get_put_data: (month, year, expiry) - get_near_stock_price(opt_frame, above_below) - get_all_data(call, put) - get_forward_data(months, call, put) (deprecated) - - Examples - -------- - # Instantiate object with ticker - >>> aapl = Options('aapl', 'yahoo') - - # Fetch next expiry call data - >>> calls = aapl.get_call_data() - - # Can now access aapl.calls instance variable - >>> aapl.calls - - # Fetch next expiry put data - >>> puts = aapl.get_put_data() - - # Can now access aapl.puts instance variable - >>> aapl.puts - - # cut down the call data to be 3 below and 3 above the stock price. - >>> cut_calls = aapl.get_near_stock_price(call=True, above_below=3) - - # Fetch call and put data with expiry from now to 8 months out - >>> forward_data = aapl.get_forward_data(8, call=True, put=True) - - # Fetch all call and put data - >>> all_data = aapl.get_all_data() - """ - - _TABLE_LOC = {'calls': 1, 'puts': 2} - _OPTIONS_BASE_URL = 'http://finance.yahoo.com/q/op?s={sym}' - _FINANCE_BASE_URL = 'http://finance.yahoo.com' - - def __init__(self, symbol, data_source=None): - """ Instantiates options_data with a ticker saved as symbol """ - self.symbol = symbol.upper() - if data_source is None: - warnings.warn("Options(symbol) is deprecated, use Options(symbol," - " data_source) instead", FutureWarning) - data_source = "yahoo" - if data_source != "yahoo": - raise NotImplementedError("currently only yahoo supported") - - def get_options_data(self, month=None, year=None, expiry=None): - """ - ***Experimental*** - Gets call/put data for the stock with the expiration data in the - given month and year - - Parameters - ---------- - month : number, int, optional(default=None) - The month the options expire. This should be either 1 or 2 - digits. - - year : number, int, optional(default=None) - The year the options expire. This should be a 4 digit int. - - expiry : date-like or convertible or list-like object, optional (default=None) - The date (or dates) when options expire (defaults to current month) - - Returns - ------- - pandas.DataFrame - A DataFrame with requested options data. - - Index: - Strike: Option strike, int - Expiry: Option expiry, Timestamp - Type: Call or Put, string - Symbol: Option symbol as reported on Yahoo, string - Columns: - Last: Last option price, float - Chg: Change from prior day, float - Bid: Bid price, float - Ask: Ask price, float - Vol: Volume traded, int64 - Open_Int: Open interest, int64 - IsNonstandard: True if the the deliverable is not 100 shares, otherwise false - Underlying: Ticker of the underlying security, string - Underlying_Price: Price of the underlying security, float64 - Quote_Time: Time of the quote, Timestamp - - Notes - ----- - Note: Format of returned data frame is dependent on Yahoo and may change. - - When called, this function will add instance variables named - calls and puts. See the following example: - - >>> aapl = Options('aapl', 'yahoo') # Create object - >>> aapl.calls # will give an AttributeError - >>> aapl.get_options() # Get data and set ivars - >>> aapl.calls # Doesn't throw AttributeError - - Also note that aapl.calls and appl.puts will always be the calls - and puts for the next expiry. If the user calls this method with - a different expiry, the ivar will be named callsYYMMDD or putsYYMMDD, - where YY, MM and DD are, respectively, two digit representations of - the year, month and day for the expiry of the options. - - """ - return concat([f(month, year, expiry) - for f in (self.get_put_data, - self.get_call_data)]).sortlevel() - - def _get_option_frames_from_yahoo(self, expiry): - url = self._yahoo_url_from_expiry(expiry) - option_frames = self._option_frames_from_url(url) - frame_name = '_frames' + self._expiry_to_string(expiry) - setattr(self, frame_name, option_frames) - return option_frames - - @staticmethod - def _expiry_to_string(expiry): - m1 = _two_char(expiry.month) - d1 = _two_char(expiry.day) - return str(expiry.year)[-2:] + m1 + d1 - - def _yahoo_url_from_expiry(self, expiry): - try: - expiry_links = self._expiry_links - - except AttributeError: - _, expiry_links = self._get_expiry_dates_and_links() - - return self._FINANCE_BASE_URL + expiry_links[expiry] - - def _option_frames_from_url(self, url): - frames = read_html(url) - nframes = len(frames) - frames_req = max(self._TABLE_LOC.values()) - if nframes < frames_req: - raise RemoteDataError("%s options tables found (%s expected)" % (nframes, frames_req)) - - if not hasattr(self, 'underlying_price'): - try: - self.underlying_price, self.quote_time = self._underlying_price_and_time_from_url(url) - except IndexError: - self.underlying_price, self.quote_time = np.nan, np.nan - - calls = frames[self._TABLE_LOC['calls']] - puts = frames[self._TABLE_LOC['puts']] - - calls = self._process_data(calls, 'call') - puts = self._process_data(puts, 'put') - - return {'calls': calls, 'puts': puts} - - def _underlying_price_and_time_from_url(self, url): - root = self._parse_url(url) - underlying_price = self._underlying_price_from_root(root) - quote_time = self._quote_time_from_root(root) - return underlying_price, quote_time - - @staticmethod - def _underlying_price_from_root(root): - underlying_price = root.xpath('.//*[@class="time_rtq_ticker Fz-30 Fw-b"]')[0]\ - .getchildren()[0].text - underlying_price = underlying_price.replace(',', '') #GH11 - - try: - underlying_price = float(underlying_price) - except ValueError: - underlying_price = np.nan - - return underlying_price - - @staticmethod - def _quote_time_from_root(root): - #Gets the time of the quote, note this is actually the time of the underlying price. - try: - quote_time_text = root.xpath('.//*[@class="time_rtq Fz-m"]')[0].getchildren()[1].getchildren()[0].text - ##TODO: Enable timezone matching when strptime can match EST with %Z - quote_time_text = quote_time_text.split(' ')[0] - quote_time = dt.datetime.strptime(quote_time_text, "%I:%M%p") - quote_time = quote_time.replace(year=CUR_YEAR, month=CUR_MONTH, day=CUR_DAY) - except ValueError: - quote_time = np.nan - - return quote_time - - def _get_option_data(self, expiry, name): - frame_name = '_frames' + self._expiry_to_string(expiry) - - try: - frames = getattr(self, frame_name) - except AttributeError: - frames = self._get_option_frames_from_yahoo(expiry) - - option_data = frames[name] - if expiry != self.expiry_dates[0]: - name += self._expiry_to_string(expiry) - - setattr(self, name, option_data) - return option_data - - def get_call_data(self, month=None, year=None, expiry=None): - """ - ***Experimental*** - Gets call/put data for the stock with the expiration data in the - given month and year - - Parameters - ---------- - month : number, int, optional(default=None) - The month the options expire. This should be either 1 or 2 - digits. - - year : number, int, optional(default=None) - The year the options expire. This should be a 4 digit int. - - expiry : date-like or convertible or list-like object, optional (default=None) - The date (or dates) when options expire (defaults to current month) - - Returns - ------- - call_data: pandas.DataFrame - A DataFrame with requested options data. - - Index: - Strike: Option strike, int - Expiry: Option expiry, Timestamp - Type: Call or Put, string - Symbol: Option symbol as reported on Yahoo, string - Columns: - Last: Last option price, float - Chg: Change from prior day, float - Bid: Bid price, float - Ask: Ask price, float - Vol: Volume traded, int64 - Open_Int: Open interest, int64 - IsNonstandard: True if the the deliverable is not 100 shares, otherwise false - Underlying: Ticker of the underlying security, string - Underlying_Price: Price of the underlying security, float64 - Quote_Time: Time of the quote, Timestamp - - Notes - ----- - Note: Format of returned data frame is dependent on Yahoo and may change. - - When called, this function will add instance variables named - calls and puts. See the following example: - - >>> aapl = Options('aapl', 'yahoo') # Create object - >>> aapl.calls # will give an AttributeError - >>> aapl.get_call_data() # Get data and set ivars - >>> aapl.calls # Doesn't throw AttributeError - - Also note that aapl.calls will always be the calls for the next - expiry. If the user calls this method with a different month - or year, the ivar will be named callsYYMMDD where YY, MM and DD are, - respectively, two digit representations of the year, month and day - for the expiry of the options. - """ - expiry = self._try_parse_dates(year, month, expiry) - return self._get_data_in_date_range(expiry, call=True, put=False) - - def get_put_data(self, month=None, year=None, expiry=None): - """ - ***Experimental*** - Gets put data for the stock with the expiration data in the - given month and year - - Parameters - ---------- - month : number, int, optional(default=None) - The month the options expire. This should be either 1 or 2 - digits. - - year : number, int, optional(default=None) - The year the options expire. This should be a 4 digit int. - - expiry : date-like or convertible or list-like object, optional (default=None) - The date (or dates) when options expire (defaults to current month) - - Returns - ------- - put_data: pandas.DataFrame - A DataFrame with requested options data. - - Index: - Strike: Option strike, int - Expiry: Option expiry, Timestamp - Type: Call or Put, string - Symbol: Option symbol as reported on Yahoo, string - Columns: - Last: Last option price, float - Chg: Change from prior day, float - Bid: Bid price, float - Ask: Ask price, float - Vol: Volume traded, int64 - Open_Int: Open interest, int64 - IsNonstandard: True if the the deliverable is not 100 shares, otherwise false - Underlying: Ticker of the underlying security, string - Underlying_Price: Price of the underlying security, float64 - Quote_Time: Time of the quote, Timestamp - - Notes - ----- - Note: Format of returned data frame is dependent on Yahoo and may change. - - When called, this function will add instance variables named - puts. See the following example: - - >>> aapl = Options('aapl') # Create object - >>> aapl.puts # will give an AttributeError - >>> aapl.get_put_data() # Get data and set ivars - >>> aapl.puts # Doesn't throw AttributeError - - return self.__setattr__(self, str(str(x) + str(y))) - - Also note that aapl.puts will always be the puts for the next - expiry. If the user calls this method with a different month - or year, the ivar will be named putsYYMMDD where YY, MM and DD are, - respectively, two digit representations of the year, month and day - for the expiry of the options. - """ - expiry = self._try_parse_dates(year, month, expiry) - return self._get_data_in_date_range(expiry, put=True, call=False) - - def get_near_stock_price(self, above_below=2, call=True, put=False, - month=None, year=None, expiry=None): - """ - ***Experimental*** - Returns a data frame of options that are near the current stock price. - - Parameters - ---------- - above_below : number, int, optional (default=2) - The number of strike prices above and below the stock price that - should be taken - - call : bool - Tells the function whether or not it should be using calls - - put : bool - Tells the function weather or not it should be using puts - - month : number, int, optional(default=None) - The month the options expire. This should be either 1 or 2 - digits. - - year : number, int, optional(default=None) - The year the options expire. This should be a 4 digit int. - - expiry : date-like or convertible or list-like object, optional (default=None) - The date (or dates) when options expire (defaults to current month) - - Returns - ------- - chopped: DataFrame - The resultant DataFrame chopped down to be 2 * above_below + 1 rows - desired. If there isn't data as far out as the user has asked for - then - - Note: Format of returned data frame is dependent on Yahoo and may change. - - """ - expiry = self._try_parse_dates(year, month, expiry) - data = self._get_data_in_date_range(expiry, call=call, put=put) - return self.chop_data(data, above_below, self.underlying_price) - - def chop_data(self, df, above_below=2, underlying_price=None): - """Returns a data frame only options that are near the current stock price.""" - - if not underlying_price: - try: - underlying_price = self.underlying_price - except AttributeError: - underlying_price = np.nan - - max_strike = max(df.index.get_level_values('Strike')) - min_strike = min(df.index.get_level_values('Strike')) - - if not np.isnan(underlying_price) and min_strike < underlying_price < max_strike: - start_index = np.where(df.index.get_level_values('Strike') - > underlying_price)[0][0] - - get_range = slice(start_index - above_below, - start_index + above_below + 1) - df = df[get_range].dropna(how='all') - - return df - - def _try_parse_dates(self, year, month, expiry): - """ - Validates dates provided by user. Ensures the user either provided both a month and a year or an expiry. - - Parameters - ---------- - year : int - Calendar year - - month : int - Calendar month - - expiry : date-like or convertible, (preferred) - Expiry date - - Returns - ------- - list of expiry dates (datetime.date) - """ - - #Checks if the user gave one of the month or the year but not both and did not provide an expiry: - if (month is not None and year is None) or (month is None and year is not None) and expiry is None: - msg = "You must specify either (`year` and `month`) or `expiry` " \ - "or none of these options for the next expiry." - raise ValueError(msg) - - if expiry is not None: - if hasattr(expiry, '__iter__'): - expiry = [self._validate_expiry(exp) for exp in expiry] - else: - expiry = [self._validate_expiry(expiry)] - - if len(expiry) == 0: - raise ValueError('No expiries available for given input.') - - elif year is None and month is None: - #No arguments passed, provide next expiry - year = CUR_YEAR - month = CUR_MONTH - expiry = dt.date(year, month, 1) - expiry = [self._validate_expiry(expiry)] - - else: - #Year and month passed, provide all expiries in that month - expiry = [expiry for expiry in self.expiry_dates if expiry.year == year and expiry.month == month] - if len(expiry) == 0: - raise ValueError('No expiries available in %s-%s' % (year, month)) - - return expiry - - def _validate_expiry(self, expiry): - """Ensures that an expiry date has data available on Yahoo - If the expiry date does not have options that expire on that day, return next expiry""" - - expiry_dates = self.expiry_dates - expiry = to_datetime(expiry) - if hasattr(expiry, 'date'): - expiry = expiry.date() - - if expiry in expiry_dates: - return expiry - else: - index = DatetimeIndex(expiry_dates).order() - return index[index.date >= expiry][0].date() - - def get_forward_data(self, months, call=True, put=False, near=False, - above_below=2): - """ - ***Experimental*** - Gets either call, put, or both data for months starting in the current - month and going out in the future a specified amount of time. - - Parameters - ---------- - months : number, int - How many months to go out in the collection of the data. This is - inclusive. - - call : bool, optional (default=True) - Whether or not to collect data for call options - - put : bool, optional (default=False) - Whether or not to collect data for put options. - - near : bool, optional (default=False) - Whether this function should get only the data near the - current stock price. Uses Options.get_near_stock_price - - above_below : number, int, optional (default=2) - The number of strike prices above and below the stock price that - should be taken if the near option is set to True - - Returns - ------- - pandas.DataFrame - A DataFrame with requested options data. - - Index: - Strike: Option strike, int - Expiry: Option expiry, Timestamp - Type: Call or Put, string - Symbol: Option symbol as reported on Yahoo, string - Columns: - Last: Last option price, float - Chg: Change from prior day, float - Bid: Bid price, float - Ask: Ask price, float - Vol: Volume traded, int64 - Open_Int: Open interest, int64 - IsNonstandard: True if the the deliverable is not 100 shares, otherwise false - Underlying: Ticker of the underlying security, string - Underlying_Price: Price of the underlying security, float64 - Quote_Time: Time of the quote, Timestamp - - Note: Format of returned data frame is dependent on Yahoo and may change. - - """ - warnings.warn("get_forward_data() is deprecated", FutureWarning) - end_date = dt.date.today() + MonthEnd(months) - dates = (date for date in self.expiry_dates if date <= end_date.date()) - data = self._get_data_in_date_range(dates, call=call, put=put) - if near: - data = self.chop_data(data, above_below=above_below) - return data - - def get_all_data(self, call=True, put=True): - """ - ***Experimental*** - Gets either call, put, or both data for all available months starting - in the current month. - - Parameters - ---------- - call : bool, optional (default=True) - Whether or not to collect data for call options - - put : bool, optional (default=True) - Whether or not to collect data for put options. - - Returns - ------- - pandas.DataFrame - A DataFrame with requested options data. - - Index: - Strike: Option strike, int - Expiry: Option expiry, Timestamp - Type: Call or Put, string - Symbol: Option symbol as reported on Yahoo, string - Columns: - Last: Last option price, float - Chg: Change from prior day, float - Bid: Bid price, float - Ask: Ask price, float - Vol: Volume traded, int64 - Open_Int: Open interest, int64 - IsNonstandard: True if the the deliverable is not 100 shares, otherwise false - Underlying: Ticker of the underlying security, string - Underlying_Price: Price of the underlying security, float64 - Quote_Time: Time of the quote, Timestamp - - Note: Format of returned data frame is dependent on Yahoo and may change. - - """ - - try: - expiry_dates = self.expiry_dates - except AttributeError: - expiry_dates, _ = self._get_expiry_dates_and_links() - - return self._get_data_in_date_range(dates=expiry_dates, call=call, put=put) - - def _get_data_in_date_range(self, dates, call=True, put=True): - - to_ret = Series({'calls': call, 'puts': put}) - to_ret = to_ret[to_ret].index - data = [] - - for name in to_ret: - for expiry_date in dates: - nam = name + self._expiry_to_string(expiry_date) - try: # Try to access on the instance - frame = getattr(self, nam) - except AttributeError: - frame = self._get_option_data(expiry=expiry_date, name=name) - data.append(frame) - - return concat(data).sortlevel() - - @property - def expiry_dates(self): - """ - Returns a list of available expiry dates - """ - try: - expiry_dates = self._expiry_dates - except AttributeError: - expiry_dates, _ = self._get_expiry_dates_and_links() - return expiry_dates - - def _get_expiry_dates_and_links(self): - """ - Gets available expiry dates. - - Returns - ------- - Tuple of: - List of datetime.date objects - Dict of datetime.date objects as keys and corresponding links - """ - - url = self._OPTIONS_BASE_URL.format(sym=self.symbol) - root = self._parse_url(url) - - try: - links = root.xpath('//*[@id="options_menu"]/form/select/option') - except IndexError: - raise RemoteDataError('Expiry dates not available') - - expiry_dates = [dt.datetime.strptime(element.text, "%B %d, %Y").date() for element in links] - links = [element.attrib['data-selectbox-link'] for element in links] - - if len(expiry_dates) == 0: - raise RemoteDataError('Data not available') - - expiry_links = dict(zip(expiry_dates, links)) - self._expiry_links = expiry_links - self._expiry_dates = expiry_dates - return expiry_dates, expiry_links - - def _parse_url(self, url): - """ - Downloads and parses a URL, returns xml root. - - """ - try: - from lxml.html import parse - except ImportError: - raise ImportError("Please install lxml if you want to use the " - "{0!r} class".format(self.__class__.__name__)) - try: - doc = parse(url) - except _network_error_classes: - raise RemoteDataError("Unable to parse URL " - "{0!r}".format(url)) - else: - root = doc.getroot() - if root is None: - raise RemoteDataError("Parsed URL {0!r} has no root" - "element".format(url)) - return root - - def _process_data(self, frame, type): - """ - Adds columns for Expiry, IsNonstandard (ie: deliverable is not 100 shares) - and Tag (the tag indicating what is actually deliverable, None if standard). - - """ - frame.columns = ['Strike', 'Symbol', 'Last', 'Bid', 'Ask', 'Chg', 'PctChg', 'Vol', 'Open_Int', 'IV'] - frame["Rootexp"] = frame.Symbol.str[0:-9] - frame["Root"] = frame.Rootexp.str[0:-6] - frame["Expiry"] = to_datetime(frame.Rootexp.str[-6:]) - #Removes dashes in equity ticker to map to option ticker. - #Ex: BRK-B to BRKB140517C00100000 - frame["IsNonstandard"] = frame['Root'] != self.symbol.replace('-', '') - del frame["Rootexp"] - frame["Underlying"] = self.symbol - try: - frame['Underlying_Price'] = self.underlying_price - frame["Quote_Time"] = self.quote_time - except AttributeError: - frame['Underlying_Price'] = np.nan - frame["Quote_Time"] = np.nan - frame.rename(columns={'Open Int': 'Open_Int'}, inplace=True) - frame['Type'] = type - frame.set_index(['Strike', 'Expiry', 'Type', 'Symbol'], inplace=True) - - return frame + raise NotImplementedError("currently only yahoo supported") diff --git a/pandas_datareader/famafrench.py b/pandas_datareader/famafrench.py new file mode 100644 index 00000000..5352448e --- /dev/null +++ b/pandas_datareader/famafrench.py @@ -0,0 +1,43 @@ +import tempfile +import numpy as np +from pandas.io.common import urlopen, ZipFile +from pandas.compat import lmap +from pandas import DataFrame + +_URL = 'http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp' + + +def _get_data(name): + # path of zip files + zip_file_path = '{0}/{1}_TXT.zip'.format(_URL, name) + + with urlopen(zip_file_path) as url: + raw = url.read() + + with tempfile.TemporaryFile() as tmpf: + tmpf.write(raw) + + with ZipFile(tmpf, 'r') as zf: + data = zf.open(zf.namelist()[0]).readlines() + + line_lengths = np.array(lmap(len, data)) + file_edges = np.where(line_lengths == 2)[0] + + datasets = {} + edges = zip(file_edges + 1, file_edges[1:]) + for i, (left_edge, right_edge) in enumerate(edges): + dataset = [d.split() for d in data[left_edge:right_edge]] + if len(dataset) > 10: + ncol_raw = np.array(lmap(len, dataset)) + ncol = np.median(ncol_raw) + header_index = np.where(ncol_raw == ncol - 1)[0][-1] + header = dataset[header_index] + ds_header = dataset[header_index + 1:] + # to ensure the header is unique + header = ['{0} {1}'.format(j, hj) for j, hj in enumerate(header, + start=1)] + index = np.array([d[0] for d in ds_header], dtype=int) + dataset = np.array([d[1:] for d in ds_header], dtype=float) + datasets[i] = DataFrame(dataset, index, columns=header) + + return datasets diff --git a/pandas_datareader/fred.py b/pandas_datareader/fred.py new file mode 100644 index 00000000..b36a6bcd --- /dev/null +++ b/pandas_datareader/fred.py @@ -0,0 +1,46 @@ +import datetime as dt +from pandas.core.common import is_list_like +from pandas.io.common import urlopen +from pandas import concat, read_csv + +from pandas_datareader.commons import _sanitize_dates + +_URL = "http://research.stlouisfed.org/fred2/series/" + + +def _get_data(name, start=dt.datetime(2010, 1, 1), + end=dt.datetime.today()): + """ + Get data for the given name from the St. Louis FED (FRED). + Date format is datetime + + Returns a DataFrame. + + If multiple names are passed for "series" then the index of the + DataFrame is the outer join of the indicies of each series. + """ + start, end = _sanitize_dates(start, end) + + if not is_list_like(name): + names = [name] + else: + names = name + + urls = [_URL + '%s' % n + '/downloaddata/%s' % n + '.csv' for + n in names] + + def fetch_data(url, name): + with urlopen(url) as resp: + data = read_csv(resp, index_col=0, parse_dates=True, + header=None, skiprows=1, names=["DATE", name], + na_values='.') + try: + return data.truncate(start, end) + except KeyError: + if data.ix[3].name[7:12] == 'Error': + raise IOError("Failed to get the data. Check that {0!r} is " + "a valid FRED series.".format(name)) + raise + df = concat([fetch_data(url, n) for url, n in zip(urls, names)], + axis=1, join='outer') + return df diff --git a/pandas_datareader/google/__init__.py b/pandas_datareader/google/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas_datareader/google/daily.py b/pandas_datareader/google/daily.py new file mode 100644 index 00000000..c3329b27 --- /dev/null +++ b/pandas_datareader/google/daily.py @@ -0,0 +1,57 @@ +from pandas.io.common import urlencode +from pandas_datareader.commons import _retry_read_url +from pandas_datareader.commons import _sanitize_dates +from pandas_datareader.commons import _get_data_from + +_URL = 'http://www.google.com/finance/historical?' + + +def _get_data(symbols=None, start=None, end=None, retry_count=3, + pause=0.001, chunksize=25): + """ + Returns DataFrame/Panel of historical stock prices from symbols, over date + range, start to end. To avoid being penalized by Google Finance servers, + pauses between downloading 'chunks' of symbols can be specified. + + Parameters + ---------- + symbols : string, array-like object (list, tuple, Series), or DataFrame + Single stock symbol (ticker), array-like object of symbols or + DataFrame with index containing stock symbols. + start : string, (defaults to '1/1/2010') + Starting date, timestamp. Parses many different kind of date + representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') + end : string, (defaults to today) + Ending date, timestamp. Same format as starting date. + retry_count : int, default 3 + Number of times to retry query request. + pause : int, default 0 + Time, in seconds, to pause between consecutive queries of chunks. If + single value given for symbol, represents the pause between retries. + chunksize : int, default 25 + Number of symbols to download consecutively before intiating pause. + + Returns + ------- + hist_data : DataFrame (str) or Panel (array-like object, DataFrame) + """ + return _get_data_from(symbols, start, end, None, retry_count, pause, + chunksize, _get_data_one) + + +def _get_data_one(sym, start, end, interval, retry_count, pause): + """ + Get historical data for the given name from google. + Date format is datetime + + Returns a DataFrame. + """ + start, end = _sanitize_dates(start, end) + + # www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv + url = "%s%s" % (_URL, + urlencode({"q": sym, + "startdate": start.strftime('%b %d, ' '%Y'), + "enddate": end.strftime('%b %d, %Y'), + "output": "csv"})) + return _retry_read_url(url, retry_count, pause, 'Google') diff --git a/pandas_datareader/google/quotes.py b/pandas_datareader/google/quotes.py new file mode 100644 index 00000000..c410cfe6 --- /dev/null +++ b/pandas_datareader/google/quotes.py @@ -0,0 +1,10 @@ +def _get_data(symbols): + """ + Get current yahoo quote + + (Should) Returns a DataFrame + + ToDo: Not implemented + """ + msg = "Google Finance doesn't have this functionality - can't get quote for %s" % symbols + raise NotImplementedError(msg) diff --git a/pandas_datareader/tests/test_data.py b/pandas_datareader/tests/test_data.py index cf205c78..5498228b 100644 --- a/pandas_datareader/tests/test_data.py +++ b/pandas_datareader/tests/test_data.py @@ -12,7 +12,7 @@ from pandas.util.testing import assert_frame_equal, assert_series_equal try: from pandas.util.testing import assert_produces_warning -except ImportError: +except ImportError: # pragma: no cover assert_produces_warning = None import pandas.util.testing as tm @@ -20,17 +20,18 @@ try: from urllib.error import HTTPError -except ImportError: +except ImportError: # pragma: no cover from urllib2 import HTTPError import pandas_datareader.data as web -from pandas_datareader.data import ( - DataReader, SymbolWarning, RemoteDataError, _yahoo_codes) +from pandas_datareader.data import DataReader +from pandas_datareader.commons import SymbolWarning, RemoteDataError +from pandas_datareader.yahoo.quotes import _yahoo_codes def _skip_if_no_lxml(): try: import lxml - except ImportError: + except ImportError: # pragma: no cover raise nose.SkipTest("no lxml") @@ -50,7 +51,7 @@ class TestGoogle(tm.TestCase): def setUpClass(cls): super(TestGoogle, cls).setUpClass() cls.locales = tm.get_locales(prefix='en_US') - if not cls.locales: + if not cls.locales: # pragma: no cover raise nose.SkipTest("US English locale not available for testing") @classmethod @@ -92,7 +93,7 @@ def test_get_multi1(self): if (hasattr(pan, 'Close') and hasattr(pan.Close, 'GOOG') and hasattr(pan.Close, 'AAPL')): self.assertEqual(ts[0].dayofyear, 3) - else: + else: # pragma: no cover self.assertRaises(AttributeError, lambda: pan.Close) def test_get_multi_invalid(self): @@ -173,14 +174,14 @@ def test_get_quote_stringlist(self): df = web.get_quote_yahoo(['GOOG', 'AAPL', 'GOOG']) assert_series_equal(df.ix[0], df.ix[2]) - def test_get_components_dow_jones(self): + def test_get_components_dow_jones(self): # pragma: no cover raise nose.SkipTest('unreliable test, receive partial components back for dow_jones') df = web.get_components_yahoo('^DJI') #Dow Jones assert isinstance(df, pd.DataFrame) self.assertEqual(len(df), 30) - def test_get_components_dax(self): + def test_get_components_dax(self): # pragma: no cover raise nose.SkipTest('unreliable test, receive partial components back for dax') df = web.get_components_yahoo('^GDAXI') #DAX @@ -189,7 +190,7 @@ def test_get_components_dax(self): self.assertEqual(df[df.name.str.contains('adidas', case=False)].index, 'ADS.DE') - def test_get_components_nasdaq_100(self): + def test_get_components_nasdaq_100(self): # pragma: no cover # as of 7/12/13 the conditional will test false because the link is invalid raise nose.SkipTest('unreliable test, receive partial components back for nasdaq_100') @@ -298,8 +299,7 @@ def setUpClass(cls): today = datetime.today() cls.year = today.year cls.month = today.month + 1 - if cls.month > 12: - cls.year = cls.year + 1 + if cls.month > 12: # pragma: no cover cls.month = 1 cls.expiry = datetime(cls.year, cls.month, 1) cls.dirpath = tm.get_data_path() @@ -320,7 +320,7 @@ def test_get_options_data(self): try: options = self.aapl.get_options_data(expiry=self.expiry) - except RemoteDataError as e: + except RemoteDataError as e: # pragma: no cover raise nose.SkipTest(e) self.assertTrue(len(options) > 1) @@ -328,49 +328,49 @@ def test_get_near_stock_price(self): try: options = self.aapl.get_near_stock_price(call=True, put=True, expiry=self.expiry) - except RemoteDataError as e: + except RemoteDataError as e: # pragma: no cover raise nose.SkipTest(e) self.assertTrue(len(options) > 1) def test_get_call_data(self): try: calls = self.aapl.get_call_data(expiry=self.expiry) - except RemoteDataError as e: + except RemoteDataError as e: # pragma: no cover raise nose.SkipTest(e) self.assertTrue(len(calls) > 1) def test_get_put_data(self): try: puts = self.aapl.get_put_data(expiry=self.expiry) - except RemoteDataError as e: + except RemoteDataError as e: # pragma: no cover raise nose.SkipTest(e) self.assertTrue(len(puts) > 1) def test_get_expiry_dates(self): try: dates, _ = self.aapl._get_expiry_dates_and_links() - except RemoteDataError as e: + except RemoteDataError as e: # pragma: no cover raise nose.SkipTest(e) self.assertTrue(len(dates) > 1) def test_get_all_data(self): try: data = self.aapl.get_all_data(put=True) - except RemoteDataError as e: + except RemoteDataError as e: # pragma: no cover raise nose.SkipTest(e) self.assertTrue(len(data) > 1) def test_get_data_with_list(self): try: data = self.aapl.get_call_data(expiry=self.aapl.expiry_dates) - except RemoteDataError as e: + except RemoteDataError as e: # pragma: no cover raise nose.SkipTest(e) self.assertTrue(len(data) > 1) def test_get_all_data_calls_only(self): try: data = self.aapl.get_all_data(call=True, put=False) - except RemoteDataError as e: + except RemoteDataError as e: # pragma: no cover raise nose.SkipTest(e) self.assertTrue(len(data) > 1) @@ -381,7 +381,7 @@ def test_get_underlying_price(self): url = options_object._yahoo_url_from_expiry(options_object.expiry_dates[0]) root = options_object._parse_url(url) quote_price = options_object._underlying_price_from_root(root) - except RemoteDataError as e: + except RemoteDataError as e: # pragma: no cover raise nose.SkipTest(e) self.assert_(isinstance(quote_price, float)) @@ -419,7 +419,7 @@ def test_sample_page_chg_float(self): def test_month_year(self): try: data = self.aapl.get_call_data(month=self.month, year=self.year) - except RemoteDataError as e: + except RemoteDataError as e: # pragma: no cover raise nose.SkipTest(e) self.assertTrue(len(data) > 1) @@ -440,7 +440,7 @@ def tearDownClass(cls): super(TestOptionsWarnings, cls).tearDownClass() def test_options_source_warning(self): - if not assert_produces_warning: + if not assert_produces_warning: # pragma: no cover raise nose.SkipTest("old version of pandas without " "compat.assert_produces_warning") with assert_produces_warning(): @@ -486,7 +486,9 @@ def test_fred(self): # < 7/30/14 16535 was returned #self.assertEqual(int(received), 16535) - self.assertEqual(int(received), 16502) + # < 8/20/15 16502 was returned + #self.assertEqual(int(received), 16502) + self.assertEqual(int(received), 16440) self.assertRaises(Exception, web.DataReader, "NON EXISTENT SERIES", 'fred', start, end) @@ -497,7 +499,7 @@ def test_fred_nan(self): df = web.DataReader("DFII5", "fred", start, end) assert pd.isnull(df.ix['2010-01-01'][0]) - def test_fred_parts(self): + def test_fred_parts(self): # pragma: no cover raise nose.SkipTest('buggy as of 2/18/14; maybe a data revision?') start = datetime(2010, 1, 1) @@ -522,9 +524,8 @@ def test_invalid_series(self): name = "NOT A REAL SERIES" self.assertRaises(Exception, web.get_data_fred, name) - def test_fred_multi(self): + def test_fred_multi(self): # pragma: no cover raise nose.SkipTest('buggy as of 2/18/14; maybe a data revision?') - names = ['CPIAUCSL', 'CPALTT01USQ661S', 'CPILFESL'] start = datetime(2010, 1, 1) end = datetime(2013, 1, 27) @@ -544,4 +545,4 @@ def test_fred_multi_bad_series(self): if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) + exit=False) # pragma: no cover \ No newline at end of file diff --git a/pandas_datareader/yahoo/__init__.py b/pandas_datareader/yahoo/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas_datareader/yahoo/actions.py b/pandas_datareader/yahoo/actions.py new file mode 100644 index 00000000..20024828 --- /dev/null +++ b/pandas_datareader/yahoo/actions.py @@ -0,0 +1,82 @@ +import time +import csv +from pandas import to_datetime, DataFrame +from pandas.io.common import urlopen +from pandas.util.testing import _network_error_classes +from pandas.compat import StringIO, bytes_to_str +from pandas_datareader.commons import _sanitize_dates + +_URL = 'http://ichart.finance.yahoo.com/x?' + + +def _get_data(symbol, start=None, end=None, retry_count=3, pause=0.001): + """ + Returns DataFrame of historical corporate actions (dividends and stock + splits) from symbols, over date range, start to end. All dates in the + resulting DataFrame correspond with dividend and stock split ex-dates. + + Parameters + ---------- + sym : string with a single Single stock symbol (ticker). + start : string, (defaults to '1/1/2010') + Starting date, timestamp. Parses many different kind of date + representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') + end : string, (defaults to today) + Ending date, timestamp. Same format as starting date. + retry_count : int, default 3 + Number of times to retry query request. + pause : int, default 0 + Time, in seconds, of the pause between retries. + """ + + start, end = _sanitize_dates(start, end) + url = (_URL + 's=%s' % symbol + \ + '&a=%s' % (start.month - 1) + \ + '&b=%s' % start.day + \ + '&c=%s' % start.year + \ + '&d=%s' % (end.month - 1) + \ + '&e=%s' % end.day + \ + '&f=%s' % end.year + \ + '&g=v') + + for _ in range(retry_count): + time.sleep(pause) + + try: + with urlopen(url) as resp: + lines = resp.read() + except _network_error_classes: + pass + else: + actions_index = [] + actions_entries = [] + + for line in csv.reader(StringIO(bytes_to_str(lines))): + # Ignore lines that aren't dividends or splits (Yahoo + # add a bunch of irrelevant fields.) + if len(line) != 3 or line[0] not in ('DIVIDEND', 'SPLIT'): + continue + + action, date, value = line + if action == 'DIVIDEND': + actions_index.append(to_datetime(date)) + actions_entries.append({ + 'action': action, + 'value': float(value) + }) + elif action == 'SPLIT' and ':' in value: + # Convert the split ratio to a fraction. For example a + # 4:1 split expressed as a fraction is 1/4 = 0.25. + denominator, numerator = value.split(':', 1) + split_fraction = float(numerator) / float(denominator) + + actions_index.append(to_datetime(date)) + actions_entries.append({ + 'action': action, + 'value': split_fraction + }) + + return DataFrame(actions_entries, index=actions_index) + + raise IOError("after %d tries, Yahoo! did not " \ + "return a 200 for url %r" % (retry_count, url)) diff --git a/pandas_datareader/yahoo/components.py b/pandas_datareader/yahoo/components.py new file mode 100644 index 00000000..c38ae4ad --- /dev/null +++ b/pandas_datareader/yahoo/components.py @@ -0,0 +1,57 @@ +from pandas import DataFrame +from pandas.io.common import urlopen + +_URL = 'http://download.finance.yahoo.com/d/quotes.csv?' + + +def _get_data(idx_sym): + """ + Returns DataFrame containing list of component information for + index represented in idx_sym from yahoo. Includes component symbol + (ticker), exchange, and name. + + Parameters + ---------- + idx_sym : str + Stock index symbol + Examples: + '^DJI' (Dow Jones Industrial Average) + '^NYA' (NYSE Composite) + '^IXIC' (NASDAQ Composite) + + See: http://finance.yahoo.com/indices for other index symbols + + Returns + ------- + idx_df : DataFrame + """ + stats = 'snx' + # URL of form: + # http://download.finance.yahoo.com/d/quotes.csv?s=@%5EIXIC&f=snxl1d1t1c1ohgv + url = _URL + 's={0}&f={1}&e=.csv&h={2}' + + idx_mod = idx_sym.replace('^', '@%5E') + url_str = url.format(idx_mod, stats, 1) + + idx_df = DataFrame() + mask = [True] + comp_idx = 1 + + # LOOP across component index structure, + # break when no new components are found + while True in mask: + url_str = url.format(idx_mod, stats, comp_idx) + with urlopen(url_str) as resp: + raw = resp.read() + lines = raw.decode('utf-8').strip().strip('"').split('"\r\n"') + lines = [line.strip().split('","') for line in lines] + + temp_df = DataFrame(lines, columns=['ticker', 'name', 'exchange']) + temp_df = temp_df.drop_duplicates() + temp_df = temp_df.set_index('ticker') + mask = ~temp_df.index.isin(idx_df.index) + + comp_idx = comp_idx + 50 + idx_df = idx_df.append(temp_df[mask]) + + return idx_df diff --git a/pandas_datareader/yahoo/daily.py b/pandas_datareader/yahoo/daily.py new file mode 100644 index 00000000..ff3951e0 --- /dev/null +++ b/pandas_datareader/yahoo/daily.py @@ -0,0 +1,108 @@ +from pandas_datareader.commons import _retry_read_url +from pandas_datareader.commons import _sanitize_dates +from pandas_datareader.commons import _get_data_from + +_URL = 'http://ichart.finance.yahoo.com/table.csv?' + +def _get_data(symbols=None, start=None, end=None, retry_count=3, + pause=0.001, adjust_price=False, ret_index=False, + chunksize=25, interval='d'): + """ + Returns DataFrame/Panel of historical stock prices from symbols, over date + range, start to end. To avoid being penalized by Yahoo! Finance servers, + pauses between downloading 'chunks' of symbols can be specified. + + Parameters + ---------- + symbols : string, array-like object (list, tuple, Series), or DataFrame + Single stock symbol (ticker), array-like object of symbols or + DataFrame with index containing stock symbols. + start : string, (defaults to '1/1/2010') + Starting date, timestamp. Parses many different kind of date + representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') + end : string, (defaults to today) + Ending date, timestamp. Same format as starting date. + retry_count : int, default 3 + Number of times to retry query request. + pause : int, default 0 + Time, in seconds, to pause between consecutive queries of chunks. If + single value given for symbol, represents the pause between retries. + adjust_price : bool, default False + If True, adjusts all prices in hist_data ('Open', 'High', 'Low', + 'Close') based on 'Adj Close' price. Adds 'Adj_Ratio' column and drops + 'Adj Close'. + ret_index : bool, default False + If True, includes a simple return index 'Ret_Index' in hist_data. + chunksize : int, default 25 + Number of symbols to download consecutively before intiating pause. + interval : string, default 'd' + Time interval code, valid values are 'd' for daily, 'w' for weekly, + 'm' for monthly and 'v' for dividend. + + Returns + ------- + hist_data : DataFrame (str) or Panel (array-like object, DataFrame) + """ + if interval not in ['d', 'w', 'm', 'v']: + raise ValueError("Invalid interval: valid values are 'd', 'w', 'm' and 'v'") + hist_data = _get_data_from(symbols, start, end, interval, retry_count, pause, \ + chunksize, _get_data_one) + if ret_index: + hist_data['Ret_Index'] = _calc_return_index(hist_data['Adj Close']) + if adjust_price: + hist_data = _adjust_prices(hist_data) + return hist_data + +def _adjust_prices(hist_data, price_list=None): + """ + Return modifed DataFrame or Panel with adjusted prices based on + 'Adj Close' price. Adds 'Adj_Ratio' column. + """ + if price_list is None: + price_list = 'Open', 'High', 'Low', 'Close' + adj_ratio = hist_data['Adj Close'] / hist_data['Close'] + + data = hist_data.copy() + for item in price_list: + data[item] = hist_data[item] * adj_ratio + data['Adj_Ratio'] = adj_ratio + del data['Adj Close'] + return data + +def _calc_return_index(price_df): + """ + Return a returns index from a input price df or series. Initial value + (typically NaN) is set to 1. + """ + df = price_df.pct_change().add(1).cumprod() + mask = df.ix[1].notnull() & df.ix[0].isnull() + df.ix[0][mask] = 1 + + # Check for first stock listings after starting date of index in ret_index + # If True, find first_valid_index and set previous entry to 1. + if (~mask).any(): + for sym in mask.index[~mask]: + tstamp = df[sym].first_valid_index() + t_idx = df.index.get_loc(tstamp) - 1 + df[sym].ix[t_idx] = 1 + + return df + +def _get_data_one(sym, start, end, interval, retry_count, pause): + """ + Get historical data for the given name from yahoo. + Date format is datetime + + Returns a DataFrame. + """ + start, end = _sanitize_dates(start, end) + url = (_URL + 's=%s' % sym + + '&a=%s' % (start.month - 1) + + '&b=%s' % start.day + + '&c=%s' % start.year + + '&d=%s' % (end.month - 1) + + '&e=%s' % end.day + + '&f=%s' % end.year + + '&g=%s' % interval + + '&ignore=.csv') + return _retry_read_url(url, retry_count, pause, 'Yahoo!') diff --git a/pandas_datareader/yahoo/options.py b/pandas_datareader/yahoo/options.py new file mode 100644 index 00000000..540bd60b --- /dev/null +++ b/pandas_datareader/yahoo/options.py @@ -0,0 +1,690 @@ +import warnings +import datetime as dt +import numpy as np +from pandas.io.html import read_html +from pandas import to_datetime +from pandas import concat, DatetimeIndex, Series +from pandas.tseries.offsets import MonthEnd +from pandas.util.testing import _network_error_classes +from pandas_datareader.commons import RemoteDataError + +# Items needed for options class +CUR_MONTH = dt.datetime.now().month +CUR_YEAR = dt.datetime.now().year +CUR_DAY = dt.datetime.now().day + + +def _two_char(s): + return '{0:0>2}'.format(s) + + +class Options(object): + """ + ***Experimental*** + This class fetches call/put data for a given stock/expiry month. + + It is instantiated with a string representing the ticker symbol. + + The class has the following methods: + get_options_data:(month, year, expiry) + get_call_data:(month, year, expiry) + get_put_data: (month, year, expiry) + get_near_stock_price(opt_frame, above_below) + get_all_data(call, put) + get_forward_data(months, call, put) (deprecated) + + Examples + -------- + # Instantiate object with ticker + >>> aapl = Options('aapl') + + # Fetch next expiry call data + >>> calls = aapl.get_call_data() + + # Can now access aapl.calls instance variable + >>> aapl.calls + + # Fetch next expiry put data + >>> puts = aapl.get_put_data() + + # Can now access aapl.puts instance variable + >>> aapl.puts + + # cut down the call data to be 3 below and 3 above the stock price. + >>> cut_calls = aapl.get_near_stock_price(call=True, above_below=3) + + # Fetch call and put data with expiry from now to 8 months out + >>> forward_data = aapl.get_forward_data(8, call=True, put=True) + + # Fetch all call and put data + >>> all_data = aapl.get_all_data() + """ + + _TABLE_LOC = {'calls': 1, 'puts': 2} + _OPTIONS_BASE_URL = 'http://finance.yahoo.com/q/op?s={sym}' + _FINANCE_BASE_URL = 'http://finance.yahoo.com' + + def __init__(self, symbol): + """ Instantiates options_data with a ticker saved as symbol """ + self.symbol = symbol.upper() + + def get_options_data(self, month=None, year=None, expiry=None): + """ + ***Experimental*** + Gets call/put data for the stock with the expiration data in the + given month and year + + Parameters + ---------- + month : number, int, optional(default=None) + The month the options expire. This should be either 1 or 2 + digits. + + year : number, int, optional(default=None) + The year the options expire. This should be a 4 digit int. + + expiry : date-like or convertible or list-like object, optional (default=None) + The date (or dates) when options expire (defaults to current month) + + Returns + ------- + pandas.DataFrame + A DataFrame with requested options data. + + Index: + Strike: Option strike, int + Expiry: Option expiry, Timestamp + Type: Call or Put, string + Symbol: Option symbol as reported on Yahoo, string + Columns: + Last: Last option price, float + Chg: Change from prior day, float + Bid: Bid price, float + Ask: Ask price, float + Vol: Volume traded, int64 + Open_Int: Open interest, int64 + IsNonstandard: True if the the deliverable is not 100 shares, otherwise false + Underlying: Ticker of the underlying security, string + Underlying_Price: Price of the underlying security, float64 + Quote_Time: Time of the quote, Timestamp + + Notes + ----- + Note: Format of returned data frame is dependent on Yahoo and may change. + + When called, this function will add instance variables named + calls and puts. See the following example: + + >>> aapl = Options('aapl', 'yahoo') # Create object + >>> aapl.calls # will give an AttributeError + >>> aapl.get_options() # Get data and set ivars + >>> aapl.calls # Doesn't throw AttributeError + + Also note that aapl.calls and appl.puts will always be the calls + and puts for the next expiry. If the user calls this method with + a different expiry, the ivar will be named callsYYMMDD or putsYYMMDD, + where YY, MM and DD are, respectively, two digit representations of + the year, month and day for the expiry of the options. + + """ + return concat([f(month, year, expiry) + for f in (self.get_put_data, + self.get_call_data)]).sortlevel() + + def _get_option_frames_from_yahoo(self, expiry): + url = self._yahoo_url_from_expiry(expiry) + option_frames = self._option_frames_from_url(url) + frame_name = '_frames' + self._expiry_to_string(expiry) + setattr(self, frame_name, option_frames) + return option_frames + + @staticmethod + def _expiry_to_string(expiry): + m1 = _two_char(expiry.month) + d1 = _two_char(expiry.day) + return str(expiry.year)[-2:] + m1 + d1 + + def _yahoo_url_from_expiry(self, expiry): + try: + expiry_links = self._expiry_links + + except AttributeError: + _, expiry_links = self._get_expiry_dates_and_links() + + return self._FINANCE_BASE_URL + expiry_links[expiry] + + def _option_frames_from_url(self, url): + frames = read_html(url) + nframes = len(frames) + frames_req = max(self._TABLE_LOC.values()) + if nframes < frames_req: + raise RemoteDataError("%s options tables found (%s expected)" % (nframes, frames_req)) + + if not hasattr(self, 'underlying_price'): + try: + self.underlying_price, self.quote_time = self._underlying_price_and_time_from_url(url) + except IndexError: + self.underlying_price, self.quote_time = np.nan, np.nan + + calls = frames[self._TABLE_LOC['calls']] + puts = frames[self._TABLE_LOC['puts']] + + calls = self._process_data(calls, 'call') + puts = self._process_data(puts, 'put') + + return {'calls': calls, 'puts': puts} + + def _underlying_price_and_time_from_url(self, url): + root = self._parse_url(url) + underlying_price = self._underlying_price_from_root(root) + quote_time = self._quote_time_from_root(root) + return underlying_price, quote_time + + @staticmethod + def _underlying_price_from_root(root): + underlying_price = root.xpath('.//*[@class="time_rtq_ticker Fz-30 Fw-b"]')[0]\ + .getchildren()[0].text + underlying_price = underlying_price.replace(',', '') #GH11 + + try: + underlying_price = float(underlying_price) + except ValueError: + underlying_price = np.nan + + return underlying_price + + @staticmethod + def _quote_time_from_root(root): + #Gets the time of the quote, note this is actually the time of the underlying price. + try: + quote_time_text = root.xpath('.//*[@class="time_rtq Fz-m"]')[0].getchildren()[1].getchildren()[0].text + ##TODO: Enable timezone matching when strptime can match EST with %Z + quote_time_text = quote_time_text.split(' ')[0] + quote_time = dt.datetime.strptime(quote_time_text, "%I:%M%p") + quote_time = quote_time.replace(year=CUR_YEAR, month=CUR_MONTH, day=CUR_DAY) + except ValueError: + quote_time = np.nan + + return quote_time + + def _get_option_data(self, expiry, name): + frame_name = '_frames' + self._expiry_to_string(expiry) + + try: + frames = getattr(self, frame_name) + except AttributeError: + frames = self._get_option_frames_from_yahoo(expiry) + + option_data = frames[name] + if expiry != self.expiry_dates[0]: + name += self._expiry_to_string(expiry) + + setattr(self, name, option_data) + return option_data + + def get_call_data(self, month=None, year=None, expiry=None): + """ + ***Experimental*** + Gets call/put data for the stock with the expiration data in the + given month and year + + Parameters + ---------- + month : number, int, optional(default=None) + The month the options expire. This should be either 1 or 2 + digits. + + year : number, int, optional(default=None) + The year the options expire. This should be a 4 digit int. + + expiry : date-like or convertible or list-like object, optional (default=None) + The date (or dates) when options expire (defaults to current month) + + Returns + ------- + call_data: pandas.DataFrame + A DataFrame with requested options data. + + Index: + Strike: Option strike, int + Expiry: Option expiry, Timestamp + Type: Call or Put, string + Symbol: Option symbol as reported on Yahoo, string + Columns: + Last: Last option price, float + Chg: Change from prior day, float + Bid: Bid price, float + Ask: Ask price, float + Vol: Volume traded, int64 + Open_Int: Open interest, int64 + IsNonstandard: True if the the deliverable is not 100 shares, otherwise false + Underlying: Ticker of the underlying security, string + Underlying_Price: Price of the underlying security, float64 + Quote_Time: Time of the quote, Timestamp + + Notes + ----- + Note: Format of returned data frame is dependent on Yahoo and may change. + + When called, this function will add instance variables named + calls and puts. See the following example: + + >>> aapl = Options('aapl', 'yahoo') # Create object + >>> aapl.calls # will give an AttributeError + >>> aapl.get_call_data() # Get data and set ivars + >>> aapl.calls # Doesn't throw AttributeError + + Also note that aapl.calls will always be the calls for the next + expiry. If the user calls this method with a different month + or year, the ivar will be named callsYYMMDD where YY, MM and DD are, + respectively, two digit representations of the year, month and day + for the expiry of the options. + """ + expiry = self._try_parse_dates(year, month, expiry) + return self._get_data_in_date_range(expiry, call=True, put=False) + + def get_put_data(self, month=None, year=None, expiry=None): + """ + ***Experimental*** + Gets put data for the stock with the expiration data in the + given month and year + + Parameters + ---------- + month : number, int, optional(default=None) + The month the options expire. This should be either 1 or 2 + digits. + + year : number, int, optional(default=None) + The year the options expire. This should be a 4 digit int. + + expiry : date-like or convertible or list-like object, optional (default=None) + The date (or dates) when options expire (defaults to current month) + + Returns + ------- + put_data: pandas.DataFrame + A DataFrame with requested options data. + + Index: + Strike: Option strike, int + Expiry: Option expiry, Timestamp + Type: Call or Put, string + Symbol: Option symbol as reported on Yahoo, string + Columns: + Last: Last option price, float + Chg: Change from prior day, float + Bid: Bid price, float + Ask: Ask price, float + Vol: Volume traded, int64 + Open_Int: Open interest, int64 + IsNonstandard: True if the the deliverable is not 100 shares, otherwise false + Underlying: Ticker of the underlying security, string + Underlying_Price: Price of the underlying security, float64 + Quote_Time: Time of the quote, Timestamp + + Notes + ----- + Note: Format of returned data frame is dependent on Yahoo and may change. + + When called, this function will add instance variables named + puts. See the following example: + + >>> aapl = Options('aapl') # Create object + >>> aapl.puts # will give an AttributeError + >>> aapl.get_put_data() # Get data and set ivars + >>> aapl.puts # Doesn't throw AttributeError + + return self.__setattr__(self, str(str(x) + str(y))) + + Also note that aapl.puts will always be the puts for the next + expiry. If the user calls this method with a different month + or year, the ivar will be named putsYYMMDD where YY, MM and DD are, + respectively, two digit representations of the year, month and day + for the expiry of the options. + """ + expiry = self._try_parse_dates(year, month, expiry) + return self._get_data_in_date_range(expiry, put=True, call=False) + + def get_near_stock_price(self, above_below=2, call=True, put=False, + month=None, year=None, expiry=None): + """ + ***Experimental*** + Returns a data frame of options that are near the current stock price. + + Parameters + ---------- + above_below : number, int, optional (default=2) + The number of strike prices above and below the stock price that + should be taken + + call : bool + Tells the function whether or not it should be using calls + + put : bool + Tells the function weather or not it should be using puts + + month : number, int, optional(default=None) + The month the options expire. This should be either 1 or 2 + digits. + + year : number, int, optional(default=None) + The year the options expire. This should be a 4 digit int. + + expiry : date-like or convertible or list-like object, optional (default=None) + The date (or dates) when options expire (defaults to current month) + + Returns + ------- + chopped: DataFrame + The resultant DataFrame chopped down to be 2 * above_below + 1 rows + desired. If there isn't data as far out as the user has asked for + then + + Note: Format of returned data frame is dependent on Yahoo and may change. + + """ + expiry = self._try_parse_dates(year, month, expiry) + data = self._get_data_in_date_range(expiry, call=call, put=put) + return self.chop_data(data, above_below, self.underlying_price) + + def chop_data(self, df, above_below=2, underlying_price=None): + """Returns a data frame only options that are near the current stock price.""" + + if not underlying_price: + try: + underlying_price = self.underlying_price + except AttributeError: + underlying_price = np.nan + + max_strike = max(df.index.get_level_values('Strike')) + min_strike = min(df.index.get_level_values('Strike')) + + if not np.isnan(underlying_price) and min_strike < underlying_price < max_strike: + start_index = np.where(df.index.get_level_values('Strike') + > underlying_price)[0][0] + + get_range = slice(start_index - above_below, + start_index + above_below + 1) + df = df[get_range].dropna(how='all') + + return df + + def _try_parse_dates(self, year, month, expiry): + """ + Validates dates provided by user. Ensures the user either provided both a month and a year or an expiry. + + Parameters + ---------- + year : int + Calendar year + + month : int + Calendar month + + expiry : date-like or convertible, (preferred) + Expiry date + + Returns + ------- + list of expiry dates (datetime.date) + """ + + #Checks if the user gave one of the month or the year but not both and did not provide an expiry: + if (month is not None and year is None) or (month is None and year is not None) and expiry is None: + msg = "You must specify either (`year` and `month`) or `expiry` " \ + "or none of these options for the next expiry." + raise ValueError(msg) + + if expiry is not None: + if hasattr(expiry, '__iter__'): + expiry = [self._validate_expiry(exp) for exp in expiry] + else: + expiry = [self._validate_expiry(expiry)] + + if len(expiry) == 0: + raise ValueError('No expiries available for given input.') + + elif year is None and month is None: + #No arguments passed, provide next expiry + year = CUR_YEAR + month = CUR_MONTH + expiry = dt.date(year, month, 1) + expiry = [self._validate_expiry(expiry)] + + else: + #Year and month passed, provide all expiries in that month + expiry = [expiry for expiry in self.expiry_dates if expiry.year == year and expiry.month == month] + if len(expiry) == 0: + raise ValueError('No expiries available in %s-%s' % (year, month)) + + return expiry + + def _validate_expiry(self, expiry): + """Ensures that an expiry date has data available on Yahoo + If the expiry date does not have options that expire on that day, return next expiry""" + + expiry_dates = self.expiry_dates + expiry = to_datetime(expiry) + if hasattr(expiry, 'date'): + expiry = expiry.date() + + if expiry in expiry_dates: + return expiry + else: + index = DatetimeIndex(expiry_dates).order() + return index[index.date >= expiry][0].date() + + def get_forward_data(self, months, call=True, put=False, near=False, + above_below=2): + """ + ***Experimental*** + Gets either call, put, or both data for months starting in the current + month and going out in the future a specified amount of time. + + Parameters + ---------- + months : number, int + How many months to go out in the collection of the data. This is + inclusive. + + call : bool, optional (default=True) + Whether or not to collect data for call options + + put : bool, optional (default=False) + Whether or not to collect data for put options. + + near : bool, optional (default=False) + Whether this function should get only the data near the + current stock price. Uses Options.get_near_stock_price + + above_below : number, int, optional (default=2) + The number of strike prices above and below the stock price that + should be taken if the near option is set to True + + Returns + ------- + pandas.DataFrame + A DataFrame with requested options data. + + Index: + Strike: Option strike, int + Expiry: Option expiry, Timestamp + Type: Call or Put, string + Symbol: Option symbol as reported on Yahoo, string + Columns: + Last: Last option price, float + Chg: Change from prior day, float + Bid: Bid price, float + Ask: Ask price, float + Vol: Volume traded, int64 + Open_Int: Open interest, int64 + IsNonstandard: True if the the deliverable is not 100 shares, otherwise false + Underlying: Ticker of the underlying security, string + Underlying_Price: Price of the underlying security, float64 + Quote_Time: Time of the quote, Timestamp + + Note: Format of returned data frame is dependent on Yahoo and may change. + + """ + warnings.warn("get_forward_data() is deprecated", FutureWarning) + end_date = dt.date.today() + MonthEnd(months) + dates = (date for date in self.expiry_dates if date <= end_date.date()) + data = self._get_data_in_date_range(dates, call=call, put=put) + if near: + data = self.chop_data(data, above_below=above_below) + return data + + def get_all_data(self, call=True, put=True): + """ + ***Experimental*** + Gets either call, put, or both data for all available months starting + in the current month. + + Parameters + ---------- + call : bool, optional (default=True) + Whether or not to collect data for call options + + put : bool, optional (default=True) + Whether or not to collect data for put options. + + Returns + ------- + pandas.DataFrame + A DataFrame with requested options data. + + Index: + Strike: Option strike, int + Expiry: Option expiry, Timestamp + Type: Call or Put, string + Symbol: Option symbol as reported on Yahoo, string + Columns: + Last: Last option price, float + Chg: Change from prior day, float + Bid: Bid price, float + Ask: Ask price, float + Vol: Volume traded, int64 + Open_Int: Open interest, int64 + IsNonstandard: True if the the deliverable is not 100 shares, otherwise false + Underlying: Ticker of the underlying security, string + Underlying_Price: Price of the underlying security, float64 + Quote_Time: Time of the quote, Timestamp + + Note: Format of returned data frame is dependent on Yahoo and may change. + + """ + + try: + expiry_dates = self.expiry_dates + except AttributeError: + expiry_dates, _ = self._get_expiry_dates_and_links() + + return self._get_data_in_date_range(dates=expiry_dates, call=call, put=put) + + def _get_data_in_date_range(self, dates, call=True, put=True): + + to_ret = Series({'calls': call, 'puts': put}) + to_ret = to_ret[to_ret].index + data = [] + + for name in to_ret: + for expiry_date in dates: + nam = name + self._expiry_to_string(expiry_date) + try: # Try to access on the instance + frame = getattr(self, nam) + except AttributeError: + frame = self._get_option_data(expiry=expiry_date, name=name) + data.append(frame) + + return concat(data).sortlevel() + + @property + def expiry_dates(self): + """ + Returns a list of available expiry dates + """ + try: + expiry_dates = self._expiry_dates + except AttributeError: + expiry_dates, _ = self._get_expiry_dates_and_links() + return expiry_dates + + def _get_expiry_dates_and_links(self): + """ + Gets available expiry dates. + + Returns + ------- + Tuple of: + List of datetime.date objects + Dict of datetime.date objects as keys and corresponding links + """ + + url = self._OPTIONS_BASE_URL.format(sym=self.symbol) + root = self._parse_url(url) + + try: + links = root.xpath('//*[@id="options_menu"]/form/select/option') + except IndexError: + raise RemoteDataError('Expiry dates not available') + + expiry_dates = [dt.datetime.strptime(element.text, "%B %d, %Y").date() for element in links] + links = [element.attrib['data-selectbox-link'] for element in links] + + if len(expiry_dates) == 0: + raise RemoteDataError('Data not available') + + expiry_links = dict(zip(expiry_dates, links)) + self._expiry_links = expiry_links + self._expiry_dates = expiry_dates + return expiry_dates, expiry_links + + def _parse_url(self, url): + """ + Downloads and parses a URL, returns xml root. + + """ + try: + from lxml.html import parse + except ImportError: + raise ImportError("Please install lxml if you want to use the " + "{0!r} class".format(self.__class__.__name__)) + try: + doc = parse(url) + except _network_error_classes: + raise RemoteDataError("Unable to parse URL " + "{0!r}".format(url)) + else: + root = doc.getroot() + if root is None: + raise RemoteDataError("Parsed URL {0!r} has no root" + "element".format(url)) + return root + + def _process_data(self, frame, type): + """ + Adds columns for Expiry, IsNonstandard (ie: deliverable is not 100 shares) + and Tag (the tag indicating what is actually deliverable, None if standard). + + """ + frame.columns = ['Strike', 'Symbol', 'Last', 'Bid', 'Ask', 'Chg', 'PctChg', 'Vol', 'Open_Int', 'IV'] + frame["Rootexp"] = frame.Symbol.str[0:-9] + frame["Root"] = frame.Rootexp.str[0:-6] + frame["Expiry"] = to_datetime(frame.Rootexp.str[-6:]) + #Removes dashes in equity ticker to map to option ticker. + #Ex: BRK-B to BRKB140517C00100000 + frame["IsNonstandard"] = frame['Root'] != self.symbol.replace('-', '') + del frame["Rootexp"] + frame["Underlying"] = self.symbol + try: + frame['Underlying_Price'] = self.underlying_price + frame["Quote_Time"] = self.quote_time + except AttributeError: + frame['Underlying_Price'] = np.nan + frame["Quote_Time"] = np.nan + frame.rename(columns={'Open Int': 'Open_Int'}, inplace=True) + frame['Type'] = type + frame.set_index(['Strike', 'Expiry', 'Type', 'Symbol'], inplace=True) + + return frame diff --git a/pandas_datareader/yahoo/quotes.py b/pandas_datareader/yahoo/quotes.py new file mode 100644 index 00000000..a94078b5 --- /dev/null +++ b/pandas_datareader/yahoo/quotes.py @@ -0,0 +1,50 @@ +from collections import defaultdict +import pandas.compat as compat +from pandas.io.common import urlopen +from pandas import DataFrame + +_yahoo_codes = {'symbol': 's', 'last': 'l1', 'change_pct': 'p2', 'PE': 'r', + 'time': 't1', 'short_ratio': 's7'} + + +_URL = 'http://finance.yahoo.com/d/quotes.csv?' + + +def _get_data(symbols): + """ + Get current yahoo quote + + Returns a DataFrame + """ + if isinstance(symbols, compat.string_types): + sym_list = symbols + else: + sym_list = '+'.join(symbols) + + # for codes see: http://www.gummy-stuff.org/Yahoo-data.htm + request = ''.join(compat.itervalues(_yahoo_codes)) # code request string + header = list(_yahoo_codes.keys()) + + data = defaultdict(list) + + url_str = _URL + 's=%s&f=%s' % (sym_list, request) + + with urlopen(url_str) as url: + lines = url.readlines() + + for line in lines: + fields = line.decode('utf-8').strip().split(',') + for i, field in enumerate(fields): + if field[-2:] == '%"': + v = float(field.strip('"%')) + elif field[0] == '"': + v = field.strip('"') + else: + try: + v = float(field) + except ValueError: + v = field + data[header[i]].append(v) + + idx = data.pop('symbol') + return DataFrame(data, index=idx)