Skip to content

Commit 9978db5

Browse files
committed
Added EDGAR Index retrieval, with docs and (some) tests.
1 parent c0d205e commit 9978db5

File tree

8 files changed

+91
-5
lines changed

8 files changed

+91
-5
lines changed

docs/source/remote_data.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,3 +396,16 @@ You can specify dataset ID "tran_sf_railac" to get corresponding data via ``Data
396396
397397
df = web.DataReader("tran_sf_railac", 'eurostat')
398398
df
399+
400+
.. _remote_data.edgar:
401+
402+
EDGAR Index
403+
===========
404+
405+
Company filing index from EDGAR (SEC).
406+
407+
.. ipython:: python
408+
409+
import pandas_datareader.data as web
410+
ed = web.DataReader('full', 'edgar-index')
411+
ed[:5]

docs/source/whatsnew/v0.2.2.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ New features
1919
~~~~~~~~~~~~
2020

2121
- ``DataReader`` now supports dividend only pulls from Yahoo! Finance, see :ref:`here<remote_data.yahoo>` (:issue:`138`).
22+
- ``DataReader`` now supports SEC EDGAR full (current quarter) index retrieval, see :ref:`here<remote_data.edgar>` (:issue:`143`).
2223

2324
.. _whatsnew_022.api_breaking:
2425

pandas_datareader/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616

1717
from pandas_datareader._utils import RemoteDataError, SymbolWarning
1818

19+
import requests_ftp
20+
requests_ftp.monkeypatch_session()
21+
1922

2023
class _BaseReader(object):
2124

pandas_datareader/data.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pandas_datareader.fred import FredReader
1818
from pandas_datareader.famafrench import FamaFrenchReader
1919
from pandas_datareader.oecd import OECDReader
20+
from pandas_datareader.edgar import EdgarIndexReader
2021

2122

2223
def get_data_fred(*args, **kwargs):
@@ -43,8 +44,8 @@ def DataReader(name, data_source=None, start=None, end=None,
4344
"""
4445
Imports data from a number of online sources.
4546
46-
Currently supports Yahoo! Finance, Google Finance, St. Louis FED (FRED)
47-
and Kenneth French's data library.
47+
Currently supports Yahoo! Finance, Google Finance, St. Louis FED (FRED),
48+
Kenneth French's data library, and the SEC's EDGAR Index.
4849
4950
Parameters
5051
----------
@@ -53,7 +54,7 @@ def DataReader(name, data_source=None, start=None, end=None,
5354
accept a list of names.
5455
data_source: {str, None}
5556
the data source ("yahoo", "yahoo-actions", "yahoo-dividends",
56-
"google", "fred", or "ff")
57+
"google", "fred", "ff", or "edgar-index")
5758
start : {datetime, None}
5859
left boundary for range (defaults to 1/1/2010)
5960
end : {datetime, None}
@@ -86,6 +87,9 @@ def DataReader(name, data_source=None, start=None, end=None,
8687
ff = DataReader("F-F_Research_Data_Factors_weekly", "famafrench")
8788
ff = DataReader("6_Portfolios_2x3", "famafrench")
8889
ff = DataReader("F-F_ST_Reversal_Factor", "famafrench")
90+
91+
# Data from EDGAR index
92+
ed = DataReader("master", "edgar-index")
8993
"""
9094
if data_source == "yahoo":
9195
return YahooDailyReader(symbols=name, start=start, end=end,
@@ -127,6 +131,10 @@ def DataReader(name, data_source=None, start=None, end=None,
127131
return EurostatReader(symbols=name, start=start, end=end,
128132
retry_count=retry_count, pause=pause,
129133
session=session).read()
134+
elif data_source == "edgar-index":
135+
return EdgarIndexReader(symbols=name, start=start, end=end,
136+
retry_count=retry_count, pause=pause,
137+
session=session).read()
130138
else:
131139
raise NotImplementedError(
132140
"data_source=%r is not implemented" % data_source)

pandas_datareader/edgar.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from pandas import read_csv
2+
from pandas.io.common import ZipFile
3+
from pandas.compat import StringIO
4+
from pandas.compat import BytesIO
5+
6+
from pandas_datareader.base import _BaseReader
7+
8+
9+
_URL_FULL = 'ftp://ftp.sec.gov/edgar/full-index/master.zip'
10+
_COLUMNS = ['cik', 'company_name', 'form_type', 'date_filed', 'filename']
11+
12+
13+
class EdgarIndexReader(_BaseReader):
14+
"""
15+
Get master index from the SEC's EDGAR database.
16+
17+
Returns
18+
-------
19+
edgar_index : pandas.DataFrame.
20+
DataFrame of EDGAR master index.
21+
"""
22+
23+
@property
24+
def url(self):
25+
return _URL_FULL
26+
27+
def _read_zipfile(self, url):
28+
29+
zipf = BytesIO(self._get_response(url).content)
30+
31+
with ZipFile(zipf, 'r') as zf:
32+
data = zf.open(zf.namelist()[0]).read().decode()
33+
34+
return data
35+
36+
def _read_one_data(self, url, params):
37+
38+
index_file = StringIO(self._read_zipfile(url))
39+
40+
index = read_csv(index_file, delimiter='|', header=None,
41+
index_col=False, skiprows=10, names=_COLUMNS,
42+
low_memory=False)
43+
return index

pandas_datareader/tests/test_data.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import pandas_datareader.data as web
2323
from pandas_datareader.data import (DataReader, GoogleDailyReader, YahooDailyReader,
2424
YahooQuotesReader, YahooActionReader,
25-
FredReader, OECDReader)
25+
FredReader, OECDReader, EdgarIndexReader)
2626
from pandas_datareader._utils import SymbolWarning, RemoteDataError
2727
from pandas_datareader.yahoo.quotes import _yahoo_codes
2828

@@ -524,6 +524,10 @@ def test_read_fred(self):
524524
vix = DataReader("VIXCLS", "fred")
525525
assert isinstance(vix, DataFrame)
526526

527+
def test_read_edgar_index(self):
528+
ed = DataReader("full", "edgar-index")
529+
assert isinstance(ed, DataFrame)
530+
527531
def test_not_implemented(self):
528532
self.assertRaises(NotImplementedError, DataReader, "NA", "NA")
529533

pandas_datareader/tests/test_edgar.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import nose
2+
import pandas.util.testing as tm
3+
4+
import pandas_datareader.data as web
5+
6+
7+
class TestEdgarIndex(tm.TestCase):
8+
def test_get_index(self):
9+
ed = web.DataReader('full', 'edgar-index')
10+
assert len(ed > 1000)
11+
12+
if __name__ == '__main__':
13+
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
14+
exit=False)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def readme():
2424
return f.read()
2525

2626
INSTALL_REQUIRES = (
27-
['pandas', 'requests', 'requests-file']
27+
['pandas', 'requests', 'requests-file', 'requests-ftp']
2828
)
2929

3030
setup(

0 commit comments

Comments
 (0)