From 39d98968055e5be7fc211cf89ad01b8c46a9c760 Mon Sep 17 00:00:00 2001 From: Tomasz Chodakowski Date: Mon, 31 Aug 2020 23:14:52 +0100 Subject: [PATCH 1/3] added csv response handling for tiingo apis --- pandas_datareader/base.py | 4 +- pandas_datareader/tests/test_tiingo.py | 21 ++++-- pandas_datareader/tiingo.py | 97 +++++++++++++++++++------- 3 files changed, 88 insertions(+), 34 deletions(-) diff --git a/pandas_datareader/base.py b/pandas_datareader/base.py index 4d4148df..251ba8cb 100644 --- a/pandas_datareader/base.py +++ b/pandas_datareader/base.py @@ -111,11 +111,11 @@ def _read_one_data(self, url, params): raise NotImplementedError(self._format) return self._read_lines(out) - def _read_url_as_StringIO(self, url, params=None): + def _read_url_as_StringIO(self, url, params=None, headers=None): """ Open url (and retry) """ - response = self._get_response(url, params=params) + response = self._get_response(url, params=params, headers=headers) text = self._sanitize_response(response) out = StringIO() if len(text) == 0: diff --git a/pandas_datareader/tests/test_tiingo.py b/pandas_datareader/tests/test_tiingo.py index a3f559ae..87307c18 100644 --- a/pandas_datareader/tests/test_tiingo.py +++ b/pandas_datareader/tests/test_tiingo.py @@ -26,9 +26,18 @@ def symbols(request): return request.param +formats = ["csv", "json"] +format_ids = list(map(str, formats)) + + +@pytest.fixture(params=formats, ids=format_ids) +def formats(request): + return request.param + + @pytest.mark.skipif(TEST_API_KEY is None, reason="TIINGO_API_KEY not set") -def test_tiingo_quote(symbols): - df = TiingoQuoteReader(symbols=symbols).read() +def test_tiingo_quote(symbols, formats): + df = TiingoQuoteReader(symbols=symbols, response_format=formats).read() assert isinstance(df, pd.DataFrame) if isinstance(symbols, str): symbols = [symbols] @@ -36,8 +45,8 @@ def test_tiingo_quote(symbols): @pytest.mark.skipif(TEST_API_KEY is None, reason="TIINGO_API_KEY not set") -def test_tiingo_historical(symbols): - df = TiingoDailyReader(symbols=symbols).read() +def test_tiingo_historical(symbols, formats): + df = TiingoDailyReader(symbols=symbols, response_format=formats).read() assert isinstance(df, pd.DataFrame) if isinstance(symbols, str): symbols = [symbols] @@ -45,8 +54,8 @@ def test_tiingo_historical(symbols): @pytest.mark.skipif(TEST_API_KEY is None, reason="TIINGO_API_KEY not set") -def test_tiingo_iex_historical(symbols): - df = TiingoIEXHistoricalReader(symbols=symbols).read() +def test_tiingo_iex_historical(symbols, formats): + df = TiingoIEXHistoricalReader(symbols=symbols, response_format=formats).read() df.head() assert isinstance(df, pd.DataFrame) if isinstance(symbols, str): diff --git a/pandas_datareader/tiingo.py b/pandas_datareader/tiingo.py index 1ab5af56..4fecf905 100644 --- a/pandas_datareader/tiingo.py +++ b/pandas_datareader/tiingo.py @@ -4,6 +4,7 @@ from pandas_datareader.base import _BaseReader +TIINGO_API_URL_BASE = "https://api.tiingo.com" def get_tiingo_symbols(): """ @@ -34,11 +35,11 @@ class TiingoIEXHistoricalReader(_BaseReader): ---------- symbols : {str, List[str]} String symbol or list of symbols - start : string, int, date, datetime, Timestamp + start : {string, int, date, datetime, Timestamp} Starting date. Parses many different kind of date - representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980'). Defaults to - 20 years before current date. - end : string, int, date, datetime, Timestamp + representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980'). + Defaults to 20 years before current date. + end : {string, int, date, datetime, Timestamp} Ending date retry_count : int, default 3 Number of times to retry query request. @@ -47,9 +48,12 @@ class TiingoIEXHistoricalReader(_BaseReader): session : Session, default None requests.sessions.Session instance to be used freq : {str, None} - Re-sample frequency. Format is # + (min/hour); e.g. "15min" or "4hour". - If no value is provided, defaults to 5min. The minimum value is "1min". - Units in minutes (min) and hours (hour) are accepted. + Re-sample frequency. Format is # + (min/hour); e.g. "15min" or "4hour". + If no value is provided, defaults to 5min. The minimum value is "1min". + Units in minutes (min) and hours (hour) are accepted. + response_format : str, default 'json' + Format of response data returned by the underlying Tiingo REST API. + Acceptable values: 'json', 'csv'. api_key : str, optional Tiingo API key . If not provided the environmental variable TIINGO_API_KEY is read. The API key is *required*. @@ -65,6 +69,7 @@ def __init__( timeout=30, session=None, freq=None, + response_format='json', api_key=None, ): super().__init__( @@ -83,12 +88,13 @@ def __init__( "environmental variable TIINGO_API_KEY." ) self.api_key = api_key + self.response_format = response_format if response_format in ['json', 'csv'] else 'json' self._concat_axis = 0 @property def url(self): """API URL""" - _url = "https://api.tiingo.com/iex/{ticker}/prices" + _url = TIINGO_API_URL_BASE+"/iex/{ticker}/prices" return _url.format(ticker=self._symbol) @property @@ -98,7 +104,7 @@ def params(self): "startDate": self.start.strftime("%Y-%m-%d"), "endDate": self.end.strftime("%Y-%m-%d"), "resampleFreq": self.freq, - "format": "json", + "format": self.response_format, } def _get_crumb(self, *args): @@ -106,15 +112,20 @@ def _get_crumb(self, *args): def _read_one_data(self, url, params): """ read one data from specified URL """ + content_type = "application/json" if self.response_format == "json" else "text/csv" headers = { - "Content-Type": "application/json", + "Content-Type": content_type, "Authorization": "Token " + self.api_key, } - out = self._get_response(url, params=params, headers=headers).json() + out = None + if self.response_format == 'json': + out = self._get_response(url, params=params, headers=headers).json() + elif self.response_format == 'csv': + out = self._read_url_as_StringIO(url, params=params, headers=headers) return self._read_lines(out) def _read_lines(self, out): - df = pd.DataFrame(out) + df = pd.DataFrame(out) if self.response_format == 'json' else pd.read_csv(out) df["symbol"] = self._symbol df["date"] = pd.to_datetime(df["date"]) df = df.set_index(["symbol", "date"]) @@ -140,11 +151,11 @@ class TiingoDailyReader(_BaseReader): ---------- symbols : {str, List[str]} String symbol or list of symbols - start : string, int, date, datetime, Timestamp + start : {string, int, date, datetime, Timestamp} Starting date, timestamp. Parses many different kind of date representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980'). Default starting date is 5 years before current date. - end : string, int, date, datetime, Timestamp + end : {string, int, date, datetime, Timestamp} Ending date, timestamp. Same format as starting date. retry_count : int, default 3 Number of times to retry query request. @@ -154,6 +165,9 @@ class TiingoDailyReader(_BaseReader): requests.sessions.Session instance to be used freq : {str, None} Not used. + response_format : str, default 'json' + Format of response data returned by the underlying Tiingo REST API. + Acceptable values: 'json', 'csv'. api_key : str, optional Tiingo API key . If not provided the environmental variable TIINGO_API_KEY is read. The API key is *required*. @@ -169,6 +183,7 @@ def __init__( timeout=30, session=None, freq=None, + response_format='json', api_key=None, ): super(TiingoDailyReader, self).__init__( @@ -186,12 +201,13 @@ def __init__( "environmental variable TIINGO_API_KEY." ) self.api_key = api_key + self.response_format = response_format if response_format in ['json', 'csv'] else 'json' self._concat_axis = 0 @property def url(self): """API URL""" - _url = "https://api.tiingo.com/tiingo/daily/{ticker}/prices" + _url = TIINGO_API_URL_BASE+"/tiingo/daily/{ticker}/prices" return _url.format(ticker=self._symbol) @property @@ -200,7 +216,7 @@ def params(self): return { "startDate": self.start.strftime("%Y-%m-%d"), "endDate": self.end.strftime("%Y-%m-%d"), - "format": "json", + "format": self.response_format } def _get_crumb(self, *args): @@ -208,15 +224,20 @@ def _get_crumb(self, *args): def _read_one_data(self, url, params): """ read one data from specified URL """ + content_type = "application/json" if self.response_format == "json" else "text/csv" headers = { - "Content-Type": "application/json", + "Content-Type": content_type, "Authorization": "Token " + self.api_key, } - out = self._get_response(url, params=params, headers=headers).json() + out = None + if self.response_format == 'json': + out = self._get_response(url, params=params, headers=headers).json() + elif self.response_format == 'csv': + out = self._read_url_as_StringIO(url, params=params, headers=headers) return self._read_lines(out) def _read_lines(self, out): - df = pd.DataFrame(out) + df = pd.DataFrame(out) if self.response_format == 'json' else pd.read_csv(out) df["symbol"] = self._symbol df["date"] = pd.to_datetime(df["date"]) df = df.set_index(["symbol", "date"]) @@ -242,9 +263,9 @@ class TiingoMetaDataReader(TiingoDailyReader): ---------- symbols : {str, List[str]} String symbol or list of symbols - start : string, int, date, datetime, Timestamp + start : {string, int, date, datetime, Timestamp} Not used. - end : string, int, date, datetime, Timestamp + end : {string, int, date, datetime, Timestamp} Not used. retry_count : int, default 3 Number of times to retry query request. @@ -272,14 +293,15 @@ def __init__( api_key=None, ): super(TiingoMetaDataReader, self).__init__( - symbols, start, end, retry_count, pause, timeout, session, freq, api_key + symbols, start, end, retry_count, pause, timeout, session, freq, + api_key ) self._concat_axis = 1 @property def url(self): """API URL""" - _url = "https://api.tiingo.com/tiingo/daily/{ticker}" + _url = TIINGO_API_URL_BASE+"/tiingo/daily/{ticker}" return _url.format(ticker=self._symbol) @property @@ -300,9 +322,9 @@ class TiingoQuoteReader(TiingoDailyReader): ---------- symbols : {str, List[str]} String symbol or list of symbols - start : string, int, date, datetime, Timestamp + start : {string, int, date, datetime, Timestamp} Not used. - end : string, int, date, datetime, Timestamp + end : {string, int, date, datetime, Timestamp} Not used. retry_count : int, default 3 Number of times to retry query request. @@ -312,6 +334,9 @@ class TiingoQuoteReader(TiingoDailyReader): requests.sessions.Session instance to be used freq : {str, None} Not used. + response_format : str, default 'json' + Format of response data returned by the underlying Tiingo REST API. + Acceptable values: 'json', 'csv'. api_key : str, optional Tiingo API key . If not provided the environmental variable TIINGO_API_KEY is read. The API key is *required*. @@ -321,7 +346,27 @@ class TiingoQuoteReader(TiingoDailyReader): This is a special case of the daily reader which automatically selected the latest data available for each symbol. """ + def __init__( + self, + symbols, + start=None, + end=None, + retry_count=3, + pause=0.1, + timeout=30, + session=None, + freq=None, + response_format='json', + api_key=None + ): + super(TiingoQuoteReader, self).__init__( + symbols, start, end, retry_count, pause, timeout, session, freq, + response_format, api_key + ) @property def params(self): - return None + """Parameters to use in API calls""" + return { + "format": self.response_format + } From f7680fc92ec34b4b3378f15814a6d3db7675f754 Mon Sep 17 00:00:00 2001 From: Tomasz Chodakowski Date: Mon, 31 Aug 2020 23:46:06 +0100 Subject: [PATCH 2/3] formatting and whatsnew updates --- docs/source/whatsnew/v.0.9.1.txt | 6 ++- pandas_datareader/tests/test_tiingo.py | 4 +- pandas_datareader/tiingo.py | 74 ++++++++++++++++---------- 3 files changed, 52 insertions(+), 32 deletions(-) diff --git a/docs/source/whatsnew/v.0.9.1.txt b/docs/source/whatsnew/v.0.9.1.txt index 3ec1cb44..3e149ffd 100644 --- a/docs/source/whatsnew/v.0.9.1.txt +++ b/docs/source/whatsnew/v.0.9.1.txt @@ -19,8 +19,9 @@ Enhancements - Added read_all_boards() method for MOEX that returns data from every trading board (ver. 0.9.0 behaviour) - Docs for MOEX reedited +- Added CSV response serialisation for Tiingo APIs (:issue:`821`) -.. _whatsnew_080.bug_fixes: +.. _whatsnew_091.bug_fixes: Bug Fixes ~~~~~~~~~ @@ -29,4 +30,5 @@ Bug Fixes Contributors ~~~~~~~~~~~~ -- Dmitry Alekseev \ No newline at end of file +- Dmitry Alekseev +- Tomasz Chodakowski \ No newline at end of file diff --git a/pandas_datareader/tests/test_tiingo.py b/pandas_datareader/tests/test_tiingo.py index 87307c18..0fd27a63 100644 --- a/pandas_datareader/tests/test_tiingo.py +++ b/pandas_datareader/tests/test_tiingo.py @@ -83,7 +83,9 @@ def test_tiingo_no_api_key(symbols): @pytest.mark.skipif( - pd.__version__ == "0.19.2", reason="pandas 0.19.2 does not like this file format" + pd.__version__ == "0.19.2", + reason="pandas 0.19.2 does not like\ + this file format", ) def test_tiingo_stock_symbols(): sym = get_tiingo_symbols() diff --git a/pandas_datareader/tiingo.py b/pandas_datareader/tiingo.py index 4fecf905..cdfa38ac 100644 --- a/pandas_datareader/tiingo.py +++ b/pandas_datareader/tiingo.py @@ -6,6 +6,7 @@ TIINGO_API_URL_BASE = "https://api.tiingo.com" + def get_tiingo_symbols(): """ Get the set of stock symbols supported by Tiingo @@ -48,8 +49,9 @@ class TiingoIEXHistoricalReader(_BaseReader): session : Session, default None requests.sessions.Session instance to be used freq : {str, None} - Re-sample frequency. Format is # + (min/hour); e.g. "15min" or "4hour". - If no value is provided, defaults to 5min. The minimum value is "1min". + Re-sample frequency. Format is #min/hour; e.g. "15min" or "4hour". + If no value is provided, defaults to 5min. The minimum value is\ + "1min". Units in minutes (min) and hours (hour) are accepted. response_format : str, default 'json' Format of response data returned by the underlying Tiingo REST API. @@ -69,7 +71,7 @@ def __init__( timeout=30, session=None, freq=None, - response_format='json', + response_format="json", api_key=None, ): super().__init__( @@ -88,13 +90,15 @@ def __init__( "environmental variable TIINGO_API_KEY." ) self.api_key = api_key - self.response_format = response_format if response_format in ['json', 'csv'] else 'json' + self.response_format = ( + response_format if response_format in ["json", "csv"] else "json" + ) self._concat_axis = 0 @property def url(self): """API URL""" - _url = TIINGO_API_URL_BASE+"/iex/{ticker}/prices" + _url = TIINGO_API_URL_BASE + "/iex/{ticker}/prices" return _url.format(ticker=self._symbol) @property @@ -112,20 +116,22 @@ def _get_crumb(self, *args): def _read_one_data(self, url, params): """ read one data from specified URL """ - content_type = "application/json" if self.response_format == "json" else "text/csv" + content_type = ( + "application/json" if self.response_format == "json" else "text/csv" + ) headers = { "Content-Type": content_type, "Authorization": "Token " + self.api_key, } out = None - if self.response_format == 'json': - out = self._get_response(url, params=params, headers=headers).json() - elif self.response_format == 'csv': + if self.response_format == "json": + out = self._get_response(url, params=params, headers=headers).json() + elif self.response_format == "csv": out = self._read_url_as_StringIO(url, params=params, headers=headers) return self._read_lines(out) def _read_lines(self, out): - df = pd.DataFrame(out) if self.response_format == 'json' else pd.read_csv(out) + df = pd.DataFrame(out) if self.response_format == "json" else pd.read_csv(out) df["symbol"] = self._symbol df["date"] = pd.to_datetime(df["date"]) df = df.set_index(["symbol", "date"]) @@ -183,7 +189,7 @@ def __init__( timeout=30, session=None, freq=None, - response_format='json', + response_format="json", api_key=None, ): super(TiingoDailyReader, self).__init__( @@ -201,13 +207,15 @@ def __init__( "environmental variable TIINGO_API_KEY." ) self.api_key = api_key - self.response_format = response_format if response_format in ['json', 'csv'] else 'json' + self.response_format = ( + response_format if response_format in ["json", "csv"] else "json" + ) self._concat_axis = 0 @property def url(self): """API URL""" - _url = TIINGO_API_URL_BASE+"/tiingo/daily/{ticker}/prices" + _url = TIINGO_API_URL_BASE + "/tiingo/daily/{ticker}/prices" return _url.format(ticker=self._symbol) @property @@ -216,7 +224,7 @@ def params(self): return { "startDate": self.start.strftime("%Y-%m-%d"), "endDate": self.end.strftime("%Y-%m-%d"), - "format": self.response_format + "format": self.response_format, } def _get_crumb(self, *args): @@ -224,20 +232,22 @@ def _get_crumb(self, *args): def _read_one_data(self, url, params): """ read one data from specified URL """ - content_type = "application/json" if self.response_format == "json" else "text/csv" + content_type = ( + "application/json" if self.response_format == "json" else "text/csv" + ) headers = { "Content-Type": content_type, "Authorization": "Token " + self.api_key, } out = None - if self.response_format == 'json': - out = self._get_response(url, params=params, headers=headers).json() - elif self.response_format == 'csv': + if self.response_format == "json": + out = self._get_response(url, params=params, headers=headers).json() + elif self.response_format == "csv": out = self._read_url_as_StringIO(url, params=params, headers=headers) return self._read_lines(out) def _read_lines(self, out): - df = pd.DataFrame(out) if self.response_format == 'json' else pd.read_csv(out) + df = pd.DataFrame(out) if self.response_format == "json" else pd.read_csv(out) df["symbol"] = self._symbol df["date"] = pd.to_datetime(df["date"]) df = df.set_index(["symbol", "date"]) @@ -293,15 +303,14 @@ def __init__( api_key=None, ): super(TiingoMetaDataReader, self).__init__( - symbols, start, end, retry_count, pause, timeout, session, freq, - api_key + symbols, start, end, retry_count, pause, timeout, session, freq, api_key ) self._concat_axis = 1 @property def url(self): """API URL""" - _url = TIINGO_API_URL_BASE+"/tiingo/daily/{ticker}" + _url = TIINGO_API_URL_BASE + "/tiingo/daily/{ticker}" return _url.format(ticker=self._symbol) @property @@ -346,6 +355,7 @@ class TiingoQuoteReader(TiingoDailyReader): This is a special case of the daily reader which automatically selected the latest data available for each symbol. """ + def __init__( self, symbols, @@ -356,17 +366,23 @@ def __init__( timeout=30, session=None, freq=None, - response_format='json', - api_key=None + response_format="json", + api_key=None, ): super(TiingoQuoteReader, self).__init__( - symbols, start, end, retry_count, pause, timeout, session, freq, - response_format, api_key + symbols, + start, + end, + retry_count, + pause, + timeout, + session, + freq, + response_format, + api_key, ) @property def params(self): """Parameters to use in API calls""" - return { - "format": self.response_format - } + return {"format": self.response_format} From 1ffeeb11ccd0ff34b94ed426c7858560c546f7d9 Mon Sep 17 00:00:00 2001 From: Tomasz Chodakowski Date: Tue, 1 Sep 2020 12:37:51 +0100 Subject: [PATCH 3/3] Handle response_format value errors + cosmetics --- pandas_datareader/tiingo.py | 44 ++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/pandas_datareader/tiingo.py b/pandas_datareader/tiingo.py index cdfa38ac..6b375956 100644 --- a/pandas_datareader/tiingo.py +++ b/pandas_datareader/tiingo.py @@ -50,12 +50,13 @@ class TiingoIEXHistoricalReader(_BaseReader): requests.sessions.Session instance to be used freq : {str, None} Re-sample frequency. Format is #min/hour; e.g. "15min" or "4hour". - If no value is provided, defaults to 5min. The minimum value is\ - "1min". + If no value is provided, defaults to 5min. The minimum value is "1min". Units in minutes (min) and hours (hour) are accepted. response_format : str, default 'json' - Format of response data returned by the underlying Tiingo REST API. - Acceptable values: 'json', 'csv'. + Specifies format of response data returned by the underlying + Tiingo REST API. Acceptable values are 'json' and 'csv'. + Use of 'csv' results in smaller message payload, less bandwidth, + and may delay the time when client hits API's bandwidth limit. api_key : str, optional Tiingo API key . If not provided the environmental variable TIINGO_API_KEY is read. The API key is *required*. @@ -90,9 +91,9 @@ def __init__( "environmental variable TIINGO_API_KEY." ) self.api_key = api_key - self.response_format = ( - response_format if response_format in ["json", "csv"] else "json" - ) + if response_format not in ["json", "csv"]: + raise ValueError("Acceptable values are 'json' and 'csv'") + self.response_format = response_format self._concat_axis = 0 @property @@ -172,8 +173,10 @@ class TiingoDailyReader(_BaseReader): freq : {str, None} Not used. response_format : str, default 'json' - Format of response data returned by the underlying Tiingo REST API. - Acceptable values: 'json', 'csv'. + Specifies format of response data returned by the underlying + Tiingo REST API. Acceptable values are 'json' and 'csv'. + Use of 'csv' results in smaller message payload, less bandwidth, + and may delay the time when client hits API's bandwidth limit. api_key : str, optional Tiingo API key . If not provided the environmental variable TIINGO_API_KEY is read. The API key is *required*. @@ -207,9 +210,9 @@ def __init__( "environmental variable TIINGO_API_KEY." ) self.api_key = api_key - self.response_format = ( - response_format if response_format in ["json", "csv"] else "json" - ) + if response_format not in ["json", "csv"]: + raise ValueError("Acceptable values are 'json' and 'csv'") + self.response_format = response_format self._concat_axis = 0 @property @@ -303,7 +306,16 @@ def __init__( api_key=None, ): super(TiingoMetaDataReader, self).__init__( - symbols, start, end, retry_count, pause, timeout, session, freq, api_key + symbols, + start, + end, + retry_count, + pause, + timeout, + session, + freq, + response_format="json", + api_key=api_key, ) self._concat_axis = 1 @@ -344,8 +356,10 @@ class TiingoQuoteReader(TiingoDailyReader): freq : {str, None} Not used. response_format : str, default 'json' - Format of response data returned by the underlying Tiingo REST API. - Acceptable values: 'json', 'csv'. + Specifies format of response data returned by the underlying + Tiingo REST API. Acceptable values are 'json' and 'csv'. + Use of 'csv' results in smaller message payload, less bandwidth, + and may delay the time when client hits API's bandwidth limit. api_key : str, optional Tiingo API key . If not provided the environmental variable TIINGO_API_KEY is read. The API key is *required*.