1
+ import re
2
+ import time
3
+ import pandas as pd
1
4
from pandas_datareader .base import _DailyBaseReader
2
5
3
6
@@ -46,29 +49,39 @@ def __init__(self, symbols=None, start=None, end=None, retry_count=3,
46
49
retry_count = retry_count ,
47
50
pause = pause , session = session ,
48
51
chunksize = chunksize )
52
+
53
+ self .headers = {
54
+ 'Connection' : 'keep-alive' ,
55
+ 'Expires' : str (- 1 ),
56
+ 'Upgrade-Insecure-Requests' : str (1 ),
57
+ # Google Chrome:
58
+ 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36' # noqa
59
+ }
60
+
49
61
self .adjust_price = adjust_price
50
62
self .ret_index = ret_index
51
63
52
64
if interval not in ['d' , 'w' , 'm' , 'v' ]:
53
65
raise ValueError ("Invalid interval: valid values are "
54
66
"'d', 'w', 'm' and 'v'" )
55
- self .interval = interval
67
+ self .interval = '1' + interval
68
+ # self.crumb = '64ZkTeri7Xq'
69
+ self .crumb = self ._get_crumb (retry_count )
56
70
57
71
@property
58
72
def url (self ):
59
- return 'http ://ichart .finance.yahoo.com/table.csv'
73
+ return 'https ://query1 .finance.yahoo.com/v7/finance/download/{}' . format ( self . symbols ) # noqa
60
74
61
75
def _get_params (self , symbol ):
76
+ unix_start = int (time .mktime (self .start .timetuple ()))
77
+ unix_end = int (time .mktime (self .end .timetuple ()))
78
+
62
79
params = {
63
- 's' : symbol ,
64
- 'a' : self .start .month - 1 ,
65
- 'b' : self .start .day ,
66
- 'c' : self .start .year ,
67
- 'd' : self .end .month - 1 ,
68
- 'e' : self .end .day ,
69
- 'f' : self .end .year ,
70
- 'g' : self .interval ,
71
- 'ignore' : '.csv'
80
+ 'period1' : unix_start ,
81
+ 'period2' : unix_end ,
82
+ 'interval' : self .interval ,
83
+ 'events' : 'history' ,
84
+ 'crumb' : self .crumb
72
85
}
73
86
return params
74
87
@@ -79,8 +92,30 @@ def read(self):
79
92
df ['Ret_Index' ] = _calc_return_index (df ['Adj Close' ])
80
93
if self .adjust_price :
81
94
df = _adjust_prices (df )
95
+ temp = pd .date_range (self .start , self .end , None , self .interval )
82
96
return df
83
97
98
+ def _get_crumb (self , retries ):
99
+ # Scrape a history page for a valid crumb ID:
100
+ tu = "https://finance.yahoo.com/quote/{}/history" .format (self .symbols )
101
+ response = self ._get_response (tu ,
102
+ params = self .params , headers = self .headers )
103
+ out = str (self ._sanitize_response (response ))
104
+ # Matches: {"crumb":"AlphaNumeric"}
105
+ regex = re .search (r'{"crumb" ?: ?"([A-Za-z0-9.]{11,})"}' , out )
106
+
107
+ try :
108
+ crumbs = regex .groups ()
109
+ except :
110
+ # It is possible we hit a 401 with frequent requests. Cool-off:
111
+ if retries > 0 :
112
+ time .sleep (2 )
113
+ retries -= 1
114
+ crumbs = [self ._get_crumb (retries )]
115
+ raise OSError ("Unable to retrieve Yahoo breadcrumb, exiting." )
116
+
117
+ return crumbs [0 ]
118
+
84
119
85
120
def _adjust_prices (hist_data , price_list = None ):
86
121
"""
0 commit comments