Skip to content

Commit 843f135

Browse files
author
Sky NSS
committed
Updates to comments, etc + some change in username password logic
1 parent 0e10067 commit 843f135

File tree

6 files changed

+113
-45
lines changed

6 files changed

+113
-45
lines changed

doc/source/whatsnew/v0.21.0.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ Other Enhancements
4040
- :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`)
4141
- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`)
4242
- :func:`Dataframe.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`)
43-
- :func:`read_csv` `read_html` `read_json` `read_html` now accept auth in url //<user>:<password>@<host>:<port>/<url-path>, or ``auth`` tuple (username, password) parameter
44-
- :func:`read_csv` `read_html` `read_json` `read_html` now accept ``verify_ssl`` False to disable https/ssl certificate verification (eg: self signed ssl certs in testing)
43+
- :func:`read_csv`, :func:`read_html`, :func:`read_json`, :func:`read_html` now accept auth in url //<user>:<password>@<host>:<port>/<url-path>, or ``auth`` tuple (username, password) parameter
44+
- :func:`read_csv`, :func:`read_html`, :func:`read_json`, :func:`read_html` now accept ``verify_ssl`` False to disable https/ssl certificate verification (eg: self signed ssl certs in testing) (:issue:`16716`)
4545
.. _whatsnew_0210.api_breaking:
4646

4747
Backwards incompatible API changes

pandas/io/common.py

+64-22
Original file line numberDiff line numberDiff line change
@@ -190,12 +190,27 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
190190
----------
191191
filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path),
192192
or buffer
193-
supports 'https://username:[email protected]:port/aaa.csv'
193+
now supports 'https://<user>:<password>@<host>:<port>/<url-path>'
194+
195+
.. versionadded:: 0.21.0
196+
194197
encoding : the encoding to use to decode py3 bytes, default is 'utf-8'
195-
compression:
196-
auth: (str,str), default None. (username, password) for HTTP(s) basic auth
197-
verify_ssl: Default True. If False, allow self signed and invalid SSL
198-
certificates for https
198+
199+
compression : string, default None
200+
201+
.. versionadded:: 0.18.1
202+
203+
auth : tuple, default None
204+
A tuple of string with (username, password) string for
205+
HTTP(s) basic auth: eg auth= ('roberto', 'panda$4life')
206+
207+
.. versionadded:: 0.21.0
208+
209+
verify_ssl : boolean, Default True
210+
If False, allow self signed and invalid SSL certificates for https
211+
212+
.. versionadded:: 0.21.0
213+
199214
200215
Returns
201216
-------
@@ -263,19 +278,34 @@ def split_auth_from_url(url_with_uname):
263278
264279
Parameters
265280
----------
266-
url_with_uname : a url that may or may not contain username and password
281+
url_with_uname : string
282+
a url that may or may not contain username and password
267283
see section 3.1 RFC 1738 https://www.ietf.org/rfc/rfc1738.txt
268284
//<user>:<password>@<host>:<port>/<url-path>
285+
286+
.. versionadded:: 0.21.0
269287
270288
Returns
271289
-------
272-
(username, password), url_no_usrpwd : username or "", password or "",
273-
url without username or password (if it contained it )
290+
(username, password), url_no_usrpwd : tuple, string Default ('', '') url
291+
A tuple with (username, pwd) pair and
292+
url without username or password (if it contained it )
293+
294+
Raises
295+
------
296+
ValueError for empty url
274297
"""
298+
if not url_with_uname:
299+
msg = "Empty url: {_type}"
300+
raise ValueError(msg.format(_type=type(url_with_uname)))
275301
o = parse_url(url_with_uname)
276-
usrch = '{}:{}@{}'.format(o.username, o.password, o.hostname)
277-
url_no_usrpwd = url_with_uname.replace(usrch, o.hostname)
278-
return (o.username, o.password), url_no_usrpwd
302+
uname = o.username if o.username else ''
303+
pwd = o.password if o.password else ''
304+
url_no_usrpwd = url_with_uname
305+
if uname or pwd:
306+
usrch = '{}:{}@{}'.format(o.username, o.password, o.hostname)
307+
url_no_usrpwd = url_with_uname.replace(usrch, o.hostname)
308+
return (uname, pwd), url_no_usrpwd
279309

280310

281311
def get_urlopen_args(url_with_uname, auth=None, verify_ssl=True):
@@ -286,30 +316,42 @@ def get_urlopen_args(url_with_uname, auth=None, verify_ssl=True):
286316
287317
Parameters
288318
----------
289-
url_with_uname : a url that may or may not contain username and password
319+
url_with_uname : string
320+
a url that may or may not contain username and password
290321
see section 3.1 RFC 1738 https://www.ietf.org/rfc/rfc1738.txt
291322
//<user>:<password>@<host>:<port>/<url-path>
292-
auth : ( username/""/None, password/"", None) tuple
293-
verify_ssl: If False, SSL certificate verification is disabled.
323+
324+
.. versionadded:: 0.21.0
325+
326+
auth : tuple, default None
327+
A tuple of string with (username, password) string for
328+
HTTP(s) basic auth: eg auth= ('roberto', 'panda$4life')
329+
330+
.. versionadded:: 0.21.0
331+
332+
verify_ssl : boolean, Default True
333+
If False, allow self signed and invalid SSL certificates for https
334+
335+
.. versionadded:: 0.21.0
294336
295337
Returns
296338
-------
297339
Request, kwargs to pass to urlopen. kwargs may be {} or {'context': obj }
298340
"""
299341
uname = pwd = None
342+
url_no_usrpwd = url_with_uname
300343
if auth and len(auth) == 2:
301344
uname, pwd = auth
302345
if not uname and not pwd:
303346
(uname, pwd), url_no_usrpwd = split_auth_from_url(url_with_uname)
304-
else:
305-
url_no_usrpwd = url_with_uname
306-
upstr = '{}:{}'.format(uname, pwd)
307-
if compat.PY3:
308-
b64str = base64.b64encode(bytes(upstr, 'ascii')).decode('utf-8')
309-
else:
310-
b64str = base64.encodestring(upstr).replace('\n', '')
311347
req = Request(url_no_usrpwd)
312-
req.add_header("Authorization", "Basic {}".format(b64str))
348+
if uname or pwd:
349+
upstr = '{}:{}'.format(uname, pwd)
350+
if compat.PY3:
351+
b64str = base64.b64encode(bytes(upstr, 'ascii')).decode('utf-8')
352+
else:
353+
b64str = base64.encodestring(upstr).replace('\n', '')
354+
req.add_header("Authorization", "Basic {}".format(b64str))
313355
kwargs = {}
314356
if verify_ssl not in [None, True]:
315357
kwargs['context'] = ssl._create_unverified_context()

pandas/io/html.py

+19-5
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,16 @@ def _read(obj, auth=None, verify_ssl=None):
123123
Parameters
124124
----------
125125
obj : str, unicode, or file-like
126-
auth: None or (username, password) for http basic auth
127-
verify_ssl: Default True. Set to False to disable cert verification
126+
auth : tuple, default None
127+
A tuple of string with (username, password) string for
128+
HTTP(s) basic auth: eg auth= ('roberto', 'panda$4life')
129+
130+
.. versionadded:: 0.21.0
131+
132+
verify_ssl : boolean, Default True
133+
If False, allow self signed and invalid SSL certificates for https
134+
135+
.. versionadded:: 0.21.0
128136
Returns
129137
-------
130138
raw_text : str
@@ -866,10 +874,16 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
866874
867875
.. versionadded:: 0.19.0
868876
869-
auth: (str,str), default None. (username, password) for HTTP(s) basic auth
877+
auth : tuple, default None
878+
A tuple of string with (username, password) string for
879+
HTTP(s) basic auth: eg auth= ('roberto', 'panda$4life')
880+
881+
.. versionadded:: 0.21.0
882+
883+
verify_ssl : boolean, Default True
884+
If False, allow self signed and invalid SSL certificates for https
870885
871-
verify_ssl : bool, default True
872-
If False, ssl certificate is not verified (allow self signed SSL certs)
886+
.. versionadded:: 0.21.0
873887
874888
Returns
875889
-------

pandas/io/json/json.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -263,9 +263,16 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
263263
264264
.. versionadded:: 0.19.0
265265
266-
auth: (str,str), default None. (username, password) for HTTP(s) basic auth
267-
verify_ssl: boolean, default None (True).
268-
If false, allow self siged SSL certificates
266+
auth : tuple, default None
267+
A tuple of string with (username, password) string for
268+
HTTP(s) basic auth: eg auth= ('roberto', 'panda$4life')
269+
270+
.. versionadded:: 0.21.0
271+
272+
verify_ssl : boolean, Default True
273+
If False, allow self signed and invalid SSL certificates for https
274+
275+
.. versionadded:: 0.21.0
269276
270277
Returns
271278
-------

pandas/io/parsers.py

-2
Original file line numberDiff line numberDiff line change
@@ -579,10 +579,8 @@ def parser_f(filepath_or_buffer,
579579
memory_map=False,
580580
float_precision=None,
581581

582-
# Basic auth (http/https) (username, password)
583582
auth=None,
584583

585-
# skip verify self signed SSL certificates
586584
verify_ssl=None):
587585

588586
# Alias sep -> delimiter.

pandas/tests/test_common.py

+18-11
Original file line numberDiff line numberDiff line change
@@ -189,17 +189,24 @@ def test_write_fspath_hdf5(self):
189189
expected = pd.read_hdf(string, key='bar')
190190

191191
tm.assert_frame_equal(result, expected)
192-
193-
def test_split_url_extract_uname_pwd(self):
194-
for url, uname, pwd, nurl in [('https://aaa:[email protected]:1010/aaa.txt',
195-
'aaa',
196-
'bbb',
197-
'https://ccc.com:1010/aaa.txt'
198-
)]:
199-
un, p, u = common.split_uname_from_url(url)
200-
assert u == nurl
201-
assert un == uname
202-
assert p == pwd
192+
193+
@pytest.mark.parametrize('url, uname, pwd, nurl', [
194+
('https://a1:[email protected]:101/f.csv',
195+
'aaa',
196+
'bbb',
197+
'https://cc.com:101/f.csv'
198+
),
199+
('https://ccc.com:1010/aaa.txt',
200+
'',
201+
'',
202+
'https://ccc.com:1010/aaa.txt'
203+
),
204+
])
205+
def test_split_url_extract_uname_pwd(self, url, uname, pwd, nurl):
206+
un, pw, ur = common.split_uname_from_url(url)
207+
assert ur == nurl
208+
assert un == uname
209+
assert pw == pwd
203210

204211

205212
class TestMMapWrapper(object):

0 commit comments

Comments
 (0)