pandas-dev
diff --git a/‎doc/source/whatsnew/v0.21.0.txt
+56 b/‎doc/source/whatsnew/v0.21.0.txt
+56
diff --git a/‎pandas/io/common.py
+117-3 b/‎pandas/io/common.py
+117-3
diff --git a/‎pandas/io/excel.py
+10-6 b/‎pandas/io/excel.py
+10-6
diff --git a/‎pandas/io/html.py
+28-9 b/‎pandas/io/html.py
+28-9
@@ -79,6 +79,62 @@ Other Enhancements
 - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`)
 - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`)
 
+.. _whatsnew_0210.enhancements.read_csv:
+
+``read_csv`` use `python-requests` (if installed) to support basic auth and much more
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If `python-requests` library is installed try to use it first. If not, continue using urllib
+The :meth:`DataFrame.read_csv`, :meth:`DataFrame.read_html`, :meth:`DataFrame.read_json`,
+:meth:`DataFrame.read_excel` now allow optional param of ``req_session`` to pass in requests.Session()
+
+
+.. ipython:: python
+  import pandas as pd
+  from requests import Session
+
+  # req_session is optional parameter 
+  df = pd.read_csv('https://uname:[email protected]/bb.csv') # now url can contain username and pwd
+
+  # custom auth can be implemented
+  s = Session()
+  s.auth = MyAuthProvider('secret-key') # custom auth provider supported by requests
+  df = pd.read_csv(url, req_session=s)
+
+  # optional advanced scenarios: basic auth, timeout, disable ssl certificate verification, proxy, etc
+  s = Session()
+  s.auth = ('darth', 'l0rd')  # if user wants to perform basic auth Skip if url itself contains username and pwd
+  s.timeout = (3.05, 27)                           # if user wants to modify timeout
+  s.verify = False                                      # if user wants to disable ssl cert verification
+  s.headers.update( {'User-Agent': 'Custom user agent'} )  # extensible to set any custom header needed
+  s.proxies = { 'http': 'http://a.com:100'}  # if user has proxies 
+  s.cert = '/path/client.cert'                     # if custom cert is needed
+  df = pd.read_csv( 'https://aa.com/bbb.csv', req_session=s)
+
+  # support verbs other than 'GET' such as 'POST' using requests.PreparedRequest
+  r = Request('POST', 'http://joker:pwd@nlp_service.api/email_sentiment_extract?out=json')
+  prepped = req.prepare()
+  prepped.body = 'from: [email protected]\nto: [email protected]\nsubject:Complaint letter\n\nbody: I am feeling :(' # multiple lines
+  df = pd.read_json( prepped) # minor update pandas code to detect type(Request) and submit it using requests session in lieu of URL.
+  """
+  [{
+    'from': '[email protected]',
+    'to': '[email protected]',
+    'email_type': 'complaint',
+    'sentiment': 'unhappy',
+  }]
+  """
+  
+  # Event hooks callback (eg log http status codes or other callback related functions)
+  def print_http_status(r, *args, **kwargs):
+      print(r.status_code)
+      print(r.headers['Content-Length'])
+  s = Session()
+  s.hooks = dict(response=print_http_status)
+  df = pd.read_csv( 'https://aa.com/bbb.csv', req_session=s)
+
+
+
 .. _whatsnew_0210.api_breaking:
 
 Backwards incompatible API changes
 
@@ -47,6 +47,12 @@
 except:
     _PY_PATH_INSTALLED = False
 
+try:
+    import requests
+    _PY_REQUESTS_INSTALLED = True
+except ImportError:
+    _PY_REQUESTS_INSTALLED = False
+
 
 if compat.PY3:
     from urllib.request import urlopen, pathname2url
@@ -93,6 +99,11 @@ def __next__(self):
     BaseIterator.next = lambda self: self.__next__()
 
 
+def is_requests_pkg_avail():
+    """Checks if 'python-requests' package is already installed."""
+    return _PY_REQUESTS_INSTALLED
+
+
 def _is_url(url):
     """Check to see if a URL has a valid protocol.
 
@@ -176,8 +187,85 @@ def _stringify_path(filepath_or_buffer):
     return filepath_or_buffer
 
 
+def _is_handled_by_requests(o):
+    return _is_url(o) and parse_url(o).scheme in ['http', 'https']
+
+
+def gen_session(url_params):
+    """
+    Generate python-requests session from url_params dict
+    """
+    s = None
+    if url_params and type(url_params) is requests.sessions.Session:
+        s = url_params
+    else:
+        s = requests.Session()
+        s.stream = True
+        # Setting accept-encoding to None for backwards compatibility with
+        # urlopen. ideally we want to allow gzip download
+        # urlopen doesnt decompress automatically, requests does.
+        s.headers.update({'Accept-Encoding': None})
+    if url_params and type(url_params) is dict:
+        if url_params.get('auth', None) and not s.auth:
+            s.auth = url_params.get('auth')
+        if url_params.get('verify', True) is False and s.verify is not False:
+            s.verify = url_params.get('verify')
+    return s
+
+
+def fetch_url(url, url_params=None, skip_requests=False):
+    """
+    If url is url, first try python-requests else try urllib.
+    Note if requests library is used, auto gunzip is
+    disabled for backwards compatibility of code with urlopen
+
+    Parameters
+    ----------
+    url : str
+        Could be:
+            'http://cnn.com'
+            'file:///home/sky/aaa.csv'
+
+    url_params : dict or requests.Session(), default None
+        A python dict containing:
+            'auth': tuple (str, str) eg (unae, pwd)
+            'auth': Any other auth object accepted by requests
+            'verify': boolean, Default True
+                 If False, allow self signed and invalid SSL cert for https
+        or
+        A python requests.Session object if http(s) path to enable basic auth
+        and many other scenarios that requests allows
+
+        .. versionadded:: 0.21.0
+
+   skip_requests : boolean, default False
+       for testing - disable `requests` library Internal use only
+
+        .. versionadded:: 0.21.0
+    Raises
+    ------
+    ValueError if url_params specified without installed python-requests pkg
+    """
+    if is_requests_pkg_avail() and \
+            _is_handled_by_requests(url) and \
+            (not skip_requests):
+        s = gen_session(url_params)
+        resp = s.get(url)
+        resp.raise_for_status()
+        content_bytes = resp.content
+    else:
+        if url_params and (skip_requests or not is_requests_pkg_avail()):
+            msg = 'To utilize url_params, python-requests library is ' + \
+                  'required but not detected'
+            raise ValueError(msg)
+        resp = _urlopen(url)
+        content_bytes = resp.read()
+    return resp, content_bytes
+
+
 def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
-                           compression=None):
+                           compression=None, url_params=None,
+                           skip_requests=False):
     """
     If the filepath_or_buffer is a url, translate and return the buffer.
     Otherwise passthrough.
@@ -188,19 +276,45 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
                          or buffer
     encoding : the encoding to use to decode py3 bytes, default is 'utf-8'
 
+    compression : str, default None
+        indicate the compression such as 'gzip'.
+
+    url_params : dict or requests.Session(), default None
+        A python dict containing:
+            'auth': tuple (str, str) eg (unae, pwd)
+            'auth': Any other auth object accepted by requests
+            'verify': boolean, Default True
+                 If False, allow self signed and invalid SSL cert for https
+        or
+        A python requests.Session object if http(s) path to enable basic auth
+        and many other scenarios that requests allows
+
+        .. versionadded:: 0.21.0
+
+   skip_requests : boolean, default False
+       for testing - disable `requests` library Internal use only
+
+        .. versionadded:: 0.21.0
+
     Returns
     -------
     a filepath_or_buffer, the encoding, the compression
+
+    Raises
+    ------
+    ValueError if url_params specified without installed python-requests pkg
     """
     filepath_or_buffer = _stringify_path(filepath_or_buffer)
 
     if _is_url(filepath_or_buffer):
-        req = _urlopen(filepath_or_buffer)
+        req, content_bytes = fetch_url(filepath_or_buffer,
+                                       url_params,
+                                       skip_requests)
+        reader = BytesIO(content_bytes)
         content_encoding = req.headers.get('Content-Encoding', None)
         if content_encoding == 'gzip':
             # Override compression based on Content-Encoding header
             compression = 'gzip'
-        reader = BytesIO(req.read())
         return reader, encoding, compression
 
     if _is_s3_url(filepath_or_buffer):
 
@@ -15,12 +15,13 @@
     is_integer, is_float,
     is_bool, is_list_like)
 
+from pandas.compat import BytesIO
 from pandas.core.frame import DataFrame
 from pandas.io.parsers import TextParser
 from pandas.errors import EmptyDataError
-from pandas.io.common import (_is_url, _urlopen, _validate_header_arg,
-                              get_filepath_or_buffer, _NA_VALUES,
-                              _stringify_path)
+from pandas.io.common import (_is_url, fetch_url,
+                              _validate_header_arg, get_filepath_or_buffer,
+                              _NA_VALUES, _stringify_path)
 from pandas.core.indexes.period import Period
 import pandas._libs.json as json
 from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass,
@@ -211,7 +212,8 @@ def read_excel(io, sheet_name=0, header=0, skiprows=None, skip_footer=0,
                         "Use just `sheet_name`")
 
     if not isinstance(io, ExcelFile):
-        io = ExcelFile(io, engine=engine)
+        io = ExcelFile(io, engine=engine,
+                       url_params=kwds.get('url_params', None))
 
     return io._parse_excel(
         sheetname=sheet_name, header=header, skiprows=skiprows, names=names,
@@ -258,8 +260,10 @@ def __init__(self, io, **kwds):
 
         # If io is a url, want to keep the data as bytes so can't pass
         # to get_filepath_or_buffer()
-        if _is_url(self._io):
-            io = _urlopen(self._io)
+        if _is_url(self.io):
+            rs = kwds.get('url_params', None)
+            req, content = fetch_url(self.io, url_params=rs)
+            io = BytesIO(content)
         elif not isinstance(self.io, (ExcelFile, xlrd.Book)):
             io, _, _ = get_filepath_or_buffer(self._io)
 
 
@@ -14,7 +14,7 @@
 
 from pandas.core.dtypes.common import is_list_like
 from pandas.errors import EmptyDataError
-from pandas.io.common import (_is_url, urlopen,
+from pandas.io.common import (_is_url, fetch_url,
                               parse_url, _validate_header_arg)
 from pandas.io.parsers import TextParser
 from pandas.compat import (lrange, lmap, u, string_types, iteritems,
@@ -116,20 +116,31 @@ def _get_skiprows(skiprows):
                     type(skiprows).__name__)
 
 
-def _read(obj):
+def _read(obj, url_params=None):
     """Try to read from a url, file or string.
 
     Parameters
     ----------
     obj : str, unicode, or file-like
 
+    url_params : dict or requests.Session(), default None
+        A python dict containing:
+            'auth': tuple (str, str) eg (unae, pwd)
+            'auth': Any other auth object accepted by requests
+            'verify': boolean, Default True
+                 If False, allow self signed and invalid SSL certs for https
+        or
+        A python requests.Session object if http(s) path to enable basic auth
+        and many other scenarios that requests allows
+
+        .. versionadded:: 0.21.0
+
     Returns
     -------
     raw_text : str
     """
     if _is_url(obj):
-        with urlopen(obj) as url:
-            text = url.read()
+        req, text = fetch_url(obj, url_params)
     elif hasattr(obj, 'read'):
         text = obj.read()
     elif isinstance(obj, char_types):
@@ -187,11 +198,12 @@ class _HtmlFrameParser(object):
     functionality.
     """
 
-    def __init__(self, io, match, attrs, encoding):
+    def __init__(self, io, match, attrs, encoding, url_params=None):
         self.io = io
         self.match = match
         self.attrs = attrs
         self.encoding = encoding
+        self.url_params = url_params
 
     def parse_tables(self):
         tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
@@ -444,7 +456,7 @@ def _parse_tables(self, doc, match, attrs):
         return result
 
     def _setup_build_doc(self):
-        raw_text = _read(self.io)
+        raw_text = _read(self.io, self.url_params)
         if not raw_text:
             raise ValueError('No text parsed from document: %s' % self.io)
         return raw_text
@@ -731,7 +743,8 @@ def _parse(flavor, io, match, attrs, encoding, **kwargs):
     retained = None
     for flav in flavor:
         parser = _parser_dispatch(flav)
-        p = parser(io, compiled_match, attrs, encoding)
+        p = parser(io, compiled_match, attrs, encoding,
+                   url_params=kwargs.get('url_params', None))
 
         try:
             tables = p.parse_tables()
@@ -755,7 +768,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
               skiprows=None, attrs=None, parse_dates=False,
               tupleize_cols=False, thousands=',', encoding=None,
               decimal='.', converters=None, na_values=None,
-              keep_default_na=True):
+              keep_default_na=True, url_params=None):
     r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
 
     Parameters
@@ -856,6 +869,12 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
 
         .. versionadded:: 0.19.0
 
+    url_params : requests.Session(), default None
+        A python requests.Session object if http(s) path to enable basic auth
+        and many other scenarios that requests allows
+
+        .. versionadded:: 0.21.0
+
     Returns
     -------
     dfs : list of DataFrames
@@ -903,4 +922,4 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
                   parse_dates=parse_dates, tupleize_cols=tupleize_cols,
                   thousands=thousands, attrs=attrs, encoding=encoding,
                   decimal=decimal, converters=converters, na_values=na_values,
-                  keep_default_na=keep_default_na)
+                  keep_default_na=keep_default_na, url_params=url_params)