pandas-dev · May 3, 2013 · Apr 22, 2013
diff --git a/ci/install.sh b/ci/install.sh
@@ -75,6 +75,8 @@ if ( ! $VENV_FILE_AVAILABLE ); then
         pip install $PIP_ARGS xlrd>=0.9.0
         pip install $PIP_ARGS 'http://downloads.sourceforge.net/project/pytseries/scikits.timeseries/0.91.3/scikits.timeseries-0.91.3.tar.gz?r='
         pip install $PIP_ARGS patsy
+        pip install $PIP_ARGS lxml
+        pip install $PIP_ARGS beautifulsoup4
 
         # fool statsmodels into thinking pandas was already installed
         # so it won't refuse to install itself. We want it in the zipped venv

diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -50,6 +50,13 @@ File IO
    read_csv
    ExcelFile.parse
 
+.. currentmodule:: pandas.io.html
+
+.. autosummary::
+   :toctree: generated/
+
+   read_html
+
 HDFStore: PyTables (HDF5)
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 .. currentmodule:: pandas.io.pytables

diff --git a/doc/source/install.rst b/doc/source/install.rst
@@ -99,6 +99,12 @@ Optional Dependencies
   * `openpyxl <http://packages.python.org/openpyxl/>`__, `xlrd/xlwt <http://www.python-excel.org/>`__
      * openpyxl version 1.6.1 or higher
      * Needed for Excel I/O
+  * `lxml <http://lxml.de>`__, or `Beautiful Soup 4 <http://www.crummy.com/software/BeautifulSoup>`__: for reading HTML tables
+     * The differences between lxml and Beautiful Soup 4 are mostly speed (lxml
+       is faster), however sometimes Beautiful Soup returns what you might
+       intuitively expect. Both backends are implemented, so try them both to
+       see which one you like. They should return very similar results.
+     * Note that lxml requires Cython to build successfully
 
 .. note::
 

diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -33,6 +33,7 @@
                                read_fwf, to_clipboard, ExcelFile,
                                ExcelWriter)
 from pandas.io.pytables import HDFStore, Term, get_store, read_hdf
+from pandas.io.html import read_html
 from pandas.util.testing import debug
 
 from pandas.tools.describe import value_range