DOC: html-doc example

cpcloud · cpcloud · commit 32167983a65f · 2013-05-21T12:58:31.000-04:00
note new date conversion behavior jreback doc recommendations cannot pass a string to to_thml gah working notes modify tests infer_types back to true as per disc. with @jreback fix failing tests because of not correctly converted dates weird recursion error when using format spec instead of format strings
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -66,6 +66,7 @@ pandas 0.11.1
   - ``melt`` now accepts the optional parameters ``var_name`` and ``value_name`` 
     to specify custom column names of the returned DataFrame (GH3649_),
     thanks @hoechenberger
+  - ``read_html`` no longer performs hard date conversion
 
 **API Changes**
 
diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt
@@ -68,6 +68,21 @@ Enhancements
   - ``pd.read_html()`` can now parse HTML strings, files or urls and return
     DataFrames, courtesy of @cpcloud. (GH3477_, GH3605_, GH3606_, GH3616_).
     It works with a *single* parser backend: BeautifulSoup4 + html5lib
+    - You can use ``pd.read_html()`` to read the output from ``DataFrame.to_html()`` like so
+
+    .. ipython :: python
+
+        df = DataFrame({'a': range(3), 'b': list('abc')})
+        print df
+        html = df.to_html()
+        alist = pd.read_html(html, infer_types=True, index_col=0)
+        print df == alist[0]
+
+    Note that ``alist`` here is a Python ``list`` so ``pd.read_html()`` and
+    ``DataFrame.to_html()`` are not inverses.
+
+    - ``pd.read_html()`` no longer performs hard conversion of date strings
+      (GH3656_).
 
   - ``HDFStore``
 
@@ -211,3 +226,4 @@ on GitHub for a complete list.
 .. _GH3616: https://github.com/pydata/pandas/issues/3616
 .. _GH3605: https://github.com/pydata/pandas/issues/3605
 .. _GH3606: https://github.com/pydata/pandas/issues/3606
+.. _GH3656: https://github.com/pydata/pandas/issues/3656
diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -721,7 +721,7 @@ def _parse(parser, io, match, flavor, header, index_col, skiprows, infer_types,
 
 
 def read_html(io, match='.+', flavor='html5lib', header=None, index_col=None,
-              skiprows=None, infer_types=False, attrs=None):
+              skiprows=None, infer_types=True, attrs=None):
     r"""Read an HTML table into a DataFrame.
 
     Parameters
diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
@@ -2,7 +2,6 @@
 import re
 from cStringIO import StringIO
 from unittest import TestCase
-import collections
 import numbers
 from urllib2 import urlopen
 from contextlib import closing
@@ -408,7 +407,7 @@ def try_remove_ws(x):
                 return x
 
         df = self.run_read_html(self.banklist_data, 'Metcalf',
-                                attrs={'id': 'table'}, infer_types=True)[0]
+                                attrs={'id': 'table'})[0]
         ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'),
                                 converters={'Updated Date': Timestamp,
                                             'Closing Date': Timestamp})
@@ -431,7 +430,9 @@ def try_remove_ws(x):
                'Hamilton Bank, NA', 'The Citizens Savings Bank']
         dfnew = df.applymap(try_remove_ws).replace(old, new)
         gtnew = ground_truth.applymap(try_remove_ws)
-        assert_frame_equal(dfnew, gtnew)
+        converted = dfnew.convert_objects(convert_numeric=True)
+        assert_frame_equal(converted.convert_objects(convert_dates='coerce'),
+                           gtnew)
 
     @slow
     def test_gold_canyon(self):
@@ -487,6 +488,3 @@ def test_lxml_finds_tbody():
     url = ('http://ndb.nal.usda.gov/ndb/foods/show/1732?fg=&man=&'
            'lfacet=&format=&count=&max=25&offset=&sort=&qlookup=spam')
     assert get_lxml_elements(url, 'tbody')
-
-
-    
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -126,13 +126,13 @@ def assert_almost_equal(a, b, check_less_precise = False):
         return assert_dict_equal(a, b)
 
     if isinstance(a, basestring):
-        assert a == b, "{0} != {1}".format(a, b)
+        assert a == b, "%s != %s" % (a, b)
         return True
 
     if isiterable(a):
         np.testing.assert_(isiterable(b))
         na, nb = len(a), len(b)
-        assert na == nb, "{0} != {1}".format(na, nb)
+        assert na == nb, "%s != %s" % (na, nb)
 
         if np.array_equal(a, b):
             return True
@@ -154,8 +154,6 @@ def assert_almost_equal(a, b, check_less_precise = False):
         if check_less_precise:
             dtype_a = np.dtype(type(a))
             dtype_b = np.dtype(type(b))
-            if dtype_a.kind == 'i' and dtype_b == 'i':
-                pass
             if dtype_a.kind == 'f' and dtype_b == 'f':
                 if dtype_a.itemsize <= 4 and dtype_b.itemsize <= 4:
                     decimal = 3