Skip to content

Commit 3216798

Browse files
committed
DOC: html-doc example
note new date conversion behavior jreback doc recommendations cannot pass a string to to_thml gah working notes modify tests infer_types back to true as per disc. with @jreback fix failing tests because of not correctly converted dates weird recursion error when using format spec instead of format strings
1 parent 7d73077 commit 3216798

File tree

5 files changed

+24
-11
lines changed

5 files changed

+24
-11
lines changed

RELEASE.rst

+1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ pandas 0.11.1
6666
- ``melt`` now accepts the optional parameters ``var_name`` and ``value_name``
6767
to specify custom column names of the returned DataFrame (GH3649_),
6868
thanks @hoechenberger
69+
- ``read_html`` no longer performs hard date conversion
6970

7071
**API Changes**
7172

doc/source/v0.11.1.txt

+16
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,21 @@ Enhancements
6868
- ``pd.read_html()`` can now parse HTML strings, files or urls and return
6969
DataFrames, courtesy of @cpcloud. (GH3477_, GH3605_, GH3606_, GH3616_).
7070
It works with a *single* parser backend: BeautifulSoup4 + html5lib
71+
- You can use ``pd.read_html()`` to read the output from ``DataFrame.to_html()`` like so
72+
73+
.. ipython :: python
74+
75+
df = DataFrame({'a': range(3), 'b': list('abc')})
76+
print df
77+
html = df.to_html()
78+
alist = pd.read_html(html, infer_types=True, index_col=0)
79+
print df == alist[0]
80+
81+
Note that ``alist`` here is a Python ``list`` so ``pd.read_html()`` and
82+
``DataFrame.to_html()`` are not inverses.
83+
84+
- ``pd.read_html()`` no longer performs hard conversion of date strings
85+
(GH3656_).
7186

7287
- ``HDFStore``
7388

@@ -211,3 +226,4 @@ on GitHub for a complete list.
211226
.. _GH3616: https://github.com/pydata/pandas/issues/3616
212227
.. _GH3605: https://github.com/pydata/pandas/issues/3605
213228
.. _GH3606: https://github.com/pydata/pandas/issues/3606
229+
.. _GH3656: https://github.com/pydata/pandas/issues/3656

pandas/io/html.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -721,7 +721,7 @@ def _parse(parser, io, match, flavor, header, index_col, skiprows, infer_types,
721721

722722

723723
def read_html(io, match='.+', flavor='html5lib', header=None, index_col=None,
724-
skiprows=None, infer_types=False, attrs=None):
724+
skiprows=None, infer_types=True, attrs=None):
725725
r"""Read an HTML table into a DataFrame.
726726
727727
Parameters

pandas/io/tests/test_html.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import re
33
from cStringIO import StringIO
44
from unittest import TestCase
5-
import collections
65
import numbers
76
from urllib2 import urlopen
87
from contextlib import closing
@@ -408,7 +407,7 @@ def try_remove_ws(x):
408407
return x
409408

410409
df = self.run_read_html(self.banklist_data, 'Metcalf',
411-
attrs={'id': 'table'}, infer_types=True)[0]
410+
attrs={'id': 'table'})[0]
412411
ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'),
413412
converters={'Updated Date': Timestamp,
414413
'Closing Date': Timestamp})
@@ -431,7 +430,9 @@ def try_remove_ws(x):
431430
'Hamilton Bank, NA', 'The Citizens Savings Bank']
432431
dfnew = df.applymap(try_remove_ws).replace(old, new)
433432
gtnew = ground_truth.applymap(try_remove_ws)
434-
assert_frame_equal(dfnew, gtnew)
433+
converted = dfnew.convert_objects(convert_numeric=True)
434+
assert_frame_equal(converted.convert_objects(convert_dates='coerce'),
435+
gtnew)
435436

436437
@slow
437438
def test_gold_canyon(self):
@@ -487,6 +488,3 @@ def test_lxml_finds_tbody():
487488
url = ('http://ndb.nal.usda.gov/ndb/foods/show/1732?fg=&man=&'
488489
'lfacet=&format=&count=&max=25&offset=&sort=&qlookup=spam')
489490
assert get_lxml_elements(url, 'tbody')
490-
491-
492-

pandas/util/testing.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,13 @@ def assert_almost_equal(a, b, check_less_precise = False):
126126
return assert_dict_equal(a, b)
127127

128128
if isinstance(a, basestring):
129-
assert a == b, "{0} != {1}".format(a, b)
129+
assert a == b, "%s != %s" % (a, b)
130130
return True
131131

132132
if isiterable(a):
133133
np.testing.assert_(isiterable(b))
134134
na, nb = len(a), len(b)
135-
assert na == nb, "{0} != {1}".format(na, nb)
135+
assert na == nb, "%s != %s" % (na, nb)
136136

137137
if np.array_equal(a, b):
138138
return True
@@ -154,8 +154,6 @@ def assert_almost_equal(a, b, check_less_precise = False):
154154
if check_less_precise:
155155
dtype_a = np.dtype(type(a))
156156
dtype_b = np.dtype(type(b))
157-
if dtype_a.kind == 'i' and dtype_b == 'i':
158-
pass
159157
if dtype_a.kind == 'f' and dtype_b == 'f':
160158
if dtype_a.itemsize <= 4 and dtype_b.itemsize <= 4:
161159
decimal = 3

0 commit comments

Comments
 (0)