Skip to content

Commit 0fde3ba

Browse files
committed
DEPR: Remove infer_type keyword from pd.read_html as its unused, pandas-dev#4770, pandas-dev#7032
1 parent 0068abf commit 0fde3ba

File tree

3 files changed

+25
-39
lines changed

3 files changed

+25
-39
lines changed

doc/source/whatsnew/v0.17.0.txt

+3-2
Original file line numberDiff line numberDiff line change
@@ -708,8 +708,9 @@ Removal of prior version deprecations/changes
708708

709709

710710

711-
- Remove the ``table`` keyword in ``HDFStore.put/append``, in favor of using ``format=`` (:issue:`4645`)
712-
- Remove unused keyword ``kind`` in ``read_excel/ExcelFile`` (:issue:`4712`)
711+
- Remove ``table`` keyword in ``HDFStore.put/append``, in favor of using ``format=`` (:issue:`4645`)
712+
- Remove ``kind`` in ``read_excel/ExcelFile`` as its unused (:issue:`4712`)
713+
- Remove ``infer_type`` keyword from ``pd.read_html`` as its unused (:issue:`4770`, :issue:`7032`)
713714

714715

715716
.. _whatsnew_0170.performance:

pandas/io/html.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -604,7 +604,7 @@ def _expand_elements(body):
604604
body[ind] += empty * (lens_max - length)
605605

606606

607-
def _data_to_frame(data, header, index_col, skiprows, infer_types,
607+
def _data_to_frame(data, header, index_col, skiprows,
608608
parse_dates, tupleize_cols, thousands):
609609
head, body, foot = data
610610

@@ -707,7 +707,7 @@ def _validate_flavor(flavor):
707707
return flavor
708708

709709

710-
def _parse(flavor, io, match, header, index_col, skiprows, infer_types,
710+
def _parse(flavor, io, match, header, index_col, skiprows,
711711
parse_dates, tupleize_cols, thousands, attrs, encoding):
712712
flavor = _validate_flavor(flavor)
713713
compiled_match = re.compile(match) # you can pass a compiled regex here
@@ -730,15 +730,20 @@ def _parse(flavor, io, match, header, index_col, skiprows, infer_types,
730730
ret = []
731731
for table in tables:
732732
try:
733-
ret.append(_data_to_frame(table, header, index_col, skiprows,
734-
infer_types, parse_dates, tupleize_cols, thousands))
733+
ret.append(_data_to_frame(data=table,
734+
header=header,
735+
index_col=index_col,
736+
skiprows=skiprows,
737+
parse_dates=parse_dates,
738+
tupleize_cols=tupleize_cols,
739+
thousands=thousands))
735740
except StopIteration: # empty table
736741
continue
737742
return ret
738743

739744

740745
def read_html(io, match='.+', flavor=None, header=None, index_col=None,
741-
skiprows=None, infer_types=None, attrs=None, parse_dates=False,
746+
skiprows=None, attrs=None, parse_dates=False,
742747
tupleize_cols=False, thousands=',', encoding=None):
743748
r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
744749
@@ -776,9 +781,6 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
776781
that sequence. Note that a single element sequence means 'skip the nth
777782
row' whereas an integer means 'skip n rows'.
778783
779-
infer_types : None, optional
780-
This has no effect since 0.15.0. It is here for backwards compatibility.
781-
782784
attrs : dict or None, optional
783785
This is a dictionary of attributes that you can pass to use to identify
784786
the table in the HTML. These are not checked for validity before being
@@ -853,13 +855,11 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
853855
pandas.read_csv
854856
"""
855857
_importers()
856-
if infer_types is not None:
857-
warnings.warn("infer_types has no effect since 0.15", FutureWarning)
858858

859859
# Type check here. We don't want to parse only to fail because of an
860860
# invalid value of an integer skiprows.
861861
if isinstance(skiprows, numbers.Integral) and skiprows < 0:
862862
raise ValueError('cannot skip rows starting from the end of the '
863863
'data (you passed a negative value)')
864-
return _parse(flavor, io, match, header, index_col, skiprows, infer_types,
864+
return _parse(flavor, io, match, header, index_col, skiprows,
865865
parse_dates, tupleize_cols, thousands, attrs, encoding)

pandas/io/tests/test_html.py

+11-26
Original file line numberDiff line numberDiff line change
@@ -137,12 +137,10 @@ def test_banklist(self):
137137
assert_framelist_equal(df1, df2)
138138

139139
def test_spam_no_types(self):
140-
with tm.assert_produces_warning(FutureWarning):
141-
df1 = self.read_html(self.spam_data, '.*Water.*',
142-
infer_types=False)
143-
with tm.assert_produces_warning(FutureWarning):
144-
df2 = self.read_html(self.spam_data, 'Unit', infer_types=False)
145140

141+
# infer_types removed in #10892
142+
df1 = self.read_html(self.spam_data, '.*Water.*')
143+
df2 = self.read_html(self.spam_data, 'Unit')
146144
assert_framelist_equal(df1, df2)
147145

148146
self.assertEqual(df1[0].ix[0, 0], 'Proximates')
@@ -230,12 +228,9 @@ def test_index(self):
230228
assert_framelist_equal(df1, df2)
231229

232230
def test_header_and_index_no_types(self):
233-
with tm.assert_produces_warning(FutureWarning):
234-
df1 = self.read_html(self.spam_data, '.*Water.*', header=1,
235-
index_col=0, infer_types=False)
236-
with tm.assert_produces_warning(FutureWarning):
237-
df2 = self.read_html(self.spam_data, 'Unit', header=1, index_col=0,
238-
infer_types=False)
231+
df1 = self.read_html(self.spam_data, '.*Water.*', header=1,
232+
index_col=0)
233+
df2 = self.read_html(self.spam_data, 'Unit', header=1, index_col=0)
239234
assert_framelist_equal(df1, df2)
240235

241236
def test_header_and_index_with_types(self):
@@ -245,18 +240,10 @@ def test_header_and_index_with_types(self):
245240
assert_framelist_equal(df1, df2)
246241

247242
def test_infer_types(self):
248-
with tm.assert_produces_warning(FutureWarning):
249-
df1 = self.read_html(self.spam_data, '.*Water.*', index_col=0,
250-
infer_types=False)
251-
with tm.assert_produces_warning(FutureWarning):
252-
df2 = self.read_html(self.spam_data, 'Unit', index_col=0,
253-
infer_types=False)
254-
assert_framelist_equal(df1, df2)
255-
256-
with tm.assert_produces_warning(FutureWarning):
257-
df2 = self.read_html(self.spam_data, 'Unit', index_col=0,
258-
infer_types=True)
259243

244+
# 10892 infer_types removed
245+
df1 = self.read_html(self.spam_data, '.*Water.*', index_col=0)
246+
df2 = self.read_html(self.spam_data, 'Unit', index_col=0)
260247
assert_framelist_equal(df1, df2)
261248

262249
def test_string_io(self):
@@ -641,8 +628,7 @@ def test_computer_sales_page(self):
641628
with tm.assertRaisesRegexp(CParserError, r"Passed header=\[0,1\] are "
642629
"too many rows for this multi_index "
643630
"of columns"):
644-
with tm.assert_produces_warning(FutureWarning):
645-
self.read_html(data, infer_types=False, header=[0, 1])
631+
self.read_html(data, header=[0, 1])
646632

647633
def test_wikipedia_states_table(self):
648634
data = os.path.join(DATA_PATH, 'wikipedia_states.html')
@@ -751,8 +737,7 @@ def test_parse_dates_combine(self):
751737

752738
def test_computer_sales_page(self):
753739
data = os.path.join(DATA_PATH, 'computer_sales_page.html')
754-
with tm.assert_produces_warning(FutureWarning):
755-
self.read_html(data, infer_types=False, header=[0, 1])
740+
self.read_html(data, header=[0, 1])
756741

757742

758743
def test_invalid_flavor():

0 commit comments

Comments
 (0)