DEPR: Remove infer_type keyword from pd.read_html as its unused, pandas-dev#4770, pandas-dev#7032

jreback · jreback · commit 0fde3ba5a7ef · 2015-08-24T14:23:14.000-04:00
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -708,8 +708,9 @@ Removal of prior version deprecations/changes
 
 
 
-- Remove the ``table`` keyword in ``HDFStore.put/append``, in favor of using ``format=`` (:issue:`4645`)
-- Remove unused keyword ``kind`` in ``read_excel/ExcelFile`` (:issue:`4712`)
+- Remove ``table`` keyword in ``HDFStore.put/append``, in favor of using ``format=`` (:issue:`4645`)
+- Remove ``kind`` in ``read_excel/ExcelFile`` as its unused (:issue:`4712`)
+- Remove ``infer_type`` keyword from ``pd.read_html`` as its unused (:issue:`4770`, :issue:`7032`)
 
 
 .. _whatsnew_0170.performance:
diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -604,7 +604,7 @@ def _expand_elements(body):
         body[ind] += empty * (lens_max - length)
 
 
-def _data_to_frame(data, header, index_col, skiprows, infer_types,
+def _data_to_frame(data, header, index_col, skiprows,
                    parse_dates, tupleize_cols, thousands):
     head, body, foot = data
 
@@ -707,7 +707,7 @@ def _validate_flavor(flavor):
     return flavor
 
 
-def _parse(flavor, io, match, header, index_col, skiprows, infer_types,
+def _parse(flavor, io, match, header, index_col, skiprows,
            parse_dates, tupleize_cols, thousands, attrs, encoding):
     flavor = _validate_flavor(flavor)
     compiled_match = re.compile(match)  # you can pass a compiled regex here
@@ -730,15 +730,20 @@ def _parse(flavor, io, match, header, index_col, skiprows, infer_types,
     ret = []
     for table in tables:
         try:
-            ret.append(_data_to_frame(table, header, index_col, skiprows,
-                        infer_types, parse_dates, tupleize_cols, thousands))
+            ret.append(_data_to_frame(data=table,
+                                      header=header,
+                                      index_col=index_col,
+                                      skiprows=skiprows,
+                                      parse_dates=parse_dates,
+                                      tupleize_cols=tupleize_cols,
+                                      thousands=thousands))
         except StopIteration: # empty table
             continue
     return ret
 
 
 def read_html(io, match='.+', flavor=None, header=None, index_col=None,
-              skiprows=None, infer_types=None, attrs=None, parse_dates=False,
+              skiprows=None, attrs=None, parse_dates=False,
               tupleize_cols=False, thousands=',', encoding=None):
     r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
 
@@ -776,9 +781,6 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
         that sequence.  Note that a single element sequence means 'skip the nth
         row' whereas an integer means 'skip n rows'.
 
-    infer_types : None, optional
-        This has no effect since 0.15.0. It is here for backwards compatibility.
-
     attrs : dict or None, optional
         This is a dictionary of attributes that you can pass to use to identify
         the table in the HTML. These are not checked for validity before being
@@ -853,13 +855,11 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
     pandas.read_csv
     """
     _importers()
-    if infer_types is not None:
-        warnings.warn("infer_types has no effect since 0.15", FutureWarning)
 
     # Type check here. We don't want to parse only to fail because of an
     # invalid value of an integer skiprows.
     if isinstance(skiprows, numbers.Integral) and skiprows < 0:
         raise ValueError('cannot skip rows starting from the end of the '
                          'data (you passed a negative value)')
-    return _parse(flavor, io, match, header, index_col, skiprows, infer_types,
+    return _parse(flavor, io, match, header, index_col, skiprows,
                   parse_dates, tupleize_cols, thousands, attrs, encoding)
diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
@@ -137,12 +137,10 @@ def test_banklist(self):
         assert_framelist_equal(df1, df2)
 
     def test_spam_no_types(self):
-        with tm.assert_produces_warning(FutureWarning):
-            df1 = self.read_html(self.spam_data, '.*Water.*',
-                                 infer_types=False)
-        with tm.assert_produces_warning(FutureWarning):
-            df2 = self.read_html(self.spam_data, 'Unit', infer_types=False)
 
+        # infer_types removed in #10892
+        df1 = self.read_html(self.spam_data, '.*Water.*')
+        df2 = self.read_html(self.spam_data, 'Unit')
         assert_framelist_equal(df1, df2)
 
         self.assertEqual(df1[0].ix[0, 0], 'Proximates')
@@ -230,12 +228,9 @@ def test_index(self):
         assert_framelist_equal(df1, df2)
 
     def test_header_and_index_no_types(self):
-        with tm.assert_produces_warning(FutureWarning):
-            df1 = self.read_html(self.spam_data, '.*Water.*', header=1,
-                                 index_col=0, infer_types=False)
-        with tm.assert_produces_warning(FutureWarning):
-            df2 = self.read_html(self.spam_data, 'Unit', header=1, index_col=0,
-                                 infer_types=False)
+        df1 = self.read_html(self.spam_data, '.*Water.*', header=1,
+                             index_col=0)
+        df2 = self.read_html(self.spam_data, 'Unit', header=1, index_col=0)
         assert_framelist_equal(df1, df2)
 
     def test_header_and_index_with_types(self):
@@ -245,18 +240,10 @@ def test_header_and_index_with_types(self):
         assert_framelist_equal(df1, df2)
 
     def test_infer_types(self):
-        with tm.assert_produces_warning(FutureWarning):
-            df1 = self.read_html(self.spam_data, '.*Water.*', index_col=0,
-                                 infer_types=False)
-        with tm.assert_produces_warning(FutureWarning):
-            df2 = self.read_html(self.spam_data, 'Unit', index_col=0,
-                                 infer_types=False)
-        assert_framelist_equal(df1, df2)
-
-        with tm.assert_produces_warning(FutureWarning):
-            df2 = self.read_html(self.spam_data, 'Unit', index_col=0,
-                                 infer_types=True)
 
+        # 10892 infer_types removed
+        df1 = self.read_html(self.spam_data, '.*Water.*', index_col=0)
+        df2 = self.read_html(self.spam_data, 'Unit', index_col=0)
         assert_framelist_equal(df1, df2)
 
     def test_string_io(self):
@@ -641,8 +628,7 @@ def test_computer_sales_page(self):
         with tm.assertRaisesRegexp(CParserError, r"Passed header=\[0,1\] are "
                                    "too many rows for this multi_index "
                                    "of columns"):
-            with tm.assert_produces_warning(FutureWarning):
-                self.read_html(data, infer_types=False, header=[0, 1])
+            self.read_html(data, header=[0, 1])
 
     def test_wikipedia_states_table(self):
         data = os.path.join(DATA_PATH, 'wikipedia_states.html')
@@ -751,8 +737,7 @@ def test_parse_dates_combine(self):
 
     def test_computer_sales_page(self):
         data = os.path.join(DATA_PATH, 'computer_sales_page.html')
-        with tm.assert_produces_warning(FutureWarning):
-            self.read_html(data, infer_types=False, header=[0, 1])
+        self.read_html(data, header=[0, 1])
 
 
 def test_invalid_flavor():