From 3a25a866e0034ce694b942f7329dcafcc19fb421 Mon Sep 17 00:00:00 2001
From: John Owens <jowens@ece.ucdavis.edu>
Date: Wed, 26 Jul 2017 10:25:08 -0700
Subject: [PATCH 1/3] read_html handles rowspan and colspan in tables, infers
 headers if <thead> is not specified

---
 pandas/io/html.py | 360 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 256 insertions(+), 104 deletions(-)
diff --git a/pandas/io/html.py b/pandas/io/html.py
index 3c38dae91eb89..d5f5767e9b16e 100644
--- a/pandas/io/html.py
+++ b/pandas/io/html.py
@@ -16,11 +16,16 @@
 from pandas.io.common import (EmptyDataError, _is_url, urlopen,
                               parse_url, _validate_header_arg)
 from pandas.io.parsers import TextParser
-from pandas.compat import (lrange, lmap, u, string_types, iteritems,
+from pandas.compat import (lrange, lmap, lfilter, u, string_types, iteritems,
                            raise_with_traceback, binary_type)
 from pandas import Series
+<< << << < HEAD
 from pandas.core.common import AbstractMethodError
 from pandas.formats.printing import pprint_thing
+== == == =
+from pandas.core.common import (AbstractMethodError, flatten)
+from pandas.io.formats.printing import pprint_thing
+>>>>>> > b922af71b... read_html handles rowspan and colspan in tables, infers headers if < thead > is not specified
 
 _IMPORTS = False
 _HAS_BS4 = False
@@ -175,13 +180,15 @@ class _HtmlFrameParser(object):
     -----
     To subclass this class effectively you must override the following methods:
         * :func:`_build_doc`
-        * :func:`_text_getter`
-        * :func:`_parse_td`
         * :func:`_parse_tables`
-        * :func:`_parse_tr`
-        * :func:`_parse_thead`
-        * :func:`_parse_tbody`
-        * :func:`_parse_tfoot`
+        * :func:`_text_getter`
+        * :func:`_equals_tag`
+        * :func:`_has_tag`
+        * :func:`_extract_td`
+        * :func:`_extract_tr`
+        * :func:`_extract_thead`
+        * :func:`_extract_tbody`
+        * :func:`_extract_tfoot`
     See each method's respective documentation for details on their
     functionality.
     """
@@ -196,29 +203,32 @@ def parse_tables(self):
         tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
         return (self._build_table(table) for table in tables)
 
-    def _parse_raw_data(self, rows):
-        """Parse the raw data into a list of lists.
+    def _parse_tables(self, doc, match, attrs):
+        """Return all tables from the parsed DOM.
 
         Parameters
         ----------
-        rows : iterable of node-like
-            A list of row elements.
+        doc : tree-like
+            The DOM from which to parse the table element.
 
-        text_getter : callable
-            A callable that gets the text from an individual node. This must be
-            defined by subclasses.
+        match : str or regular expression
+            The text to search for in the DOM tree.
 
-        column_finder : callable
-            A callable that takes a row node as input and returns a list of the
-            column node in that row. This must be defined by subclasses.
+        attrs : dict
+            A dictionary of table attributes that can be used to disambiguate
+            mutliple tables on a page.
+
+        Raises
+        ------
+        ValueError
+            * If `match` does not match any text in the document.
 
         Returns
         -------
-        data : list of list of strings
+        tables : list of node-like
+            A list of <table> elements to be parsed into raw data.
         """
-        data = [[_remove_whitespace(self._text_getter(col)) for col in
-                 self._parse_td(row)] for row in rows]
-        return data
+        raise AbstractMethodError(self)
 
     def _text_getter(self, obj):
         """Return the text of an individual DOM node.
@@ -235,48 +245,58 @@ def _text_getter(self, obj):
         """
         raise AbstractMethodError(self)
 
-    def _parse_td(self, obj):
-        """Return the td elements from a row element.
+    def _equals_tag(self, obj, tag):
+        """Returns whether an individual DOM node matches a tag
 
         Parameters
         ----------
         obj : node-like
+            A DOM node.
+
+        tag : string
+            Tag to be checked for equality
 
         Returns
         -------
-        columns : list of node-like
-            These are the elements of each row, i.e., the columns.
+        boolean
+            Does the object match tag 'tag'?
         """
         raise AbstractMethodError(self)
 
-    def _parse_tables(self, doc, match, attrs):
-        """Return all tables from the parsed DOM.
+    def _contains_tag(self, obj, tag):
+        """Returns whether an individual DOM node has a particular tag
+        contained within it
 
         Parameters
         ----------
-        doc : tree-like
-            The DOM from which to parse the table element.
+        obj : node-like
+            A DOM node.
 
-        match : str or regular expression
-            The text to search for in the DOM tree.
+        tag : string
+            Tag to be found in this DOM
 
-        attrs : dict
-            A dictionary of table attributes that can be used to disambiguate
-            mutliple tables on a page.
+        Returns
+        -------
+        boolean
+            Does the object contain tag 'tag'?
+        """
+        raise AbstractMethodError(self)
 
-        Raises
-        ------
-        ValueError
-            * If `match` does not match any text in the document.
+    def _extract_td(self, obj):
+        """Return the td elements from a row element.
+
+        Parameters
+        ----------
+        obj : node-like
 
         Returns
         -------
-        tables : list of node-like
-            A list of <table> elements to be parsed into raw data.
+        columns : list of node-like
+            These are the elements of each row, i.e., the columns.
         """
         raise AbstractMethodError(self)
 
-    def _parse_tr(self, table):
+    def _extract_tr(self, table):
         """Return the list of row elements from the parsed table element.
 
         Parameters
@@ -291,7 +311,7 @@ def _parse_tr(self, table):
         """
         raise AbstractMethodError(self)
 
-    def _parse_thead(self, table):
+    def _extract_thead(self, table):
         """Return the header of a table.
 
         Parameters
@@ -306,7 +326,7 @@ def _parse_thead(self, table):
         """
         raise AbstractMethodError(self)
 
-    def _parse_tbody(self, table):
+    def _extract_tbody(self, table):
         """Return the body of the table.
 
         Parameters
@@ -321,7 +341,7 @@ def _parse_tbody(self, table):
         """
         raise AbstractMethodError(self)
 
-    def _parse_tfoot(self, table):
+    def _extract_tfoot(self, table):
         """Return the footer of the table if any.
 
         Parameters
@@ -345,37 +365,149 @@ def _build_doc(self):
         """
         raise AbstractMethodError(self)
 
-    def _build_table(self, table):
-        header = self._parse_raw_thead(table)
-        body = self._parse_raw_tbody(table)
-        footer = self._parse_raw_tfoot(table)
+    def _build_table(self, table_html):
+        header, body, footer = self._parse_raw_thead_tbody_tfoot(table_html)
+        # the above "footer" actually produces a footer. The below "footer"
+        # rarely does. The below "footer" is the legacy behavior and so I'm
+        # leaving it for the time being.
+        footer = self._parse_raw_tfoot(table_html)
         return header, body, footer
 
-    def _parse_raw_thead(self, table):
-        thead = self._parse_thead(table)
-        res = []
-        if thead:
-            res = lmap(self._text_getter, self._parse_th(thead[0]))
-        return np.atleast_1d(
-            np.array(res).squeeze()) if res and len(res) == 1 else res
+    def _parse_raw_thead_tbody_tfoot(self, table_html):
+        """Given a table, return parsed header, body, and foot.
+           Header and body are lists-of-lists. Top level list is a list of
+           rows. Each row is a list of parsed elements.
 
-    def _parse_raw_tfoot(self, table):
-        tfoot = self._parse_tfoot(table)
+           Logic: Use <thead>, <tbody>, <tfoot> elements to identify
+                  header, body, and footer, otherwise:
+                  - Put all rows into body
+                  - Move rows from top of body to header only if
+                    all elements inside row are <th>
+                  - Move rows from bottom of body to footer only if
+                    all elements inside row are <th>
+
+        Parameters
+        ----------
+        table_html : node-like
+            A single table element
+
+        Returns
+        -------
+        header, body, footer
+        header : list of list of node-like
+             List of rows, each of which is a list of parsed header elements
+        body : list of list of node-like
+             List of rows, each of which is a list of parsed body elements
+        footer : list of list of node-like
+             List of rows, each of which is a list of parsed footer elements
+        """
+        header_rows = []
+        body_rows = []
+        footer_rows = []
+        # first, are there thead and tbody elements in the table?
+        if (self._contains_tag(table_html, 'thead') and
+                self._contains_tag(table_html, 'tbody')):
+            header_rows = self._extract_tr(self._extract_thead(table_html)[0])
+            body_rows = self._extract_tr(self._extract_tbody(table_html)[0])
+            if self._contains_tag(table_html, 'tfoot'):
+                footer_rows = self._extract_tr(
+                    self._extract_tfoot(table_html)[0])
+        else:
+            # otherwise we need to split the body into header/body/foot
+            body_rows = self._extract_tr(table_html)
+            if body_rows == []:
+                # empty table, just return nothing
+                return [], [], []
+            # splitting criterion: if all tags within a row are th, it's part
+            # of the header/footer
+            while all(self._equals_tag(t, 'th') for t in
+                      self._extract_td(body_rows[0])):
+                # this row should be a header row, move it from body to header
+                header_rows.append(body_rows.pop(0))
+            while all(self._equals_tag(t, 'th') for t in
+                      self._extract_td(body_rows[-1])):
+                # this row should be a footer row, move it from body to footer
+                footer_rows.insert(0, body_rows.pop())
+        header = self._expand_colspan_rowspan(header_rows, fill_rowspan=False)
+        body = self._expand_colspan_rowspan(body_rows, fill_rowspan=True)
+        footer = self._expand_colspan_rowspan(footer_rows, fill_rowspan=False)
+        # the below line is lifted from _parse_raw_tfoot. Not sure what it
+        # does.
+        footer = np.atleast_1d(np.array(footer).squeeze(
+        )) if footer and len(footer) == 1 else footer
+        return header, body, footer
+
+    def _expand_colspan_rowspan(self, rows, fill_rowspan=True):
+        """Given a list of rows, return a list of rows that properly handle
+           colspan/rowspan
+
+        Discussion on behavior of fill_rowspan in #17073
+
+        Parameters
+        ----------
+        rows : list of list of node-like
+            List of rows, each of which is a list of nodes
+
+        fill_rowspan : boolean
+            Should a rowspan fill every item in the rowspan (True) or only the
+            bottommost element (False)?
+
+        Returns
+        -------
+        res : list of list of node-like
+            List of rows, each of which is a list of nodes, respecting
+            colspan/rowspan
+        """
+        res = []
+        saved_span = []
+        for row in rows:
+            extracted_row = self._extract_td(row)
+            cols_text = [_remove_whitespace(
+                self._text_getter(col)) for col in extracted_row]
+            col_colspans = [int(col.get('colspan', 1))
+                            for col in extracted_row]
+            col_rowspans = [int(col.get('rowspan', 1))
+                            for col in extracted_row]
+            # expand cols using col_colspans
+            # maybe this can be done with a list comprehension, dunno
+            cols = zip(
+                list(flatten(
+                    lmap(lambda (text, nc): [text] * nc,
+                         zip(cols_text, col_colspans)))),
+                list(flatten(
+                    lmap(lambda (nc, nr): [nr] * nc,
+                         zip(col_colspans, col_rowspans))))
+            )
+            # cols is now a list of (text, number of rows)
+            # now insert any previous rowspans
+            for (col, (text, nr)) in saved_span:
+                cols.insert(col, (text, nr))
+
+            # save next saved_span
+            def advance_item_to_next_row(item):
+                (col, (text, nr)) = item
+                if nr == 1:
+                    return None
+                else:
+                    # only keep the text around if fill_rowspan is set
+                    return (col, (text if fill_rowspan else '', nr - 1))
+            saved_span = lfilter(lambda i: i is not None,
+                                 lmap(advance_item_to_next_row,
+                                      list(enumerate(cols))))
+            cols = [text for (text, nr) in cols]
+            # generate cols with text only
+            if any([col != '' for col in cols]):
+                res.append(cols)
+        return res
+
+    def _parse_raw_tfoot(self, table_html):
+        tfoot = self._extract_tfoot(table_html)
         res = []
         if tfoot:
-            res = lmap(self._text_getter, self._parse_td(tfoot[0]))
+            res = lmap(self._text_getter, self._extract_td(tfoot[0]))
         return np.atleast_1d(
             np.array(res).squeeze()) if res and len(res) == 1 else res
 
-    def _parse_raw_tbody(self, table):
-        tbody = self._parse_tbody(table)
-
-        try:
-            res = self._parse_tr(tbody[0])
-        except IndexError:
-            res = self._parse_tr(table)
-        return self._parse_raw_data(res)
-
 
 class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser):
     """HTML to DataFrame parser that uses BeautifulSoup under the hood.
@@ -397,27 +529,6 @@ def __init__(self, *args, **kwargs):
         from bs4 import SoupStrainer
         self._strainer = SoupStrainer('table')
 
-    def _text_getter(self, obj):
-        return obj.text
-
-    def _parse_td(self, row):
-        return row.find_all(('td', 'th'))
-
-    def _parse_tr(self, element):
-        return element.find_all('tr')
-
-    def _parse_th(self, element):
-        return element.find_all('th')
-
-    def _parse_thead(self, table):
-        return table.find_all('thead')
-
-    def _parse_tbody(self, table):
-        return table.find_all('tbody')
-
-    def _parse_tfoot(self, table):
-        return table.find_all('tfoot')
-
     def _parse_tables(self, doc, match, attrs):
         element_name = self._strainer.name
         tables = doc.find_all(element_name, attrs=attrs)
@@ -439,6 +550,33 @@ def _parse_tables(self, doc, match, attrs):
                              match.pattern)
         return result
 
+    def _text_getter(self, obj):
+        return obj.text
+
+    def _equals_tag(self, obj, tag):
+        return obj.name == tag
+
+    def _contains_tag(self, obj, tag):
+        return obj.find(tag) is not None
+
+    def _extract_td(self, row):
+        return row.find_all(('td', 'th'))
+
+    def _extract_tr(self, element):
+        return element.find_all('tr')
+
+    def _extract_th(self, element):
+        return element.find_all('th')
+
+    def _extract_thead(self, table):
+        return table.find_all('thead')
+
+    def _extract_tbody(self, table):
+        return table.find_all('tbody')
+
+    def _extract_tfoot(self, table):
+        return table.find_all('tfoot')
+
     def _setup_build_doc(self):
         raw_text = _read(self.io)
         if not raw_text:
@@ -498,16 +636,6 @@ class _LxmlFrameParser(_HtmlFrameParser):
     def __init__(self, *args, **kwargs):
         super(_LxmlFrameParser, self).__init__(*args, **kwargs)
 
-    def _text_getter(self, obj):
-        return obj.text_content()
-
-    def _parse_td(self, row):
-        return row.xpath('.//td|.//th')
-
-    def _parse_tr(self, table):
-        expr = './/tr[normalize-space()]'
-        return table.xpath(expr)
-
     def _parse_tables(self, doc, match, kwargs):
         pattern = match.pattern
 
@@ -527,6 +655,22 @@ def _parse_tables(self, doc, match, kwargs):
             raise ValueError("No tables found matching regex %r" % pattern)
         return tables
 
+    def _equals_tag(self, obj, tag):
+        return obj.tag == tag
+
+    def _contains_tag(self, obj, tag):
+        return obj.find(tag) is not None
+
+    def _text_getter(self, obj):
+        return obj.text_content()
+
+    def _extract_td(self, row):
+        return row.xpath('.//td|.//th')
+
+    def _extract_tr(self, table):
+        expr = './/tr[normalize-space()]'
+        return table.xpath(expr)
+
     def _build_doc(self):
         """
         Raises
@@ -581,19 +725,27 @@ def _build_doc(self):
                 raise XMLSyntaxError("no text parsed from document", 0, 0, 0)
         return r
 
-    def _parse_tbody(self, table):
+    def _extract_tbody(self, table):
         return table.xpath('.//tbody')
 
-    def _parse_thead(self, table):
+    def _extract_thead(self, table):
         return table.xpath('.//thead')
 
-    def _parse_tfoot(self, table):
+    def _extract_tfoot(self, table):
         return table.xpath('.//tfoot')
 
     def _parse_raw_thead(self, table):
-        expr = './/thead//th'
-        return [_remove_whitespace(x.text_content()) for x in
-                table.xpath(expr)]
+        expr = './/thead'
+        thead = table.xpath(expr)
+        res = []
+        if thead:
+            trs = self._extract_tr(thead[0])
+            for tr in trs:
+                cols = [_remove_whitespace(x.text_content()) for x in
+                        self._extract_td(tr)]
+                if any([col != '' for col in cols]):
+                    res.append(cols)
+        return res
 
     def _parse_raw_tfoot(self, table):
         expr = './/tfoot//th|//tfoot//td'

From 282f039b2793f7da1a72659799e4bf5908c516ca Mon Sep 17 00:00:00 2001
From: John Owens <jowens@ece.ucdavis.edu>
Date: Wed, 26 Jul 2017 10:26:06 -0700
Subject: [PATCH 2/3] added rowspan/colspan/infer-header tests. removed
 test_computer_sales_page, which now appears to parse correctly.

---
 pandas/io/tests/test_html.py | 161 +++++++++++++++++++++++++++++++++--
 1 file changed, 154 insertions(+), 7 deletions(-)

diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
index f4eec864da572..9b8064d89dc64 100644
--- a/pandas/io/tests/test_html.py
+++ b/pandas/io/tests/test_html.py
@@ -633,6 +633,118 @@ def test_different_number_of_rows(self):
         res = self.read_html(out, index_col=0)[0]
         tm.assert_frame_equal(expected, res)
 
+    def test_colspan_rowspan_are_1(self):
+        expected = """<table>
+                        <thead>
+                            <tr>
+                            <th>X</th>
+                            <th>Y</th>
+                            <th>Z</th>
+                            <th>W</th>
+                            </tr>
+                        </thead>
+                        <tbody>
+                        </tbody>
+                    </table>"""
+        out = """<table>
+                   <thead>
+                       <tr>
+                       <th colspan="1">X</th>
+                       <th>Y</th>
+                       <th rowspan="1">Z</th>
+                       <th>W</th>
+                       </tr>
+                   </thead>
+                   <tbody>
+                   </tbody>
+               </table>"""
+        expected = self.read_html(expected)[0]
+        res = self.read_html(out)[0]
+        tm.assert_frame_equal(expected, res)
+
+    def test_colspan_rowspan_are_more_than_1(self):
+        expected = """<table>
+                        <thead>
+                            <tr>
+                            <th>X</th>
+                            <th>X</th>
+                            <th>Y</th>
+                            <th>Z</th>
+                            <th>W</th>
+                            </tr>
+                            <tr>
+                            <th>1</th>
+                            <th>2</th>
+                            <th>2</th>
+                            <th></th>
+                            <th>3</th>
+                            </tr>
+                        </thead>
+                        <tbody>
+                        </tbody>
+                    </table>"""
+        out = """<table>
+                   <thead>
+                       <tr>
+                       <th colspan="2">X</th>
+                       <th>Y</th>
+                       <th rowspan="2">Z</th>
+                       <th>W</th>
+                       </tr>
+                       <tr>
+                       <th>1</th>
+                       <th colspan="2">2</th>
+                       <th>3</th>
+                       </tr>
+                   </thead>
+                   <tbody>
+                   </tbody>
+               </table>"""
+        expected = self.read_html(expected)[0]
+        res = self.read_html(out)[0]
+        tm.assert_frame_equal(expected, res)
+
+    def test_header_should_be_inferred_from_th_elements(self):
+        expected = """<table>
+                        <thead>
+                            <tr>
+                            <th>X</th>
+                            <th>X</th>
+                            <th>Y</th>
+                            <th>Z</th>
+                            <th>W</th>
+                            </tr>
+                        </thead>
+                        <tbody>
+                            <tr>
+                            <td>1</td>
+                            <td>2</td>
+                            <td>3</td>
+                            <td>4</td>
+                            <td>5</td>
+                        </tbody>
+                    </table>"""
+        out = """<table>
+                            <tr>
+                            <th>X</th>
+                            <th>X</th>
+                            <th>Y</th>
+                            <th>Z</th>
+                            <th>W</th>
+                            </tr>
+                            <tr>
+                            <td>1</td>
+                            <td>2</td>
+                            <td>3</td>
+                            <td>4</td>
+                            <td>5</td>
+                    </table>"""
+        expected = self.read_html(expected)[0]  # header is explicit
+        res = self.read_html(out)[0]            # infer header
+        tm.assert_frame_equal(expected, res)
+        res2 = self.read_html(out, header=0)[0]  # manually set header
+        tm.assert_frame_equal(expected, res2)
+
     def test_parse_dates_list(self):
         df = DataFrame({'date': date_range('1/1/2001', periods=10)})
         expected = df.to_html()
@@ -650,13 +762,6 @@ def test_parse_dates_combine(self):
         newdf = DataFrame({'datetime': raw_dates})
         tm.assert_frame_equal(newdf, res[0])
 
-    def test_computer_sales_page(self):
-        data = os.path.join(DATA_PATH, 'computer_sales_page.html')
-        with tm.assertRaisesRegexp(ParserError, r"Passed header=\[0,1\] are "
-                                   "too many rows for this multi_index "
-                                   "of columns"):
-            self.read_html(data, header=[0, 1])
-
     def test_wikipedia_states_table(self):
         data = os.path.join(DATA_PATH, 'wikipedia_states.html')
         assert os.path.isfile(data), '%r is not a file' % data
@@ -872,8 +977,15 @@ def test_computer_sales_page(self):
 
 def test_invalid_flavor():
     url = 'google.com'
+<< << << < HEAD:
+    pandas / io / tests / test_html.py
     with tm.assertRaises(ValueError):
         read_html(url, 'google', flavor='not a* valid**++ flaver')
+== == == =
+    with pytest.raises(ValueError):
+        read_html(url, 'google', flavor='not a* valid**++ flavor')
+>>>>>> > 5818f804b... added rowspan / colspan / infer - header tests. removed test_computer_sales_page, which now appears to parse correctly.:
+    pandas / tests / io / test_html.py
 
 
 def get_elements_from_file(url, element='table'):
@@ -919,6 +1031,41 @@ def test_same_ordering():
     dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4'])
     assert_framelist_equal(dfs_lxml, dfs_bs4)
 
+<< << << < HEAD:
+    pandas / io / tests / test_html.py
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
                    exit=False)
+== == == =
+
+
+class ErrorThread(threading.Thread):
+
+    def run(self):
+        try:
+            super(ErrorThread, self).run()
+        except Exception as e:
+            self.err = e
+        else:
+            self.err = None
+
+
+@pytest.mark.slow
+def test_importcheck_thread_safety():
+    # see gh-16928
+
+    # force import check by reinitalising global vars in html.py
+    reload(pandas.io.html)
+
+    filename = os.path.join(DATA_PATH, 'valid_markup.html')
+    helper_thread1 = ErrorThread(target=read_html, args=(filename,))
+    helper_thread2 = ErrorThread(target=read_html, args=(filename,))
+
+    helper_thread1.start()
+    helper_thread2.start()
+
+    while helper_thread1.is_alive() or helper_thread2.is_alive():
+        pass
+    assert None is helper_thread1.err is helper_thread2.err
+>>>>>> > 5818f804b... added rowspan / colspan / infer - header tests. removed test_computer_sales_page, which now appears to parse correctly.:
+    pandas / tests / io / test_html.py

From c51f1b95a4070317f9125d17013117d220befcaf Mon Sep 17 00:00:00 2001
From: John Owens <jowens@ece.ucdavis.edu>
Date: Wed, 26 Jul 2017 16:15:26 -0700
Subject: [PATCH 3/3] fixing merge errors

---
 pandas/io/html.py            |  5 -----
 pandas/io/tests/test_html.py | 11 -----------
 2 files changed, 16 deletions(-)

diff --git a/pandas/io/html.py b/pandas/io/html.py
index d5f5767e9b16e..0ee523f699159 100644
--- a/pandas/io/html.py
+++ b/pandas/io/html.py
@@ -19,13 +19,8 @@
 from pandas.compat import (lrange, lmap, lfilter, u, string_types, iteritems,
                            raise_with_traceback, binary_type)
 from pandas import Series
-<< << << < HEAD
-from pandas.core.common import AbstractMethodError
-from pandas.formats.printing import pprint_thing
-== == == =
 from pandas.core.common import (AbstractMethodError, flatten)
 from pandas.io.formats.printing import pprint_thing
->>>>>> > b922af71b... read_html handles rowspan and colspan in tables, infers headers if < thead > is not specified
 
 _IMPORTS = False
 _HAS_BS4 = False
diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
index 9b8064d89dc64..e91fbebf02108 100644
--- a/pandas/io/tests/test_html.py
+++ b/pandas/io/tests/test_html.py
@@ -977,14 +977,8 @@ def test_computer_sales_page(self):
 
 def test_invalid_flavor():
     url = 'google.com'
-<< << << < HEAD:
-    pandas / io / tests / test_html.py
-    with tm.assertRaises(ValueError):
-        read_html(url, 'google', flavor='not a* valid**++ flaver')
-== == == =
     with pytest.raises(ValueError):
         read_html(url, 'google', flavor='not a* valid**++ flavor')
->>>>>> > 5818f804b... added rowspan / colspan / infer - header tests. removed test_computer_sales_page, which now appears to parse correctly.:
     pandas / tests / io / test_html.py
 
 
@@ -1031,12 +1025,9 @@ def test_same_ordering():
     dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4'])
     assert_framelist_equal(dfs_lxml, dfs_bs4)
 
-<< << << < HEAD:
-    pandas / io / tests / test_html.py
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
                    exit=False)
-== == == =
 
 
 class ErrorThread(threading.Thread):
@@ -1067,5 +1058,3 @@ def test_importcheck_thread_safety():
     while helper_thread1.is_alive() or helper_thread2.is_alive():
         pass
     assert None is helper_thread1.err is helper_thread2.err
->>>>>> > 5818f804b... added rowspan / colspan / infer - header tests. removed test_computer_sales_page, which now appears to parse correctly.:
-    pandas / tests / io / test_html.py