reactive-python · rmorshea · Dec 1, 2022 · Nov 9, 2022 · Nov 9, 2022 · Nov 9, 2022
diff --git a/docs/source/_custom_js/package-lock.json b/docs/source/_custom_js/package-lock.json
diff --git a/docs/source/about/changelog.rst b/docs/source/about/changelog.rst
@@ -23,7 +23,16 @@ more info, see the :ref:`Contributor Guide <Creating a Changelog Entry>`.
 Unreleased
 ----------
 
-No changes.
+**Added**
+
+- :pull:`832` - ``del_html_body_transform`` to remove ``<html>``, ``<head>``, and ``<body>`` while preserving ``<head>`` and ``<body>`` children.
+
+**Fixed**
+
+- :pull:`832` - Fix ``html_to_vdom`` improperly handling ``<html>``, ``<head>``, and ``<body>``.
+
+**Removed**
+- :pull:`832` - Removed ``idom.html.body`` as it is currently unusable due to technological limitations, and thus not needed.
 
 
 v0.41.0

diff --git a/src/idom/html.py b/src/idom/html.py
@@ -14,7 +14,6 @@
 
 **Content sectioning**
 
-- :func:`body`
 - :func:`address`
 - :func:`article`
 - :func:`aside`
@@ -189,7 +188,6 @@ def _(*children: Any, key: Key | None = None) -> VdomDict:
 title = make_vdom_constructor("title")
 
 # Content sectioning
-body = make_vdom_constructor("body")
 address = make_vdom_constructor("address")
 article = make_vdom_constructor("article")
 aside = make_vdom_constructor("aside")

diff --git a/src/idom/utils.py b/src/idom/utils.py
@@ -1,8 +1,10 @@
+from __future__ import annotations
+
 from itertools import chain
 from typing import Any, Callable, Generic, Iterable, List, TypeVar, Union
 
 from lxml import etree
-from lxml.html import fragments_fromstring
+from lxml.html import fromstring
 
 import idom
 from idom.core.types import VdomDict
@@ -63,7 +65,7 @@ def html_to_vdom(
     using a ``key=...`` attribute within your HTML tag.
 
     Parameters:
-        source:
+        html:
             The raw HTML as a string
         transforms:
             Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM
@@ -77,15 +79,15 @@ def html_to_vdom(
         raise TypeError(f"Expected html to be a string, not {type(html).__name__}")
 
     # If the user provided a string, convert it to a list of lxml.etree nodes
-    parser = etree.HTMLParser(
-        remove_comments=True,
-        remove_pis=True,
-        remove_blank_text=True,
-        recover=not strict,
-    )
     try:
-        nodes: list[etree._Element] = fragments_fromstring(
-            html, no_leading_text=True, parser=parser
+        root_node: etree._Element = fromstring(
+            html.strip(),
+            parser=etree.HTMLParser(
+                remove_comments=True,
+                remove_pis=True,
+                remove_blank_text=True,
+                recover=not strict,
+            ),
         )
     except etree.XMLSyntaxError as e:
         if not strict:
@@ -97,34 +99,17 @@ def html_to_vdom(
             "you can disable the strict parameter on html_to_vdom().\n"
             "Otherwise, repair your broken HTML and try again."
         ) from e
-    has_root_node = len(nodes) == 1
-
-    # Find or create a root node
-    if has_root_node:
-        root_node = nodes[0]
-    else:
-        # etree.Element requires a non-empty tag - we correct this below
-        root_node = etree.Element("TEMP", None, None)
-        for child in nodes:
-            root_node.append(child)
 
-    # Convert the lxml node to a VDOM dict
-    vdom = _etree_to_vdom(root_node, transforms)
-
-    # Change the artificially created root node to a React Fragment, instead of a div
-    if not has_root_node:
-        vdom["tagName"] = ""
-
-    return vdom
+    return _etree_to_vdom(root_node, transforms)
 
 
 def _etree_to_vdom(
     node: etree._Element, transforms: Iterable[_ModelTransform]
 ) -> VdomDict:
-    """Recusively transform an lxml etree node into a DOM model
+    """Transform an lxml etree node into a DOM model
 
     Parameters:
-        source:
+        node:
             The ``lxml.etree._Element`` node
         transforms:
             Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM
@@ -136,7 +121,7 @@ def _etree_to_vdom(
             f"Expected node to be a etree._Element, not {type(node).__name__}"
         )
 
-    # This will recursively call _etree_to_vdom() on all children
+    # Recursively call _etree_to_vdom() on all children
     children = _generate_vdom_children(node, transforms)
 
     # Convert the lxml node to a VDOM dict
@@ -223,3 +208,40 @@ def _hypen_to_camel_case(string: str) -> str:
 
 class HTMLParseError(etree.LxmlSyntaxError):  # type: ignore[misc]
     """Raised when an HTML document cannot be parsed using strict parsing."""
+
+
+def del_html_body_transform(vdom: dict[str, Any]) -> dict[str, Any]:
+    """Transform intended for use with `html_to_vdom`.
+
+    Removes `<html>`, `<head>`, and `<body>` while preserving `<head>` and `<body>` children.
+
+    Parameters:
+        vdom:
+            The VDOM dictionary to transform.
+    """
+    if vdom["tagName"] == "html":
+        vdom["tagName"] = ""
+
+        # Remove all fields from `<html>` except for `children` and `tagName`
+        for key in list(vdom.keys()):
+            if key not in ("children", "tagName"):
+                del vdom[key]
+
+        # Preserve `<head>` children and remove the `<body>` tag
+        head_and_body_children = []
+        for child in vdom.get("children", []):
+            # Add `<head>` children to the list
+            if child["tagName"] == "head":
+                head_and_body_children.extend(child.get("children", []))
+
+            # Add `<body>` children to the list, then remove `<body>` and `<head>`
+            if child.get("tagName", None) == "body":
+                head_and_body_children.extend(child.get("children", []))
+                vdom["children"] = head_and_body_children
+                break
+
+        # Set vdom to the first child if there's only one child
+        if len(vdom.get("children", [])) == 1:
+            vdom = vdom["children"][0]
+
+    return vdom
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -1,7 +1,7 @@
 import pytest
 
 import idom
-from idom.utils import HTMLParseError, html_to_vdom
+from idom.utils import HTMLParseError, del_html_body_transform, html_to_vdom
 
 
 def test_basic_ref_behavior():
@@ -141,11 +141,51 @@ def test_html_to_vdom_with_no_parent_node():
     source = "<p>Hello</p><div>World</div>"
 
     expected = {
-        "tagName": "",
+        "tagName": "div",
         "children": [
             {"tagName": "p", "children": ["Hello"]},
             {"tagName": "div", "children": ["World"]},
         ],
     }
 
     assert html_to_vdom(source) == expected
+
+
+def test_del_html_body_transform():
+    source = """
+    <!DOCTYPE html>
+    <html lang="en">
+
+    <head>
+    <title>My Title</title>
+    </head>
+
+    <body><h1>Hello World</h1></body>
+
+    </html>
+    """
+
+    expected = {
+        "tagName": "",
+        "children": [
+            {"tagName": "title", "children": ["My Title"]},
+            {"tagName": "h1", "children": ["Hello World"]},
+        ],
+    }
+
+    assert html_to_vdom(source, del_html_body_transform) == expected
+
+
+def test_del_html_body_transform_no_head():
+    source = """
+    <!DOCTYPE html>
+    <html lang="en">
+
+    <body><h1>Hello World</h1></body>
+
+    </html>
+    """
+
+    expected = {"tagName": "h1", "children": ["Hello World"]}
+
+    assert html_to_vdom(source, del_html_body_transform) == expected