From f347e8e7781ea794de9e23d963972b80fb715d91 Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Thu, 22 Jun 2023 19:47:39 -0400
Subject: [PATCH 01/17] Updating documentation and adding deprecation logic for
 read_xml.

---
 doc/source/whatsnew/v2.1.0.rst         |  2 +
 pandas/io/xml.py                       | 20 ++++++++--
 pandas/tests/io/xml/test_xml.py        | 51 +++++++++++++++++---------
 pandas/tests/io/xml/test_xml_dtypes.py | 30 ++++++++-------
 4 files changed, 69 insertions(+), 34 deletions(-)
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 2436d91690ed3..6fdedf97d3e09 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -298,6 +298,7 @@ Deprecations
 - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
 - Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`)
 - Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead. (:issue:`53409`)
+- Deprecated literal string input to :func:`read_xml`. Wrap literal string/bytes input in ``io.StringIO`` instead. (:issue:`53767`)
 - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`)
 - Deprecated parameter ``obj`` in :meth:`GroupBy.get_group` (:issue:`53545`)
 - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
@@ -305,6 +306,7 @@ Deprecations
 - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`)
 - Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`)
 - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`)
+-
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_210.performance:
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 2aec361d46b99..bb46088296f47 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -12,6 +12,7 @@
     Callable,
     Sequence,
 )
+import warnings
 
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
@@ -20,6 +21,7 @@
     ParserError,
 )
 from pandas.util._decorators import doc
+from pandas.util._exceptions import find_stack_level
 from pandas.util._validators import check_dtype_backend
 
 from pandas.core.dtypes.common import is_list_like
@@ -894,6 +896,9 @@ def read_xml(
         string or a path. The string can further be a URL. Valid URL schemes
         include http, ftp, s3, and file.
 
+        .. deprecated:: 2.1.0
+            Passing html literal strings is deprecated.
+
     xpath : str, optional, default './\*'
         The XPath to parse required set of nodes for migration to DataFrame.
         XPath should return a collection of elements and not a single
@@ -1068,7 +1073,7 @@ def read_xml(
     ...  </row>
     ... </data>'''
 
-    >>> df = pd.read_xml(xml)
+    >>> df = pd.read_xml(SringIO(xml))
     >>> df
           shape  degrees  sides
     0    square      360    4.0
@@ -1082,7 +1087,7 @@ def read_xml(
     ...   <row shape="triangle" degrees="180" sides="3.0"/>
     ... </data>'''
 
-    >>> df = pd.read_xml(xml, xpath=".//row")
+    >>> df = pd.read_xml(StringIO(xml), xpath=".//row")
     >>> df
           shape  degrees  sides
     0    square      360    4.0
@@ -1108,7 +1113,7 @@ def read_xml(
     ...   </doc:row>
     ... </doc:data>'''
 
-    >>> df = pd.read_xml(xml,
+    >>> df = pd.read_xml(StringIO(xml),
     ...                  xpath="//doc:row",
     ...                  namespaces={{"doc": "https://example.com"}})
     >>> df
@@ -1119,6 +1124,15 @@ def read_xml(
     """
     check_dtype_backend(dtype_backend)
 
+    if isinstance(path_or_buffer, str) and "\n" in path_or_buffer:
+        warnings.warn(
+            "Passing literal xml to 'read_xml' is deprecated and "
+            "will be removed in a future version. To read from a "
+            "literal string, wrap it in a 'StringIO' object.",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
+
     return _parse(
         path_or_buffer=path_or_buffer,
         xpath=xpath,
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index b0e806caecc80..f83079acd611c 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -391,6 +391,11 @@ def test_file_buffered_reader_string(xml_books, parser, mode):
     with open(xml_books, mode, encoding="utf-8" if mode == "r" else None) as f:
         xml_obj = f.read()
 
+    if mode == "rb":
+        xml_obj = StringIO(xml_obj.decode())
+    elif mode == "r":
+        xml_obj = StringIO(xml_obj)
+
     df_str = read_xml(xml_obj, parser=parser)
 
     df_expected = DataFrame(
@@ -411,6 +416,11 @@ def test_file_buffered_reader_no_xml_declaration(xml_books, parser, mode):
         next(f)
         xml_obj = f.read()
 
+    if mode == "rb":
+        xml_obj = StringIO(xml_obj.decode())
+    elif mode == "r":
+        xml_obj = StringIO(xml_obj)
+
     df_str = read_xml(xml_obj, parser=parser)
 
     df_expected = DataFrame(
@@ -580,7 +590,7 @@ def test_bad_xpath_lxml(xml_books):
 
 def test_default_namespace(parser):
     df_nmsp = read_xml(
-        xml_default_nmsp,
+        StringIO(xml_default_nmsp),
         xpath=".//ns:row",
         namespaces={"ns": "http://example.com"},
         parser=parser,
@@ -606,7 +616,7 @@ def test_default_namespace(parser):
 
 def test_prefix_namespace(parser):
     df_nmsp = read_xml(
-        xml_prefix_nmsp,
+        StringIO(xml_prefix_nmsp),
         xpath=".//doc:row",
         namespaces={"doc": "http://example.com"},
         parser=parser,
@@ -630,14 +640,14 @@ def test_prefix_namespace(parser):
 @td.skip_if_no("lxml")
 def test_consistency_default_namespace():
     df_lxml = read_xml(
-        xml_default_nmsp,
+        StringIO(xml_default_nmsp),
         xpath=".//ns:row",
         namespaces={"ns": "http://example.com"},
         parser="lxml",
     )
 
     df_etree = read_xml(
-        xml_default_nmsp,
+        StringIO(xml_default_nmsp),
         xpath=".//doc:row",
         namespaces={"doc": "http://example.com"},
         parser="etree",
@@ -649,14 +659,14 @@ def test_consistency_default_namespace():
 @td.skip_if_no("lxml")
 def test_consistency_prefix_namespace():
     df_lxml = read_xml(
-        xml_prefix_nmsp,
+        StringIO(xml_prefix_nmsp),
         xpath=".//doc:row",
         namespaces={"doc": "http://example.com"},
         parser="lxml",
     )
 
     df_etree = read_xml(
-        xml_prefix_nmsp,
+        StringIO(xml_prefix_nmsp),
         xpath=".//doc:row",
         namespaces={"doc": "http://example.com"},
         parser="etree",
@@ -693,7 +703,7 @@ def test_none_namespace_prefix(key):
         TypeError, match=("empty namespace prefix is not supported in XPath")
     ):
         read_xml(
-            xml_default_nmsp,
+            StringIO(xml_default_nmsp),
             xpath=".//kml:Placemark",
             namespaces={key: "http://www.opengis.net/kml/2.2"},
             parser="lxml",
@@ -782,7 +792,7 @@ def test_empty_attrs_only(parser):
         ValueError,
         match=("xpath does not return any nodes or attributes"),
     ):
-        read_xml(xml, xpath="./row", attrs_only=True, parser=parser)
+        read_xml(StringIO(xml), xpath="./row", attrs_only=True, parser=parser)
 
 
 def test_empty_elems_only(parser):
@@ -797,7 +807,7 @@ def test_empty_elems_only(parser):
         ValueError,
         match=("xpath does not return any nodes or attributes"),
     ):
-        read_xml(xml, xpath="./row", elems_only=True, parser=parser)
+        read_xml(StringIO(xml), xpath="./row", elems_only=True, parser=parser)
 
 
 @td.skip_if_no("lxml")
@@ -822,8 +832,8 @@ def test_attribute_centric_xml():
       </Stations>
 </TrainSchedule>"""
 
-    df_lxml = read_xml(xml, xpath=".//station")
-    df_etree = read_xml(xml, xpath=".//station", parser="etree")
+    df_lxml = read_xml(StringIO(xml), xpath=".//station")
+    df_etree = read_xml(StringIO(xml), xpath=".//station", parser="etree")
 
     df_iter_lx = read_xml_iterparse(xml, iterparse={"station": ["Name", "coords"]})
     df_iter_et = read_xml_iterparse(
@@ -875,7 +885,10 @@ def test_repeat_names(parser):
   </shape>
 </shapes>"""
     df_xpath = read_xml(
-        xml, xpath=".//shape", parser=parser, names=["type_dim", "shape", "type_edge"]
+        StringIO(xml),
+        xpath=".//shape",
+        parser=parser,
+        names=["type_dim", "shape", "type_edge"],
     )
 
     df_iter = read_xml_iterparse(
@@ -917,7 +930,9 @@ def test_repeat_values_new_names(parser):
     <family>ellipse</family>
   </shape>
 </shapes>"""
-    df_xpath = read_xml(xml, xpath=".//shape", parser=parser, names=["name", "group"])
+    df_xpath = read_xml(
+        StringIO(xml), xpath=".//shape", parser=parser, names=["name", "group"]
+    )
 
     df_iter = read_xml_iterparse(
         xml,
@@ -960,7 +975,7 @@ def test_repeat_elements(parser):
   </shape>
 </shapes>"""
     df_xpath = read_xml(
-        xml,
+        StringIO(xml),
         xpath=".//shape",
         parser=parser,
         names=["name", "family", "degrees", "sides"],
@@ -1532,7 +1547,7 @@ def test_comment(parser):
 </shapes>
 <!-- comment after root -->"""
 
-    df_xpath = read_xml(xml, xpath=".//shape", parser=parser)
+    df_xpath = read_xml(StringIO(xml), xpath=".//shape", parser=parser)
 
     df_iter = read_xml_iterparse(
         xml, parser=parser, iterparse={"shape": ["name", "type"]}
@@ -1568,7 +1583,7 @@ def test_dtd(parser):
   </shape>
 </shapes>"""
 
-    df_xpath = read_xml(xml, xpath=".//shape", parser=parser)
+    df_xpath = read_xml(StringIO(xml), xpath=".//shape", parser=parser)
 
     df_iter = read_xml_iterparse(
         xml, parser=parser, iterparse={"shape": ["name", "type"]}
@@ -1604,7 +1619,7 @@ def test_processing_instruction(parser):
   </shape>
 </shapes>"""
 
-    df_xpath = read_xml(xml, xpath=".//shape", parser=parser)
+    df_xpath = read_xml(StringIO(xml), xpath=".//shape", parser=parser)
 
     df_iter = read_xml_iterparse(
         xml, parser=parser, iterparse={"shape": ["name", "type"]}
@@ -1808,7 +1823,7 @@ def test_read_xml_nullable_dtypes(parser, string_storage, dtype_backend):
         string_array_na = ArrowStringArray(pa.array(["x", None]))
 
     with pd.option_context("mode.string_storage", string_storage):
-        result = read_xml(data, parser=parser, dtype_backend=dtype_backend)
+        result = read_xml(StringIO(data), parser=parser, dtype_backend=dtype_backend)
 
     expected = DataFrame(
         {
diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py
index 911b540dbc380..fb24902efc0f5 100644
--- a/pandas/tests/io/xml/test_xml_dtypes.py
+++ b/pandas/tests/io/xml/test_xml_dtypes.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from io import StringIO
+
 import pytest
 
 from pandas.errors import ParserWarning
@@ -81,7 +83,7 @@ def read_xml_iterparse(data, **kwargs):
 
 
 def test_dtype_single_str(parser):
-    df_result = read_xml(xml_types, dtype={"degrees": "str"}, parser=parser)
+    df_result = read_xml(StringIO(xml_types), dtype={"degrees": "str"}, parser=parser)
     df_iter = read_xml_iterparse(
         xml_types,
         parser=parser,
@@ -102,7 +104,7 @@ def test_dtype_single_str(parser):
 
 
 def test_dtypes_all_str(parser):
-    df_result = read_xml(xml_dates, dtype="string", parser=parser)
+    df_result = read_xml(StringIO(xml_dates), dtype="string", parser=parser)
     df_iter = read_xml_iterparse(
         xml_dates,
         parser=parser,
@@ -126,7 +128,7 @@ def test_dtypes_all_str(parser):
 
 def test_dtypes_with_names(parser):
     df_result = read_xml(
-        xml_dates,
+        StringIO(xml_dates),
         names=["Col1", "Col2", "Col3", "Col4"],
         dtype={"Col2": "string", "Col3": "Int64", "Col4": "datetime64[ns]"},
         parser=parser,
@@ -153,7 +155,7 @@ def test_dtypes_with_names(parser):
 
 
 def test_dtype_nullable_int(parser):
-    df_result = read_xml(xml_types, dtype={"sides": "Int64"}, parser=parser)
+    df_result = read_xml(StringIO(xml_types), dtype={"sides": "Int64"}, parser=parser)
     df_iter = read_xml_iterparse(
         xml_types,
         parser=parser,
@@ -174,7 +176,7 @@ def test_dtype_nullable_int(parser):
 
 
 def test_dtype_float(parser):
-    df_result = read_xml(xml_types, dtype={"degrees": "float"}, parser=parser)
+    df_result = read_xml(StringIO(xml_types), dtype={"degrees": "float"}, parser=parser)
     df_iter = read_xml_iterparse(
         xml_types,
         parser=parser,
@@ -214,7 +216,7 @@ def test_both_dtype_converters(parser):
 
     with tm.assert_produces_warning(ParserWarning, match="Both a converter and dtype"):
         df_result = read_xml(
-            xml_types,
+            StringIO(xml_types),
             dtype={"degrees": "str"},
             converters={"degrees": str},
             parser=parser,
@@ -235,7 +237,9 @@ def test_both_dtype_converters(parser):
 
 
 def test_converters_str(parser):
-    df_result = read_xml(xml_types, converters={"degrees": str}, parser=parser)
+    df_result = read_xml(
+        StringIO(xml_types), converters={"degrees": str}, parser=parser
+    )
     df_iter = read_xml_iterparse(
         xml_types,
         parser=parser,
@@ -258,7 +262,7 @@ def test_converters_str(parser):
 def test_converters_date(parser):
     convert_to_datetime = lambda x: to_datetime(x)
     df_result = read_xml(
-        xml_dates, converters={"date": convert_to_datetime}, parser=parser
+        StringIO(xml_dates), converters={"date": convert_to_datetime}, parser=parser
     )
     df_iter = read_xml_iterparse(
         xml_dates,
@@ -305,7 +309,7 @@ def test_callable_str_converters(xml_books, parser, iterparse):
 
 
 def test_parse_dates_column_name(parser):
-    df_result = read_xml(xml_dates, parse_dates=["date"], parser=parser)
+    df_result = read_xml(StringIO(xml_dates), parse_dates=["date"], parser=parser)
     df_iter = read_xml_iterparse(
         xml_dates,
         parser=parser,
@@ -327,7 +331,7 @@ def test_parse_dates_column_name(parser):
 
 
 def test_parse_dates_column_index(parser):
-    df_result = read_xml(xml_dates, parse_dates=[3], parser=parser)
+    df_result = read_xml(StringIO(xml_dates), parse_dates=[3], parser=parser)
     df_iter = read_xml_iterparse(
         xml_dates,
         parser=parser,
@@ -349,7 +353,7 @@ def test_parse_dates_column_index(parser):
 
 
 def test_parse_dates_true(parser):
-    df_result = read_xml(xml_dates, parse_dates=True, parser=parser)
+    df_result = read_xml(StringIO(xml_dates), parse_dates=True, parser=parser)
 
     df_iter = read_xml_iterparse(
         xml_dates,
@@ -401,7 +405,7 @@ def test_parse_dates_dictionary(parser):
 </data>"""
 
     df_result = read_xml(
-        xml, parse_dates={"date_end": ["year", "month", "day"]}, parser=parser
+        StringIO(xml), parse_dates={"date_end": ["year", "month", "day"]}, parser=parser
     )
     df_iter = read_xml_iterparse(
         xml,
@@ -459,7 +463,7 @@ def test_day_first_parse_dates(parser):
     with tm.assert_produces_warning(
         UserWarning, match="Parsing dates in %d/%m/%Y format"
     ):
-        df_result = read_xml(xml, parse_dates=["date"], parser=parser)
+        df_result = read_xml(StringIO(xml), parse_dates=["date"], parser=parser)
         df_iter = read_xml_iterparse(
             xml,
             parse_dates=["date"],

From 296b45afa14280b1ff68cbb2943f680a6141c9f7 Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Thu, 22 Jun 2023 20:47:38 -0400
Subject: [PATCH 02/17] Fixing documentation issue and adding unit test

---
 doc/source/whatsnew/v1.5.0.rst  |  3 ++-
 pandas/tests/io/xml/test_xml.py | 40 ++++++++++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index 9653226b96196..44728e7e552ab 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -221,6 +221,7 @@ apply converter methods, and parse dates (:issue:`43567`).
 
 .. ipython:: python
 
+    from io import StringIO
     xml_dates = """<?xml version='1.0' encoding='utf-8'?>
     <data>
       <row>
@@ -244,7 +245,7 @@ apply converter methods, and parse dates (:issue:`43567`).
     </data>"""
 
     df = pd.read_xml(
-        xml_dates,
+        StringIO(xml_dates),
         dtype={'sides': 'Int64'},
         converters={'degrees': str},
         parse_dates=['date']
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index f83079acd611c..dfaf609360467 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -247,6 +247,44 @@
 )
 
 
+def test_literal_xml_deprecation():
+    # GH 53785
+    msg = (
+        "Passing literal xml to 'read_xml' is deprecated and "
+        "will be removed in a future version. To read from a "
+        "literal string, wrap it in a 'StringIO' object."
+    )
+
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        read_xml(
+            """<?xml version='1.0' encoding='utf-8'?>
+        <data xmlns="http://example.com">
+        <row>
+          <a>x</a>
+          <b>1</b>
+          <c>4.0</c>
+          <d>x</d>
+          <e>2</e>
+          <f>4.0</f>
+          <g></g>
+          <h>True</h>
+          <i>False</i>
+        </row>
+        <row>
+          <a>y</a>
+          <b>2</b>
+          <c>5.0</c>
+          <d></d>
+          <e></e>
+          <f></f>
+          <g></g>
+          <h>False</h>
+          <i></i>
+        </row>
+        </data>"""
+        )
+
+
 @pytest.fixture(params=["rb", "r"])
 def mode(request):
     return request.param
@@ -1361,7 +1399,7 @@ def test_string_error(parser):
         ParserError, match=("iterparse is designed for large XML files")
     ):
         read_xml(
-            xml_default_nmsp,
+            StringIO(xml_default_nmsp),
             parser=parser,
             iterparse={"row": ["shape", "degrees", "sides", "date"]},
         )

From 69cdc1afee4ecdc04d96ba5fbe318c50e78876a9 Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Fri, 23 Jun 2023 17:51:57 -0400
Subject: [PATCH 03/17] Updating unit tests and documentation.

---
 doc/source/user_guide/io.rst    | 13 +++++++------
 pandas/tests/io/xml/test_xml.py | 15 +--------------
 2 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 84a78ace8d7c7..59f8ddd9853bd 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -2920,6 +2920,7 @@ Read an XML string:
 
 .. ipython:: python
 
+    from io import StringIO
    xml = """<?xml version="1.0" encoding="UTF-8"?>
    <bookstore>
      <book category="cooking">
@@ -2942,7 +2943,7 @@ Read an XML string:
      </book>
    </bookstore>"""
 
-   df = pd.read_xml(xml)
+   df = pd.read_xml(StringIO(xml))
    df
 
 Read a URL with no options:
@@ -2962,7 +2963,7 @@ as a string:
        f.write(xml)
 
    with open(file_path, "r") as f:
-       df = pd.read_xml(f.read())
+       df = pd.read_xml(StringIO(f.read()))
    df
 
 Read in the content of the "books.xml" as instance of ``StringIO`` or
@@ -3053,7 +3054,7 @@ For example, below XML contains a namespace with prefix, ``doc``, and URI at
      </doc:row>
    </doc:data>"""
 
-   df = pd.read_xml(xml,
+   df = pd.read_xml(StringIO(xml),
                     xpath="//doc:row",
                     namespaces={"doc": "https://example.com"})
    df
@@ -3083,7 +3084,7 @@ But assigning *any* temporary name to correct URI allows parsing by nodes.
     </row>
    </data>"""
 
-   df = pd.read_xml(xml,
+   df = pd.read_xml(StringIO(xml),
                     xpath="//pandas:row",
                     namespaces={"pandas": "https://example.com"})
    df
@@ -3118,7 +3119,7 @@ However, if XPath does not reference node names such as default, ``/*``, then
         </row>
       </data>"""
 
-      df = pd.read_xml(xml, xpath="./row")
+      df = pd.read_xml(StringIO(xml), xpath="./row")
       df
 
    shows the attribute ``sides`` on ``shape`` element was not parsed as
@@ -3219,7 +3220,7 @@ output (as shown below for demonstration) for easier parse into ``DataFrame``:
       </row>
     </response>"""
 
-   df = pd.read_xml(xml, stylesheet=xsl)
+   df = pd.read_xml(StringIO(xml), stylesheet=xsl)
    df
 
 For very large XML files that can range in hundreds of megabytes to gigabytes, :func:`pandas.read_xml`
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index dfaf609360467..fc76715ea97e7 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -248,7 +248,7 @@
 
 
 def test_literal_xml_deprecation():
-    # GH 53785
+    # GH 53809
     msg = (
         "Passing literal xml to 'read_xml' is deprecated and "
         "will be removed in a future version. To read from a "
@@ -1392,19 +1392,6 @@ def test_empty_stylesheet(val):
 
 
 # ITERPARSE
-
-
-def test_string_error(parser):
-    with pytest.raises(
-        ParserError, match=("iterparse is designed for large XML files")
-    ):
-        read_xml(
-            StringIO(xml_default_nmsp),
-            parser=parser,
-            iterparse={"row": ["shape", "degrees", "sides", "date"]},
-        )
-
-
 def test_file_like_iterparse(xml_books, parser, mode):
     with open(xml_books, mode, encoding="utf-8" if mode == "r" else None) as f:
         if mode == "r" and parser == "lxml":

From 0f0f38b3cdef4cdcbfe25e4d41efb45f5d051050 Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Fri, 23 Jun 2023 21:57:31 -0400
Subject: [PATCH 04/17] Fixing unit tests and documentation issues

---
 pandas/io/xml.py                |  2 +-
 pandas/tests/io/xml/test_xml.py | 30 +++---------------------------
 2 files changed, 4 insertions(+), 28 deletions(-)

diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index bb46088296f47..b35f409d9032b 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -1073,7 +1073,7 @@ def read_xml(
     ...  </row>
     ... </data>'''
 
-    >>> df = pd.read_xml(SringIO(xml))
+    >>> df = pd.read_xml(StringIO(xml))
     >>> df
           shape  degrees  sides
     0    square      360    4.0
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index fc76715ea97e7..637af6e4a5f95 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -246,6 +246,8 @@
     }
 )
 
+td.skip_if_no("lxml")
+
 
 def test_literal_xml_deprecation():
     # GH 53809
@@ -256,33 +258,7 @@ def test_literal_xml_deprecation():
     )
 
     with tm.assert_produces_warning(FutureWarning, match=msg):
-        read_xml(
-            """<?xml version='1.0' encoding='utf-8'?>
-        <data xmlns="http://example.com">
-        <row>
-          <a>x</a>
-          <b>1</b>
-          <c>4.0</c>
-          <d>x</d>
-          <e>2</e>
-          <f>4.0</f>
-          <g></g>
-          <h>True</h>
-          <i>False</i>
-        </row>
-        <row>
-          <a>y</a>
-          <b>2</b>
-          <c>5.0</c>
-          <d></d>
-          <e></e>
-          <f></f>
-          <g></g>
-          <h>False</h>
-          <i></i>
-        </row>
-        </data>"""
-        )
+        read_xml(xml_default_nmsp)
 
 
 @pytest.fixture(params=["rb", "r"])

From 2c848acc59b4ba69082f7580bae6ee9b810629bd Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Fri, 23 Jun 2023 22:34:05 -0400
Subject: [PATCH 05/17] Fixing unit tests and documentation issues

---
 pandas/tests/io/xml/test_xml.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 637af6e4a5f95..893e5ec229bdd 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -246,9 +246,8 @@
     }
 )
 
-td.skip_if_no("lxml")
-
 
+@td.skip_if_no("lxml")
 def test_literal_xml_deprecation():
     # GH 53809
     msg = (
@@ -258,7 +257,7 @@ def test_literal_xml_deprecation():
     )
 
     with tm.assert_produces_warning(FutureWarning, match=msg):
-        read_xml(xml_default_nmsp)
+        read_xml(xml_default_nmsp, parser="etree")
 
 
 @pytest.fixture(params=["rb", "r"])

From b8a582c97a0e75459eb069d61bed85918ee12a7c Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Fri, 23 Jun 2023 23:27:59 -0400
Subject: [PATCH 06/17] Fixing unit tests and documentation issues

---
 pandas/io/xml.py                | 6 +++---
 pandas/tests/io/xml/test_xml.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index b35f409d9032b..a60586368a0bf 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -1073,7 +1073,7 @@ def read_xml(
     ...  </row>
     ... </data>'''
 
-    >>> df = pd.read_xml(StringIO(xml))
+    >>> df = pd.read_xml(io.StringIO(xml))
     >>> df
           shape  degrees  sides
     0    square      360    4.0
@@ -1087,7 +1087,7 @@ def read_xml(
     ...   <row shape="triangle" degrees="180" sides="3.0"/>
     ... </data>'''
 
-    >>> df = pd.read_xml(StringIO(xml), xpath=".//row")
+    >>> df = pd.read_xml(io.StringIO(xml), xpath=".//row")
     >>> df
           shape  degrees  sides
     0    square      360    4.0
@@ -1113,7 +1113,7 @@ def read_xml(
     ...   </doc:row>
     ... </doc:data>'''
 
-    >>> df = pd.read_xml(StringIO(xml),
+    >>> df = pd.read_xml(io.StringIO(xml),
     ...                  xpath="//doc:row",
     ...                  namespaces={{"doc": "https://example.com"}})
     >>> df
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 893e5ec229bdd..fb8802d5be312 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -257,7 +257,7 @@ def test_literal_xml_deprecation():
     )
 
     with tm.assert_produces_warning(FutureWarning, match=msg):
-        read_xml(xml_default_nmsp, parser="etree")
+        read_xml(xml_default_nmsp)
 
 
 @pytest.fixture(params=["rb", "r"])

From 92bc6fa7bf5a4173a5c5027147ec381a1c635951 Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Sat, 24 Jun 2023 00:06:58 -0400
Subject: [PATCH 07/17] Fixing import error in documentation

---
 pandas/io/xml.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index a60586368a0bf..a85e82448538a 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -1054,6 +1054,7 @@ def read_xml(
 
     Examples
     --------
+    >>> import io
     >>> xml = '''<?xml version='1.0' encoding='utf-8'?>
     ... <data xmlns="http://example.com">
     ...  <row>

From 8bbd7c425c8232bfa188035452dcb3d2147c2540 Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Mon, 26 Jun 2023 18:32:48 -0400
Subject: [PATCH 08/17] Updated deprecation logic per reviewer recommendations.

---
 pandas/io/xml.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index a85e82448538a..ac46b5f8cc552 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -32,6 +32,7 @@
     file_exists,
     get_handle,
     infer_compression,
+    is_file_like,
     is_fsspec_url,
     is_url,
     stringify_path,
@@ -898,6 +899,7 @@ def read_xml(
 
         .. deprecated:: 2.1.0
             Passing html literal strings is deprecated.
+            Wrap literal xml input in ``io.StringIO`` or ``io.BytesIO`` instead.
 
     xpath : str, optional, default './\*'
         The XPath to parse required set of nodes for migration to DataFrame.
@@ -1125,7 +1127,13 @@ def read_xml(
     """
     check_dtype_backend(dtype_backend)
 
-    if isinstance(path_or_buffer, str) and "\n" in path_or_buffer:
+    if (
+        isinstance(path_or_buffer, str)
+        and not is_file_like(path_or_buffer)
+        and "\n" in path_or_buffer
+    ):
+        with open("/home/richard/Desktop/file.txt", "a+") as fil:
+            fil.write(f"{path_or_buffer}\n\n\n")
         warnings.warn(
             "Passing literal xml to 'read_xml' is deprecated and "
             "will be removed in a future version. To read from a "

From 5aece784cf4d4d5419c1460a2979f6ae1d53de79 Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Mon, 26 Jun 2023 18:45:52 -0400
Subject: [PATCH 09/17] Updating deprecation logic and documentation per
 reviewer recommendations.

---
 doc/source/whatsnew/v2.1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index c7a5bd59861ae..c224fd822c887 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -298,7 +298,7 @@ Deprecations
 - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
 - Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`)
 - Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead. (:issue:`53409`)
-- Deprecated literal string input to :func:`read_xml`. Wrap literal string/bytes input in ``io.StringIO`` instead. (:issue:`53767`)
+- Deprecated literal string input to :func:`read_xml`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead. (:issue:`53767`)
 - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`)
 - Deprecated parameter ``obj`` in :meth:`GroupBy.get_group` (:issue:`53545`)
 - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)

From 6f15924698badb860367df4e059dd306ce2a6cce Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Mon, 26 Jun 2023 19:19:40 -0400
Subject: [PATCH 10/17] Fixing logic error

---
 pandas/io/xml.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index ac46b5f8cc552..7a8de97e9e288 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -1132,8 +1132,6 @@ def read_xml(
         and not is_file_like(path_or_buffer)
         and "\n" in path_or_buffer
     ):
-        with open("/home/richard/Desktop/file.txt", "a+") as fil:
-            fil.write(f"{path_or_buffer}\n\n\n")
         warnings.warn(
             "Passing literal xml to 'read_xml' is deprecated and "
             "will be removed in a future version. To read from a "

From 00f7b158aa8a4b908469529be9c0ddce31957e01 Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Mon, 26 Jun 2023 21:52:54 -0400
Subject: [PATCH 11/17] Fixing implementation per reviewer recommendations.

---
 pandas/io/xml.py                |  7 ++++---
 pandas/tests/io/xml/test_xml.py | 36 +++++++++++++++++++++++----------
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 7a8de97e9e288..d545821f218b5 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -1128,9 +1128,10 @@ def read_xml(
     check_dtype_backend(dtype_backend)
 
     if (
-        isinstance(path_or_buffer, str)
-        and not is_file_like(path_or_buffer)
-        and "\n" in path_or_buffer
+        not is_file_like(path_or_buffer)
+        and not file_exists(path_or_buffer)
+        and not is_url(path_or_buffer)
+        and not is_fsspec_url(path_or_buffer)
     ):
         warnings.warn(
             "Passing literal xml to 'read_xml' is deprecated and "
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index fb8802d5be312..df00ff2b5d825 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -452,7 +452,7 @@ def test_file_buffered_reader_no_xml_declaration(xml_books, parser, mode):
 def test_string_charset(parser):
     txt = "<中文標籤><row><c1>1</c1><c2>2</c2></row></中文標籤>"
 
-    df_str = read_xml(txt, parser=parser)
+    df_str = read_xml(StringIO(txt), parser=parser)
 
     df_expected = DataFrame({"c1": 1, "c2": 2}, index=[0])
 
@@ -510,34 +510,48 @@ def test_empty_string_lxml(val):
         ]
     )
     with pytest.raises(XMLSyntaxError, match=msg):
-        read_xml(val, parser="lxml")
+        if isinstance(val, str):
+            read_xml(StringIO(val), parser="lxml")
+        else:
+            read_xml(BytesIO(val), parser="lxml")
 
 
 @pytest.mark.parametrize("val", ["", b""])
 def test_empty_string_etree(val):
     with pytest.raises(ParseError, match="no element found"):
-        read_xml(val, parser="etree")
+        if isinstance(val, str):
+            read_xml(StringIO(val), parser="etree")
+        else:
+            read_xml(BytesIO(val), parser="etree")
 
 
 @td.skip_if_no("lxml")
 def test_wrong_file_path_lxml():
-    from lxml.etree import XMLSyntaxError
-
+    msg = (
+        "Passing literal xml to 'read_xml' is deprecated and "
+        "will be removed in a future version. To read from a "
+        "literal string, wrap it in a 'StringIO' object."
+    )
     filename = os.path.join("data", "html", "books.xml")
 
     with pytest.raises(
-        XMLSyntaxError,
-        match=("Start tag expected, '<' not found"),
+        FutureWarning,
+        match=msg,
     ):
         read_xml(filename, parser="lxml")
 
 
 def test_wrong_file_path_etree():
+    msg = (
+        "Passing literal xml to 'read_xml' is deprecated and "
+        "will be removed in a future version. To read from a "
+        "literal string, wrap it in a 'StringIO' object."
+    )
     filename = os.path.join("data", "html", "books.xml")
 
     with pytest.raises(
-        ParseError,
-        match=("not well-formed"),
+        FutureWarning,
+        match=msg,
     ):
         read_xml(filename, parser="etree")
 
@@ -1223,8 +1237,8 @@ def test_style_charset():
 
 </xsl:stylesheet>"""
 
-    df_orig = read_xml(xml)
-    df_style = read_xml(xml, stylesheet=xsl)
+    df_orig = read_xml(StringIO(xml))
+    df_style = read_xml(StringIO(xml), stylesheet=xsl)
 
     tm.assert_frame_equal(df_orig, df_style)
 

From 20e7ef2fb83256a426ab09caa5707ae21fb75f46 Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Mon, 26 Jun 2023 23:11:02 -0400
Subject: [PATCH 12/17] Updating implementation per reviewer recommendations.

---
 pandas/tests/io/xml/test_xml.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index df00ff2b5d825..b6024b2bc11e6 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -1370,13 +1370,14 @@ def test_stylesheet_with_etree(kml_cta_rail_lines, xsl_flatten_doc):
 @td.skip_if_no("lxml")
 @pytest.mark.parametrize("val", ["", b""])
 def test_empty_stylesheet(val):
-    from lxml.etree import XMLSyntaxError
-
+    msg = (
+        "Passing literal xml to 'read_xml' is deprecated and "
+        "will be removed in a future version. To read from a "
+        "literal string, wrap it in a 'StringIO' object."
+    )
     kml = os.path.join("data", "xml", "cta_rail_lines.kml")
 
-    with pytest.raises(
-        XMLSyntaxError, match=("Document is empty|Start tag expected, '<' not found")
-    ):
+    with pytest.raises(FutureWarning, match=msg):
         read_xml(kml, stylesheet=val)
 
 

From 526c2240ce79ac8145b604d109d7089a85614be7 Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Mon, 26 Jun 2023 23:31:44 -0400
Subject: [PATCH 13/17] Cleaning up the deprecation logic a bit.

---
 pandas/io/xml.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index d545821f218b5..44c77f8e215ee 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -1127,11 +1127,13 @@ def read_xml(
     """
     check_dtype_backend(dtype_backend)
 
-    if (
-        not is_file_like(path_or_buffer)
-        and not file_exists(path_or_buffer)
-        and not is_url(path_or_buffer)
-        and not is_fsspec_url(path_or_buffer)
+    if not any(
+        [
+            is_file_like(path_or_buffer),
+            file_exists(path_or_buffer),
+            is_url(path_or_buffer),
+            is_fsspec_url(path_or_buffer),
+        ]
     ):
         warnings.warn(
             "Passing literal xml to 'read_xml' is deprecated and "

From 65f88b978e91c27d50d918ca8183b0d5761ad41a Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Tue, 27 Jun 2023 18:13:27 -0400
Subject: [PATCH 14/17] Updating implementation per reviewer recommendations.

---
 pandas/io/xml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 44c77f8e215ee..0adcedce24049 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -1127,7 +1127,7 @@ def read_xml(
     """
     check_dtype_backend(dtype_backend)
 
-    if not any(
+    if isinstance(path_or_buffer, str) and not any(
         [
             is_file_like(path_or_buffer),
             file_exists(path_or_buffer),

From b7e1fb6418ad15822dd6f5f135a1e7bc82e5db11 Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Sat, 8 Jul 2023 23:18:17 -0400
Subject: [PATCH 15/17] Updating unit tests

---
 pandas/tests/io/xml/test_xml.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 7879da01ff016..1a64d9910d8bf 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -313,7 +313,7 @@ def test_parser_consistency_file(xml_books):
 def test_parser_consistency_url(parser, httpserver):
     httpserver.serve_content(content=xml_default_nmsp)
 
-    df_xpath = read_xml(xml_default_nmsp, parser=parser)
+    df_xpath = read_xml(StringIO(xml_default_nmsp), parser=parser)
     df_iter = read_xml(
         BytesIO(xml_default_nmsp.encode()),
         parser=parser,
@@ -1872,7 +1872,7 @@ def test_online_stylesheet():
 """
 
     df_xsl = read_xml(
-        xml,
+        StringIO(xml),
         xpath=".//tr[td and position() <= 6]",
         names=["title", "artist"],
         stylesheet=xsl,

From 14d2cb159a2b12549c3c65ebf9dc61c719abb394 Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Sun, 9 Jul 2023 12:09:00 -0400
Subject: [PATCH 16/17] Fixing discrepancy in doc string.

---
 pandas/io/xml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index a7bb041a3028e..75e3ccdb66c0e 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -898,7 +898,7 @@ def read_xml(
         include http, ftp, s3, and file.
 
         .. deprecated:: 2.1.0
-            Passing html literal strings is deprecated.
+            Passing xml literal strings is deprecated.
             Wrap literal xml input in ``io.StringIO`` or ``io.BytesIO`` instead.
 
     xpath : str, optional, default './\*'

From c215a94098d4dbc314cbb64e75246fa032a59a01 Mon Sep 17 00:00:00 2001
From: Richard Howe <rhowe425@gmail.com>
Date: Tue, 11 Jul 2023 14:37:16 -0400
Subject: [PATCH 17/17] Updating implementation based on reviewer
 recommendations.

---
 pandas/io/xml.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 75e3ccdb66c0e..a58437fdeb8dc 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -805,6 +805,22 @@ def _parse(
 
     p: _EtreeFrameParser | _LxmlFrameParser
 
+    if isinstance(path_or_buffer, str) and not any(
+        [
+            is_file_like(path_or_buffer),
+            file_exists(path_or_buffer),
+            is_url(path_or_buffer),
+            is_fsspec_url(path_or_buffer),
+        ]
+    ):
+        warnings.warn(
+            "Passing literal xml to 'read_xml' is deprecated and "
+            "will be removed in a future version. To read from a "
+            "literal string, wrap it in a 'StringIO' object.",
+            FutureWarning,
+            stacklevel=find_stack_level(),
+        )
+
     if parser == "lxml":
         lxml = import_optional_dependency("lxml.etree", errors="ignore")
 
@@ -1127,22 +1143,6 @@ def read_xml(
     """
     check_dtype_backend(dtype_backend)
 
-    if isinstance(path_or_buffer, str) and not any(
-        [
-            is_file_like(path_or_buffer),
-            file_exists(path_or_buffer),
-            is_url(path_or_buffer),
-            is_fsspec_url(path_or_buffer),
-        ]
-    ):
-        warnings.warn(
-            "Passing literal xml to 'read_xml' is deprecated and "
-            "will be removed in a future version. To read from a "
-            "literal string, wrap it in a 'StringIO' object.",
-            FutureWarning,
-            stacklevel=find_stack_level(),
-        )
-
     return _parse(
         path_or_buffer=path_or_buffer,
         xpath=xpath,