From e778d07dc8af665662d1e2ed9d8f2db7d0c03095 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Sun, 16 Jan 2022 22:59:36 -0600
Subject: [PATCH 1/5] ENH: Add dtypes/converters arguments for pandas.read_xml

---
 doc/source/whatsnew/v1.4.0.rst  |  41 +++++
 pandas/io/xml.py                |  93 +++++++++-
 pandas/tests/io/xml/test_xml.py | 317 +++++++++++++++++++++++++++++++-
 3 files changed, 448 insertions(+), 3 deletions(-)
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 32ca3f6945d7f..d57ddad0b48c2 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -184,6 +184,47 @@ representation of :class:`DataFrame` objects (:issue:`4889`).
     df
     df.to_dict(orient='tight')
 
+.. _whatsnew_140.read_xml_dtypes:
+
+read_xml now supports ``dtype``, ``converters``, and ``parse_dates``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Similar to other IO methods, :func:`pandas.read_xml` now supports assigning specific dtypes to columns,
+apply converter methods, and parse dates.
+
+.. ipython:: python
+
+    xml_dates = """<?xml version='1.0' encoding='utf-8'?>
+    <data>
+      <row>
+        <shape>square</shape>
+        <degrees>00360</degrees>
+        <sides>4.0</sides>
+        <date>2020-01-01</date>
+       </row>
+      <row>
+        <shape>circle</shape>
+        <degrees>00360</degrees>
+        <sides/>
+        <date>2021-01-01</date>
+      </row>
+      <row>
+        <shape>triangle</shape>
+        <degrees>00180</degrees>
+        <sides>3.0</sides>
+        <date>2022-01-01</date>
+      </row>
+    </data>"""
+
+    df = pd.read_xml(
+        xml_dates,
+        dtype={'sides': 'Int64'},
+        converters={'degrees': str},
+        parse_dates=['date']
+    )
+    df
+    df.dtypes
+
 .. _whatsnew_140.enhancements.other:
 
 Other enhancements
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index ad87b18bd1683..7add71df0e872 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -5,10 +5,14 @@
 from __future__ import annotations
 
 import io
-from typing import Sequence
+from typing import (
+    Callable,
+    Sequence,
+)
 
 from pandas._typing import (
     CompressionOptions,
+    DtypeArg,
     FilePath,
     ReadBuffer,
     StorageOptions,
@@ -67,6 +71,23 @@ class _XMLFrameParser:
     names : list
         Column names for Data Frame of parsed XML data.
 
+    dtype : dict
+        Data type for data or columns. E.g. {{'a': np.float64,
+        'b': np.int32, 'c': 'Int64'}}
+
+        .. versionadded:: 1.4.0
+
+    converters : dict, optional
+        Dict of functions for converting values in certain columns. Keys can
+        either be integers or column labels.
+
+        .. versionadded:: 1.4.0
+
+    parse_dates : bool or list of int or names or list of lists or dict
+        Converts either index or select columns to datetimes
+
+        .. versionadded:: 1.4.0
+
     encoding : str
         Encoding of xml object or document.
 
@@ -109,6 +130,13 @@ def __init__(
         elems_only: bool,
         attrs_only: bool,
         names: Sequence[str] | None,
+        dtype: DtypeArg | None,
+        converters: dict[str, Callable] | None,
+        parse_dates: bool
+        | list[int | str]
+        | list[list[int | str]]
+        | dict[str, list[int | str]]
+        | None,
         encoding: str | None,
         stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None,
         compression: CompressionOptions,
@@ -120,6 +148,9 @@ def __init__(
         self.elems_only = elems_only
         self.attrs_only = attrs_only
         self.names = names
+        self.dtype = dtype
+        self.converters = converters
+        self.parse_dates = parse_dates
         self.encoding = encoding
         self.stylesheet = stylesheet
         self.is_style = None
@@ -671,6 +702,13 @@ def _parse(
     elems_only: bool,
     attrs_only: bool,
     names: Sequence[str] | None,
+    dtype: DtypeArg | None,
+    converters: dict[str, Callable] | None,
+    parse_dates: bool
+    | list[int | str]
+    | list[list[int | str]]
+    | dict[str, list[int | str]]
+    | None,
     encoding: str | None,
     parser: XMLParsers,
     stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None,
@@ -706,6 +744,9 @@ def _parse(
                 elems_only,
                 attrs_only,
                 names,
+                dtype,
+                converters,
+                parse_dates,
                 encoding,
                 stylesheet,
                 compression,
@@ -722,6 +763,9 @@ def _parse(
             elems_only,
             attrs_only,
             names,
+            dtype,
+            converters,
+            parse_dates,
             encoding,
             stylesheet,
             compression,
@@ -732,7 +776,13 @@ def _parse(
 
     data_dicts = p.parse_data()
 
-    return _data_to_frame(data=data_dicts, **kwargs)
+    return _data_to_frame(
+        data=data_dicts,
+        dtype=dtype,
+        converters=converters,
+        parse_dates=parse_dates,
+        **kwargs,
+    )
 
 
 @deprecate_nonkeyword_arguments(
@@ -749,6 +799,13 @@ def read_xml(
     elems_only: bool = False,
     attrs_only: bool = False,
     names: Sequence[str] | None = None,
+    dtype: DtypeArg | None = None,
+    converters: dict[str, Callable] | None = None,
+    parse_dates: bool
+    | list[int | str]
+    | list[list[int | str]]
+    | dict[str, list[int | str]]
+    | None = None,
     # encoding can not be None for lxml and StringIO input
     encoding: str | None = "utf-8",
     parser: XMLParsers = "lxml",
@@ -799,6 +856,35 @@ def read_xml(
         Column names for DataFrame of parsed XML data. Use this parameter to
         rename original element names and distinguish same named elements.
 
+    dtype : Type name or dict of column -> type, optional
+        Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32,
+        'c': 'Int64'}}
+        Use `str` or `object` together with suitable `na_values` settings
+        to preserve and not interpret dtype.
+        If converters are specified, they will be applied INSTEAD
+        of dtype conversion.
+
+        .. versionadded:: 1.4.0
+
+    converters : dict, optional
+        Dict of functions for converting values in certain columns. Keys can either
+        be integers or column labels.
+
+        .. versionadded:: 1.4.0
+
+    parse_dates : bool or list of int or names or list of lists or dict, default False
+        The behavior is as follows:
+
+        * boolean. If True -> try parsing the index.
+        * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
+          each as a separate date column.
+        * list of lists. e.g.  If [[1, 3]] -> combine columns 1 and 3 and parse as
+          a single date column.
+        * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call
+          result 'foo'
+
+        .. versionadded:: 1.4.0
+
     encoding : str, optional, default 'utf-8'
         Encoding of XML document.
 
@@ -942,6 +1028,9 @@ def read_xml(
         elems_only=elems_only,
         attrs_only=attrs_only,
         names=names,
+        dtype=dtype,
+        converters=converters,
+        parse_dates=parse_dates,
         encoding=encoding,
         parser=parser,
         stylesheet=stylesheet,
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 8809c423a29ba..aa1a3418666f0 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -13,9 +13,14 @@
 import pytest
 
 from pandas.compat._optional import import_optional_dependency
+from pandas.errors import ParserWarning
 import pandas.util._test_decorators as td
 
-from pandas import DataFrame
+from pandas import (
+    DataFrame,
+    Series,
+    to_datetime,
+)
 import pandas._testing as tm
 
 from pandas.io.xml import read_xml
@@ -231,6 +236,48 @@
     }
 )
 
+xml_types = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <shape>square</shape>
+    <degrees>00360</degrees>
+    <sides>4.0</sides>
+   </row>
+  <row>
+    <shape>circle</shape>
+    <degrees>00360</degrees>
+    <sides/>
+  </row>
+  <row>
+    <shape>triangle</shape>
+    <degrees>00180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+xml_dates = """<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <shape>square</shape>
+    <degrees>00360</degrees>
+    <sides>4.0</sides>
+    <date>2020-01-01</date>
+   </row>
+  <row>
+    <shape>circle</shape>
+    <degrees>00360</degrees>
+    <sides/>
+    <date>2021-01-01</date>
+  </row>
+  <row>
+    <shape>triangle</shape>
+    <degrees>00180</degrees>
+    <sides>3.0</sides>
+    <date>2022-01-01</date>
+  </row>
+</data>"""
+
 
 @pytest.fixture(params=["rb", "r"])
 def mode(request):
@@ -687,6 +734,274 @@ def test_names_option_wrong_type(datapath, parser):
         read_xml(filename, names="Col1, Col2, Col3", parser=parser)
 
 
+# DTYPE
+
+
+def test_dtype_single_str(parser):
+    df_result = read_xml(xml_types, dtype={"degrees": "str"}, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": ["00360", "00360", "00180"],
+            "sides": [4.0, float("nan"), 3.0],
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_dtypes_all_str(parser):
+    df_result = read_xml(xml_dates, dtype="string")
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": ["00360", "00360", "00180"],
+            "sides": ["4.0", None, "3.0"],
+            "date": ["2020-01-01", "2021-01-01", "2022-01-01"],
+        },
+        dtype="string",
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_dtype_nullable_int(parser):
+    df_result = read_xml(xml_types, dtype={"sides": "Int64"}, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": Series([4.0, float("nan"), 3.0]).astype("Int64"),
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_dtype_float(parser):
+    df_result = read_xml(xml_types, dtype={"degrees": "float"}, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": Series([360, 360, 180]).astype("float"),
+            "sides": [4.0, float("nan"), 3.0],
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_wrong_dtype(parser):
+    with pytest.raises(
+        ValueError, match=('Unable to parse string "square" at position 0')
+    ):
+        read_xml(xml_types, dtype={"shape": "Int64"}, parser=parser)
+
+
+def test_both_dtype_converters(parser):
+    df_result = read_xml(
+        xml_types, dtype={"degrees": "str"}, converters={"degrees": str}, parser=parser
+    )
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": ["00360", "00360", "00180"],
+            "sides": [4.0, float("nan"), 3.0],
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+    with tm.assert_produces_warning(ParserWarning, match="Both a converter and dtype"):
+        read_xml(
+            xml_types,
+            dtype={"degrees": "str"},
+            converters={"degrees": str},
+            parser=parser,
+        )
+
+
+# CONVERTERS
+
+
+def test_converters_str(parser):
+    df_result = read_xml(xml_types, converters={"degrees": str}, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": ["00360", "00360", "00180"],
+            "sides": [4.0, float("nan"), 3.0],
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_converters_date(parser):
+    convert_to_datetime = lambda x: to_datetime(x)
+    df_result = read_xml(xml_dates, converters={"date": convert_to_datetime})
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+            "date": to_datetime(["2020-01-01", "2021-01-01", "2022-01-01"]),
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_wrong_converters_type(parser):
+    with pytest.raises(TypeError, match=("Type converters must be a dict or subclass")):
+        read_xml(xml_types, converters={"degrees", str}, parser=parser)
+
+
+def test_callable_func_converters(parser):
+    with pytest.raises(TypeError, match=("'float' object is not callable")):
+        read_xml(xml_types, converters={"degrees": float()}, parser=parser)
+
+
+def test_callable_str_converters(parser):
+    with pytest.raises(TypeError, match=("'str' object is not callable")):
+        read_xml(xml_types, converters={"degrees": "float"}, parser=parser)
+
+
+# PARSE DATES
+
+
+def test_parse_dates_column_name(parser):
+    df_result = read_xml(xml_dates, parse_dates=["date"], parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+            "date": to_datetime(["2020-01-01", "2021-01-01", "2022-01-01"]),
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_parse_dates_column_index(parser):
+    df_result = read_xml(xml_dates, parse_dates=[3], parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+            "date": to_datetime(["2020-01-01", "2021-01-01", "2022-01-01"]),
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_parse_dates_dictionary(parser):
+    xml = """<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+    <year>2020</year>
+    <month>12</month>
+    <day>31</day>
+   </row>
+  <row>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides/>
+    <year>2021</year>
+    <month>12</month>
+    <day>31</day>
+  </row>
+  <row>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+    <year>2022</year>
+    <month>12</month>
+    <day>31</day>
+  </row>
+</data>"""
+
+    df_result = read_xml(
+        xml, parse_dates={"date_end": ["year", "month", "day"]}, parser=parser
+    )
+
+    df_expected = DataFrame(
+        {
+            "date_end": to_datetime(["2020-12-31", "2021-12-31", "2022-12-31"]),
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_day_first_parse_dates(parser):
+    xml = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <shape>square</shape>
+    <degrees>00360</degrees>
+    <sides>4.0</sides>
+    <date>31/12/2020</date>
+   </row>
+  <row>
+    <shape>circle</shape>
+    <degrees>00360</degrees>
+    <sides/>
+    <date>31/12/2021</date>
+  </row>
+  <row>
+    <shape>triangle</shape>
+    <degrees>00180</degrees>
+    <sides>3.0</sides>
+    <date>31/12/2022</date>
+  </row>
+</data>"""
+
+    df_result = read_xml(xml, parse_dates=["date"], parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+            "date": to_datetime(["2020-12-31", "2021-12-31", "2022-12-31"]),
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+    with tm.assert_produces_warning(
+        UserWarning, match="Parsing '31/12/2020' in DD/MM/YYYY format"
+    ):
+        read_xml(xml, parse_dates=["date"], parser=parser)
+
+
+def test_wrong_parse_dates_type(parser):
+    with pytest.raises(
+        TypeError, match=("Only booleans, lists, and dictionaries are accepted")
+    ):
+        read_xml(xml_dates, parse_dates={"date"}, parser=parser)
+
+
 # ENCODING
 
 

From ef88558de74c779613bd205677086ba02882e4c9 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Mon, 17 Jan 2022 00:01:30 -0600
Subject: [PATCH 2/5] Fix missing fixture param on tests

---
 pandas/tests/io/xml/test_xml.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index aa1a3418666f0..f4024fb51208a 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -752,7 +752,7 @@ def test_dtype_single_str(parser):
 
 
 def test_dtypes_all_str(parser):
-    df_result = read_xml(xml_dates, dtype="string")
+    df_result = read_xml(xml_dates, dtype="string", parser=parser)
 
     df_expected = DataFrame(
         {
@@ -845,7 +845,9 @@ def test_converters_str(parser):
 
 def test_converters_date(parser):
     convert_to_datetime = lambda x: to_datetime(x)
-    df_result = read_xml(xml_dates, converters={"date": convert_to_datetime})
+    df_result = read_xml(
+        xml_dates, converters={"date": convert_to_datetime}, parser=parser
+    )
 
     df_expected = DataFrame(
         {

From 6787a5912dd69a694af3ac908d660e267afb0508 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Mon, 17 Jan 2022 11:26:06 -0600
Subject: [PATCH 3/5] Update whats_new version, move dtypes tests to new file
 and add tests

---
 doc/source/whatsnew/v1.4.0.rst         |  41 ---
 doc/source/whatsnew/v1.5.0.rst         |  42 +++
 pandas/io/xml.py                       |  14 +-
 pandas/tests/io/xml/test_xml.py        | 319 +--------------------
 pandas/tests/io/xml/test_xml_dtypes.py | 368 +++++++++++++++++++++++++
 5 files changed, 418 insertions(+), 366 deletions(-)
 create mode 100644 pandas/tests/io/xml/test_xml_dtypes.py

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index d57ddad0b48c2..32ca3f6945d7f 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -184,47 +184,6 @@ representation of :class:`DataFrame` objects (:issue:`4889`).
     df
     df.to_dict(orient='tight')
 
-.. _whatsnew_140.read_xml_dtypes:
-
-read_xml now supports ``dtype``, ``converters``, and ``parse_dates``
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Similar to other IO methods, :func:`pandas.read_xml` now supports assigning specific dtypes to columns,
-apply converter methods, and parse dates.
-
-.. ipython:: python
-
-    xml_dates = """<?xml version='1.0' encoding='utf-8'?>
-    <data>
-      <row>
-        <shape>square</shape>
-        <degrees>00360</degrees>
-        <sides>4.0</sides>
-        <date>2020-01-01</date>
-       </row>
-      <row>
-        <shape>circle</shape>
-        <degrees>00360</degrees>
-        <sides/>
-        <date>2021-01-01</date>
-      </row>
-      <row>
-        <shape>triangle</shape>
-        <degrees>00180</degrees>
-        <sides>3.0</sides>
-        <date>2022-01-01</date>
-      </row>
-    </data>"""
-
-    df = pd.read_xml(
-        xml_dates,
-        dtype={'sides': 'Int64'},
-        converters={'degrees': str},
-        parse_dates=['date']
-    )
-    df
-    df.dtypes
-
 .. _whatsnew_140.enhancements.other:
 
 Other enhancements
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index 290f2e0ae08b6..7a7ab197df08d 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -82,6 +82,48 @@ Optional libraries below the lowest tested version may still work, but are not c
 
 See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.
 
+
+.. _whatsnew_140.read_xml_dtypes:
+
+read_xml now supports ``dtype``, ``converters``, and ``parse_dates``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Similar to other IO methods, :func:`pandas.read_xml` now supports assigning specific dtypes to columns,
+apply converter methods, and parse dates.
+
+.. ipython:: python
+
+    xml_dates = """<?xml version='1.0' encoding='utf-8'?>
+    <data>
+      <row>
+        <shape>square</shape>
+        <degrees>00360</degrees>
+        <sides>4.0</sides>
+        <date>2020-01-01</date>
+       </row>
+      <row>
+        <shape>circle</shape>
+        <degrees>00360</degrees>
+        <sides/>
+        <date>2021-01-01</date>
+      </row>
+      <row>
+        <shape>triangle</shape>
+        <degrees>00180</degrees>
+        <sides>3.0</sides>
+        <date>2022-01-01</date>
+      </row>
+    </data>"""
+
+    df = pd.read_xml(
+        xml_dates,
+        dtype={'sides': 'Int64'},
+        converters={'degrees': str},
+        parse_dates=['date']
+    )
+    df
+    df.dtypes
+
 .. _whatsnew_150.api_breaking.other:
 
 Other API changes
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 7add71df0e872..1af76c75b7dc8 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -75,18 +75,18 @@ class _XMLFrameParser:
         Data type for data or columns. E.g. {{'a': np.float64,
         'b': np.int32, 'c': 'Int64'}}
 
-        .. versionadded:: 1.4.0
+        .. versionadded:: 1.5.0
 
     converters : dict, optional
         Dict of functions for converting values in certain columns. Keys can
         either be integers or column labels.
 
-        .. versionadded:: 1.4.0
+        .. versionadded:: 1.5.0
 
     parse_dates : bool or list of int or names or list of lists or dict
         Converts either index or select columns to datetimes
 
-        .. versionadded:: 1.4.0
+        .. versionadded:: 1.5.0
 
     encoding : str
         Encoding of xml object or document.
@@ -864,16 +864,16 @@ def read_xml(
         If converters are specified, they will be applied INSTEAD
         of dtype conversion.
 
-        .. versionadded:: 1.4.0
+        .. versionadded:: 1.5.0
 
     converters : dict, optional
         Dict of functions for converting values in certain columns. Keys can either
         be integers or column labels.
 
-        .. versionadded:: 1.4.0
+        .. versionadded:: 1.5.0
 
     parse_dates : bool or list of int or names or list of lists or dict, default False
-        The behavior is as follows:
+        Identifiers to parse index or columns to datetime. The behavior is as follows:
 
         * boolean. If True -> try parsing the index.
         * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
@@ -883,7 +883,7 @@ def read_xml(
         * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call
           result 'foo'
 
-        .. versionadded:: 1.4.0
+        .. versionadded:: 1.5.0
 
     encoding : str, optional, default 'utf-8'
         Encoding of XML document.
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index f4024fb51208a..8809c423a29ba 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -13,14 +13,9 @@
 import pytest
 
 from pandas.compat._optional import import_optional_dependency
-from pandas.errors import ParserWarning
 import pandas.util._test_decorators as td
 
-from pandas import (
-    DataFrame,
-    Series,
-    to_datetime,
-)
+from pandas import DataFrame
 import pandas._testing as tm
 
 from pandas.io.xml import read_xml
@@ -236,48 +231,6 @@
     }
 )
 
-xml_types = """\
-<?xml version='1.0' encoding='utf-8'?>
-<data>
-  <row>
-    <shape>square</shape>
-    <degrees>00360</degrees>
-    <sides>4.0</sides>
-   </row>
-  <row>
-    <shape>circle</shape>
-    <degrees>00360</degrees>
-    <sides/>
-  </row>
-  <row>
-    <shape>triangle</shape>
-    <degrees>00180</degrees>
-    <sides>3.0</sides>
-  </row>
-</data>"""
-
-xml_dates = """<?xml version='1.0' encoding='utf-8'?>
-<data>
-  <row>
-    <shape>square</shape>
-    <degrees>00360</degrees>
-    <sides>4.0</sides>
-    <date>2020-01-01</date>
-   </row>
-  <row>
-    <shape>circle</shape>
-    <degrees>00360</degrees>
-    <sides/>
-    <date>2021-01-01</date>
-  </row>
-  <row>
-    <shape>triangle</shape>
-    <degrees>00180</degrees>
-    <sides>3.0</sides>
-    <date>2022-01-01</date>
-  </row>
-</data>"""
-
 
 @pytest.fixture(params=["rb", "r"])
 def mode(request):
@@ -734,276 +687,6 @@ def test_names_option_wrong_type(datapath, parser):
         read_xml(filename, names="Col1, Col2, Col3", parser=parser)
 
 
-# DTYPE
-
-
-def test_dtype_single_str(parser):
-    df_result = read_xml(xml_types, dtype={"degrees": "str"}, parser=parser)
-
-    df_expected = DataFrame(
-        {
-            "shape": ["square", "circle", "triangle"],
-            "degrees": ["00360", "00360", "00180"],
-            "sides": [4.0, float("nan"), 3.0],
-        }
-    )
-
-    tm.assert_frame_equal(df_result, df_expected)
-
-
-def test_dtypes_all_str(parser):
-    df_result = read_xml(xml_dates, dtype="string", parser=parser)
-
-    df_expected = DataFrame(
-        {
-            "shape": ["square", "circle", "triangle"],
-            "degrees": ["00360", "00360", "00180"],
-            "sides": ["4.0", None, "3.0"],
-            "date": ["2020-01-01", "2021-01-01", "2022-01-01"],
-        },
-        dtype="string",
-    )
-
-    tm.assert_frame_equal(df_result, df_expected)
-
-
-def test_dtype_nullable_int(parser):
-    df_result = read_xml(xml_types, dtype={"sides": "Int64"}, parser=parser)
-
-    df_expected = DataFrame(
-        {
-            "shape": ["square", "circle", "triangle"],
-            "degrees": [360, 360, 180],
-            "sides": Series([4.0, float("nan"), 3.0]).astype("Int64"),
-        }
-    )
-
-    tm.assert_frame_equal(df_result, df_expected)
-
-
-def test_dtype_float(parser):
-    df_result = read_xml(xml_types, dtype={"degrees": "float"}, parser=parser)
-
-    df_expected = DataFrame(
-        {
-            "shape": ["square", "circle", "triangle"],
-            "degrees": Series([360, 360, 180]).astype("float"),
-            "sides": [4.0, float("nan"), 3.0],
-        }
-    )
-
-    tm.assert_frame_equal(df_result, df_expected)
-
-
-def test_wrong_dtype(parser):
-    with pytest.raises(
-        ValueError, match=('Unable to parse string "square" at position 0')
-    ):
-        read_xml(xml_types, dtype={"shape": "Int64"}, parser=parser)
-
-
-def test_both_dtype_converters(parser):
-    df_result = read_xml(
-        xml_types, dtype={"degrees": "str"}, converters={"degrees": str}, parser=parser
-    )
-
-    df_expected = DataFrame(
-        {
-            "shape": ["square", "circle", "triangle"],
-            "degrees": ["00360", "00360", "00180"],
-            "sides": [4.0, float("nan"), 3.0],
-        }
-    )
-
-    tm.assert_frame_equal(df_result, df_expected)
-
-    with tm.assert_produces_warning(ParserWarning, match="Both a converter and dtype"):
-        read_xml(
-            xml_types,
-            dtype={"degrees": "str"},
-            converters={"degrees": str},
-            parser=parser,
-        )
-
-
-# CONVERTERS
-
-
-def test_converters_str(parser):
-    df_result = read_xml(xml_types, converters={"degrees": str}, parser=parser)
-
-    df_expected = DataFrame(
-        {
-            "shape": ["square", "circle", "triangle"],
-            "degrees": ["00360", "00360", "00180"],
-            "sides": [4.0, float("nan"), 3.0],
-        }
-    )
-
-    tm.assert_frame_equal(df_result, df_expected)
-
-
-def test_converters_date(parser):
-    convert_to_datetime = lambda x: to_datetime(x)
-    df_result = read_xml(
-        xml_dates, converters={"date": convert_to_datetime}, parser=parser
-    )
-
-    df_expected = DataFrame(
-        {
-            "shape": ["square", "circle", "triangle"],
-            "degrees": [360, 360, 180],
-            "sides": [4.0, float("nan"), 3.0],
-            "date": to_datetime(["2020-01-01", "2021-01-01", "2022-01-01"]),
-        }
-    )
-
-    tm.assert_frame_equal(df_result, df_expected)
-
-
-def test_wrong_converters_type(parser):
-    with pytest.raises(TypeError, match=("Type converters must be a dict or subclass")):
-        read_xml(xml_types, converters={"degrees", str}, parser=parser)
-
-
-def test_callable_func_converters(parser):
-    with pytest.raises(TypeError, match=("'float' object is not callable")):
-        read_xml(xml_types, converters={"degrees": float()}, parser=parser)
-
-
-def test_callable_str_converters(parser):
-    with pytest.raises(TypeError, match=("'str' object is not callable")):
-        read_xml(xml_types, converters={"degrees": "float"}, parser=parser)
-
-
-# PARSE DATES
-
-
-def test_parse_dates_column_name(parser):
-    df_result = read_xml(xml_dates, parse_dates=["date"], parser=parser)
-
-    df_expected = DataFrame(
-        {
-            "shape": ["square", "circle", "triangle"],
-            "degrees": [360, 360, 180],
-            "sides": [4.0, float("nan"), 3.0],
-            "date": to_datetime(["2020-01-01", "2021-01-01", "2022-01-01"]),
-        }
-    )
-
-    tm.assert_frame_equal(df_result, df_expected)
-
-
-def test_parse_dates_column_index(parser):
-    df_result = read_xml(xml_dates, parse_dates=[3], parser=parser)
-
-    df_expected = DataFrame(
-        {
-            "shape": ["square", "circle", "triangle"],
-            "degrees": [360, 360, 180],
-            "sides": [4.0, float("nan"), 3.0],
-            "date": to_datetime(["2020-01-01", "2021-01-01", "2022-01-01"]),
-        }
-    )
-
-    tm.assert_frame_equal(df_result, df_expected)
-
-
-def test_parse_dates_dictionary(parser):
-    xml = """<?xml version='1.0' encoding='utf-8'?>
-<data>
-  <row>
-    <shape>square</shape>
-    <degrees>360</degrees>
-    <sides>4.0</sides>
-    <year>2020</year>
-    <month>12</month>
-    <day>31</day>
-   </row>
-  <row>
-    <shape>circle</shape>
-    <degrees>360</degrees>
-    <sides/>
-    <year>2021</year>
-    <month>12</month>
-    <day>31</day>
-  </row>
-  <row>
-    <shape>triangle</shape>
-    <degrees>180</degrees>
-    <sides>3.0</sides>
-    <year>2022</year>
-    <month>12</month>
-    <day>31</day>
-  </row>
-</data>"""
-
-    df_result = read_xml(
-        xml, parse_dates={"date_end": ["year", "month", "day"]}, parser=parser
-    )
-
-    df_expected = DataFrame(
-        {
-            "date_end": to_datetime(["2020-12-31", "2021-12-31", "2022-12-31"]),
-            "shape": ["square", "circle", "triangle"],
-            "degrees": [360, 360, 180],
-            "sides": [4.0, float("nan"), 3.0],
-        }
-    )
-
-    tm.assert_frame_equal(df_result, df_expected)
-
-
-def test_day_first_parse_dates(parser):
-    xml = """\
-<?xml version='1.0' encoding='utf-8'?>
-<data>
-  <row>
-    <shape>square</shape>
-    <degrees>00360</degrees>
-    <sides>4.0</sides>
-    <date>31/12/2020</date>
-   </row>
-  <row>
-    <shape>circle</shape>
-    <degrees>00360</degrees>
-    <sides/>
-    <date>31/12/2021</date>
-  </row>
-  <row>
-    <shape>triangle</shape>
-    <degrees>00180</degrees>
-    <sides>3.0</sides>
-    <date>31/12/2022</date>
-  </row>
-</data>"""
-
-    df_result = read_xml(xml, parse_dates=["date"], parser=parser)
-
-    df_expected = DataFrame(
-        {
-            "shape": ["square", "circle", "triangle"],
-            "degrees": [360, 360, 180],
-            "sides": [4.0, float("nan"), 3.0],
-            "date": to_datetime(["2020-12-31", "2021-12-31", "2022-12-31"]),
-        }
-    )
-
-    tm.assert_frame_equal(df_result, df_expected)
-
-    with tm.assert_produces_warning(
-        UserWarning, match="Parsing '31/12/2020' in DD/MM/YYYY format"
-    ):
-        read_xml(xml, parse_dates=["date"], parser=parser)
-
-
-def test_wrong_parse_dates_type(parser):
-    with pytest.raises(
-        TypeError, match=("Only booleans, lists, and dictionaries are accepted")
-    ):
-        read_xml(xml_dates, parse_dates={"date"}, parser=parser)
-
-
 # ENCODING
 
 
diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py
new file mode 100644
index 0000000000000..8fbdff898a9d1
--- /dev/null
+++ b/pandas/tests/io/xml/test_xml_dtypes.py
@@ -0,0 +1,368 @@
+from __future__ import annotations
+
+import pytest
+
+from pandas.errors import ParserWarning
+import pandas.util._test_decorators as td
+
+from pandas import (
+    DataFrame,
+    Series,
+    to_datetime,
+)
+import pandas._testing as tm
+
+from pandas.io.xml import read_xml
+
+
+@pytest.fixture(params=[pytest.param("lxml", marks=td.skip_if_no("lxml")), "etree"])
+def parser(request):
+    return request.param
+
+
+xml_types = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <shape>square</shape>
+    <degrees>00360</degrees>
+    <sides>4.0</sides>
+   </row>
+  <row>
+    <shape>circle</shape>
+    <degrees>00360</degrees>
+    <sides/>
+  </row>
+  <row>
+    <shape>triangle</shape>
+    <degrees>00180</degrees>
+    <sides>3.0</sides>
+  </row>
+</data>"""
+
+xml_dates = """<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <shape>square</shape>
+    <degrees>00360</degrees>
+    <sides>4.0</sides>
+    <date>2020-01-01</date>
+   </row>
+  <row>
+    <shape>circle</shape>
+    <degrees>00360</degrees>
+    <sides/>
+    <date>2021-01-01</date>
+  </row>
+  <row>
+    <shape>triangle</shape>
+    <degrees>00180</degrees>
+    <sides>3.0</sides>
+    <date>2022-01-01</date>
+  </row>
+</data>"""
+
+
+# DTYPE
+
+
+def test_dtype_single_str(parser):
+    df_result = read_xml(xml_types, dtype={"degrees": "str"}, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": ["00360", "00360", "00180"],
+            "sides": [4.0, float("nan"), 3.0],
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_dtypes_all_str(parser):
+    df_result = read_xml(xml_dates, dtype="string", parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": ["00360", "00360", "00180"],
+            "sides": ["4.0", None, "3.0"],
+            "date": ["2020-01-01", "2021-01-01", "2022-01-01"],
+        },
+        dtype="string",
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_dtypes_with_names(parser):
+    df_result = read_xml(
+        xml_dates,
+        names=["Col1", "Col2", "Col3", "Col4"],
+        dtype={"Col2": "string", "Col3": "Int64", "Col4": "datetime64"},
+        parser=parser,
+    )
+
+    df_expected = DataFrame(
+        {
+            "Col1": ["square", "circle", "triangle"],
+            "Col2": Series(["00360", "00360", "00180"]).astype("string"),
+            "Col3": Series([4.0, float("nan"), 3.0]).astype("Int64"),
+            "Col4": to_datetime(["2020-01-01", "2021-01-01", "2022-01-01"]),
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_dtype_nullable_int(parser):
+    df_result = read_xml(xml_types, dtype={"sides": "Int64"}, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": Series([4.0, float("nan"), 3.0]).astype("Int64"),
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_dtype_float(parser):
+    df_result = read_xml(xml_types, dtype={"degrees": "float"}, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": Series([360, 360, 180]).astype("float"),
+            "sides": [4.0, float("nan"), 3.0],
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_wrong_dtype(parser):
+    with pytest.raises(
+        ValueError, match=('Unable to parse string "square" at position 0')
+    ):
+        read_xml(xml_types, dtype={"shape": "Int64"}, parser=parser)
+
+
+def test_both_dtype_converters(parser):
+    df_result = read_xml(
+        xml_types, dtype={"degrees": "str"}, converters={"degrees": str}, parser=parser
+    )
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": ["00360", "00360", "00180"],
+            "sides": [4.0, float("nan"), 3.0],
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+    with tm.assert_produces_warning(ParserWarning, match="Both a converter and dtype"):
+        read_xml(
+            xml_types,
+            dtype={"degrees": "str"},
+            converters={"degrees": str},
+            parser=parser,
+        )
+
+
+# CONVERTERS
+
+
+def test_converters_str(parser):
+    df_result = read_xml(xml_types, converters={"degrees": str}, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": ["00360", "00360", "00180"],
+            "sides": [4.0, float("nan"), 3.0],
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_converters_date(parser):
+    convert_to_datetime = lambda x: to_datetime(x)
+    df_result = read_xml(
+        xml_dates, converters={"date": convert_to_datetime}, parser=parser
+    )
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+            "date": to_datetime(["2020-01-01", "2021-01-01", "2022-01-01"]),
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_wrong_converters_type(parser):
+    with pytest.raises(TypeError, match=("Type converters must be a dict or subclass")):
+        read_xml(xml_types, converters={"degrees", str}, parser=parser)
+
+
+def test_callable_func_converters(parser):
+    with pytest.raises(TypeError, match=("'float' object is not callable")):
+        read_xml(xml_types, converters={"degrees": float()}, parser=parser)
+
+
+def test_callable_str_converters(parser):
+    with pytest.raises(TypeError, match=("'str' object is not callable")):
+        read_xml(xml_types, converters={"degrees": "float"}, parser=parser)
+
+
+# PARSE DATES
+
+
+def test_parse_dates_column_name(parser):
+    df_result = read_xml(xml_dates, parse_dates=["date"], parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+            "date": to_datetime(["2020-01-01", "2021-01-01", "2022-01-01"]),
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_parse_dates_column_index(parser):
+    df_result = read_xml(xml_dates, parse_dates=[3], parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+            "date": to_datetime(["2020-01-01", "2021-01-01", "2022-01-01"]),
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_parse_dates_true(parser):
+    df_result = read_xml(xml_dates, parse_dates=True, parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+            "date": ["2020-01-01", "2021-01-01", "2022-01-01"],
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_parse_dates_dictionary(parser):
+    xml = """<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <shape>square</shape>
+    <degrees>360</degrees>
+    <sides>4.0</sides>
+    <year>2020</year>
+    <month>12</month>
+    <day>31</day>
+   </row>
+  <row>
+    <shape>circle</shape>
+    <degrees>360</degrees>
+    <sides/>
+    <year>2021</year>
+    <month>12</month>
+    <day>31</day>
+  </row>
+  <row>
+    <shape>triangle</shape>
+    <degrees>180</degrees>
+    <sides>3.0</sides>
+    <year>2022</year>
+    <month>12</month>
+    <day>31</day>
+  </row>
+</data>"""
+
+    df_result = read_xml(
+        xml, parse_dates={"date_end": ["year", "month", "day"]}, parser=parser
+    )
+
+    df_expected = DataFrame(
+        {
+            "date_end": to_datetime(["2020-12-31", "2021-12-31", "2022-12-31"]),
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+
+def test_day_first_parse_dates(parser):
+    xml = """\
+<?xml version='1.0' encoding='utf-8'?>
+<data>
+  <row>
+    <shape>square</shape>
+    <degrees>00360</degrees>
+    <sides>4.0</sides>
+    <date>31/12/2020</date>
+   </row>
+  <row>
+    <shape>circle</shape>
+    <degrees>00360</degrees>
+    <sides/>
+    <date>31/12/2021</date>
+  </row>
+  <row>
+    <shape>triangle</shape>
+    <degrees>00180</degrees>
+    <sides>3.0</sides>
+    <date>31/12/2022</date>
+  </row>
+</data>"""
+
+    df_result = read_xml(xml, parse_dates=["date"], parser=parser)
+
+    df_expected = DataFrame(
+        {
+            "shape": ["square", "circle", "triangle"],
+            "degrees": [360, 360, 180],
+            "sides": [4.0, float("nan"), 3.0],
+            "date": to_datetime(["2020-12-31", "2021-12-31", "2022-12-31"]),
+        }
+    )
+
+    tm.assert_frame_equal(df_result, df_expected)
+
+    with tm.assert_produces_warning(
+        UserWarning, match="Parsing '31/12/2020' in DD/MM/YYYY format"
+    ):
+        read_xml(xml, parse_dates=["date"], parser=parser)
+
+
+def test_wrong_parse_dates_type(parser):
+    with pytest.raises(
+        TypeError, match=("Only booleans, lists, and dictionaries are accepted")
+    ):
+        read_xml(xml_dates, parse_dates={"date"}, parser=parser)

From d3ffe37b316e38ca1744f13e54a9b2886057ee47 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Mon, 17 Jan 2022 19:51:08 -0600
Subject: [PATCH 4/5] Fix tests with warnings assertions

---
 pandas/tests/io/xml/test_xml_dtypes.py | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py
index 8fbdff898a9d1..801461ed4288a 100644
--- a/pandas/tests/io/xml/test_xml_dtypes.py
+++ b/pandas/tests/io/xml/test_xml_dtypes.py
@@ -152,10 +152,6 @@ def test_wrong_dtype(parser):
 
 
 def test_both_dtype_converters(parser):
-    df_result = read_xml(
-        xml_types, dtype={"degrees": "str"}, converters={"degrees": str}, parser=parser
-    )
-
     df_expected = DataFrame(
         {
             "shape": ["square", "circle", "triangle"],
@@ -164,16 +160,16 @@ def test_both_dtype_converters(parser):
         }
     )
 
-    tm.assert_frame_equal(df_result, df_expected)
-
     with tm.assert_produces_warning(ParserWarning, match="Both a converter and dtype"):
-        read_xml(
+        df_result = read_xml(
             xml_types,
             dtype={"degrees": "str"},
             converters={"degrees": str},
             parser=parser,
         )
 
+        tm.assert_frame_equal(df_result, df_expected)
+
 
 # CONVERTERS
 
@@ -342,8 +338,6 @@ def test_day_first_parse_dates(parser):
   </row>
 </data>"""
 
-    df_result = read_xml(xml, parse_dates=["date"], parser=parser)
-
     df_expected = DataFrame(
         {
             "shape": ["square", "circle", "triangle"],
@@ -353,12 +347,11 @@ def test_day_first_parse_dates(parser):
         }
     )
 
-    tm.assert_frame_equal(df_result, df_expected)
-
     with tm.assert_produces_warning(
         UserWarning, match="Parsing '31/12/2020' in DD/MM/YYYY format"
     ):
-        read_xml(xml, parse_dates=["date"], parser=parser)
+        df_result = read_xml(xml, parse_dates=["date"], parser=parser)
+        tm.assert_frame_equal(df_result, df_expected)
 
 
 def test_wrong_parse_dates_type(parser):

From eabaa4de7c4c2fa8f504b3d323d22c567e72f855 Mon Sep 17 00:00:00 2001
From: Parfait Gasana <parfait.gasana@gmail.com>
Date: Sat, 22 Jan 2022 21:52:36 -0600
Subject: [PATCH 5/5] Add new converters and parse_dates typing aliases

---
 doc/source/whatsnew/v1.5.0.rst |  4 ++--
 pandas/_typing.py              | 13 +++++++++++--
 pandas/io/xml.py               | 31 +++++++++----------------------
 3 files changed, 22 insertions(+), 26 deletions(-)

diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index 8ac265f5c27ac..495e90c94b10d 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -86,13 +86,13 @@ Optional libraries below the lowest tested version may still work, but are not c
 See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.
 
 
-.. _whatsnew_140.read_xml_dtypes:
+.. _whatsnew_150.read_xml_dtypes:
 
 read_xml now supports ``dtype``, ``converters``, and ``parse_dates``
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Similar to other IO methods, :func:`pandas.read_xml` now supports assigning specific dtypes to columns,
-apply converter methods, and parse dates.
+apply converter methods, and parse dates (:issue:`43567`).
 
 .. ipython:: python
 
diff --git a/pandas/_typing.py b/pandas/_typing.py
index fd099b3897bab..c0383fe50a7e7 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -129,6 +129,14 @@
 DtypeArg = Union[Dtype, Dict[Hashable, Dtype]]
 DtypeObj = Union[np.dtype, "ExtensionDtype"]
 
+# converters
+ConvertersArg = Dict[Hashable, Callable[[Dtype], Dtype]]
+
+# parse_dates
+ParseDatesArg = Union[
+    bool, List[Hashable], List[List[Hashable]], Dict[Hashable, List[Hashable]]
+]
+
 # For functions like rename that convert one label to another
 Renamer = Union[Mapping[Hashable, Any], Callable[[Hashable], Hashable]]
 
@@ -246,8 +254,6 @@ def closed(self) -> bool:
 CompressionOptions = Optional[
     Union[Literal["infer", "gzip", "bz2", "zip", "xz", "zstd"], CompressionDict]
 ]
-XMLParsers = Literal["lxml", "etree"]
-
 
 # types in DataFrameFormatter
 FormattersType = Union[
@@ -295,3 +301,6 @@ def closed(self) -> bool:
 
 # read_csv engines
 CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"]
+
+# read_xml parsers
+XMLParsers = Literal["lxml", "etree"]
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 1af76c75b7dc8..8e463c94340c8 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -5,15 +5,14 @@
 from __future__ import annotations
 
 import io
-from typing import (
-    Callable,
-    Sequence,
-)
+from typing import Sequence
 
 from pandas._typing import (
     CompressionOptions,
+    ConvertersArg,
     DtypeArg,
     FilePath,
+    ParseDatesArg,
     ReadBuffer,
     StorageOptions,
     XMLParsers,
@@ -131,12 +130,8 @@ def __init__(
         attrs_only: bool,
         names: Sequence[str] | None,
         dtype: DtypeArg | None,
-        converters: dict[str, Callable] | None,
-        parse_dates: bool
-        | list[int | str]
-        | list[list[int | str]]
-        | dict[str, list[int | str]]
-        | None,
+        converters: ConvertersArg | None,
+        parse_dates: ParseDatesArg | None,
         encoding: str | None,
         stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None,
         compression: CompressionOptions,
@@ -703,12 +698,8 @@ def _parse(
     attrs_only: bool,
     names: Sequence[str] | None,
     dtype: DtypeArg | None,
-    converters: dict[str, Callable] | None,
-    parse_dates: bool
-    | list[int | str]
-    | list[list[int | str]]
-    | dict[str, list[int | str]]
-    | None,
+    converters: ConvertersArg | None,
+    parse_dates: ParseDatesArg | None,
     encoding: str | None,
     parser: XMLParsers,
     stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None,
@@ -800,12 +791,8 @@ def read_xml(
     attrs_only: bool = False,
     names: Sequence[str] | None = None,
     dtype: DtypeArg | None = None,
-    converters: dict[str, Callable] | None = None,
-    parse_dates: bool
-    | list[int | str]
-    | list[list[int | str]]
-    | dict[str, list[int | str]]
-    | None = None,
+    converters: ConvertersArg | None = None,
+    parse_dates: ParseDatesArg | None = None,
     # encoding can not be None for lxml and StringIO input
     encoding: str | None = "utf-8",
     parser: XMLParsers = "lxml",