diff --git a/pandas/conftest.py b/pandas/conftest.py
index 1dcfc88eb1bfd..b2f1377a9fb32 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -30,6 +30,7 @@
from decimal import Decimal
import operator
import os
+from pathlib import Path
from typing import (
Callable,
Hashable,
@@ -1167,6 +1168,16 @@ def strict_data_files(pytestconfig):
return pytestconfig.getoption("--strict-data-files")
+@pytest.fixture
+def tests_path() -> Path:
+ return Path(__file__).parent / "tests"
+
+
+@pytest.fixture
+def tests_io_data_path(tests_path) -> Path:
+ return tests_path / "io" / "data"
+
+
@pytest.fixture
def datapath(strict_data_files: str) -> Callable[..., str]:
"""
diff --git a/pandas/io/xml.py b/pandas/io/xml.py
index 65cc369416352..2aec361d46b99 100644
--- a/pandas/io/xml.py
+++ b/pandas/io/xml.py
@@ -5,6 +5,7 @@
from __future__ import annotations
import io
+from os import PathLike
from typing import (
TYPE_CHECKING,
Any,
@@ -326,10 +327,13 @@ def _iterparse_nodes(self, iterparse: Callable) -> list[dict[str, str | None]]:
)
if (not hasattr(self.path_or_buffer, "read")) and (
- not isinstance(self.path_or_buffer, str)
+ not isinstance(self.path_or_buffer, (str, PathLike))
or is_url(self.path_or_buffer)
or is_fsspec_url(self.path_or_buffer)
- or self.path_or_buffer.startswith(("
"""
- kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
-
with pytest.raises(
XMLSyntaxError, match=("Extra content at the end of the document")
):
- read_xml(kml, stylesheet=xsl)
+ read_xml(kml_cta_rail_lines, stylesheet=xsl)
@td.skip_if_no("lxml")
-def test_incorrect_xsl_eval(datapath):
+def test_incorrect_xsl_eval(kml_cta_rail_lines):
from lxml.etree import XSLTParseError
xsl = """\
@@ -1313,14 +1263,12 @@ def test_incorrect_xsl_eval(datapath):
"""
- kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
-
with pytest.raises(XSLTParseError, match=("failed to compile")):
- read_xml(kml, stylesheet=xsl)
+ read_xml(kml_cta_rail_lines, stylesheet=xsl)
@td.skip_if_no("lxml")
-def test_incorrect_xsl_apply(datapath):
+def test_incorrect_xsl_apply(kml_cta_rail_lines):
from lxml.etree import XSLTApplyError
xsl = """\
@@ -1335,55 +1283,46 @@ def test_incorrect_xsl_apply(datapath):
"""
- kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
-
with pytest.raises(XSLTApplyError, match=("Cannot resolve URI")):
- read_xml(kml, stylesheet=xsl)
+ read_xml(kml_cta_rail_lines, stylesheet=xsl)
@td.skip_if_no("lxml")
-def test_wrong_stylesheet():
+def test_wrong_stylesheet(kml_cta_rail_lines, xml_data_path):
from lxml.etree import XMLSyntaxError
- kml = os.path.join("data", "xml", "cta_rail_lines.kml")
- xsl = os.path.join("data", "xml", "flatten.xsl")
+ xsl = xml_data_path / "flatten.xsl"
with pytest.raises(
XMLSyntaxError,
match=("Start tag expected, '<' not found"),
):
- read_xml(kml, stylesheet=xsl)
+ read_xml(kml_cta_rail_lines, stylesheet=xsl)
@td.skip_if_no("lxml")
-def test_stylesheet_file_close(datapath, mode):
- kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
- xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
-
+def test_stylesheet_file_close(kml_cta_rail_lines, xsl_flatten_doc, mode):
# note: By default the bodies of untyped functions are not checked,
# consider using --check-untyped-defs
xsl_obj: BytesIO | StringIO # type: ignore[annotation-unchecked]
- with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f:
+ with open(xsl_flatten_doc, mode, encoding="utf-8" if mode == "r" else None) as f:
if mode == "rb":
xsl_obj = BytesIO(f.read())
else:
xsl_obj = StringIO(f.read())
- read_xml(kml, stylesheet=xsl_obj)
+ read_xml(kml_cta_rail_lines, stylesheet=xsl_obj)
assert not f.closed
@td.skip_if_no("lxml")
-def test_stylesheet_with_etree():
- kml = os.path.join("data", "xml", "cta_rail_lines.kml")
- xsl = os.path.join("data", "xml", "flatten_doc.xsl")
-
+def test_stylesheet_with_etree(kml_cta_rail_lines, xsl_flatten_doc):
with pytest.raises(
ValueError, match=("To use stylesheet, you need lxml installed")
):
- read_xml(kml, parser="etree", stylesheet=xsl)
+ read_xml(kml_cta_rail_lines, parser="etree", stylesheet=xsl_flatten_doc)
@td.skip_if_no("lxml")
@@ -1413,10 +1352,8 @@ def test_string_error(parser):
)
-def test_file_like_iterparse(datapath, parser, mode):
- filename = datapath("io", "data", "xml", "books.xml")
-
- with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f:
+def test_file_like_iterparse(xml_books, parser, mode):
+ with open(xml_books, mode, encoding="utf-8" if mode == "r" else None) as f:
if mode == "r" and parser == "lxml":
with pytest.raises(
TypeError, match=("reading file objects must return bytes objects")
@@ -1449,12 +1386,10 @@ def test_file_like_iterparse(datapath, parser, mode):
tm.assert_frame_equal(df_filelike, df_expected)
-def test_file_io_iterparse(datapath, parser, mode):
- filename = datapath("io", "data", "xml", "books.xml")
-
+def test_file_io_iterparse(xml_books, parser, mode):
funcIO = StringIO if mode == "r" else BytesIO
with open(
- filename,
+ xml_books,
mode,
encoding="utf-8" if mode == "r" else None,
) as f:
@@ -1522,22 +1457,20 @@ def test_compression_error(parser, compression_only):
)
-def test_wrong_dict_type(datapath, parser):
- filename = datapath("io", "data", "xml", "books.xml")
+def test_wrong_dict_type(xml_books, parser):
with pytest.raises(TypeError, match="list is not a valid type for iterparse"):
read_xml(
- filename,
+ xml_books,
parser=parser,
iterparse=["category", "title", "year", "author", "price"],
)
-def test_wrong_dict_value(datapath, parser):
- filename = datapath("io", "data", "xml", "books.xml")
+def test_wrong_dict_value(xml_books, parser):
with pytest.raises(
TypeError, match=" is not a valid type for value in iterparse"
):
- read_xml(filename, parser=parser, iterparse={"book": "category"})
+ read_xml(xml_books, parser=parser, iterparse={"book": "category"})
def test_bad_xml(parser):
@@ -1688,23 +1621,21 @@ def test_processing_instruction(parser):
tm.assert_frame_equal(df_iter, df_expected)
-def test_no_result(datapath, parser):
- filename = datapath("io", "data", "xml", "books.xml")
+def test_no_result(xml_books, parser):
with pytest.raises(
ParserError, match="No result from selected items in iterparse."
):
read_xml(
- filename,
+ xml_books,
parser=parser,
iterparse={"node": ["attr1", "elem1", "elem2", "elem3"]},
)
-def test_empty_data(datapath, parser):
- filename = datapath("io", "data", "xml", "books.xml")
+def test_empty_data(xml_books, parser):
with pytest.raises(EmptyDataError, match="No columns to parse from file"):
read_xml(
- filename,
+ xml_books,
parser=parser,
iterparse={"book": ["attr1", "elem1", "elem2", "elem3"]},
)