Skip to content

TYP: type read_xml and deprecate passing positional arguments #45133

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 4, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ def closed(self) -> bool:
CompressionOptions = Optional[
Union[Literal["infer", "gzip", "bz2", "zip", "xz", "zstd"], CompressionDict]
]
XMLParsers = Literal["lxml", "etree"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice addition for this type. Maybe inspired by new IO XML module.



# types in DataFrameFormatter
Expand Down
59 changes: 29 additions & 30 deletions pandas/io/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
from __future__ import annotations

import io
from typing import Iterable

from pandas._typing import (
CompressionOptions,
FilePath,
ReadBuffer,
StorageOptions,
XMLParsers,
)
from pandas.compat._optional import import_optional_dependency
from pandas.errors import (
Expand Down Expand Up @@ -98,17 +100,17 @@ class _XMLFrameParser:

def __init__(
self,
path_or_buffer,
xpath,
namespaces,
elems_only,
attrs_only,
names,
encoding,
stylesheet,
path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str],
xpath: str,
namespaces: dict | list[dict] | None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I cannot see how list of dicts is used for this argument. Each XML doc will have a unique set of namespaces (i.e., prefix to URI pairs). Does mypy raise? Consider dict[str, str] though.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could not find them on my newest commit, did you maybe review an older one? I have moved the types from the public api first and checked them for validity afterwards. I realised that list[dict] did not work and removed them again then

elems_only: bool,
attrs_only: bool,
names: Iterable[str] | None,
encoding: str | None,
stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None,
compression: CompressionOptions,
storage_options: StorageOptions,
) -> None:
):
self.path_or_buffer = path_or_buffer
self.xpath = xpath
self.namespaces = namespaces
Expand Down Expand Up @@ -371,9 +373,6 @@ class _LxmlFrameParser(_XMLFrameParser):
XPath 1.0 and XSLT 1.0.
"""

def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)

def parse_data(self) -> list[dict[str, str | None]]:
"""
Parse xml data.
Expand Down Expand Up @@ -570,7 +569,7 @@ def _transform_doc(self) -> bytes:

def get_data_from_filepath(
filepath_or_buffer: FilePath | bytes | ReadBuffer[bytes] | ReadBuffer[str],
encoding,
encoding: str | None,
compression: CompressionOptions,
storage_options: StorageOptions,
) -> str | bytes | ReadBuffer[bytes] | ReadBuffer[str]:
Expand Down Expand Up @@ -658,15 +657,15 @@ class that build Data Frame and infers specific dtypes.


def _parse(
path_or_buffer,
xpath,
namespaces,
elems_only,
attrs_only,
names,
encoding,
parser,
stylesheet,
path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str],
xpath: str,
namespaces: dict | list[dict] | None,
elems_only: bool,
attrs_only: bool,
names: Iterable[str] | None,
encoding: str | None,
parser: XMLParsers,
stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None,
compression: CompressionOptions,
storage_options: StorageOptions,
**kwargs,
Expand All @@ -686,11 +685,11 @@ def _parse(
* If parser is not lxml or etree.
"""

lxml = import_optional_dependency("lxml.etree", errors="ignore")

p: _EtreeFrameParser | _LxmlFrameParser

if parser == "lxml":
lxml = import_optional_dependency("lxml.etree", errors="ignore")

if lxml is not None:
p = _LxmlFrameParser(
path_or_buffer,
Expand Down Expand Up @@ -734,13 +733,13 @@ def _parse(
)
def read_xml(
path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str],
xpath: str | None = "./*",
xpath: str = "./*",
namespaces: dict | list[dict] | None = None,
elems_only: bool | None = False,
attrs_only: bool | None = False,
names: list[str] | None = None,
elems_only: bool = False,
attrs_only: bool = False,
names: Iterable[str] | None = None,
encoding: str | None = "utf-8",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can type this correctly after enforcing the positional deprecation

parser: str | None = "lxml",
parser: XMLParsers = "lxml",
stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None = None,
compression: CompressionOptions = "infer",
storage_options: StorageOptions = None,
Expand All @@ -765,7 +764,7 @@ def read_xml(
expressions. For more complex XPath, use ``lxml`` which requires
installation.

namespaces : dict, optional
namespaces : dict, list of dicts, optional
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again namespaces will not be a list of dicts.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep removed, forgot this

The namespaces defined in XML document as dicts with key being
namespace prefix and value the URI. There is no need to include all
namespaces in XML, only the ones used in ``xpath`` expression.
Expand Down