-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
TYP: type read_xml and deprecate passing positional arguments #45133
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
baa3ab2
ab8f1c6
9ff4878
6998251
0b85c7d
3accd12
1da9569
a2348a2
ba355ba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,12 +5,14 @@ | |
from __future__ import annotations | ||
|
||
import io | ||
from typing import Iterable | ||
|
||
from pandas._typing import ( | ||
CompressionOptions, | ||
FilePath, | ||
ReadBuffer, | ||
StorageOptions, | ||
XMLParsers, | ||
) | ||
from pandas.compat._optional import import_optional_dependency | ||
from pandas.errors import ( | ||
|
@@ -98,17 +100,17 @@ class _XMLFrameParser: | |
|
||
def __init__( | ||
self, | ||
path_or_buffer, | ||
xpath, | ||
namespaces, | ||
elems_only, | ||
attrs_only, | ||
names, | ||
encoding, | ||
stylesheet, | ||
path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str], | ||
xpath: str, | ||
twoertwein marked this conversation as resolved.
Show resolved
Hide resolved
|
||
namespaces: dict | list[dict] | None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I cannot see how list of dicts is used for this argument. Each XML doc will have a unique set of namespaces (i.e., prefix to URI pairs). Does mypy raise? Consider There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could not find them on my newest commit, did you maybe review an older one? I have moved the types from the public api first and checked them for validity afterwards. I realised that list[dict] did not work and removed them again then |
||
elems_only: bool, | ||
attrs_only: bool, | ||
names: Iterable[str] | None, | ||
encoding: str | None, | ||
stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None, | ||
compression: CompressionOptions, | ||
storage_options: StorageOptions, | ||
) -> None: | ||
): | ||
self.path_or_buffer = path_or_buffer | ||
self.xpath = xpath | ||
self.namespaces = namespaces | ||
|
@@ -371,9 +373,6 @@ class _LxmlFrameParser(_XMLFrameParser): | |
XPath 1.0 and XSLT 1.0. | ||
""" | ||
|
||
def __init__(self, *args, **kwargs) -> None: | ||
super().__init__(*args, **kwargs) | ||
|
||
def parse_data(self) -> list[dict[str, str | None]]: | ||
""" | ||
Parse xml data. | ||
|
@@ -570,7 +569,7 @@ def _transform_doc(self) -> bytes: | |
|
||
def get_data_from_filepath( | ||
filepath_or_buffer: FilePath | bytes | ReadBuffer[bytes] | ReadBuffer[str], | ||
encoding, | ||
encoding: str | None, | ||
compression: CompressionOptions, | ||
storage_options: StorageOptions, | ||
) -> str | bytes | ReadBuffer[bytes] | ReadBuffer[str]: | ||
|
@@ -658,15 +657,15 @@ class that build Data Frame and infers specific dtypes. | |
|
||
|
||
def _parse( | ||
path_or_buffer, | ||
xpath, | ||
namespaces, | ||
elems_only, | ||
attrs_only, | ||
names, | ||
encoding, | ||
parser, | ||
stylesheet, | ||
path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str], | ||
xpath: str, | ||
namespaces: dict | list[dict] | None, | ||
elems_only: bool, | ||
attrs_only: bool, | ||
names: Iterable[str] | None, | ||
encoding: str | None, | ||
parser: XMLParsers, | ||
stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None, | ||
compression: CompressionOptions, | ||
storage_options: StorageOptions, | ||
**kwargs, | ||
|
@@ -686,11 +685,11 @@ def _parse( | |
* If parser is not lxml or etree. | ||
""" | ||
|
||
lxml = import_optional_dependency("lxml.etree", errors="ignore") | ||
|
||
p: _EtreeFrameParser | _LxmlFrameParser | ||
|
||
if parser == "lxml": | ||
lxml = import_optional_dependency("lxml.etree", errors="ignore") | ||
|
||
if lxml is not None: | ||
p = _LxmlFrameParser( | ||
path_or_buffer, | ||
|
@@ -734,13 +733,13 @@ def _parse( | |
) | ||
def read_xml( | ||
path_or_buffer: FilePath | ReadBuffer[bytes] | ReadBuffer[str], | ||
xpath: str | None = "./*", | ||
xpath: str = "./*", | ||
namespaces: dict | list[dict] | None = None, | ||
elems_only: bool | None = False, | ||
attrs_only: bool | None = False, | ||
names: list[str] | None = None, | ||
elems_only: bool = False, | ||
attrs_only: bool = False, | ||
names: Iterable[str] | None = None, | ||
encoding: str | None = "utf-8", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can type this correctly after enforcing the positional deprecation |
||
parser: str | None = "lxml", | ||
parser: XMLParsers = "lxml", | ||
stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None = None, | ||
compression: CompressionOptions = "infer", | ||
storage_options: StorageOptions = None, | ||
|
@@ -765,7 +764,7 @@ def read_xml( | |
expressions. For more complex XPath, use ``lxml`` which requires | ||
installation. | ||
|
||
namespaces : dict, optional | ||
namespaces : dict, list of dicts, optional | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again namespaces will not be a list of dicts. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep removed, forgot this |
||
The namespaces defined in XML document as dicts with key being | ||
namespace prefix and value the URI. There is no need to include all | ||
namespaces in XML, only the ones used in ``xpath`` expression. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice addition for this type. Maybe inspired by new IO XML module.