5
5
from __future__ import annotations
6
6
7
7
import io
8
+ from typing import Iterable
8
9
9
10
from pandas ._typing import (
10
11
CompressionOptions ,
11
12
FilePath ,
12
13
ReadBuffer ,
13
14
StorageOptions ,
15
+ XMLParsers ,
14
16
)
15
17
from pandas .compat ._optional import import_optional_dependency
16
18
from pandas .errors import (
@@ -98,17 +100,17 @@ class _XMLFrameParser:
98
100
99
101
def __init__ (
100
102
self ,
101
- path_or_buffer ,
102
- xpath ,
103
- namespaces ,
104
- elems_only ,
105
- attrs_only ,
106
- names ,
107
- encoding ,
108
- stylesheet ,
103
+ path_or_buffer : FilePath | ReadBuffer [ bytes ] | ReadBuffer [ str ] ,
104
+ xpath : str ,
105
+ namespaces : dict | list [ dict ] | None ,
106
+ elems_only : bool ,
107
+ attrs_only : bool ,
108
+ names : Iterable [ str ] | None ,
109
+ encoding : str | None ,
110
+ stylesheet : FilePath | ReadBuffer [ bytes ] | ReadBuffer [ str ] | None ,
109
111
compression : CompressionOptions ,
110
112
storage_options : StorageOptions ,
111
- ) -> None :
113
+ ):
112
114
self .path_or_buffer = path_or_buffer
113
115
self .xpath = xpath
114
116
self .namespaces = namespaces
@@ -371,9 +373,6 @@ class _LxmlFrameParser(_XMLFrameParser):
371
373
XPath 1.0 and XSLT 1.0.
372
374
"""
373
375
374
- def __init__ (self , * args , ** kwargs ) -> None :
375
- super ().__init__ (* args , ** kwargs )
376
-
377
376
def parse_data (self ) -> list [dict [str , str | None ]]:
378
377
"""
379
378
Parse xml data.
@@ -570,7 +569,7 @@ def _transform_doc(self) -> bytes:
570
569
571
570
def get_data_from_filepath (
572
571
filepath_or_buffer : FilePath | bytes | ReadBuffer [bytes ] | ReadBuffer [str ],
573
- encoding ,
572
+ encoding : str | None ,
574
573
compression : CompressionOptions ,
575
574
storage_options : StorageOptions ,
576
575
) -> str | bytes | ReadBuffer [bytes ] | ReadBuffer [str ]:
@@ -658,15 +657,15 @@ class that build Data Frame and infers specific dtypes.
658
657
659
658
660
659
def _parse (
661
- path_or_buffer ,
662
- xpath ,
663
- namespaces ,
664
- elems_only ,
665
- attrs_only ,
666
- names ,
667
- encoding ,
668
- parser ,
669
- stylesheet ,
660
+ path_or_buffer : FilePath | ReadBuffer [ bytes ] | ReadBuffer [ str ] ,
661
+ xpath : str ,
662
+ namespaces : dict | list [ dict ] | None ,
663
+ elems_only : bool ,
664
+ attrs_only : bool ,
665
+ names : Iterable [ str ] | None ,
666
+ encoding : str | None ,
667
+ parser : XMLParsers ,
668
+ stylesheet : FilePath | ReadBuffer [ bytes ] | ReadBuffer [ str ] | None ,
670
669
compression : CompressionOptions ,
671
670
storage_options : StorageOptions ,
672
671
** kwargs ,
@@ -686,11 +685,11 @@ def _parse(
686
685
* If parser is not lxml or etree.
687
686
"""
688
687
689
- lxml = import_optional_dependency ("lxml.etree" , errors = "ignore" )
690
-
691
688
p : _EtreeFrameParser | _LxmlFrameParser
692
689
693
690
if parser == "lxml" :
691
+ lxml = import_optional_dependency ("lxml.etree" , errors = "ignore" )
692
+
694
693
if lxml is not None :
695
694
p = _LxmlFrameParser (
696
695
path_or_buffer ,
@@ -734,13 +733,13 @@ def _parse(
734
733
)
735
734
def read_xml (
736
735
path_or_buffer : FilePath | ReadBuffer [bytes ] | ReadBuffer [str ],
737
- xpath : str | None = "./*" ,
736
+ xpath : str = "./*" ,
738
737
namespaces : dict | list [dict ] | None = None ,
739
- elems_only : bool | None = False ,
740
- attrs_only : bool | None = False ,
741
- names : list [str ] | None = None ,
738
+ elems_only : bool = False ,
739
+ attrs_only : bool = False ,
740
+ names : Iterable [str ] | None = None ,
742
741
encoding : str | None = "utf-8" ,
743
- parser : str | None = "lxml" ,
742
+ parser : XMLParsers = "lxml" ,
744
743
stylesheet : FilePath | ReadBuffer [bytes ] | ReadBuffer [str ] | None = None ,
745
744
compression : CompressionOptions = "infer" ,
746
745
storage_options : StorageOptions = None ,
@@ -765,7 +764,7 @@ def read_xml(
765
764
expressions. For more complex XPath, use ``lxml`` which requires
766
765
installation.
767
766
768
- namespaces : dict, optional
767
+ namespaces : dict, list of dicts, optional
769
768
The namespaces defined in XML document as dicts with key being
770
769
namespace prefix and value the URI. There is no need to include all
771
770
namespaces in XML, only the ones used in ``xpath`` expression.
0 commit comments