9
9
10
10
from pandas ._typing import (
11
11
CompressionOptions ,
12
+ ConvertersArg ,
13
+ DtypeArg ,
12
14
FilePath ,
15
+ ParseDatesArg ,
13
16
ReadBuffer ,
14
17
StorageOptions ,
15
18
XMLParsers ,
@@ -67,6 +70,23 @@ class _XMLFrameParser:
67
70
names : list
68
71
Column names for Data Frame of parsed XML data.
69
72
73
+ dtype : dict
74
+ Data type for data or columns. E.g. {{'a': np.float64,
75
+ 'b': np.int32, 'c': 'Int64'}}
76
+
77
+ .. versionadded:: 1.5.0
78
+
79
+ converters : dict, optional
80
+ Dict of functions for converting values in certain columns. Keys can
81
+ either be integers or column labels.
82
+
83
+ .. versionadded:: 1.5.0
84
+
85
+ parse_dates : bool or list of int or names or list of lists or dict
86
+ Converts either index or select columns to datetimes
87
+
88
+ .. versionadded:: 1.5.0
89
+
70
90
encoding : str
71
91
Encoding of xml object or document.
72
92
@@ -109,6 +129,9 @@ def __init__(
109
129
elems_only : bool ,
110
130
attrs_only : bool ,
111
131
names : Sequence [str ] | None ,
132
+ dtype : DtypeArg | None ,
133
+ converters : ConvertersArg | None ,
134
+ parse_dates : ParseDatesArg | None ,
112
135
encoding : str | None ,
113
136
stylesheet : FilePath | ReadBuffer [bytes ] | ReadBuffer [str ] | None ,
114
137
compression : CompressionOptions ,
@@ -120,6 +143,9 @@ def __init__(
120
143
self .elems_only = elems_only
121
144
self .attrs_only = attrs_only
122
145
self .names = names
146
+ self .dtype = dtype
147
+ self .converters = converters
148
+ self .parse_dates = parse_dates
123
149
self .encoding = encoding
124
150
self .stylesheet = stylesheet
125
151
self .is_style = None
@@ -671,6 +697,9 @@ def _parse(
671
697
elems_only : bool ,
672
698
attrs_only : bool ,
673
699
names : Sequence [str ] | None ,
700
+ dtype : DtypeArg | None ,
701
+ converters : ConvertersArg | None ,
702
+ parse_dates : ParseDatesArg | None ,
674
703
encoding : str | None ,
675
704
parser : XMLParsers ,
676
705
stylesheet : FilePath | ReadBuffer [bytes ] | ReadBuffer [str ] | None ,
@@ -706,6 +735,9 @@ def _parse(
706
735
elems_only ,
707
736
attrs_only ,
708
737
names ,
738
+ dtype ,
739
+ converters ,
740
+ parse_dates ,
709
741
encoding ,
710
742
stylesheet ,
711
743
compression ,
@@ -722,6 +754,9 @@ def _parse(
722
754
elems_only ,
723
755
attrs_only ,
724
756
names ,
757
+ dtype ,
758
+ converters ,
759
+ parse_dates ,
725
760
encoding ,
726
761
stylesheet ,
727
762
compression ,
@@ -732,7 +767,13 @@ def _parse(
732
767
733
768
data_dicts = p .parse_data ()
734
769
735
- return _data_to_frame (data = data_dicts , ** kwargs )
770
+ return _data_to_frame (
771
+ data = data_dicts ,
772
+ dtype = dtype ,
773
+ converters = converters ,
774
+ parse_dates = parse_dates ,
775
+ ** kwargs ,
776
+ )
736
777
737
778
738
779
@deprecate_nonkeyword_arguments (
@@ -749,6 +790,9 @@ def read_xml(
749
790
elems_only : bool = False ,
750
791
attrs_only : bool = False ,
751
792
names : Sequence [str ] | None = None ,
793
+ dtype : DtypeArg | None = None ,
794
+ converters : ConvertersArg | None = None ,
795
+ parse_dates : ParseDatesArg | None = None ,
752
796
# encoding can not be None for lxml and StringIO input
753
797
encoding : str | None = "utf-8" ,
754
798
parser : XMLParsers = "lxml" ,
@@ -799,6 +843,35 @@ def read_xml(
799
843
Column names for DataFrame of parsed XML data. Use this parameter to
800
844
rename original element names and distinguish same named elements.
801
845
846
+ dtype : Type name or dict of column -> type, optional
847
+ Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32,
848
+ 'c': 'Int64'}}
849
+ Use `str` or `object` together with suitable `na_values` settings
850
+ to preserve and not interpret dtype.
851
+ If converters are specified, they will be applied INSTEAD
852
+ of dtype conversion.
853
+
854
+ .. versionadded:: 1.5.0
855
+
856
+ converters : dict, optional
857
+ Dict of functions for converting values in certain columns. Keys can either
858
+ be integers or column labels.
859
+
860
+ .. versionadded:: 1.5.0
861
+
862
+ parse_dates : bool or list of int or names or list of lists or dict, default False
863
+ Identifiers to parse index or columns to datetime. The behavior is as follows:
864
+
865
+ * boolean. If True -> try parsing the index.
866
+ * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
867
+ each as a separate date column.
868
+ * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as
869
+ a single date column.
870
+ * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call
871
+ result 'foo'
872
+
873
+ .. versionadded:: 1.5.0
874
+
802
875
encoding : str, optional, default 'utf-8'
803
876
Encoding of XML document.
804
877
@@ -942,6 +1015,9 @@ def read_xml(
942
1015
elems_only = elems_only ,
943
1016
attrs_only = attrs_only ,
944
1017
names = names ,
1018
+ dtype = dtype ,
1019
+ converters = converters ,
1020
+ parse_dates = parse_dates ,
945
1021
encoding = encoding ,
946
1022
parser = parser ,
947
1023
stylesheet = stylesheet ,
0 commit comments