Skip to content

Commit beb4dc4

Browse files
authored
CLN: Adjust namespace handling in to_xml (pandas-dev#54201)
1 parent 273c8c6 commit beb4dc4

File tree

2 files changed

+49
-52
lines changed

2 files changed

+49
-52
lines changed

pandas/io/formats/xml.py

+15-43
Original file line numberDiff line numberDiff line change
@@ -251,18 +251,11 @@ def other_namespaces(self) -> dict:
251251
"""
252252

253253
nmsp_dict: dict[str, str] = {}
254-
if self.namespaces and self.prefix is None:
255-
nmsp_dict = {
256-
"xmlns": n # noqa: RUF011
257-
for p, n in self.namespaces.items()
258-
if p != ""
259-
}
260-
261-
if self.namespaces and self.prefix:
254+
if self.namespaces:
262255
nmsp_dict = {
263-
"xmlns": n # noqa: RUF011
256+
f"xmlns{p if p=='' else f':{p}'}": n
264257
for p, n in self.namespaces.items()
265-
if p == ""
258+
if n != self.prefix_uri[1:-1]
266259
}
267260

268261
return nmsp_dict
@@ -365,16 +358,16 @@ def build_tree(self) -> bytes:
365358
elem_row = self.build_attribs(d, elem_row)
366359
self.build_elems(d, elem_row)
367360

368-
self.out_xml = tostring(self.root, method="xml", encoding=self.encoding)
361+
self.out_xml = tostring(
362+
self.root,
363+
method="xml",
364+
encoding=self.encoding,
365+
xml_declaration=self.xml_declaration,
366+
)
369367

370368
if self.pretty_print:
371369
self.out_xml = self.prettify_tree()
372370

373-
if self.xml_declaration:
374-
self.out_xml = self.add_declaration()
375-
else:
376-
self.out_xml = self.remove_declaration()
377-
378371
if self.stylesheet is not None:
379372
raise ValueError(
380373
"To use stylesheet, you need lxml installed and selected as parser."
@@ -395,8 +388,10 @@ def get_prefix_uri(self) -> str:
395388
uri = f"{{{self.namespaces[self.prefix]}}}"
396389
except KeyError:
397390
raise KeyError(f"{self.prefix} is not included in namespaces")
398-
else:
391+
elif "" in self.namespaces:
399392
uri = f'{{{self.namespaces[""]}}}'
393+
else:
394+
uri = ""
400395

401396
return uri
402397

@@ -418,31 +413,6 @@ def prettify_tree(self) -> bytes:
418413

419414
return dom.toprettyxml(indent=" ", encoding=self.encoding)
420415

421-
def add_declaration(self) -> bytes:
422-
"""
423-
Add xml declaration.
424-
425-
This method will add xml declaration of working tree. Currently,
426-
xml_declaration is supported in etree starting in Python 3.8.
427-
"""
428-
decl = f'<?xml version="1.0" encoding="{self.encoding}"?>\n'
429-
430-
return (
431-
self.out_xml
432-
if self.out_xml.startswith(b"<?xml")
433-
else decl.encode(self.encoding) + self.out_xml
434-
)
435-
436-
def remove_declaration(self) -> bytes:
437-
"""
438-
Remove xml declaration.
439-
440-
This method will remove xml declaration of working tree. Currently,
441-
pretty_print is not supported in etree.
442-
"""
443-
444-
return self.out_xml.split(b"?>")[-1].strip()
445-
446416

447417
class LxmlXMLFormatter(BaseXMLFormatter):
448418
"""
@@ -513,8 +483,10 @@ def get_prefix_uri(self) -> str:
513483
uri = f"{{{self.namespaces[self.prefix]}}}"
514484
except KeyError:
515485
raise KeyError(f"{self.prefix} is not included in namespaces")
516-
else:
486+
elif "" in self.namespaces:
517487
uri = f'{{{self.namespaces[""]}}}'
488+
else:
489+
uri = ""
518490

519491
return uri
520492

pandas/tests/io/xml/test_to_xml.py

+34-9
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,6 @@ def equalize_decl(doc):
154154
'<?xml version="1.0" encoding="utf-8"?',
155155
"<?xml version='1.0' encoding='utf-8'?",
156156
)
157-
158157
return doc
159158

160159

@@ -705,6 +704,39 @@ def test_default_namespace(parser, geom_df):
705704
assert output == expected
706705

707706

707+
def test_unused_namespaces(parser, geom_df):
708+
expected = """\
709+
<?xml version='1.0' encoding='utf-8'?>
710+
<data xmlns:oth="http://other.org" xmlns:ex="http://example.com">
711+
<row>
712+
<index>0</index>
713+
<shape>square</shape>
714+
<degrees>360</degrees>
715+
<sides>4.0</sides>
716+
</row>
717+
<row>
718+
<index>1</index>
719+
<shape>circle</shape>
720+
<degrees>360</degrees>
721+
<sides/>
722+
</row>
723+
<row>
724+
<index>2</index>
725+
<shape>triangle</shape>
726+
<degrees>180</degrees>
727+
<sides>3.0</sides>
728+
</row>
729+
</data>"""
730+
731+
output = geom_df.to_xml(
732+
namespaces={"oth": "http://other.org", "ex": "http://example.com"},
733+
parser=parser,
734+
)
735+
output = equalize_decl(output)
736+
737+
assert output == expected
738+
739+
708740
# PREFIX
709741

710742

@@ -750,7 +782,7 @@ def test_missing_prefix_in_nmsp(parser, geom_df):
750782
def test_namespace_prefix_and_default(parser, geom_df):
751783
expected = """\
752784
<?xml version='1.0' encoding='utf-8'?>
753-
<doc:data xmlns="http://example.com" xmlns:doc="http://other.org">
785+
<doc:data xmlns:doc="http://other.org" xmlns="http://example.com">
754786
<doc:row>
755787
<doc:index>0</doc:index>
756788
<doc:shape>square</doc:shape>
@@ -778,13 +810,6 @@ def test_namespace_prefix_and_default(parser, geom_df):
778810
)
779811
output = equalize_decl(output)
780812

781-
if output is not None:
782-
# etree and lxml differs on order of namespace prefixes
783-
output = output.replace(
784-
'xmlns:doc="http://other.org" xmlns="http://example.com"',
785-
'xmlns="http://example.com" xmlns:doc="http://other.org"',
786-
)
787-
788813
assert output == expected
789814

790815

0 commit comments

Comments
 (0)