@@ -387,7 +387,7 @@ def _iterparse_nodes(self, iterparse: Callable) -> list[dict[str, str | None]]:
387
387
388
388
return dicts
389
389
390
- def _validate_path (self ) -> None :
390
+ def _validate_path (self ) -> list [ Any ] :
391
391
"""
392
392
Validate xpath.
393
393
@@ -446,8 +446,7 @@ def parse_data(self) -> list[dict[str, str | None]]:
446
446
447
447
if self .iterparse is None :
448
448
self .xml_doc = self ._parse_doc (self .path_or_buffer )
449
- self ._validate_path ()
450
- elems = self .xml_doc .findall (self .xpath , namespaces = self .namespaces )
449
+ elems = self ._validate_path ()
451
450
452
451
self ._validate_names ()
453
452
@@ -459,7 +458,7 @@ def parse_data(self) -> list[dict[str, str | None]]:
459
458
460
459
return xml_dicts
461
460
462
- def _validate_path (self ) -> None :
461
+ def _validate_path (self ) -> list [ Any ] :
463
462
"""
464
463
Notes
465
464
-----
@@ -468,18 +467,28 @@ def _validate_path(self) -> None:
468
467
"""
469
468
470
469
msg = (
471
- "xpath does not return any nodes. "
470
+ "xpath does not return any nodes or attributes. "
471
+ "Be sure to specify in `xpath` the parent nodes of "
472
+ "children and attributes to parse. "
472
473
"If document uses namespaces denoted with "
473
474
"xmlns, be sure to define namespaces and "
474
475
"use them in xpath."
475
476
)
476
477
try :
477
- elems = self .xml_doc .find (self .xpath , namespaces = self .namespaces )
478
+ elems = self .xml_doc .findall (self .xpath , namespaces = self .namespaces )
479
+ children = [ch for el in elems for ch in el .findall ("*" )]
480
+ attrs = {k : v for el in elems for k , v in el .attrib .items ()}
481
+
478
482
if elems is None :
479
483
raise ValueError (msg )
480
484
481
- if elems is not None and elems .find ("*" ) is None and elems .attrib is None :
482
- raise ValueError (msg )
485
+ if elems is not None :
486
+ if self .elems_only and children == []:
487
+ raise ValueError (msg )
488
+ elif self .attrs_only and attrs == {}:
489
+ raise ValueError (msg )
490
+ elif children == [] and attrs == {}:
491
+ raise ValueError (msg )
483
492
484
493
except (KeyError , SyntaxError ):
485
494
raise SyntaxError (
@@ -488,6 +497,8 @@ def _validate_path(self) -> None:
488
497
"undeclared namespace prefix."
489
498
)
490
499
500
+ return elems
501
+
491
502
def _validate_names (self ) -> None :
492
503
children : list [Any ]
493
504
@@ -554,8 +565,7 @@ def parse_data(self) -> list[dict[str, str | None]]:
554
565
self .xsl_doc = self ._parse_doc (self .stylesheet )
555
566
self .xml_doc = self ._transform_doc ()
556
567
557
- self ._validate_path ()
558
- elems = self .xml_doc .xpath (self .xpath , namespaces = self .namespaces )
568
+ elems = self ._validate_path ()
559
569
560
570
self ._validate_names ()
561
571
@@ -567,25 +577,33 @@ def parse_data(self) -> list[dict[str, str | None]]:
567
577
568
578
return xml_dicts
569
579
570
- def _validate_path (self ) -> None :
580
+ def _validate_path (self ) -> list [ Any ] :
571
581
572
582
msg = (
573
- "xpath does not return any nodes. "
574
- "Be sure row level nodes are in xpath. "
583
+ "xpath does not return any nodes or attributes. "
584
+ "Be sure to specify in `xpath` the parent nodes of "
585
+ "children and attributes to parse. "
575
586
"If document uses namespaces denoted with "
576
587
"xmlns, be sure to define namespaces and "
577
588
"use them in xpath."
578
589
)
579
590
580
591
elems = self .xml_doc .xpath (self .xpath , namespaces = self .namespaces )
581
- children = self . xml_doc . xpath (self . xpath + "/*" , namespaces = self . namespaces )
582
- attrs = self . xml_doc . xpath ( self . xpath + "/@*" , namespaces = self . namespaces )
592
+ children = [ ch for el in elems for ch in el . xpath ("*" )]
593
+ attrs = { k : v for el in elems for k , v in el . attrib . items ()}
583
594
584
595
if elems == []:
585
596
raise ValueError (msg )
586
597
587
- if elems != [] and attrs == [] and children == []:
588
- raise ValueError (msg )
598
+ if elems != []:
599
+ if self .elems_only and children == []:
600
+ raise ValueError (msg )
601
+ elif self .attrs_only and attrs == {}:
602
+ raise ValueError (msg )
603
+ elif children == [] and attrs == {}:
604
+ raise ValueError (msg )
605
+
606
+ return elems
589
607
590
608
def _validate_names (self ) -> None :
591
609
children : list [Any ]
0 commit comments