Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 31 additions & 9 deletions pandas/io/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,14 @@ def _parse_doc(
"""
raise AbstractMethodError(self)

def _get_default_namespace(root):
"""
Extract default XML namespace URI if present.
"""
if root is not None and isinstance(root.tag, str) and root.tag.startswith("{"):
return root.tag.split("}")[0].lstrip("{")
return None


class _EtreeFrameParser(_XMLFrameParser):
"""
Expand Down Expand Up @@ -587,23 +595,36 @@ def _validate_path(self) -> list[Any]:
"use them in xpath."
)

elems = self.xml_doc.xpath(self.xpath, namespaces=self.namespaces)
namespaces = self.namespaces

if namespaces is None:
default_ns = _get_default_namespace(self.xml_doc)
if default_ns is not None:
namespaces = {"_default": default_ns}
xpath = self.xpath.replace("//", "//_default:")
else:
xpath = self.xpath
else:
xpath = self.xpath

elems = self.xml_doc.xpath(xpath, namespaces=namespaces)

children = [ch for el in elems for ch in el.xpath("*")]
attrs = {k: v for el in elems for k, v in el.attrib.items()}

if elems == []:
raise ValueError(msg)

if elems != []:
if self.elems_only and children == []:
raise ValueError(msg)
if self.attrs_only and attrs == {}:
raise ValueError(msg)
if children == [] and attrs == {}:
raise ValueError(msg)
if self.elems_only and children == []:
raise ValueError(msg)
if self.attrs_only and attrs == {}:
raise ValueError(msg)
if children == [] and attrs == {}:
raise ValueError(msg)

return elems


def _validate_names(self) -> None:
children: list[Any]

Expand Down Expand Up @@ -695,11 +716,12 @@ def get_data_from_filepath(
compression=compression,
storage_options=storage_options,
) as handle_obj:
return (
data = (
preprocess_data(handle_obj.handle.read())
if hasattr(handle_obj.handle, "read")
else handle_obj.handle
)
return data


def preprocess_data(
Expand Down
Loading