Skip to content

Commit 70cf79c

Browse files
authored
Merge pull request #200 from khaeru/issue/199
Fix XML namespace handling in read of structure-specific SDMX-ML
2 parents e88f8d3 + 293b0c6 commit 70cf79c

File tree

10 files changed

+89
-58
lines changed

10 files changed

+89
-58
lines changed

doc/whatsnew.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Next release
77
============
88

99
- Bug fix for writing :class:`.VersionableArtefact` to SDMX-ML 2.1: :class:`KeyError` was raised if :attr:`.VersionableArtefact.version` was an instance of :class:`.Version` (:pull:`198`).
10+
- Bug fix for reading data from structure-specific SDMX-ML: :class:`.XMLParseError` / :class:`NotImplementedError` was raised if reading 2 messages in sequence with different XML namespaces defined (:pull:`200`, thanks :gh-user:`mephinet` for :issue:`199`).
1011

1112
v2.18.0 (2024-10-15)
1213
====================

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,11 @@ select = ["C9", "E", "F", "I", "W"]
9595
ignore = ["E501", "W191"]
9696
# Exceptions:
9797
# - .client._handle_get_kwargs: 12
98-
# - .reader.xml.v21.read_message: 15
98+
# - .reader.xml.v21._component_end: 12
99+
# - .testing.generate_endpoint_tests: 11
99100
# - .writer.pandas._maybe_convert_datetime: 23
100101
# - .writer.pandas.write_dataset: 12
101-
mccabe.max-complexity = 11
102+
mccabe.max-complexity = 10
102103

103104
[tool.setuptools.packages]
104105
find = {}

sdmx/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ def _request_from_url(self, kwargs):
244244

245245
return requests.Request("get", url, params=parameters, headers=headers)
246246

247-
def _handle_get_kwargs(self, kwargs): # noqa: C901 TODO reduce complexity 12 → ≤11
247+
def _handle_get_kwargs(self, kwargs): # noqa: C901 TODO reduce complexity 12 → ≤10
248248
if kwargs.pop("validate", None) is not None:
249249
warn("validate= keyword argument to Client.get()", DeprecationWarning)
250250

sdmx/format/xml/common.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
# To be formatted
8787
"com": "{}/common",
8888
"md": "{}/metadata/generic",
89+
"md_ss": "{}/metadata/structurespecific",
8990
"data": "{}/data/structurespecific",
9091
"str": "{}/structure",
9192
"mes": "{}/message",
@@ -273,6 +274,8 @@ def install_schemas(
273274

274275

275276
class XMLFormat:
277+
"""Information about an SDMX-ML format."""
278+
276279
NS: Mapping[str, Optional[str]]
277280
_class_tag: list
278281

@@ -306,29 +309,32 @@ def ns_prefix(self, url) -> str:
306309
return prefix
307310
raise ValueError(url)
308311

312+
_NS_PATTERN = re.compile(r"(\{(?P<ns>.*)\}|(?P<ns_prefix>.*):)?(?P<localname>.*)")
313+
309314
@lru_cache()
310-
def qname(self, ns_or_name, name=None) -> QName:
315+
def qname(self, ns_or_name: str, name: Optional[str] = None) -> QName:
311316
"""Return a fully-qualified tag `name` in namespace `ns`."""
312317
if isinstance(ns_or_name, QName):
313318
# Already a QName; do nothing
314319
return ns_or_name
315-
else:
316-
if name is None:
317-
match = re.fullmatch(
318-
r"(\{(?P<ns_full>.*)\}|(?P<ns_key>.*):)?(?P<name>.*)", ns_or_name
319-
)
320-
assert match
321-
name = match.group("name")
322-
if ns_key := match.group("ns_key"):
323-
ns = self.NS[ns_key]
324-
elif ns := match.group("ns_full"):
325-
pass
326-
else:
327-
ns = None
320+
321+
if name is None:
322+
# `ns_or_name` contains the local name ("tag") and possibly a namespace
323+
# prefix ("ns:tag") or full namespace name ("{foo}tag")
324+
match = self._NS_PATTERN.fullmatch(ns_or_name)
325+
assert match
326+
name = match.group("localname")
327+
if prefix := match.group("ns_prefix"):
328+
ns = self.NS[prefix]
329+
elif ns := match.group("ns"):
330+
pass
328331
else:
329-
ns = self.NS[ns_or_name]
332+
ns = None # Tag without namespace
333+
else:
334+
# `ns_or_name` is the namespace prefix; `name` is the local name
335+
ns = self.NS[ns_or_name]
330336

331-
return QName(ns, name)
337+
return QName(ns, name)
332338

333339
@lru_cache()
334340
def class_for_tag(self, tag) -> Optional[type]:

sdmx/reader/json.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class Reader(BaseReader):
3737
def detect(cls, content):
3838
return content.startswith(b"{")
3939

40-
def read_message(self, source, structure=None, **kwargs): # noqa: C901 TODO reduce complexity 15 → ≤11
40+
def read_message(self, source, structure=None, **kwargs): # noqa: C901 TODO reduce complexity 15 → ≤10
4141
# Initialize message instance
4242
msg = DataMessage()
4343

sdmx/reader/xml/common.py

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,9 @@ class XMLEventReader(BaseReader):
129129
#: :class:`.BaseReference` subclass used by this reader.
130130
Reference: ClassVar[type[BaseReference]]
131131

132-
# Mapping from (QName, ["start", "end"]) to a function that parses the element/event
133-
# or else None
134-
parser: ClassVar[Mapping[tuple[QName, str], Callable]]
132+
#: Mapping from (QName, ["start", "end"]) to a function that parses the
133+
#: element/event or else None (no parsing).
134+
parser: ClassVar[dict[tuple[QName, str], Callable]]
135135

136136
# One-way counter for use in stacks
137137
_count: Iterator[int]
@@ -151,7 +151,7 @@ def __init__(self):
151151

152152
# BaseReader methods
153153

154-
def read_message( # noqa: C901 TODO reduce complexity 12 → ≤11
154+
def read_message(
155155
self,
156156
source,
157157
structure=None,
@@ -196,20 +196,14 @@ def read_message( # noqa: C901 TODO reduce complexity 12 → ≤11
196196
# Don't know what to do for this (element, event)
197197
raise NotImplementedError(element.tag, event) from None
198198

199-
try:
200-
# Parse the element
201-
result = func(self, element)
202-
except TypeError:
203-
if func is None: # Explicitly no parser for this (element, event)
204-
continue # Skip
205-
else: # pragma: no cover
206-
raise
207-
else:
208-
# Store the result
209-
self.push(result)
199+
if func is None:
200+
continue # Explicitly no parser for this (element, event) → skip
201+
202+
result = func(self, element) # Parse the element
203+
self.push(result) # Store the result
210204

211-
if event == "end":
212-
element.clear() # Free memory
205+
if event == "end":
206+
element.clear() # Free memory
213207

214208
except Exception as exc:
215209
# Parsing failed; display some diagnostic information
@@ -368,10 +362,6 @@ def unstash(self):
368362
self.stack[s].update(values)
369363

370364
# Delegate to version-specific module
371-
@classmethod
372-
def NS(cls):
373-
return cls.format.NS
374-
375365
@classmethod
376366
def class_for_tag(cls, tag: str) -> type:
377367
return cls.format.class_for_tag(tag)

sdmx/reader/xml/v21.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import logging
1010
import re
1111
from copy import copy
12-
from itertools import chain
12+
from itertools import chain, filterfalse
1313
from sys import maxsize
1414
from typing import Any, MutableMapping, Optional, cast
1515

@@ -34,7 +34,6 @@
3434
)
3535

3636
log = logging.getLogger(__name__)
37-
log.setLevel(logging.DEBUG)
3837

3938

4039
class _NoText:
@@ -151,14 +150,11 @@ def _message(reader: Reader, elem):
151150
reader.push("DataSetClass", model.get_class(f"{QName(elem).localname}Set"))
152151

153152
# Handle namespaces mapped on `elem` but not part of the standard set
154-
for key, value in filter(
155-
lambda kv: kv[1] not in set(reader.NS().values()), elem.nsmap.items()
156-
):
157-
# Register the namespace
158-
reader.NS().update({key: value})
159-
# Use _ds_start() and _ds_end() to handle <{key}:DataSet> elements
160-
reader.start(f"{key}:DataSet", only=False)(_ds_start)
161-
reader.end(f"{key}:DataSet", only=False)(_ds_end)
153+
existing_ns = set(reader.format.NS.values())
154+
for namespace in filterfalse(existing_ns.__contains__, elem.nsmap.values()):
155+
# Use _ds_start() and _ds_end() to handle <{namespace}DataSet> elements
156+
reader.parser[QName(namespace, "DataSet"), "start"] = _ds_start
157+
reader.parser[QName(namespace, "DataSet"), "end"] = _ds_end
162158

163159
# Instantiate the message object
164160
return reader.class_for_tag(elem.tag)()
@@ -602,7 +598,7 @@ def _maybe_unbounded(value: str) -> Optional[int]:
602598
return None if value == "unbounded" else int(value)
603599

604600

605-
# TODO Reduce complexity from 12 → 11, by adding separate parsers for certain COMPONENTs
601+
# TODO Reduce complexity from 12 → ≤10, by adding separate parsers for some COMPONENTs
606602
@end(COMPONENT, only=False)
607603
@possible_reference(unstash=True)
608604
def _component_end(reader: Reader, elem): # noqa: C901
@@ -1160,7 +1156,7 @@ def _obs_ss(reader, elem):
11601156
except KeyError:
11611157
pass
11621158
else:
1163-
elem.attrib[dim_at_obs.id] = reader.qname(tmp).localname
1159+
_, elem.attrib[dim_at_obs.id] = tmp.split(":", maxsplit=2)
11641160

11651161
if ss_without_structure and dim_at_obs is not model.AllDimensions:
11661162
# Create the observation key
@@ -1241,8 +1237,10 @@ def _mds_start(reader, elem):
12411237
mds = reader.class_for_tag(elem.tag)()
12421238

12431239
# Retrieve the (message-local) ID referencing a data structure definition
1244-
id = elem.attrib.get("structureRef", None) or elem.attrib.get(
1245-
reader.qname("metadata:structureRef"), None
1240+
id = (
1241+
elem.attrib.get("structureRef", None)
1242+
or elem.attrib.get(reader.qname("md:structureRef"), None)
1243+
or elem.attrib.get(reader.qname("md_ss:structureRef"), None)
12461244
)
12471245

12481246
# Get a reference to the MSD that structures the data set

sdmx/testing/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def parametrize_specimens(metafunc):
134134
}
135135

136136

137-
def generate_endpoint_tests(metafunc):
137+
def generate_endpoint_tests(metafunc): # noqa: C901 TODO reduce complexity 11 → ≤10
138138
"""pytest hook for parametrizing tests that need an "endpoint" fixture.
139139
140140
This function relies on the :class:`.DataSourceTest` base class defined in

sdmx/tests/reader/test_reader_xml_v21.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,41 @@ def test_gh_164(specimen):
201201
assert isinstance(da.related_to, v21.NoSpecifiedRelationship)
202202

203203

204+
def test_gh_199():
205+
"""Test of https://github.com/khaeru/sdmx/issues/199."""
206+
import sdmx.format.xml.v21
207+
208+
# Template for DSD URN
209+
URN = "urn:sdmx:org.sdmx.infomodel.datastructure.DataStructure=FOO:BAR({})"
210+
211+
# Template for SDMX-ML data message
212+
CONTENT = """<?xml version="1.0" encoding="UTF-8"?>
213+
<mes:StructureSpecificData
214+
xmlns:mes="http://www.sdmx.org/resources/sdmxml/schemas/v2_1/message"
215+
xmlns:u="{}:ObsLevelDim:TIME_PERIOD">
216+
<u:DataSet>
217+
...
218+
</u:DataSet>
219+
</mes:StructureSpecificData>"""
220+
221+
# Construct a URN and message; store as BytesIO
222+
urn1 = URN.format("1")
223+
dsd1 = v21.DataStructureDefinition(urn=urn1)
224+
f1 = BytesIO(CONTENT.format(urn1).encode())
225+
226+
# Construct a *different* URN and message with this other URN mapped to the "u:" XML
227+
# namespace prefix
228+
urn2 = URN.format("2")
229+
dsd2 = v21.DataStructureDefinition(urn=urn2)
230+
f2 = BytesIO(CONTENT.format(urn2).encode())
231+
232+
# First message can be parsed
233+
sdmx.read_sdmx(f1, structure=dsd1)
234+
235+
# #199: raises XMLParseError/NotImplementedError
236+
sdmx.read_sdmx(f2, structure=dsd2)
237+
238+
204239
# Each entry is a tuple with 2 elements:
205240
# 1. an instance of lxml.etree.Element to be parsed.
206241
# 2. Either:

sdmx/writer/pandas.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def _rp(obj: model.RangePeriod, **kwargs):
209209

210210

211211
@writer
212-
def write_dataset( # noqa: C901 TODO reduce complexity 12 → ≤11
212+
def write_dataset( # noqa: C901 TODO reduce complexity 12 → ≤10
213213
obj: model.DataSet,
214214
attributes="",
215215
dtype=np.float64,
@@ -377,7 +377,7 @@ def _dataset_compat(df, datetime, kwargs):
377377
return df, datetime, kwargs
378378

379379

380-
def _maybe_convert_datetime(df, arg, obj, dsd=None): # noqa: C901 TODO reduce complexity 23 → ≤11
380+
def _maybe_convert_datetime(df, arg, obj, dsd=None): # noqa: C901 TODO reduce complexity 23 → ≤10
381381
"""Helper for :meth:`.write_dataset` to handle datetime indices.
382382
383383
Parameters

0 commit comments

Comments
 (0)