Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whatsnew.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ Next release
- Python 3.14 (`released 2025-10-07 <https://www.python.org/downloads/release/python-3140/>`_) is fully supported (:pull:`249`).
- Python 3.9 support is dropped, as `it has reached end-of-life <https://peps.python.org/pep-0569/#lifespan>`__ (:pull:`249`).
:mod:`sdmx` requires Python 3.10 or later.
- :class:`.URN` parses letters in the version part of a URN (:issue:`230`, :pull:`252`).
This fixes a bug in v2.16.0–v2.23.1 where creating :class:`.VersionableArtefact`
with both :py:`version=...` and :py:`urn=...` would raise :class:`ValueError`
even if the two were in agreement.
- Fix two regressions in :func:`.to_pandas` introduced in v2.23.0 (:issue:`251`, :pull:`252`).

v2.23.1 (2025-10-01)
====================
Expand Down
25 changes: 15 additions & 10 deletions sdmx/convert/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,15 @@ def to_pandas(obj, **kwargs):

`kwargs` can include any of the attributes of :class:`.PandasConverter`.

.. versionchanged:: 1.0

:func:`.to_pandas` handles all types of objects,
replacing the earlier, separate ``data2pandas`` and ``structure2pd`` writers.

.. versionchanged:: 2.23.0

:func:`.to_pandas` is a thin wrapper for :class:`.PandasConverter`.

Other parameters
----------------
format_options :
Expand All @@ -513,15 +522,6 @@ def to_pandas(obj, **kwargs):
time_format :
if given, the :attr:`.CSVFormatOptions.time_format` attribute of the
`format_options` keyword argument is replaced.

.. versionchanged:: 1.0

:func:`.to_pandas` handles all types of objects,
replacing the earlier, separate ``data2pandas`` and ``structure2pd`` writers.

.. versionchanged:: 2.23.0

:func:`.to_pandas` is a thin wrapper for :class:`.PandasConverter`.
"""
csv.common.kwargs_to_format_options(kwargs, csv.common.CSVFormatOptions)
return PandasConverter(**kwargs).convert(obj)
Expand Down Expand Up @@ -685,6 +685,7 @@ def convert_dataset(c: "PandasConverter", obj: common.BaseDataSet):
Otherwise.
"""
c._context[common.BaseDataSet] = obj
c._context.setdefault(common.BaseDataStructureDefinition, obj.structured_by)
c._columns = ColumnSpec(pc=c, ds=obj)

# - Apply convert_obs() to every obs → iterable of list.
Expand All @@ -697,7 +698,11 @@ def convert_dataset(c: "PandasConverter", obj: common.BaseDataSet):
# - (Possibly) convert certain columns to datetime.
# - (Possibly) reshape.
result = (
pd.DataFrame(map(c._columns.convert_obs, obj.obs))
pd.DataFrame(
map(c._columns.convert_obs, obj.obs)
if obj.obs
else [[None] * len(c._columns.obs)]
)
.dropna(how="all")
.set_axis(c._columns.obs, axis=1) # NB This must come after DataFrame(map(…))
.assign(**c._columns.assign)
Expand Down
10 changes: 5 additions & 5 deletions sdmx/model/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def value(self) -> str | None:
return None


@dataclass
@dataclass(slots=True)
class AnnotableArtefact(Comparable):
#: :class:`Annotations <.Annotation>` of the object.
#:
Expand Down Expand Up @@ -244,7 +244,7 @@ def eval_annotation(self, id: str, globals=None):
return value


@dataclass
@dataclass(slots=True)
class IdentifiableArtefact(AnnotableArtefact):
#: Unique identifier of the object.
id: str = MissingID
Expand Down Expand Up @@ -349,8 +349,8 @@ def __post_init__(self):
super().__post_init__()

if not self.version:
self.version = self._urn.version
elif isinstance(self.version, str) and self.version == "None":
self.version = self._urn.version or None
elif isinstance(self.version, str) and self.version in ("", "None"):
self.version = None
elif self.urn and self.version != self._urn.version:
raise ValueError(
Expand Down Expand Up @@ -1756,7 +1756,7 @@ def __add__(self, other):
if not isinstance(other, Key) and other is not None:
raise NotImplementedError
else:
result.values.update_fast(other.values)
result.values.update_fast(getattr(other, "values", []))
return result

def __radd__(self, other):
Expand Down
2 changes: 2 additions & 0 deletions sdmx/model/v21.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,8 @@ class Observation(common.BaseObservation):
class DataSet(common.BaseDataSet):
"""SDMX 2.1 DataSet."""

structured_by: DataStructureDefinition | None = None

#: Named ``attachedAttribute`` in the IM.
attrib: DictLikeDescriptor[str, common.AttributeValue] = DictLikeDescriptor()

Expand Down
2 changes: 2 additions & 0 deletions sdmx/model/v30.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,8 @@ class Observation(common.BaseObservation):
class DataSet(common.BaseDataSet):
"""SDMX 3.0 Data Set."""

structured_by: DataStructureDefinition | None = None


class StructureSpecificDataSet(DataSet):
"""SDMX 3.0 StructureSpecificDataSet.
Expand Down
43 changes: 28 additions & 15 deletions sdmx/reader/xml/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,28 @@
from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
from importlib import import_module
from itertools import chain, count
from typing import TYPE_CHECKING, Any, ClassVar, cast
from typing import TYPE_CHECKING, Any, ClassVar, TypeVar, cast

from lxml import etree
from lxml.etree import QName

import sdmx.urn
from sdmx import message
from sdmx.exceptions import XMLParseError # noqa: F401
from sdmx.format import Version, list_media_types
from sdmx.format import Version as FormatVersion
from sdmx.format import list_media_types
from sdmx.model import common
from sdmx.model.version import Version
from sdmx.reader.base import BaseReader

if TYPE_CHECKING:
import types

AA = TypeVar("AA", bound=common.AnnotableArtefact)
IA = TypeVar("IA", bound=common.IdentifiableArtefact)
NA = TypeVar("NA", bound=common.NameableArtefact)
MA = TypeVar("MA", bound=common.MaintainableArtefact)

# Sentinel value for a missing Agency
_NO_AGENCY = common.Agency()

Expand Down Expand Up @@ -50,7 +57,9 @@ class BaseReference:
"version",
)

def __init__(self, reader, elem, cls_hint=None):
def __init__(
self, reader: "XMLEventReader", elem, cls_hint: type | None = None
) -> None:
parent_tag = elem.tag

info = self.info_from_element(elem)
Expand Down Expand Up @@ -93,7 +102,7 @@ def __init__(self, reader, elem, cls_hint=None):
@abstractmethod
def info_from_element(cls, elem) -> dict[str, Any]: ...

def __str__(self):
def __str__(self) -> str:
# NB for debugging only
return ( # pragma: no cover
f"{self.cls.__name__}={self.agency.id}:{self.id}({self.version}) → "
Expand All @@ -108,7 +117,7 @@ class XMLEventReader(BaseReader):
suffixes = [".xml"]

#: SDMX-ML version handled by this reader.
xml_version: ClassVar[Version]
xml_version: ClassVar[FormatVersion]

#: Reference to the module defining the format read.
format: ClassVar["types.ModuleType"]
Expand All @@ -129,7 +138,9 @@ def __init_subclass__(cls: type["XMLEventReader"]):
# Empty dictionary
cls.parser = {}

name = {Version["2.1"]: "v21", Version["3.0.0"]: "v30"}[cls.xml_version]
name = {FormatVersion["2.1"]: "v21", FormatVersion["3.0.0"]: "v30"}[
cls.xml_version
]
cls.format = import_module(f"sdmx.format.xml.{name}")
cls.model = import_module(f"sdmx.model.{name}")
cls.media_types = list_media_types(base="xml", version=cls.xml_version)
Expand Down Expand Up @@ -306,7 +317,7 @@ def _dump(self): # pragma: no cover
)
print("\nIgnore:\n", self.ignore)

def push(self, stack_or_obj, obj=None):
def push(self, stack_or_obj, obj=None) -> None:
"""Push an object onto a stack."""
if stack_or_obj is None:
return
Expand Down Expand Up @@ -335,11 +346,11 @@ def push(self, stack_or_obj, obj=None):

self.stack[s][id] = obj

def stash(self, *stacks, name: str = "_stash"):
def stash(self, *stacks, name: str = "_stash") -> None:
"""Temporarily hide all objects in the given `stacks`."""
self.push(name, {s: self.stack.pop(s, dict()) for s in stacks})

def unstash(self, name: str = "_stash"):
def unstash(self, name: str = "_stash") -> None:
"""Restore the objects hidden by the last :meth:`stash` call to their stacks.

Calls to :meth:`.stash` and :meth:`.unstash` should be matched 1-to-1; if the
Expand All @@ -361,7 +372,7 @@ def get_single(
self,
cls_or_name: type | str,
id: str | None = None,
version: str | None = None,
version: str | Version | None = None,
subclass: bool = False,
) -> Any | None:
"""Return a reference to an object while leaving it in its stack.
Expand Down Expand Up @@ -475,7 +486,9 @@ def resolve(self, ref):
return parent.get_hierarchical(ref.target_id)
raise # pragma: no cover

def annotable(self, cls, elem, **kwargs):
AA = TypeVar("AA", bound=common.AnnotableArtefact)

def annotable(self, cls: type["AA"], elem, **kwargs) -> "AA":
"""Create a AnnotableArtefact of `cls` from `elem` and `kwargs`.

Collects all parsed <com:Annotation>.
Expand All @@ -485,12 +498,12 @@ def annotable(self, cls, elem, **kwargs):
kwargs["annotations"].extend(self.pop_all(self.model.Annotation))
return cls(**kwargs)

def identifiable(self, cls, elem, **kwargs):
def identifiable(self, cls: type["IA"], elem, **kwargs) -> "IA":
"""Create a IdentifiableArtefact of `cls` from `elem` and `kwargs`."""
setdefault_attrib(kwargs, elem, "id", "urn", "uri")
return self.annotable(cls, elem, **kwargs)

def nameable(self, cls, elem, **kwargs):
def nameable(self, cls: type["NA"], elem, **kwargs) -> "NA":
"""Create a NameableArtefact of `cls` from `elem` and `kwargs`.

Collects all parsed :class:`.InternationalString` localizations of <com:Name>
Expand All @@ -502,7 +515,7 @@ def nameable(self, cls, elem, **kwargs):
add_localizations(obj.description, self.pop_all("Description"))
return obj

def maintainable(self, cls, elem, **kwargs):
def maintainable(self, cls: type["MA"], elem, **kwargs) -> "MA":
"""Create or retrieve a MaintainableArtefact of `cls` from `elem` and `kwargs`.

Following the SDMX-IM class hierarchy, :meth:`maintainable` calls
Expand Down Expand Up @@ -578,7 +591,7 @@ def maintainable(self, cls, elem, **kwargs):
return obj


def add_localizations(target: common.InternationalString, values: list) -> None:
def add_localizations(target: common.InternationalString, values: Sequence) -> None:
"""Add localized strings from *values* to *target*."""
target.localizations.update({locale: label for locale, label in values})

Expand Down
17 changes: 6 additions & 11 deletions sdmx/reader/xml/v21.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def _item_start(reader, elem):
@possible_reference(unstash=True)
def _item_end(reader: Reader, elem):
cls = reader.class_for_tag(elem.tag)
item = reader.nameable(cls, elem)
item: "common.Item" = reader.nameable(cls, elem)

# Hierarchy is stored in two ways

Expand Down Expand Up @@ -718,7 +718,7 @@ def _cl(reader: Reader, elem):
assert dsd is not None

# Determine the class
cls = reader.class_for_tag(elem.tag)
cls: type[common.ComponentList] = reader.class_for_tag(elem.tag)

args = dict(
# Retrieve the components
Expand Down Expand Up @@ -746,11 +746,8 @@ def _cl(reader: Reader, elem):

cl = reader.identifiable(cls, elem, **args)

try:
# DimensionDescriptor only
if isinstance(cl, common.DimensionDescriptor):
cl.assign_order()
except AttributeError:
pass

# Assign to the DSD eagerly (instead of in _dsd_end()) for reference by next
# ComponentList e.g. so that AttributeRelationship can reference the
Expand Down Expand Up @@ -1040,7 +1037,7 @@ def _ar(reader, elem):
def _structure_start(reader: Reader, elem):
# Get any external reference created earlier, or instantiate a new object
cls = reader.class_for_tag(elem.tag)
obj = reader.maintainable(cls, elem)
obj: "common.Structure" = reader.maintainable(cls, elem)

if obj not in reader.stack[cls]:
# A new object was created
Expand Down Expand Up @@ -1541,9 +1538,7 @@ def _hc_end(reader: Reader, elem):
level = common.Level(id=level_ref.id)

# Create the HierarchicalCode
obj = reader.identifiable(
reader.class_for_tag(elem.tag), elem, code=code, level=level
)
obj = reader.identifiable(common.HierarchicalCode, elem, code=code, level=level)

# Count children represented as XML sub-elements of the parent
n_child = sum(e.tag == elem.tag for e in elem)
Expand Down Expand Up @@ -1573,7 +1568,7 @@ def _h_start(reader: Reader, elem):

@end("str:Hierarchy", only=False)
def _h_end(reader: Reader, elem):
result = reader.nameable(
result: "v21.Hierarchy" = reader.nameable(
reader.class_for_tag(elem.tag),
elem,
has_formal_levels=eval(elem.attrib.get("leveled", "false").title()),
Expand Down
12 changes: 12 additions & 0 deletions sdmx/tests/convert/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,18 @@ def expected(df, axis=0, cls=pd.DatetimeIndex):
sdmx.to_pandas(ds, datetime=43)


def test_dataset_empty() -> None:
"""Dataset with 0 observations can be converted.

https://github.com/khaeru/sdmx/issues/251.
"""
dsd = v21.DataStructureDefinition()
dsd.dimensions.getdefault(id="DIM_0")
ds = v21.DataSet(structured_by=dsd)

sdmx.to_pandas(ds)


def test_list_of_obs(specimen) -> None:
"""Bare list of observations can be written."""
with specimen("ng-ts.xml") as f:
Expand Down
Loading