Skip to content

get_root_link/get_child_links/get_item_links: Ensure correct media types #1497

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

- Top-level `item_assets` dict on `Collection`s ([#1476](https://github.com/stac-utils/pystac/pull/1476))
- Render Extension ([#1465](https://github.com/stac-utils/pystac/pull/1465))
- Filter by links by list of media_types

### Changed

Expand All @@ -16,6 +17,7 @@
- Update Projection Extension to version 2 - proj:epsg -> proj:code ([#1287](https://github.com/stac-utils/pystac/pull/1287))
- Update migrate code to handle license changes in STAC spec 1.1.0 ([#1491](https://github.com/stac-utils/pystac/pull/1491))
- Allow links to have `file://` prefix - but don't write them that way by default ([#1489](https://github.com/stac-utils/pystac/pull/1489))
- For `get_root_link`, `get_child_links`, `get_item_links`: Ensure json media types ([#1497](https://github.com/stac-utils/pystac/pull/1497))
- Raise `STACError` with message when a link is expected to resolve to a STAC object but doesn't ([#1500](https://github.com/stac-utils/pystac/pull/1500))
- Raise an error on APILayoutStrategy when root_href is non-url ([#1498](https://github.com/stac-utils/pystac/pull/1498))

Expand Down
10 changes: 8 additions & 2 deletions pystac/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
)

import pystac
import pystac.media_type
from pystac.cache import ResolvedObjectCache
from pystac.errors import STACError, STACTypeError
from pystac.layout import (
Expand Down Expand Up @@ -466,7 +467,10 @@ def get_child_links(self) -> list[Link]:
Return:
List[Link]: List of links of this catalog with ``rel == 'child'``
"""
return self.get_links(pystac.RelType.CHILD)
return self.get_links(
rel=pystac.RelType.CHILD,
media_type=pystac.media_type.STAC_JSON,
)

def clear_children(self) -> None:
"""Removes all children from this catalog.
Expand Down Expand Up @@ -626,7 +630,9 @@ def get_item_links(self) -> list[Link]:
Return:
List[Link]: List of links of this catalog with ``rel == 'item'``
"""
return self.get_links(pystac.RelType.ITEM)
return self.get_links(
rel=pystac.RelType.ITEM, media_type=pystac.media_type.STAC_JSON
)

def to_dict(
self, include_self_link: bool = True, transform_hrefs: bool = True
Expand Down
4 changes: 4 additions & 0 deletions pystac/media_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,7 @@ class MediaType(StringEnum):
PDF = "application/pdf"
ZARR = "application/vnd+zarr" # https://github.com/openMetadataInitiative/openMINDS_core/blob/v4/instances/data/contentTypes/zarr.jsonld
NETCDF = "application/netcdf" # https://github.com/Unidata/netcdf/issues/42#issuecomment-1007618822


#: Media types that can be resolved as STAC Objects
STAC_JSON = [None, MediaType.GEOJSON, MediaType.JSON]
43 changes: 24 additions & 19 deletions pystac/stac_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,7 @@
from abc import ABC, abstractmethod
from collections.abc import Callable, Iterable
from html import escape
from typing import (
TYPE_CHECKING,
Any,
TypeVar,
cast,
)
from typing import TYPE_CHECKING, Any, TypeAlias, TypeVar, cast

import pystac
from pystac import STACError
Expand All @@ -27,6 +22,8 @@

S = TypeVar("S", bound="STACObject")

OptionalMediaType: TypeAlias = str | pystac.MediaType | None


class STACObjectType(StringEnum):
CATALOG = "Catalog"
Expand Down Expand Up @@ -177,7 +174,7 @@ def traverse(obj: str | STACObject, visited: set[str | STACObject]) -> bool:
def get_single_link(
self,
rel: str | pystac.RelType | None = None,
media_type: str | pystac.MediaType | None = None,
media_type: OptionalMediaType | Iterable[OptionalMediaType] = None,
) -> Link | None:
"""Get a single :class:`~pystac.Link` instance associated with this
object.
Expand All @@ -186,7 +183,8 @@ def get_single_link(
rel : If set, filter links such that only those
matching this relationship are returned.
media_type: If set, filter the links such that only
those matching media_type are returned
those matching media_type are returned. media_type can
be a single value or a list of values.

Returns:
:class:`~pystac.Link` | None: First link that matches ``rel``
Expand All @@ -195,28 +193,31 @@ def get_single_link(
"""
if rel is None and media_type is None:
return next(iter(self.links), None)
if media_type and isinstance(media_type, (str, pystac.MediaType)):
media_type = [media_type]
return next(
(
link
for link in self.links
if (rel is None or link.rel == rel)
and (media_type is None or link.media_type == media_type)
and (media_type is None or link.media_type in media_type)
),
None,
)

def get_links(
self,
rel: str | pystac.RelType | None = None,
media_type: str | pystac.MediaType | None = None,
media_type: OptionalMediaType | Iterable[OptionalMediaType] = None,
) -> list[Link]:
"""Gets the :class:`~pystac.Link` instances associated with this object.

Args:
rel : If set, filter links such that only those
matching this relationship are returned.
media_type: If set, filter the links such that only
those matching media_type are returned
those matching media_type are returned. media_type can
be a single value or a list of values.

Returns:
List[:class:`~pystac.Link`]: A list of links that match ``rel`` and/
Expand All @@ -225,13 +226,14 @@ def get_links(
"""
if rel is None and media_type is None:
return self.links
else:
return [
link
for link in self.links
if (rel is None or link.rel == rel)
and (media_type is None or link.media_type == media_type)
]
if media_type and isinstance(media_type, (str, pystac.MediaType)):
media_type = [media_type]
return [
link
for link in self.links
if (rel is None or link.rel == rel)
and (media_type is None or link.media_type in media_type)
]

def clear_links(self, rel: str | pystac.RelType | None = None) -> None:
"""Clears all :class:`~pystac.Link` instances associated with this object.
Expand All @@ -252,7 +254,10 @@ def get_root_link(self) -> Link | None:
:class:`~pystac.Link` or None: The root link for this object,
or ``None`` if no root link is set.
"""
return self.get_single_link(pystac.RelType.ROOT)
return self.get_single_link(
rel=pystac.RelType.ROOT,
media_type=pystac.media_type.STAC_JSON,
)

@property
def self_href(self) -> str:
Expand Down
48 changes: 48 additions & 0 deletions tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -1409,6 +1409,10 @@ def test_get_links(self) -> None:
len(catalog.get_links(rel="search", media_type="application/geo+json")) == 1
)
assert len(catalog.get_links(media_type="text/html")) == 1
assert (
len(catalog.get_links(media_type=["text/html", "application/geo+json"]))
== 2
)
assert len(catalog.get_links(rel="search")) == 2
assert len(catalog.get_links(rel="via")) == 0
assert len(catalog.get_links()) == 6
Expand Down Expand Up @@ -1982,3 +1986,47 @@ def test_APILayoutStrategy_requires_root_to_be_url(
match="When using APILayoutStrategy the root_href must be a URL",
):
catalog.normalize_hrefs(root_href="issues-1486", strategy=APILayoutStrategy())


def test_get_child_links_cares_about_media_type(catalog: pystac.Catalog) -> None:
catalog.links.extend(
[
pystac.Link(
rel="child", target="./child-1.json", media_type="application/json"
),
pystac.Link(
rel="child", target="./child-2.json", media_type="application/geo+json"
),
pystac.Link(rel="child", target="./child-3.json"),
# this one won't get counted since it's the wrong media_type
pystac.Link(rel="child", target="./child.html", media_type="text/html"),
]
)

assert len(catalog.get_child_links()) == 3


def test_get_item_links_cares_about_media_type(catalog: pystac.Catalog) -> None:
catalog.links.extend(
[
pystac.Link(
rel="item", target="./item-1.json", media_type="application/json"
),
pystac.Link(
rel="item", target="./item-2.json", media_type="application/geo+json"
),
pystac.Link(rel="item", target="./item-3.json"),
# this one won't get counted since it's the wrong media_type
pystac.Link(rel="item", target="./item.html", media_type="text/html"),
]
)

assert len(catalog.get_item_links()) == 3


def test_get_root_link_cares_about_media_type(catalog: pystac.Catalog) -> None:
catalog.links.insert(
0, pystac.Link(rel="root", target="./self.json", media_type="text/html")
)
root_link = catalog.get_root_link()
assert root_link and root_link.target != "./self.json"
Loading