diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9ac11a08..29a56265 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: - uses: actions/checkout@v4 - uses: Swatinem/rust-cache@v2 - name: Test - run: cargo test -p stac --all-features + run: cargo test -p stac stac-types --all-features check-features-core: name: Check stac features runs-on: ubuntu-latest diff --git a/Cargo.toml b/Cargo.toml index 4c17d35b..f0cf7b99 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,6 +62,7 @@ object_store = "0.11.0" openssl = { version = "0.10.68", features = ["vendored"] } openssl-src = "=300.3.1" # joinked from https://github.com/iopsystems/rpc-perf/commit/705b290d2105af6f33150da04b217422c6d68701#diff-2e9d962a08321605940b5a657135052fbcef87b5e360662bb527c96d9a615542R41 to cross-compile Python parquet = { version = "52.2", default-features = false } +path-slash = "0.2.1" pgstac = { version = "0.2.1", path = "crates/pgstac" } pyo3 = "0.22.3" pythonize = "0.22.0" diff --git a/crates/cli/src/subcommand/serve.rs b/crates/cli/src/subcommand/serve.rs index 55d0160c..0644e825 100644 --- a/crates/cli/src/subcommand/serve.rs +++ b/crates/cli/src/subcommand/serve.rs @@ -93,7 +93,7 @@ impl Run for Args { } Value::Collection(mut collection) => { if self.load_collection_items { - collection.make_relative_links_absolute()?; + collection.make_links_absolute()?; for link in collection.iter_item_links() { let href = link.href.to_string(); let input = input.with_href(href); diff --git a/crates/core/CHANGELOG.md b/crates/core/CHANGELOG.md index 4dbd1053..d7c066f7 100644 --- a/crates/core/CHANGELOG.md +++ b/crates/core/CHANGELOG.md @@ -9,6 +9,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Added - `version` ([#476](https://github.com/stac-utils/stac-rs/pull/476)) +- `Node` and friends ([#504](https://github.com/stac-utils/stac-rs/pull/504)) + +### Changed + +- `make_links_absolute` instead of `make_relative_links_absolute`, `make_links_relative` instead of `make_absolute_links_relative` ([#504](https://github.com/stac-utils/stac-rs/pull/504)) - Permissive deserialization ([#505](https://github.com/stac-utils/stac-rs/pull/505)) ### Removed diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index d8f351d2..0bd79c2a 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -63,6 +63,7 @@ jsonschema = { workspace = true, optional = true } log.workspace = true object_store = { workspace = true, optional = true } parquet = { workspace = true, optional = true } +path-slash.workspace = true reqwest = { workspace = true, features = ["json", "blocking"], optional = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, features = ["preserve_order"] } diff --git a/crates/core/src/item.rs b/crates/core/src/item.rs index 4035a69b..c61dd8b8 100644 --- a/crates/core/src/item.rs +++ b/crates/core/src/item.rs @@ -3,10 +3,11 @@ use crate::{Asset, Assets, Bbox, Error, Fields, Link, Result, Version, STAC_VERSION}; use chrono::{DateTime, FixedOffset, Utc}; use geojson::{feature::Id, Feature, Geometry}; +use path_slash::PathBufExt; use serde::{Deserialize, Serialize}; use serde_json::{Map, Value}; use stac_derive::{Href, Links, Migrate}; -use std::{collections::HashMap, path::Path}; +use std::{collections::HashMap, path::PathBuf}; use url::Url; const TOP_LEVEL_ATTRIBUTES: [&str; 8] = [ @@ -285,7 +286,7 @@ impl Builder { let mut item = Item::new(self.id); for (key, mut asset) in self.assets { if Url::parse(&asset.href).is_err() && self.canonicalize_paths { - asset.href = Path::new(&asset.href) + asset.href = PathBuf::from_slash(&asset.href) .canonicalize()? .to_string_lossy() .into_owned(); diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index 65f7e2a6..cae2fb0b 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -168,6 +168,7 @@ mod item_asset; mod item_collection; mod json; mod ndjson; +mod node; mod statistics; #[cfg(feature = "validate")] mod validate; @@ -196,6 +197,7 @@ pub use { item_collection::ItemCollection, json::{FromJson, ToJson}, ndjson::{FromNdjson, ToNdjson}, + node::Node, statistics::Statistics, value::Value, }; diff --git a/crates/core/src/node.rs b/crates/core/src/node.rs new file mode 100644 index 00000000..de71bc15 --- /dev/null +++ b/crates/core/src/node.rs @@ -0,0 +1,240 @@ +use crate::{Catalog, Collection, Error, Href, Item, Link, Links, Result, Value}; +use std::collections::VecDeque; + +/// A node in a STAC tree. +#[derive(Debug)] +pub struct Node { + /// The value of the node. + pub value: Container, + + /// The child nodes. + pub children: VecDeque, + + /// The node's items. + pub items: VecDeque, +} + +/// A STAC container, i.e. a [Catalog] or a [Collection]. +#[derive(Debug)] +pub enum Container { + /// A [Collection]. + Collection(Collection), + + /// A [Catalog]. + Catalog(Catalog), +} + +/// An iterator over a node and all of its descendants. +#[derive(Debug)] +pub struct IntoValues { + node: Option, + children: VecDeque, + items: VecDeque, +} + +impl Node { + /// Resolves all child and item links in this node. + /// + /// # Examples + /// + /// ``` + /// use stac::{Catalog, Node}; + /// + /// let mut node: Node = stac::read::("examples/catalog.json").unwrap().into(); + /// node.resolve().unwrap(); + /// ``` + pub fn resolve(&mut self) -> Result<()> { + let links = std::mem::take(self.value.links_mut()); + let href = self.value.href().map(String::from); + for mut link in links { + if link.is_child() { + link.make_absolute(href.as_deref())?; + // TODO enable object store + tracing::debug!("resolving child: {}", link.href); + let child: Container = crate::read::(link.href)?.try_into()?; + self.children.push_back(child.into()); + } else if link.is_item() { + link.make_absolute(href.as_deref())?; + tracing::debug!("resolving item: {}", link.href); + let item = crate::read::(link.href)?; + self.items.push_back(item); + } else { + self.value.links_mut().push(link); + } + } + Ok(()) + } + + /// Creates a consuming iterator over this node and its children and items. + /// + /// This iterator will visit all children (catalogs and collections) first, + /// then visit all the items. + /// + /// # Examples + /// + /// ``` + /// use stac::{Node, Catalog}; + /// + /// let mut node: Node = Catalog::new("an-id", "a description").into(); + /// node.children + /// .push_back(Catalog::new("child", "child catalog").into()); + /// let values: Vec<_> = node.into_values().collect::>().unwrap(); + /// assert_eq!(values.len(), 2); + /// ``` + pub fn into_values(self) -> IntoValues { + IntoValues { + node: Some(self), + children: VecDeque::new(), + items: VecDeque::new(), + } + } +} + +impl Iterator for IntoValues { + type Item = Result; + + fn next(&mut self) -> Option { + if let Some(mut node) = self.node.take() { + self.children.append(&mut node.children); + self.items.append(&mut node.items); + Some(Ok(node.value.into())) + } else if let Some(child) = self.children.pop_front() { + self.node = Some(child); + self.next() + } else { + self.items.pop_front().map(|item| Ok(item.into())) + } + } +} + +impl From for Node { + fn from(value: Catalog) -> Self { + Container::from(value).into() + } +} + +impl From for Container { + fn from(value: Catalog) -> Self { + Container::Catalog(value) + } +} + +impl From for Node { + fn from(value: Collection) -> Self { + Container::from(value).into() + } +} + +impl From for Container { + fn from(value: Collection) -> Self { + Container::Collection(value) + } +} + +impl From for Node { + fn from(value: Container) -> Self { + Node { + value, + children: VecDeque::new(), + items: VecDeque::new(), + } + } +} + +impl TryFrom for Container { + type Error = Error; + + fn try_from(value: Value) -> std::result::Result { + match value { + Value::Catalog(c) => Ok(c.into()), + Value::Collection(c) => Ok(c.into()), + _ => Err(stac_types::Error::IncorrectType { + actual: value.type_name().to_string(), + expected: "Catalog or Collection".to_string(), + } + .into()), + } + } +} + +impl From for Value { + fn from(value: Container) -> Self { + match value { + Container::Catalog(c) => Value::Catalog(c), + Container::Collection(c) => Value::Collection(c), + } + } +} + +impl Links for Container { + fn links(&self) -> &[Link] { + match self { + Container::Catalog(c) => c.links(), + Container::Collection(c) => c.links(), + } + } + + fn links_mut(&mut self) -> &mut Vec { + match self { + Container::Catalog(c) => c.links_mut(), + Container::Collection(c) => c.links_mut(), + } + } +} + +impl Href for Container { + fn href(&self) -> Option<&str> { + match self { + Container::Catalog(c) => c.href(), + Container::Collection(c) => c.href(), + } + } + + fn set_href(&mut self, href: impl ToString) { + match self { + Container::Catalog(c) => c.set_href(href), + Container::Collection(c) => c.set_href(href), + } + } + + fn clear_href(&mut self) { + match self { + Container::Catalog(c) => c.clear_href(), + Container::Collection(c) => c.clear_href(), + } + } +} + +#[cfg(test)] +mod tests { + use super::Node; + use crate::{Catalog, Collection, Links}; + + #[test] + fn into_node() { + let _ = Node::from(Catalog::new("an-id", "a description")); + let _ = Node::from(Collection::new("an-id", "a description")); + } + + #[test] + fn resolve() { + let mut node: Node = crate::read::("examples/catalog.json") + .unwrap() + .into(); + node.resolve().unwrap(); + assert_eq!(node.children.len(), 3); + assert_eq!(node.items.len(), 1); + assert_eq!(node.value.links().len(), 2); + } + + #[test] + fn into_values() { + let mut node: Node = Catalog::new("an-id", "a description").into(); + node.children + .push_back(Catalog::new("child", "child catalog").into()); + let mut iter = node.into_values(); + let _root = iter.next().unwrap().unwrap(); + let _child = iter.next().unwrap().unwrap(); + assert!(iter.next().is_none()); + } +} diff --git a/crates/types/Cargo.toml b/crates/types/Cargo.toml index df0c1ed7..3cda5689 100644 --- a/crates/types/Cargo.toml +++ b/crates/types/Cargo.toml @@ -11,6 +11,7 @@ rust-version.workspace = true [dependencies] mime.workspace = true +path-slash.workspace = true serde = { workspace = true, features = ["derive"] } serde_json.workspace = true thiserror.workspace = true diff --git a/crates/types/src/link.rs b/crates/types/src/link.rs index 95438ac5..4f749690 100644 --- a/crates/types/src/link.rs +++ b/crates/types/src/link.rs @@ -2,8 +2,10 @@ use crate::{mime::APPLICATION_GEOJSON, Error, Href, Result}; use mime::APPLICATION_JSON; +use path_slash::PathBufExt; use serde::{Deserialize, Serialize}; use serde_json::{Map, Value}; +use std::path::PathBuf; use url::Url; /// Child links. @@ -226,19 +228,15 @@ pub trait Links: Href { /// /// let mut catalog: stac::Catalog = stac::read("examples/catalog.json").unwrap(); /// assert!(!catalog.root_link().unwrap().is_absolute()); - /// catalog.make_relative_links_absolute().unwrap(); + /// catalog.make_links_absolute().unwrap(); /// assert!(catalog.root_link().unwrap().is_absolute()); /// ``` - fn make_relative_links_absolute(&mut self) -> Result<()> { - if let Some(href) = self.href() { - let href = make_absolute(href.to_string(), None)?; - for link in self.links_mut() { - link.href = make_absolute(std::mem::take(&mut link.href), Some(&href))?; - } - Ok(()) - } else { - Err(Error::NoHref) + fn make_links_absolute(&mut self) -> Result<()> { + let href = self.href().map(String::from); + for link in self.links_mut() { + link.make_absolute(href.as_deref())?; } + Ok(()) } /// Makes all absolute links relative with respect to an href. @@ -252,12 +250,12 @@ pub trait Links: Href { /// /// let mut catalog: stac::Catalog = stac::read("examples/catalog.json").unwrap(); /// assert!(!catalog.root_link().unwrap().is_absolute()); - /// catalog.make_relative_links_absolute().unwrap(); + /// catalog.make_links_absolute().unwrap(); /// assert!(catalog.root_link().unwrap().is_absolute()); - /// catalog.make_absolute_links_relative("examples/catalog.json").unwrap(); + /// catalog.make_links_relative("examples/catalog.json").unwrap(); /// assert!(catalog.root_link().unwrap().is_relative()); /// ``` - fn make_absolute_links_relative(&mut self, href: impl ToString) -> Result<()> { + fn make_links_relative(&mut self, href: impl ToString) -> Result<()> { let href = make_absolute(href.to_string(), None)?; for link in self.links_mut() { let absolute_link_href = make_absolute(std::mem::take(&mut link.href), Some(&href))?; @@ -688,14 +686,39 @@ impl Link { }), } } + + /// Makes this link absolute. + /// + /// If the href is relative, use the passed in value as a base. + /// + /// # Examples + /// + /// ``` + /// use stac::Link; + /// + /// let mut link = Link::new("./b/item.json", "rel"); + /// link.make_absolute("/a/base/catalog.json").unwrap(); + /// assert_eq!(link.href, "/a/base/b/item.json") + /// ``` + #[allow(single_use_lifetimes)] + pub fn make_absolute<'a>(&mut self, base: impl Into>) -> Result<()> { + if let Some(base) = base.into() { + let base = make_absolute(base, None)?; + self.href = make_absolute(std::mem::take(&mut self.href), Some(&base))?; + } else { + self.href = make_absolute(std::mem::take(&mut self.href), None)?; + } + Ok(()) + } } fn is_absolute(href: &str) -> bool { - Url::parse(href).is_ok() || href.starts_with('/') + href.starts_with('/') || Url::parse(href).is_ok() } -fn make_absolute(href: String, base: Option<&str>) -> Result { +fn make_absolute(href: impl ToString, base: Option<&str>) -> Result { // TODO if we make this interface public, make this an impl Option + let href = href.to_string(); if is_absolute(&href) { Ok(href) } else if let Some(base) = base { @@ -712,8 +735,8 @@ fn make_absolute(href: String, base: Option<&str>) -> Result { } } } else { - std::fs::canonicalize(href) - .map(|p| p.to_string_lossy().into_owned()) + std::fs::canonicalize(PathBuf::from_slash(href)) + .map(|p| p.to_slash_lossy().into_owned()) .map_err(Error::from) } } @@ -820,6 +843,17 @@ mod tests { assert!(value.get("title").is_none()); } + #[test] + fn absolute() { + let mut link = Link::new("examples/simple-item.json", "rel"); + link.make_absolute(None).unwrap(); + assert!( + link.href.ends_with("should fail"), + "the absolute href failed, here's the output: {}", + link.href + ); + } + mod links { use stac::{Catalog, Href, Item, Link, Links}; @@ -850,7 +884,7 @@ mod tests { #[test] fn make_relative_links_absolute_path() { let mut catalog: Catalog = stac::read("examples/catalog.json").unwrap(); - catalog.make_relative_links_absolute().unwrap(); + catalog.make_links_absolute().unwrap(); for link in catalog.links() { assert!(link.is_absolute()); } @@ -860,7 +894,7 @@ mod tests { fn make_relative_links_absolute_url() { let mut catalog: Catalog = stac::read("examples/catalog.json").unwrap(); catalog.set_href("http://stac-rs.test/catalog.json"); - catalog.make_relative_links_absolute().unwrap(); + catalog.make_links_absolute().unwrap(); for link in catalog.links() { assert!(link.is_absolute()); } @@ -873,8 +907,8 @@ mod tests { #[test] fn make_absolute_links_relative_path() { let mut catalog: Catalog = stac::read("examples/catalog.json").unwrap(); - catalog.make_relative_links_absolute().unwrap(); - catalog.make_absolute_links_relative("examples/").unwrap(); + catalog.make_links_absolute().unwrap(); + catalog.make_links_relative("examples/").unwrap(); for link in catalog.links() { if !link.is_self() { assert!(link.is_relative(), "{}", link.href); @@ -886,10 +920,8 @@ mod tests { fn make_absolute_links_relative_url() { let mut catalog: Catalog = stac::read("examples/catalog.json").unwrap(); catalog.set_href("http://stac-rs.test/catalog.json"); - catalog.make_relative_links_absolute().unwrap(); - catalog - .make_absolute_links_relative("http://stac-rs.test/") - .unwrap(); + catalog.make_links_absolute().unwrap(); + catalog.make_links_relative("http://stac-rs.test/").unwrap(); assert_eq!(catalog.root_link().unwrap().href, "./catalog.json"); }