Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preliminary rdflib resource for ISMI CIDOC-CRM time spans #117

Draft
wants to merge 13 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python: ["3.9", "3.10", "3.11", "3.12", "3.13"]
python: ["3.10", "3.11", "3.12", "3.13"]
defaults:
run:
working-directory: .
Expand Down
5 changes: 2 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ name = "undate"
description = "library for working with uncertain, fuzzy, or partially unknown dates and date intervals"
readme = "README.md"
license = { text = "Apache-2" }
requires-python = ">= 3.9"
requires-python = ">= 3.10"
dynamic = ["version"]
dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"]
dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'", "rdflib"]
authors = [
{ name = "Rebecca Sutton Koeser" },
{ name = "Cole Crawford" },
Expand All @@ -31,7 +31,6 @@ keywords = [
classifiers = [
"Development Status :: 2 - Pre-Alpha",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
Expand Down
73 changes: 73 additions & 0 deletions src/undate/converters/cidoc_crm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import rdflib

from undate import Undate

#: CIDOC-CRM namespace
CIDOC_CRM = rdflib.Namespace("http://www.cidoc-crm.org/cidoc-crm/")
ISMI_DATE_TYPE = rdflib.Namespace(
"http://content.mpiwg-berlin.mpg.de/ns/ismi/type/date/"
)
ISMI_CALENDAR_TYPE = rdflib.Namespace(
"http://content.mpiwg-berlin.mpg.de/ns/ismi/type/calendar/"
)


class TimeSpan(rdflib.resource.Resource):
@property
def identified_by(self):
# by default, rdflib resource value method will return another Resource
return self.value(CIDOC_CRM.P1_is_identified_by)

@property
def label(self):
# for ISMI records, label is under the crm identifier/appelation
# other examples have it directly under the time span as RDFS.label
Copy link
Contributor

@robcast robcast Mar 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is that the Time-Span is the processable Gregorian xsd:Date while the appellation represents the date in its original calendar therefore the appellation has the calendar type and its label has the date rendered in e.g. Hijri (the Time-Span label would rather have the date rendered in Gregorian)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for clarifying, I sort of got what you were doing here but not fully.

How would this ideally map to an undate? Would it be two different undate objects? (they should sort the same). Or for this case would you ignore/bypass calendar conversion?

return self.identified_by.value(rdflib.RDFS.label)

@property
def calendar(self):
# for ISMI records, calendar type is associated with identifier
return self.identified_by.value(CIDOC_CRM.P2_has_type).identifier

@property
def type(self):
# CIDOC-CRM type
return self.value(CIDOC_CRM.P2_has_type).identifier

@property
def at_some_time_within(self):
return self.value(CIDOC_CRM.P82_at_some_time_within)

@property
def begin_of_the_begin(self):
return self.value(CIDOC_CRM.P82a_begin_of_the_begin)

@property
def end_of_the_end(self):
return self.value(CIDOC_CRM.P82b_end_of_the_end)

@property
def note(self):
return self.value(CIDOC_CRM.P3_has_note)

def to_undate(self):
# convert to an undate object, if possible
match self.type:
# day precision
case ISMI_DATE_TYPE.day:
# at_some_time_within is xsd:date; use toPython method
# to convert to datetime.date and then convert to undate
return Undate.to_undate(self.at_some_time_within.toPython())
# TODO: should we set label before returning?

# for ISMI dates, could we parse the label and preserve calendar information?

@classmethod
def time_spans_from_graph(cls, graph):
"""Find and return all entities with CIDOC-CRM type E52 Time-Span
within the rdflib graph and yield them as :class:`TimeSpan`
resources."""
for timespan_uri in graph.subjects(
predicate=rdflib.RDF.type, object=CIDOC_CRM["E52_Time-Span"]
):
yield cls(graph, timespan_uri)
95 changes: 77 additions & 18 deletions src/undate/interval.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import datetime

# Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None
from typing import Optional, Union

Expand All @@ -25,31 +23,30 @@ class UndateInterval:
latest: Union[Undate, None]
label: Union[str, None]

# TODO: let's think about adding an optional precision / length /size field
# using DatePrecision
# TODO: think about adding an optional precision / length /size field
# using DatePrecision for intervals of any standard duration (decade, century)

def __init__(
self,
earliest: Optional[Undate] = None,
latest: Optional[Undate] = None,
label: Optional[str] = None,
):
# for now, assume takes two undate objects;
# support conversion from datetime
if earliest and not isinstance(earliest, Undate):
# NOTE: some overlap with Undate._comparison_type method
# maybe support conversion from other formats later
if isinstance(earliest, datetime.date):
earliest = Undate.from_datetime_date(earliest)
else:
# takes two undate objects; allows conversion from supported types
if earliest:
try:
earliest = Undate.to_undate(earliest)
except TypeError as err:
raise ValueError(
f"earliest date {earliest} cannot be converted to Undate"
)
if latest and not isinstance(latest, Undate):
if isinstance(latest, datetime.date):
latest = Undate.from_datetime_date(latest)
else:
raise ValueError(f"latest date {latest} cannot be converted to Undate")
) from err
if latest:
try:
latest = Undate.to_undate(latest)
except TypeError as err:
raise ValueError(
f"latest date {latest} cannot be converted to Undate"
) from err

# check that the interval is valid
if latest and earliest and latest <= earliest:
Expand Down Expand Up @@ -78,6 +75,9 @@ def __repr__(self) -> str:
return "<UndateInterval %s>" % self

def __eq__(self, other) -> bool:
# currently doesn't support comparison with any other types
if not isinstance(other, UndateInterval):
return NotImplemented
# consider interval equal if both dates are equal
return self.earliest == other.earliest and self.latest == other.latest

Expand Down Expand Up @@ -122,3 +122,62 @@ def duration(self) -> Timedelta:
# is there any meaningful way to calculate duration
# if one year is known and the other is not?
raise NotImplementedError

def __contains__(self, other: object) -> bool:
"""Determine if another interval or date falls within this
interval."""
# support comparison with another interval
if isinstance(other, UndateInterval):
# if two intervals are strictly equal, don't consider
# either one as containing the other
if self == other:
return False
# otherwise compare based on earliest/latest bounds
other_earliest = other.earliest
other_latest = other.latest
else:
# otherwise, try to convert to an Undate
try:
other = Undate.to_undate(other)
other_latest = other_earliest = other
except TypeError:
# if conversion fails, then we don't support comparison
raise

# if either bound of the current interval is None,
# then it is an open interval and we don't need to check the other value.
# if the other value is set, then check that it falls within the
# bounds of this interval
return (
self.earliest is None
or other_earliest is not None
and other_earliest >= self.earliest
) and (
self.latest is None
or other_latest is not None
and other_latest <= self.latest
)

def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]:
"""Determine the intersection or overlap between two :class:`UndateInterval`
objects and return a new interval. Returns None if there is no overlap.
"""
try:
# when both values are defined, return the inner bounds;
# if not, return whichever is not None, or None
earliest = (
max(self.earliest, other.earliest)
if self.earliest and other.earliest
else self.earliest or other.earliest
)
latest = (
min(self.latest, other.latest)
if self.latest and other.latest
else self.latest or other.latest
)

# if this results in an invalid interval, initialization
# will throw an exception
return UndateInterval(earliest, latest)
except ValueError:
return None
49 changes: 33 additions & 16 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

import datetime
from enum import auto

import re
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from undate.interval import UndateInterval

try:
# StrEnum was only added in python 3.11
from enum import StrEnum
Expand Down Expand Up @@ -72,6 +74,10 @@ def __init__(
label: Optional[str] = None,
calendar: Optional[Union[str, Calendar]] = None,
):
# everything is optional but something is required
if all([val is None for val in [year, month, day]]):
raise ValueError("At least one of year, month, or day must be specified")

# keep track of initial values and which values are known
# TODO: add validation: if str, must be expected length
self.initial_values: Dict[str, Optional[Union[int, str]]] = {
Expand Down Expand Up @@ -242,23 +248,19 @@ def format(self, format) -> str:

raise ValueError(f"Unsupported format '{format}'")

def _comparison_type(self, other: object) -> "Undate":
@classmethod
def _comparison_type(cls, other: object) -> "Undate":
"""Common logic for type handling in comparison methods.
Converts to Undate object if possible, otherwise raises
NotImplemented error. Currently only supports conversion
from :class:`datetime.date`
NotImplementedError exception. Uses :meth:`to_undate` for conversion.
"""

# support datetime.date by converting to undate
if isinstance(other, datetime.date):
other = Undate.from_datetime_date(other)

# recommended to support comparison with arbitrary objects
if not isinstance(other, Undate):
# convert if possible; return NotImplemented if not
try:
return cls.to_undate(other)
except TypeError:
# recommended to support comparison with arbitrary objects
return NotImplemented

return other

def __eq__(self, other: object) -> bool:
# Note: assumes label differences don't matter for comparing dates

Expand All @@ -268,6 +270,8 @@ def __eq__(self, other: object) -> bool:

other = self._comparison_type(other)
if other is NotImplemented:
# return NotImplemented to indicate comparison is not supported
# with this type
return NotImplemented

# if both dates are fully known, then earliest/latest check
Expand Down Expand Up @@ -359,10 +363,23 @@ def __contains__(self, other: object) -> bool:
]
)

@staticmethod
def from_datetime_date(dt_date: datetime.date):
"""Initialize an :class:`Undate` object from a :class:`datetime.date`"""
return Undate(dt_date.year, dt_date.month, dt_date.day)
@classmethod
def to_undate(cls, other: object) -> "Undate":
"""Converted arbitrary object to Undate, if possible. Raises TypeError
if conversion is not possible.

Currently suppports:
- :class:`datetime.date` or :class:`datetime.datetime`

"""
match other:
case Undate():
return other
case datetime.date() | datetime.datetime():
return Undate(other.year, other.month, other.day)

case _:
raise TypeError(f"Conversion from {type(other)} is not supported")

@property
def known_year(self) -> bool:
Expand Down
73 changes: 73 additions & 0 deletions tests/test_converters/test_cidoc_crm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import pathlib
import types

import pytest
import rdflib

from undate import Undate, DatePrecision
from undate.converters import cidoc_crm


# TODO: move or copy example ismi data to test for use as a fixture
ISMI_DATA_PATH = (
pathlib.Path(__file__)
/ ".."
/ ".."
/ ".."
/ "examples"
/ "use-cases"
/ "ismi"
/ "data"
/ "ismi-crm-date-samples.ttl"
)

DATE1_URI = rdflib.URIRef("http://content.mpiwg-berlin.mpg.de/ns/ismi/date1")


@pytest.fixture
def ismi_data():
g = rdflib.Graph()
g.parse(ISMI_DATA_PATH)
return g


class TestTimeSpan:
def test_properties(self, ismi_data):
# initialize a time span rdflib.resource for date1 in the sample data
# TODO: convert to a fixture
# g = rdflib.Graph()
# g.parse(ISMI_DATA_PATH)
# g.parse(data=sample_data)

time_span = cidoc_crm.TimeSpan(ismi_data, DATE1_URI)
assert time_span.type == cidoc_crm.ISMI_DATE_TYPE.day
assert time_span.label == rdflib.term.Literal("901 Rabīʿ I 14 (islamic)")
assert time_span.calendar == cidoc_crm.ISMI_CALENDAR_TYPE.islamic
assert time_span.at_some_time_within == rdflib.term.Literal(
"1495-12-11", datatype=rdflib.XSD.date
)
assert time_span.note == rdflib.term.Literal(
"day-precision date in islamic calendar"
)

def test_time_spans_from_graph(self, ismi_data):
time_spans = cidoc_crm.TimeSpan.time_spans_from_graph(ismi_data)
assert isinstance(time_spans, types.GeneratorType)
time_spans = list(time_spans)
# fixture has 9 time spans
assert len(time_spans) == 9
assert isinstance(time_spans[0], cidoc_crm.TimeSpan)
assert time_spans[0].identifier == DATE1_URI

def test_to_undate(self, ismi_data):
time_span = cidoc_crm.TimeSpan(ismi_data, DATE1_URI)
ts_undate = time_span.to_undate()
assert isinstance(ts_undate, Undate)
# 1495-12-11"^^xsd:date ;
assert ts_undate.year == "1495"
assert ts_undate.month == "12"
assert ts_undate.day == "11"
assert ts_undate.precision == DatePrecision.DAY

# if we round trip the date it comes out the same
assert ts_undate.format("ISO8601") == str(time_span.at_some_time_within)
Loading
Loading